npm - bloby-bot - Versions diffs - 0.70.12 → 0.71.0 - Mend

bloby-bot 0.70.12 → 0.71.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (65) hide show

package/bin/cli.js +234 -48
package/dist-bloby/assets/{bloby-DSNB0g4w.js → bloby-es6cZJzs.js} +6 -6
package/dist-bloby/assets/globals-DBqwNiJV.css +2 -0
package/dist-bloby/assets/{globals-B3cTbITX.js → globals-DN3F0CQE.js} +1 -1
package/dist-bloby/assets/{highlighted-body-OFNGDK62-BLforpkr.js → highlighted-body-OFNGDK62-8PiOHw9p.js} +1 -1
package/dist-bloby/assets/mermaid-GHXKKRXX-BJWX8urU.js +1 -0
package/dist-bloby/assets/{onboard-Dn2Ws_G2.js → onboard-BKgy17OU.js} +1 -1
package/dist-bloby/bloby.html +3 -3
package/dist-bloby/onboard.html +3 -3
package/package.json +3 -4
package/scripts/install +156 -41
package/scripts/install.ps1 +146 -29
package/scripts/install.sh +156 -41
package/shared/config.ts +37 -2
package/shared/relay.ts +3 -1
package/supervisor/channels/manager.ts +84 -44
package/supervisor/channels/telegram.ts +57 -16
package/supervisor/channels/types.ts +4 -1
package/supervisor/channels/whatsapp.ts +57 -10
package/supervisor/chat/OnboardWizard.tsx +0 -15
package/supervisor/chat/src/components/Chat/AudioBubble.tsx +1 -1
package/supervisor/chat/src/components/Chat/AuthedImage.tsx +16 -3
package/supervisor/chat/src/components/Chat/BlobyImageCard.tsx +2 -2
package/supervisor/chat/src/components/Chat/ImageLightbox.tsx +25 -8
package/supervisor/chat/src/components/Chat/InputBar.tsx +62 -7
package/supervisor/chat/src/components/Chat/MessageBubble.tsx +37 -18
package/supervisor/chat/src/components/Chat/MessageList.tsx +3 -3
package/supervisor/chat/src/hooks/useChat.ts +52 -0
package/supervisor/chat/src/lib/authedFile.ts +24 -12
package/supervisor/file-saver.ts +92 -19
package/supervisor/harnesses/attachment-policy.ts +111 -0
package/supervisor/harnesses/claude.ts +62 -15
package/supervisor/harnesses/codex.ts +69 -43
package/supervisor/harnesses/pi/index.ts +367 -112
package/supervisor/harnesses/pi/providers/humanize-error.ts +27 -2
package/supervisor/harnesses/pi/providers/retry.ts +31 -0
package/supervisor/harnesses/pi/providers/stream-anthropic.ts +31 -3
package/supervisor/harnesses/pi/providers/stream-google.ts +26 -3
package/supervisor/harnesses/pi/providers/stream-openai-completions.ts +32 -9
package/supervisor/harnesses/pi/providers/types.ts +29 -1
package/supervisor/harnesses/pi/session.ts +143 -3
package/supervisor/harnesses/pi/test-completion.ts +56 -0
package/supervisor/harnesses/pi/tools/bash.ts +198 -22
package/supervisor/harnesses/pi/tools/glob.ts +79 -0
package/supervisor/harnesses/pi/tools/grep.ts +0 -0
package/supervisor/harnesses/pi/tools/registry.ts +18 -6
package/supervisor/harnesses/pi/tools/todo-write.ts +45 -0
package/supervisor/harnesses/pi/tools/web-fetch.ts +129 -0
package/supervisor/index.ts +93 -18
package/supervisor/widget.js +19 -5
package/worker/db.ts +2 -0
package/worker/index.ts +18 -1
package/worker/prompts/bloby-system-prompt-codex.txt +1 -1
package/worker/prompts/bloby-system-prompt-pi.txt +6 -24
package/worker/prompts/bloby-system-prompt.txt +1 -1
package/workspace/client/src/components/Dashboard/DashboardPage.tsx +4 -117
package/workspace/client/src/components/Dashboard/deleteme_placeholders.tsx +194 -0
package/workspace/client/src/components/Layout/Sidebar.tsx +52 -30
package/workspace/client/src/components/deleteme_onboarding/WorkspaceTour.tsx +25 -15
package/workspace/client/src/components/deleteme_onboarding/tour-theme.css +24 -0
package/workspace/skills/mac/SKILL.md +13 -4
package/dist-bloby/assets/globals-DyeW509Y.css +0 -2
package/dist-bloby/assets/mermaid-GHXKKRXX-C1H_fSCU.js +0 -1
package/supervisor/public/headphones_spritesheet.webp +0 -0
package/supervisor/public/spritesheet.webp +0 -0

package/supervisor/harnesses/pi/index.ts CHANGED Viewed

@@ -29,12 +29,20 @@ import type {
 export type { RecentMessage, AgentAttachment };
 import { buildSkillsIndex } from '../skills.js';
+import {
+  routeAttachment,
+  buildSavedFilesNote,
+  normalizeImageMediaType,
+  approxBase64Bytes,
+  MAX_INLINE_IMAGE_BYTES,
+  INLINE_TEXT_PER_FILE_CHARS,
+  INLINE_TEXT_TOTAL_CHARS,
+} from '../attachment-policy.js';
 import { createAsyncQueue, type AsyncQueue } from './async-queue.js';
 import { createPiSession, type PiSessionEvent, type PiSessionAuth } from './session.js';
-import { getPiSubProvider, getCatalogModel } from './sub-providers.js';
+import { getPiSubProvider, getCatalogModel, type PiApiFlavor } from './sub-providers.js';
 import { readPiAuth } from './auth-storage.js';
-import { streamProvider } from './providers/stream.js';
-import type { PiMessage } from './providers/types.js';
+import type { PiMessage, PiContentBlock } from './providers/types.js';
 import { toolDefsForProvider } from './tools/registry.js';
 import type { PiTaskHost } from './tools/types.js';
@@ -264,6 +272,11 @@ function resolveAuth(): { ok: true; auth: PiSessionAuth } | { ok: false; error:
       maxTokensField: sub.maxTokensField,
       includeStreamUsage: sub.noStreamUsage ? false : undefined,
       contextWindow,
+      // Text-only models 400 on image blocks AND the stuck image re-fails
+      // every later message (audit C-8) — the session downgrades images to
+      // placeholders when the catalog says no vision. Unknown (dynamic
+      // sub-providers) ⇒ undefined ⇒ assume vision.
+      supportsImages: catalog?.input ? catalog.input.includes('image') : undefined,
     },
   };
 }
@@ -349,7 +362,7 @@ function createTaskHost(conv: LiveConversation, getAuth: () => PiSessionAuth): P
       // Honor the agent config's tool restrictions (claude applies these via
       // the SDK's tools/disallowedTools options — e.g. a future researcher
       // agent with disallowedTools: ['Write','Edit']).
-      let childTools = toolDefsForProvider({ forSubagent: true });
+      let childTools = toolDefsForProvider();
       if (Array.isArray(cfg.tools) && cfg.tools.length > 0) {
         childTools = childTools.filter((t) => cfg.tools.includes(t.name));
       }
@@ -491,27 +504,82 @@ function recentToPiMessages(messages: RecentMessage[] | undefined): PiMessage[]
   }));
 }
-/** Wrap a raw user input into a PiMessage with text + optional image blocks. */
-function buildUserMessage(text: string, attachments?: AgentAttachment[], savedFiles?: SavedFile[]): PiMessage {
-  const content: PiMessage['content'] = [];
+/** Native PDF document blocks reach only the flavors that render them — the
+ *  Anthropic Messages API and Gemini both ingest application/pdf inline
+ *  (base64 document source / inlineData). openai-completions has no document
+ *  type, so a PDF there falls back to the saved-files disk pointer. Matches the
+ *  shared attachment-policy routing rule. */
+function canNativeDocumentForFlavor(flavor: PiApiFlavor): boolean {
+  return flavor === 'anthropic-messages' || flavor === 'google-gemini';
+}
+/** Build a PiContentBlock[] from raw text + attachments, MEDIA-FIRST then the
+ *  prompt text last (parity with claude.ts and the other pi providers). Routing
+ *  is delegated to the shared attachment-policy so all three harnesses ingest
+ *  identically; canNativeDocument is the active provider's PDF capability. */
+function buildAttachmentBlocks(
+  text: string,
+  canNativeDocument: boolean,
+  attachments?: AgentAttachment[],
+  savedFiles?: SavedFile[],
+): PiContentBlock[] {
+  const content: PiContentBlock[] = [];
   if (attachments?.length) {
+    // Running budget so the cross-file inline-text total never exceeds the cap.
+    let inlineTextBudget = INLINE_TEXT_TOTAL_CHARS;
     for (const att of attachments) {
-      if (att.type === 'image') {
-        content.push({ type: 'image', mediaType: att.mediaType, data: att.data });
-      } else {
-        // Documents aren't directly supported across all sub-providers yet.
-        // Surface their existence in the text body instead.
-        content.push({ type: 'text', text: `[Attached document: ${att.name} (${att.mediaType})]` });
+      switch (routeAttachment(att, { canNativeDocument })) {
+        case 'image': {
+          // Drop the inline copy when it would bloat every stateless resend —
+          // the file is on disk and buildSavedFilesNote points the tools at it.
+          if (approxBase64Bytes(att.data) > MAX_INLINE_IMAGE_BYTES) break;
+          content.push({ type: 'image', mediaType: normalizeImageMediaType(att.mediaType), data: att.data });
+          break;
+        }
+        case 'native-document': {
+          // PDF on a flavor that renders it natively (anthropic / gemini).
+          content.push({ type: 'document', mediaType: 'application/pdf', data: att.data, name: att.name });
+          break;
+        }
+        case 'inline-text': {
+          if (inlineTextBudget <= 0) break;
+          let decoded = '';
+          try { decoded = Buffer.from(att.data, 'base64').toString('utf-8'); }
+          catch { break; } // undecodable → rely on the saved-files note
+          const cap = Math.min(INLINE_TEXT_PER_FILE_CHARS, inlineTextBudget);
+          const slice = decoded.slice(0, cap);
+          inlineTextBudget -= slice.length;
+          content.push({ type: 'text', text: `--- ${att.name} ---\n${slice}` });
+          break;
+        }
+        case 'reference-only':
+        default:
+          // Binary we can't inline (docx/xlsx/zip/…), a PDF on a flavor without
+          // native documents, or an unexpected route — no provider block; the
+          // saved-files note below carries the disk pointer. Never emit a
+          // malformed block (defensive default, review PI-E).
+          break;
       }
     }
   }
   let prompt = text || '(attached files)';
   if (savedFiles?.length) {
-    const lines = savedFiles.map((f) => `- ${f.name} -> ${f.relPath}`);
-    prompt += `\n\n[Attached files saved to disk]\n${lines.join('\n')}\nYou can read or reference these files using the paths above (relative to your cwd).`;
+    const note = buildSavedFilesNote(savedFiles);
+    if (note) prompt += `\n\n${note}`;
   }
   content.push({ type: 'text', text: prompt });
-  return { role: 'user', content };
+  return content;
+}
+/** Wrap a raw user input into a PiMessage with text + optional media blocks. */
+function buildUserMessage(
+  text: string,
+  canNativeDocument: boolean,
+  attachments?: AgentAttachment[],
+  savedFiles?: SavedFile[],
+): PiMessage {
+  return { role: 'user', content: buildAttachmentBlocks(text, canNativeDocument, attachments, savedFiles) };
 }
 // ── Live Conversation API ──────────────────────────────────────────────────
@@ -582,7 +650,7 @@ export async function startConversation(
   const session = createPiSession({
     getAuth,
     systemPrompt,
-    tools: toolDefsForProvider(),
+    tools: toolDefsForProvider({ withTask: true }),
     cwd: WORKSPACE_DIR,
     abortController,
     taskHost: createTaskHost(conv, getAuth),
@@ -647,8 +715,17 @@ function translateAndEmit(conv: LiveConversation, evt: PiSessionEvent) {
       conv.onMessage('bot:tool', { conversationId: conv.id, name: toolName, input: evt.input, ...syn });
       break;
     }
+    case 'thinking':
+      // Reasoning-model liveness pulse (house standard, codex M1 analog) —
+      // the UI dedups repeated name+running entries, channels get a chunk
+      // flush opportunity. Reasoning TEXT is never forwarded.
+      conv.onMessage('bot:tool', { conversationId: conv.id, name: 'thinking', status: 'running', ...syn });
+      break;
     case 'tool_result':
-      // Not surfaced yet (Phase D: translate to a bot:tool progress pulse).
+      // Progress pulse between tool rounds (audit D1-7): claude punctuates
+      // long tasks with tool_progress events; this is pi's equivalent —
+      // commits dashboard bubbles and flushes channel chunks mid-task.
+      conv.onMessage('bot:tool', { conversationId: conv.id, name: evt.name, status: 'running', ...syn });
       break;
     case 'turn_complete': {
       conv.busy = false;
@@ -723,7 +800,13 @@ export function pushMessage(
   conv.busy = true;
   conv.pendingCount += 1;
   conv.turnOrigins.push('user');
-  conv.inputQueue.push(buildUserMessage(content, attachments, savedFiles));
+  // Resolve the active flavor at push time (the session re-resolves auth every
+  // round, so a wizard provider switch mid-session is honored). Unreadable auth
+  // ⇒ no native documents — the conservative route sends a PDF to the disk
+  // pointer rather than emitting a block the provider can't render.
+  const resolved = resolveAuth();
+  const canNativeDocument = resolved.ok ? canNativeDocumentForFlavor(resolved.auth.flavor) : false;
+  conv.inputQueue.push(buildUserMessage(content, canNativeDocument, attachments, savedFiles));
   conv.onMessage('bot:typing', { conversationId });
   return true;
 }
@@ -795,9 +878,20 @@ export function anyOneShotActive(): boolean {
 }
 /**
- * One-shot text query — used by customer WhatsApp + scheduler. Uses the
- * provider stream directly (no async queue), drains it, emits the same
- * bloby events the live path does.
+ * One-shot agentic query — used by customer WhatsApp + scheduler (pulse/cron).
+ *
+ * Phase C (audit D5-1/D3-1): runs the SAME tool loop as the live path — a
+ * single-message `createPiSession` — so pulse/cron runs can actually edit
+ * files, run Bash, and read skills, and the tool-advertising system prompt is
+ * finally true (a tool-less request under that prompt made Gemini emit
+ * MALFORMED_FUNCTION_CALL — PI-HARNESS.md gotcha #3). No task host: background
+ * sub-agents stay a live-conversation feature (claude parity), so the Task def
+ * is excluded from the tool list and a hallucinated call fails gracefully.
+ *
+ * Guarantees preserved: finally-emitted bot:done, 5-min non-resetting
+ * watchdog, activeQueries registration AFTER the awaited prompt build
+ * (leak-ordering, claude.ts), supportPrompt bypasses the owner prompt +
+ * skills index entirely.
  */
 export async function startBlobyAgentQuery(
   conversationId: string,
@@ -809,7 +903,7 @@ export async function startBlobyAgentQuery(
   names?: { botName: string; humanName: string },
   recentMessages?: RecentMessage[],
   supportPrompt?: string,
-  _maxTurns?: number,
+  maxTurns?: number,
 ): Promise<void> {
   const resolved = resolveAuth();
   if (!resolved.ok) {
@@ -820,7 +914,6 @@ export async function startBlobyAgentQuery(
     onMessage('bot:done', { conversationId, usedFileTools: false });
     return;
   }
-  const auth = resolved.auth;
   // Build the prompt BEFORE registering in activeQueries / arming the watchdog
   // (claude.ts ordering): if anything in here ever rejected after registration,
@@ -830,15 +923,21 @@ export async function startBlobyAgentQuery(
   if (supportPrompt) {
     systemPrompt = supportPrompt;
   } else {
-    systemPrompt = await buildSystemPrompt(names, recentMessages);
+    // History rides ONLY as structured messages (initialMessages below).
+    // Passing it here too duplicated every prior turn into the system prompt
+    // (audit D3-6).
+    systemPrompt = await buildSystemPrompt(names, undefined);
+    // The base prompt routes heavy coding to the Agent tool, which only LIVE
+    // conversations have (one-shots have no task host) — keep the model
+    // honest so it doesn't chase a tool that isn't declared (review PI-C-4).
+    systemPrompt +=
+      '\n\n---\n# One-shot run\nThis is a scheduled/one-shot run: the Agent tool is NOT available here. ' +
+      'Do any heavy work yourself, directly with Read, Write, Edit, and Bash.';
   }
-  const messages: PiMessage[] = recentToPiMessages(recentMessages);
-  messages.push(buildUserMessage(prompt, attachments, savedFiles));
   const abortController = new AbortController();
   activeQueries.set(conversationId, abortController);
-  // Hard watchdog — a hung provider stream would otherwise pin this query forever (finally never
+  // Hard watchdog — a hung turn would otherwise pin this query forever (finally never
   // runs, bot:done never fires). Abort after 5 min; cleared in the finally on normal completion.
   const watchdog = setTimeout(() => {
     log.warn(`[pi/bloby-agent] one-shot timed out (5m) — aborting conv=${conversationId}`);
@@ -847,76 +946,107 @@ export async function startBlobyAgentQuery(
   onMessage('bot:typing', { conversationId });
-  let accumulated = '';
+  let usedFileTools = false;
+  // Track tool names LIVE (not only via turn_complete): an aborted run never
+  // emits turn_complete, and files written in earlier rounds must still flag
+  // usedFileTools on bot:done or the backend serves stale code
+  // (review PI-C-1; mirrors claude.ts:723-760 and runAgentQuery below).
   const usedTools = new Set<string>();
-  // Errors are stashed, not emitted inline — at the end, partial text wins
-  // over the error bubble (audit D3-5/D6-2, claude.ts:730-737 precedence).
-  let errorMsg: string | null = null;
+  let sawResponse = false;
+  let capHit = false;
   const batcher = createTokenBatcher((text) => onMessage('bot:token', { conversationId, token: text }));
+  // Re-resolve auth per round, same as the live path — a key/model fix in the
+  // wizard applies to the next round of an in-flight pulse run too.
+  let currentAuth: PiSessionAuth = resolved.auth;
+  const getAuth = (): PiSessionAuth => {
+    const fresh = resolveAuth();
+    if (fresh.ok) currentAuth = fresh.auth;
+    return currentAuth;
+  };
   try {
-    const stream = streamProvider(auth.flavor, {
-      modelId: auth.modelId,
-      baseUrl: auth.baseUrl,
-      apiKey: auth.apiKey,
+    const session = createPiSession({
+      getAuth,
       systemPrompt,
-      messages,
-      maxOutputTokens: auth.maxOutputTokens,
-      maxTokensField: auth.maxTokensField,
-      includeStreamUsage: auth.includeStreamUsage,
-      signal: abortController.signal,
+      initialMessages: recentToPiMessages(recentMessages),
+      tools: toolDefsForProvider(), // no Task — one-shots have no task host
+      cwd: WORKSPACE_DIR,
+      abortController,
+      maxToolRounds: maxTurns ?? 50, // claude one-shot default (claude.ts:677)
+      onEvent: (evt: PiSessionEvent) => {
+        switch (evt.type) {
+          case 'text_delta':
+            batcher.add(evt.delta);
+            break;
+          case 'text_end':
+            // Session precedence (D6-2): emitted even on errored turns when
+            // partial text streamed — the partial reaches the customer/pulse.
+            batcher.flush();
+            sawResponse = true;
+            onMessage('bot:response', { conversationId, content: evt.text });
+            break;
+          case 'tool_use': {
+            batcher.flush();
+            usedTools.add(evt.name);
+            const toolName = evt.name === 'Agent' || evt.name === 'agent' ? 'Task' : evt.name;
+            onMessage('bot:tool', { conversationId, name: toolName, input: evt.input });
+            break;
+          }
+          case 'thinking':
+            batcher.flush();
+            onMessage('bot:tool', { conversationId, name: 'thinking', status: 'running' });
+            break;
+          case 'tool_result':
+            batcher.flush();
+            onMessage('bot:tool', { conversationId, name: evt.name, status: 'running' });
+            break;
+          case 'error':
+            // Fires only when the turn produced no text, or fatally (D6-2).
+            batcher.flush();
+            sawResponse = true; // the caller got a terminal signal for this turn
+            onMessage('bot:error', { conversationId, error: evt.error });
+            break;
+          case 'turn_complete':
+            usedFileTools = usedFileTools || evt.usedFileTools;
+            if (evt.roundCapHit) capHit = true;
+            break;
+        }
+      },
     });
-    for await (const evt of stream) {
-      if (abortController.signal.aborted) break;
-      switch (evt.type) {
-        case 'text_delta':
-          accumulated += evt.delta;
-          batcher.add(evt.delta);
-          break;
-        case 'text_end':
-          batcher.flush();
-          accumulated = evt.text;
-          break;
-        case 'tool_use':
-          batcher.flush();
-          usedTools.add(evt.name);
-          onMessage('bot:tool', { conversationId, name: evt.name, input: evt.input });
-          break;
-        case 'error':
-          batcher.flush();
-          errorMsg = evt.error;
-          break;
-      }
-    }
-    // Abort guard (audit D3-8): a watchdog-aborted run must not surface a
-    // truncated reply — a stopped pulse could otherwise still fire <Message>
-    // pushes with half-finished content.
-    if (!abortController.signal.aborted) {
+    const queue = createAsyncQueue<PiMessage>();
+    queue.push(buildUserMessage(prompt, canNativeDocumentForFlavor(resolved.auth.flavor), attachments, savedFiles));
+    queue.end();
+    await session.run(queue);
+    // Round-cap exhaustion with no terminal signal: the model was still
+    // mid-task when the budget ran out and no text streamed — without this the
+    // customer/pulse gets dead silence (review PI-C-2; claude surfaces an
+    // error_max_turns result on the same path).
+    if (!abortController.signal.aborted && capHit && !sawResponse) {
       batcher.flush();
-      if (accumulated) {
-        onMessage('bot:response', { conversationId, content: accumulated });
-      } else if (errorMsg) {
-        onMessage('bot:error', { conversationId, error: errorMsg });
-      }
+      onMessage('bot:error', {
+        conversationId,
+        error: `The run hit its ${maxTurns ?? 50}-round tool limit before producing a reply. Try a narrower request.`,
+      });
     }
   } catch (err: any) {
+    // session.run contains per-turn error handling; a throw here is unexpected.
     if (!abortController.signal.aborted) {
       log.warn(`[pi/bloby-agent] one-shot error: ${err?.message || err}`);
       batcher.flush();
-      if (accumulated) {
-        onMessage('bot:response', { conversationId, content: accumulated });
-      } else {
-        onMessage('bot:error', { conversationId, error: err?.message || String(err) });
-      }
+      onMessage('bot:error', { conversationId, error: err?.message || String(err) });
     }
   } finally {
+    // Aborted-run stragglers must not surface (audit D3-8) — discard, never flush.
     batcher.discard();
     clearTimeout(watchdog);
     activeQueries.delete(conversationId);
-    const FILE_TOOL_NAMES = ['Write', 'Edit', 'write', 'edit'];
-    const usedFileTools = FILE_TOOL_NAMES.some((t) => usedTools.has(t));
-    onMessage('bot:done', { conversationId, usedFileTools });
+    // Live tool tracking covers aborted runs whose turn_complete never fired —
+    // files already written must still trigger the backend restart (PI-C-1).
+    const fileToolsUsed = usedFileTools || ['Write', 'Edit', 'write', 'edit'].some((t) => usedTools.has(t));
+    onMessage('bot:done', { conversationId, usedFileTools: fileToolsUsed });
   }
 }
@@ -930,45 +1060,126 @@ export function stopBlobyAgentQuery(conversationId: string): void {
 // ── Workspace agent endpoint (POST /api/agent/query) ──────────────────────
+/** Minimal coding-agent prompt for /api/agent/query when the caller supplies
+ *  none — claude falls back to its native `claude_code` preset; pi's
+ *  equivalent advertises ONLY the tools that actually exist, and never the
+ *  Bloby owner persona (agent-API callers are workspace apps, not the bot). */
+const PI_CODING_AGENT_PROMPT =
+  'You are a coding agent operating non-interactively inside a project workspace. ' +
+  'Complete the request fully using your tools, then reply with a concise summary of what you did. ' +
+  'Tools: Read (file contents), Write (create/overwrite a file), Edit (exact string replacement), ' +
+  'Bash (shell commands; cwd is the workspace root). Paths are relative to the workspace root. ' +
+  'Do the work — never claim to have done something without actually using the tools.';
+/** In-memory session store for the agent API (audit D2-7/D3-3). Process-
+ *  lifetime only — AGENT-API.md documents that sessions die on supervisor
+ *  restart, and claude's resume has the same practical bound. */
+interface StoredAgentSession { messages: PiMessage[]; lastUsed: number }
+const agentSessions = new Map<string, StoredAgentSession>();
+const AGENT_SESSION_CAP = 50;
+const AGENT_SESSION_TTL_MS = 24 * 60 * 60_000;
+const AGENT_SESSION_MAX_MESSAGES = 40;
+function sweepAgentSessions(): void {
+  const now = Date.now();
+  for (const [id, s] of agentSessions) {
+    if (now - s.lastUsed > AGENT_SESSION_TTL_MS) agentSessions.delete(id);
+  }
+  if (agentSessions.size > AGENT_SESSION_CAP) {
+    const byAge = [...agentSessions.entries()].sort((a, b) => a[1].lastUsed - b[1].lastUsed);
+    for (const [id] of byAge.slice(0, agentSessions.size - AGENT_SESSION_CAP)) {
+      agentSessions.delete(id);
+    }
+  }
+}
+/** Trim resumed history at a clean turn boundary: the window must start on a
+ *  REAL user message (not a tool_result carrier) — an orphaned tool_result or
+ *  a leading assistant message makes Anthropic/Gemini reject the request. */
+function trimAgentHistory(messages: PiMessage[]): PiMessage[] {
+  if (messages.length <= AGENT_SESSION_MAX_MESSAGES) return messages;
+  const isRealUser = (m: PiMessage) =>
+    m.role === 'user' && !m.content.some((b) => b.type === 'tool_result');
+  const windowStart = messages.length - AGENT_SESSION_MAX_MESSAGES;
+  for (let i = windowStart; i < messages.length; i++) {
+    if (isRealUser(messages[i])) return messages.slice(i);
+  }
+  // No clean boundary inside the window — a single tool-heavy turn (each round
+  // adds an assistant + a tool_result message) exceeds the cap by itself.
+  // Fall back BACKWARD to that turn's own user message: the window overshoots
+  // the cap (bounded by the turn's size) instead of silently wiping the whole
+  // history to [] (review PI-C-3 — total session amnesia).
+  for (let j = windowStart - 1; j >= 0; j--) {
+    if (isRealUser(messages[j])) return messages.slice(j);
+  }
+  return messages;
+}
+/** Per-sessionId serialization (review PI-C-SESS-2): two concurrent resumes of
+ *  the same session would both read the same stored history and last-write-win
+ *  the store, silently erasing one call's turn. Chaining the second behind the
+ *  first keeps the linear-history contract; each run is bounded by its own
+ *  timeout (≤300s), so the wait is too. */
+const agentSessionLocks = new Map<string, Promise<void>>();
 export async function runAgentQuery(req: AgentQueryRequest): Promise<AgentQueryResult> {
+  if (!req.sessionId) return runAgentQueryInner(req);
+  const id = req.sessionId;
+  const prev = agentSessionLocks.get(id) ?? Promise.resolve();
+  let release!: () => void;
+  const gate = new Promise<void>((r) => { release = r; });
+  const chained = prev.then(() => gate);
+  agentSessionLocks.set(id, chained);
+  await prev;
+  try {
+    return await runAgentQueryInner(req);
+  } finally {
+    release();
+    if (agentSessionLocks.get(id) === chained) agentSessionLocks.delete(id);
+  }
+}
+async function runAgentQueryInner(req: AgentQueryRequest): Promise<AgentQueryResult> {
   const resolved = resolveAuth();
   if (!resolved.ok) return { ok: false, error: resolved.error };
-  const auth = resolved.auth;
   const timeout = Math.min(Math.max(req.timeout || 120_000, 5_000), 300_000);
+  // Same clamp as claude.ts:781 — maxTurns maps onto the session's tool-round budget.
+  const maxTurns = Math.min(Math.max(req.maxTurns || 25, 1), 50);
   const abortController = new AbortController();
   const timeoutHandle = setTimeout(() => abortController.abort(), timeout);
-  const systemPrompt = req.systemPrompt ?? '';
-  const messages: PiMessage[] = [{
-    role: 'user',
-    content: [{ type: 'text', text: req.message }],
-  }];
+  const systemPrompt = req.systemPrompt?.trim() ? req.systemPrompt : PI_CODING_AGENT_PROMPT;
+  sweepAgentSessions();
+  const resumed = req.sessionId ? agentSessions.get(req.sessionId) : undefined;
+  const sessionId = resumed ? req.sessionId! : crypto.randomUUID();
+  if (resumed) resumed.lastUsed = Date.now();
   let fullText = '';
   const usedTools = new Set<string>();
   let errored = false;
   let errorMsg = '';
+  let usedFileTools = false;
+  let capHit = false;
-  try {
-    const stream = streamProvider(auth.flavor, {
-      modelId: auth.modelId,
-      baseUrl: auth.baseUrl,
-      apiKey: auth.apiKey,
-      systemPrompt,
-      messages,
-      maxOutputTokens: auth.maxOutputTokens,
-      maxTokensField: auth.maxTokensField,
-      includeStreamUsage: auth.includeStreamUsage,
-      signal: abortController.signal,
-    });
+  let currentAuth: PiSessionAuth = resolved.auth;
+  const getAuth = (): PiSessionAuth => {
+    const fresh = resolveAuth();
+    if (fresh.ok) currentAuth = fresh.auth;
+    return currentAuth;
+  };
-    for await (const evt of stream) {
-      if (abortController.signal.aborted) break;
+  const session = createPiSession({
+    getAuth,
+    systemPrompt,
+    initialMessages: resumed ? trimAgentHistory(resumed.messages) : undefined,
+    tools: toolDefsForProvider(), // no Task — no task host on this path
+    cwd: WORKSPACE_DIR,
+    abortController,
+    maxToolRounds: maxTurns,
+    onEvent: (evt: PiSessionEvent) => {
       switch (evt.type) {
-        case 'text_delta':
-          fullText += evt.delta;
-          break;
         case 'text_end':
           fullText = evt.text;
           break;
@@ -979,22 +1190,66 @@ export async function runAgentQuery(req: AgentQueryRequest): Promise<AgentQueryR
           errored = true;
           errorMsg = evt.error;
           break;
+        case 'turn_complete':
+          usedFileTools = usedFileTools || evt.usedFileTools;
+          // The error EVENT is suppressed when partial text streamed (D6-2) —
+          // read the outcome fields so a failed turn isn't reported clean.
+          if (evt.errored) {
+            errored = true;
+            errorMsg = errorMsg || evt.errorMsg || '';
+          }
+          if (evt.roundCapHit) capHit = true;
+          break;
       }
-    }
+    },
+  });
+  try {
+    log.info(`[pi/agent-api] Query: msg="${req.message.slice(0, 80)}..." maxTurns=${maxTurns} timeout=${timeout}ms resume=${resumed ? sessionId : 'none'}`);
+    const queue = createAsyncQueue<PiMessage>();
+    queue.push({ role: 'user', content: [{ type: 'text', text: req.message }] });
+    queue.end();
+    await session.run(queue);
   } catch (err: any) {
-    if (abortController.signal.aborted) {
-      return { ok: false, error: 'Query timed out.' };
-    }
-    return { ok: false, error: err?.message || String(err) };
+    if (abortController.signal.aborted) return { ok: false, error: 'Query timed out.', sessionId };
+    return { ok: false, error: err?.message || String(err), sessionId };
   } finally {
     clearTimeout(timeoutHandle);
   }
+  if (abortController.signal.aborted) {
+    // Timed-out histories can hold a dangling tool_use (aborted mid-round) —
+    // don't persist them for resume.
+    return { ok: false, error: 'Query timed out.', sessionId };
+  }
+  // Round-cap exhaustion with no answer: the model was still mid-task when the
+  // budget ran out (claude maps the same state to an error_max_turns result —
+  // review PI-C-2; an ok:true empty response reads as a silent blank bubble in
+  // the documented maxTurns:1 aichat pattern). Don't persist the half-done
+  // turn either — a fresh retry beats resuming into unanswered tool results.
+  if (capHit && !fullText) {
+    return {
+      ok: false,
+      error: `Agent hit its turn limit (maxTurns=${maxTurns}) before producing a response — raise maxTurns or narrow the request.`,
+      sessionId,
+      toolsUsed: Array.from(usedTools),
+    };
+  }
+  // Trim at store time too — otherwise a long-lived session's stored history
+  // grows unboundedly across resumes (the resume-side trim only caps what the
+  // provider sees, not what we keep in memory).
+  agentSessions.set(sessionId, { messages: trimAgentHistory(session.getMessages()), lastUsed: Date.now() });
   // Partial-text precedence (claude parity, audit D6-2): if the model streamed
   // anything before failing, return it as a successful (truncated) response —
   // claude's runAgentQuery only reports the error when nothing streamed.
-  if (errored && !fullText) return { ok: false, error: errorMsg || 'Agent query failed' };
+  if (errored && !fullText) {
+    return { ok: false, error: errorMsg || 'Agent query failed', sessionId, toolsUsed: Array.from(usedTools) };
+  }
-  const usedFileTools = ['Write', 'Edit', 'write', 'edit'].some((t) => usedTools.has(t));
-  return { ok: true, response: fullText, toolsUsed: Array.from(usedTools), usedFileTools };
+  const fileToolsUsed = usedFileTools || ['Write', 'Edit', 'write', 'edit'].some((t) => usedTools.has(t));
+  log.info(`[pi/agent-api] Done: ${fullText.length} chars, tools=[${Array.from(usedTools).join(',')}], session=${sessionId}`);
+  return { ok: true, response: fullText, sessionId, toolsUsed: Array.from(usedTools), usedFileTools: fileToolsUsed };
 }