npm - bloby-bot - Versions diffs - 0.70.12 → 0.70.13 - Mend

bloby-bot 0.70.12 → 0.70.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

package/bin/cli.js +11 -3
package/dist-bloby/assets/{bloby-DSNB0g4w.js → bloby-CU9KhQdP.js} +4 -4
package/dist-bloby/assets/globals-DlPtwiZL.css +2 -0
package/dist-bloby/assets/{globals-B3cTbITX.js → globals-mGpojCOe.js} +1 -1
package/dist-bloby/assets/{highlighted-body-OFNGDK62-BLforpkr.js → highlighted-body-OFNGDK62-D0Tm_wgU.js} +1 -1
package/dist-bloby/assets/mermaid-GHXKKRXX-B95J3s3s.js +1 -0
package/dist-bloby/assets/{onboard-Dn2Ws_G2.js → onboard-GfjHF9nm.js} +1 -1
package/dist-bloby/bloby.html +3 -3
package/dist-bloby/onboard.html +3 -3
package/package.json +2 -2
package/scripts/install +15 -7
package/scripts/install.ps1 +35 -14
package/scripts/install.sh +15 -7
package/shared/relay.ts +3 -1
package/supervisor/channels/manager.ts +16 -11
package/supervisor/chat/OnboardWizard.tsx +0 -15
package/supervisor/harnesses/pi/index.ts +320 -100
package/supervisor/harnesses/pi/providers/humanize-error.ts +2 -2
package/supervisor/harnesses/pi/providers/retry.ts +31 -0
package/supervisor/harnesses/pi/providers/stream-anthropic.ts +23 -3
package/supervisor/harnesses/pi/providers/stream-google.ts +21 -3
package/supervisor/harnesses/pi/providers/stream-openai-completions.ts +17 -3
package/supervisor/harnesses/pi/providers/types.ts +11 -0
package/supervisor/harnesses/pi/session.ts +116 -3
package/supervisor/harnesses/pi/test-completion.ts +56 -0
package/supervisor/harnesses/pi/tools/bash.ts +198 -22
package/supervisor/harnesses/pi/tools/glob.ts +79 -0
package/supervisor/harnesses/pi/tools/grep.ts +0 -0
package/supervisor/harnesses/pi/tools/registry.ts +18 -6
package/supervisor/harnesses/pi/tools/todo-write.ts +45 -0
package/supervisor/harnesses/pi/tools/web-fetch.ts +129 -0
package/supervisor/index.ts +36 -2
package/worker/index.ts +18 -1
package/worker/prompts/bloby-system-prompt-codex.txt +1 -1
package/worker/prompts/bloby-system-prompt-pi.txt +6 -24
package/worker/prompts/bloby-system-prompt.txt +1 -1
package/workspace/client/src/components/Dashboard/DashboardPage.tsx +4 -117
package/workspace/client/src/components/Dashboard/deleteme_placeholders.tsx +194 -0
package/workspace/client/src/components/Layout/Sidebar.tsx +52 -30
package/workspace/client/src/components/deleteme_onboarding/WorkspaceTour.tsx +25 -15
package/workspace/client/src/components/deleteme_onboarding/tour-theme.css +24 -0
package/workspace/skills/mac/SKILL.md +13 -4
package/dist-bloby/assets/globals-DyeW509Y.css +0 -2
package/dist-bloby/assets/mermaid-GHXKKRXX-C1H_fSCU.js +0 -1

package/supervisor/harnesses/pi/providers/stream-anthropic.ts CHANGED Viewed

@@ -20,7 +20,7 @@ import type {
   PiStopReason,
   PiUsage,
 } from './types.js';
-import { fetchWithRetry } from './retry.js';
+import { fetchWithRetry, readWithIdleTimeout } from './retry.js';
 import { classifyPiError, classifyPiNetworkError } from './humanize-error.js';
 /* ── SSE parser (shares the LF/CRLF-tolerant pattern from the other providers) ── */
@@ -32,7 +32,7 @@ async function* parseSse(res: Response): AsyncIterable<any> {
   let buffer = '';
   try {
     while (true) {
-      const { value, done } = await reader.read();
+      const { value, done } = await readWithIdleTimeout(reader, 'Anthropic');
       if (done) break;
       buffer += decoder.decode(value, { stream: true });
       let idx;
@@ -79,6 +79,10 @@ function toAnthropicContent(blocks: PiContentBlock[]): any[] {
   const out: any[] = [];
   for (const b of blocks) {
     if (b.type === 'text') {
+      // The Messages API rejects empty/whitespace-only text blocks ("text
+      // content blocks must be non-empty") — drop them; an all-empty message
+      // is then filtered by the content-length guards in toAnthropicMessages.
+      if (!b.text || !b.text.trim()) continue;
       out.push({ type: 'text', text: b.text });
     } else if (b.type === 'image') {
       out.push({
@@ -105,13 +109,19 @@ function toAnthropicContent(blocks: PiContentBlock[]): any[] {
 }
 function toAnthropicMessages(pi: PiMessage[]): any[] {
-  return pi
+  const msgs = pi
     .filter((m) => m.content.length > 0)
     .map((m) => ({
       role: m.role === 'assistant' ? 'assistant' : 'user',
       content: toAnthropicContent(m.content),
     }))
     .filter((m) => m.content.length > 0);
+  // The Messages API requires the first message to be user-role. Rolling
+  // history windows (customer buffers) are trimmed user-first at the source
+  // (channels/manager.ts trimCustomerBuffer), but defend here too — a leading
+  // assistant message 400s the whole request (audit C-7).
+  while (msgs.length > 0 && msgs[0].role !== 'user') msgs.shift();
+  return msgs;
 }
 function toAnthropicTools(tools: { name: string; description: string; inputSchema: Record<string, any> }[]) {
@@ -166,6 +176,9 @@ export async function* streamAnthropic(req: PiStreamRequest): AsyncIterable<PiSt
   if (req.tools && req.tools.length > 0) {
     body.tools = toAnthropicTools(req.tools);
     body.tools[body.tools.length - 1].cache_control = { type: 'ephemeral' };
+    // Round-cap wrap-up: forbid further tool calls; tools stay declared so
+    // tool_use/tool_result blocks in history remain valid.
+    if (req.toolChoice === 'none') body.tool_choice = { type: 'none' };
   }
   if (Array.isArray(body.messages) && body.messages.length > 0) {
     const lastContent = body.messages[body.messages.length - 1].content;
@@ -213,6 +226,7 @@ export async function* streamAnthropic(req: PiStreamRequest): AsyncIterable<PiSt
   let usage: PiUsage | undefined;
   let chunkCount = 0;
   let firstChunkSummary = '';
+  let thinkingEmitted = false;
   try {
     for await (const evt of parseSse(res)) {
@@ -250,6 +264,12 @@ export async function* streamAnthropic(req: PiStreamRequest): AsyncIterable<PiSt
               toolArgsBuf: '',
             });
           } else {
+            // Extended-thinking blocks (not requested today, future-proofed):
+            // one liveness pulse, text never forwarded.
+            if (block.type === 'thinking' && !thinkingEmitted) {
+              thinkingEmitted = true;
+              yield { type: 'thinking' };
+            }
             blocks.set(idx, { kind: 'other' });
           }
           break;

package/supervisor/harnesses/pi/providers/stream-google.ts CHANGED Viewed

@@ -18,7 +18,7 @@ import type {
   PiStopReason,
   PiUsage,
 } from './types.js';
-import { fetchWithRetry } from './retry.js';
+import { fetchWithRetry, readWithIdleTimeout } from './retry.js';
 import { classifyPiError, classifyPiNetworkError } from './humanize-error.js';
 /** Walk an SSE byte stream and yield each parsed JSON event. */
@@ -30,7 +30,7 @@ async function* parseSse(res: Response, dbg: { firstBytes: string }): AsyncItera
   let totalBytes = 0;
   try {
     while (true) {
-      const { value, done } = await reader.read();
+      const { value, done } = await readWithIdleTimeout(reader, 'Google Gemini');
       if (done) break;
       if (value) totalBytes += value.byteLength;
       buffer += decoder.decode(value, { stream: true });
@@ -203,11 +203,25 @@ export async function* streamGoogle(req: PiStreamRequest): AsyncIterable<PiStrea
       maxOutputTokens: req.maxOutputTokens ?? 32768,
     },
   };
+  // Thinking-capable families (2.5+/3.x): ask for thought summaries so the
+  // harness can emit a liveness pulse — without this, Gemini 3 burns its
+  // output budget on invisible reasoning and the chat looks hung. Gated by
+  // model id; unknown/dynamic ids skip it (older models reject the field).
+  // The rolling aliases (gemini-flash-latest / gemini-flash-lite-latest)
+  // resolve to 2.5+/3.x thinking models too (review PI-D-2).
+  if (/gemini-(2\.5|[3-9]|flash(-lite)?-latest)/i.test(req.modelId)) {
+    body.generationConfig.thinkingConfig = { includeThoughts: true };
+  }
   if (req.systemPrompt?.trim()) {
     body.systemInstruction = { parts: [{ text: req.systemPrompt }] };
   }
   if (req.tools && req.tools.length > 0) {
     body.tools = toGeminiTools(req.tools);
+    // Round-cap wrap-up: forbid further function calls; tools stay declared so
+    // functionCall/functionResponse parts in history remain valid.
+    if (req.toolChoice === 'none') {
+      body.toolConfig = { functionCallingConfig: { mode: 'NONE' } };
+    }
   }
   let res: Response;
@@ -263,7 +277,11 @@ export async function* streamGoogle(req: PiStreamRequest): AsyncIterable<PiStrea
       for (const part of parts) {
         // Thinking models emit reasoning parts with `thought: true`. They
         // shouldn't be shown to the user as part of the visible answer.
-        if (part?.thought) { thoughtPartCount++; continue; }
+        if (part?.thought) {
+          thoughtPartCount++;
+          if (thoughtPartCount === 1) yield { type: 'thinking' };
+          continue;
+        }
         if (part?.functionCall && typeof part.functionCall.name === 'string') {
           // Gemini doesn't surface a tool-call id of its own; bake the tool
           // name into the id so the session can echo it back as a

package/supervisor/harnesses/pi/providers/stream-openai-completions.ts CHANGED Viewed

@@ -18,7 +18,7 @@ import type {
   PiStopReason,
   PiUsage,
 } from './types.js';
-import { fetchWithRetry } from './retry.js';
+import { fetchWithRetry, readWithIdleTimeout } from './retry.js';
 import { classifyPiError, classifyPiNetworkError } from './humanize-error.js';
 /* ── SSE parser (LF or CRLF tolerant, flushes the trailing event) ── */
@@ -30,7 +30,7 @@ async function* parseSse(res: Response): AsyncIterable<any> {
   let buffer = '';
   try {
     while (true) {
-      const { value, done } = await reader.read();
+      const { value, done } = await readWithIdleTimeout(reader, 'OpenAI-compat');
       if (done) break;
       buffer += decoder.decode(value, { stream: true });
       let idx;
@@ -203,7 +203,10 @@ export async function* streamOpenAICompletions(req: PiStreamRequest): AsyncItera
   }
   if (req.tools && req.tools.length > 0) {
     body.tools = toOpenAITools(req.tools);
-    body.tool_choice = 'auto';
+    // 'none' = the round-cap wrap-up round: the model must summarize, not
+    // start more work. Tools stay declared so histories containing tool calls
+    // remain valid.
+    body.tool_choice = req.toolChoice === 'none' ? 'none' : 'auto';
   }
   let res: Response;
@@ -243,6 +246,7 @@ export async function* streamOpenAICompletions(req: PiStreamRequest): AsyncItera
   const toolCallsByIndex = new Map<number, PartialToolCall>();
   let chunkCount = 0;
   let firstChunkSummary = '';
+  let thinkingEmitted = false;
   // Vendors disagree on where streamed usage lives: spec says a final
   // choice-less chunk's `usage`, Groq defaults to nesting under `x_groq.usage`,
@@ -267,6 +271,16 @@ export async function* streamOpenAICompletions(req: PiStreamRequest): AsyncItera
       readUsage(choice?.usage);
       const delta = choice.delta || {};
+      // Reasoning models stream hidden thinking under vendor-specific fields
+      // (DeepSeek/OpenRouter: reasoning_content; others: reasoning /
+      // reasoning_text — upstream pi's field priority). Emit ONE liveness
+      // pulse so the chat doesn't look hung; never forward the text itself.
+      const reasoningDelta = delta.reasoning_content ?? delta.reasoning ?? delta.reasoning_text;
+      if (!thinkingEmitted && typeof reasoningDelta === 'string' && reasoningDelta.length > 0) {
+        thinkingEmitted = true;
+        yield { type: 'thinking' };
+      }
       if (typeof delta.content === 'string' && delta.content.length > 0) {
         accumulated += delta.content;
         yield { type: 'text_delta', delta: delta.content };

package/supervisor/harnesses/pi/providers/types.ts CHANGED Viewed

@@ -56,6 +56,13 @@ export interface PiStreamRequest {
    * that 422 on the `stream_options.include_usage` opt-in. Default true.
    */
   includeStreamUsage?: boolean;
+  /**
+   * 'none' forbids tool calls for this request (mapped per flavor: OpenAI
+   * tool_choice:'none', Anthropic {type:'none'}, Gemini functionCallingConfig
+   * mode NONE). Used by the session's round-cap wrap-up round, where the model
+   * must summarize instead of starting more work.
+   */
+  toolChoice?: 'auto' | 'none';
   /** Optional abort signal so the session can interrupt in-flight requests. */
   signal?: AbortSignal;
 }
@@ -72,6 +79,10 @@ export type PiErrorKind = 'auth' | 'context-overflow' | 'rate-limit' | 'billing'
 export type PiStreamEvent =
   | { type: 'text_delta'; delta: string }
   | { type: 'text_end'; text: string }
+  /** Emitted when the model starts (visibly) reasoning — a liveness pulse for
+   *  thinking models so the chat doesn't look hung. Reasoning TEXT is never
+   *  forwarded (it would corrupt the streamed-text == response contract). */
+  | { type: 'thinking' }
   | { type: 'tool_use'; id: string; name: string; input: any; thoughtSignature?: string }
   | { type: 'done'; stopReason: PiStopReason; usage?: PiUsage }
   | { type: 'error'; error: string; status?: number; kind?: PiErrorKind; retryable?: boolean };

package/supervisor/harnesses/pi/session.ts CHANGED Viewed

@@ -44,6 +44,8 @@ export type PiSessionEvent =
   | { type: 'turn_started' }
   | { type: 'text_delta'; delta: string }
   | { type: 'text_end'; text: string }
+  /** Liveness pulse: the model is reasoning (thinking models) — no text attached. */
+  | { type: 'thinking' }
   | { type: 'tool_use'; id: string; name: string; input: any }
   | { type: 'tool_result'; toolUseId: string; name: string; isError?: boolean }
   | {
@@ -78,6 +80,10 @@ export interface PiSessionAuth {
   includeStreamUsage?: boolean;
   /** Model context window from the catalog — reported on turn_complete for the recycler. */
   contextWindow?: number;
+  /** False when the catalog says the model is text-only — image blocks are
+   *  downgraded to placeholders on send so one screenshot can't 400-poison
+   *  the session (audit C-8). Undefined (dynamic models) ⇒ assume vision. */
+  supportsImages?: boolean;
 }
 export interface PiSessionInit {
@@ -117,6 +123,51 @@ export interface PiSession {
   getMessages(): PiMessage[];
 }
+/** Transform-on-send for text-only models (audit C-8): image blocks become
+ *  placeholders in the REQUEST only — the stored history keeps the images, so
+ *  switching to a vision model later restores them. */
+function downgradeImages(messages: PiMessage[]): PiMessage[] {
+  let any = false;
+  const out = messages.map((m) => {
+    if (!m.content.some((b) => b.type === 'image')) return m;
+    any = true;
+    return {
+      ...m,
+      content: m.content.map((b): PiContentBlock =>
+        b.type === 'image'
+          ? { type: 'text', text: '[An image was attached here, but the current model cannot view images. Tell the user to switch to a vision-capable model if the image matters.]' }
+          : b,
+      ),
+    };
+  });
+  return any ? out : messages;
+}
+/** Emergency in-turn context relief (audit D2-6): when occupancy crosses the
+ *  threshold MID-turn (recycling only acts between idle turns), stub out the
+ *  oldest large tool_result payloads — never user/assistant text, never the
+ *  protected tail (the current round's results). Cruder than real compaction,
+ *  but the turn finishes instead of 400ing on the context wall. */
+function trimOldToolResults(messages: PiMessage[], charsToFree: number, protectTail: number): number {
+  let freed = 0;
+  const limit = Math.max(0, messages.length - protectTail);
+  for (let i = 0; i < limit && freed < charsToFree; i++) {
+    const m = messages[i];
+    if (m.role !== 'user') continue;
+    for (const b of m.content) {
+      if (b.type === 'tool_result' && typeof b.content === 'string' && b.content.length > 2048) {
+        freed += b.content.length;
+        b.content = `[tool output trimmed to fit the context window — ~${Math.round(b.content.length / 1024)} KB removed]`;
+        if (freed >= charsToFree) break;
+      }
+    }
+  }
+  return freed;
+}
+const ROUND_CAP_NOTICE =
+  '[System: the tool budget for this turn is exhausted. Stop working now. In 2-3 sentences, summarize what you completed, what remains, and the exact next step.]';
 const FILE_TOOL_NAMES = new Set(['Write', 'Edit', 'MultiEdit', 'NotebookEdit', 'write', 'edit', 'multiEdit', 'notebookEdit']);
 const MAX_TOOL_ROUNDS = 25;
 /** Transparent re-runs of a failed round that produced nothing (audit D6-1). */
@@ -142,7 +193,7 @@ export function createPiSession(init: PiSessionInit): PiSession {
     retryable?: boolean;
   }
-  async function runOneRound(emitSeparatorFirst: boolean): Promise<RoundResult> {
+  async function runOneRound(emitSeparatorFirst: boolean, opts?: { wrapUp?: boolean }): Promise<RoundResult> {
     const result: RoundResult = { text: '', toolUses: [], errored: false };
     let firstDelta = true;
     try {
@@ -153,8 +204,9 @@ export function createPiSession(init: PiSessionInit): PiSession {
         baseUrl: auth.baseUrl,
         apiKey: auth.apiKey,
         systemPrompt: init.systemPrompt,
-        messages,
+        messages: auth.supportsImages === false ? downgradeImages(messages) : messages,
         tools: init.tools,
+        toolChoice: opts?.wrapUp ? 'none' : undefined,
         maxOutputTokens: auth.maxOutputTokens,
         maxTokensField: auth.maxTokensField,
         includeStreamUsage: auth.includeStreamUsage,
@@ -182,6 +234,9 @@ export function createPiSession(init: PiSessionInit): PiSession {
             // at the end of the whole turn so the UI doesn't show half-answers.
             result.text = evt.text;
             break;
+          case 'thinking':
+            init.onEvent({ type: 'thinking' });
+            break;
           case 'tool_use':
             result.toolUses.push({
               id: evt.id,
@@ -189,7 +244,12 @@ export function createPiSession(init: PiSessionInit): PiSession {
               input: evt.input,
               thoughtSignature: evt.thoughtSignature,
             });
-            init.onEvent({ type: 'tool_use', id: evt.id, name: evt.name, input: evt.input });
+            // Wrap-up rounds forbid tools (toolChoice 'none'); if a vendor
+            // ignores that, swallow the phantom call silently — it is never
+            // executed or persisted.
+            if (!opts?.wrapUp) {
+              init.onEvent({ type: 'tool_use', id: evt.id, name: evt.name, input: evt.input });
+            }
             break;
           case 'error':
             result.errored = true;
@@ -331,10 +391,63 @@ export function createPiSession(init: PiSessionInit): PiSession {
         messages.push({ role: 'user', content: toolResultBlocks });
       }
+      // Emergency in-turn context relief (audit D2-6): recycling only acts
+      // between idle turns, so a single heavy tool loop could cross the wall
+      // mid-turn. Above 85% occupancy, stub the oldest large tool outputs to
+      // bring the next request back toward 70%.
+      if (lastContextWindow && lastUsage) {
+        const occupancy =
+          (lastUsage.inputTokens || 0) + (lastUsage.cacheReadTokens || 0) + (lastUsage.cacheCreationTokens || 0);
+        if (occupancy > 0.85 * lastContextWindow) {
+          const charsToFree = (occupancy - Math.floor(0.7 * lastContextWindow)) * 4; // ~4 chars/token
+          const freed = trimOldToolResults(messages, charsToFree, 4);
+          if (freed > 0) {
+            log.info(`[pi/session] context at ${occupancy}/${lastContextWindow} tok mid-turn — trimmed ~${Math.round(freed / 1024)} KB of old tool output`);
+          }
+        }
+      }
       // No tool calls ⇒ the model is done with this turn.
       if (toolUses.length === 0) { roundCapHit = false; break; }
     }
+    // Round-cap wrap-up (audit D5-8): the budget ran out with the model still
+    // mid-task. Run ONE final no-tools round so the turn ends with an honest
+    // status summary instead of silent truncation. roundCapHit stays true on
+    // turn_complete — consumers still know the work is incomplete.
+    if (roundCapHit && !turnErrored && !init.abortController.signal.aborted) {
+      log.info(`[pi/session] tool-round budget (${maxRounds}) exhausted — running a no-tools wrap-up round`);
+      messages.push({ role: 'user', content: [{ type: 'text', text: ROUND_CAP_NOTICE }] });
+      const needsSeparator = accumulatedText.length > 0 && !accumulatedText.endsWith('\n');
+      const res = await runOneRound(needsSeparator, { wrapUp: true });
+      if (res.text) {
+        if (needsSeparator) accumulatedText += '\n\n';
+        accumulatedText += res.text;
+        messages.push({ role: 'assistant', content: [{ type: 'text', text: res.text }] });
+      } else {
+        // The notice was never answered — pop it so the NEXT turn doesn't
+        // open under a stale "stop working now" instruction (review PI-D-1).
+        const last = messages[messages.length - 1];
+        if (last?.role === 'user' && last.content.length === 1 &&
+            last.content[0].type === 'text' && last.content[0].text === ROUND_CAP_NOTICE) {
+          messages.pop();
+        }
+      }
+      // Fatal wrap-up failures (dead key / context wall) must still tear the
+      // session down, and a cap-hit turn with NO text at all must not end in
+      // total silence — claude surfaces error_max_turns and pi's one-shot
+      // paths guard this state too (PI-C-2). Set the turn-error fields so the
+      // standard emission below handles both (review PI-D-1).
+      if (res.errored && (res.errorKind === 'auth' || res.errorKind === 'context-overflow')) {
+        turnErrored = true;
+        turnErrorMsg = res.errorMsg;
+        turnErrorKind = res.errorKind;
+      } else if (!accumulatedText) {
+        turnErrored = true;
+        turnErrorMsg = `I hit my tool budget for this turn (${maxRounds} rounds) before finishing — say "continue" and I'll pick up where I left off.`;
+      }
+    }
     // Turn-end emission order (audit D6-2, mirrors claude.ts:394-401):
     //   1. text_end whenever ANY text streamed — even on errored turns, so the
     //      partial the user watched is committed, persisted, and consumes its

package/supervisor/harnesses/pi/test-completion.ts CHANGED Viewed

@@ -11,6 +11,8 @@
  *   - google-gemini       → POST {baseUrl}/models/{modelId}:generateContent
  */
 import { getPiSubProvider, type PiApiFlavor } from './sub-providers.js';
+import { streamProvider } from './providers/stream.js';
+import { toolDefsForProvider } from './tools/registry.js';
 export interface PiTestCompletionInput {
   subProvider: string;
@@ -88,6 +90,60 @@ export async function runPiTestCompletion(input: PiTestCompletionInput): Promise
   }
 }
+/**
+ * Streaming + tools probe (audit C-4). The non-streaming, tool-less test above
+ * validates a contract no real turn uses — free-form model ids (Ollama, LM
+ * Studio, custom, OpenRouter) could pass it and then fail the first actual
+ * message, which streams SSE with the full tool schema attached. This probe
+ * exercises the REAL wire shape in one cheap request: success = any
+ * text/tool-call event arrives before an error does.
+ */
+export async function runPiStreamProbe(input: PiTestCompletionInput): Promise<PiTestCompletionResult> {
+  const provider = getPiSubProvider(input.subProvider);
+  if (!provider) return { ok: false, error: `Unknown sub-provider: ${input.subProvider}` };
+  const baseUrl = pickBaseUrl(input);
+  if (!baseUrl) return { ok: false, error: 'Missing base URL' };
+  const modelId = pickModelId(input);
+  if (!modelId) return { ok: false, error: 'Missing model ID' };
+  const ctl = new AbortController();
+  const timer = setTimeout(() => ctl.abort(), REQUEST_TIMEOUT_MS);
+  try {
+    const stream = streamProvider(provider.flavor, {
+      modelId,
+      baseUrl,
+      apiKey: input.apiKey?.trim() || '',
+      systemPrompt: 'You are a connectivity probe. Reply with the single word OK.',
+      messages: [{ role: 'user', content: [{ type: 'text', text: input.prompt || 'Reply with the single word OK.' }] }],
+      // withTask: the live conversation's schema is the superset every real
+      // turn sends — probe with the same shape (review PI-D-4).
+      tools: toolDefsForProvider({ withTask: true }),
+      // Generous: reasoning models burn output budget on hidden thinking first.
+      maxOutputTokens: 2048,
+      maxTokensField: provider.maxTokensField,
+      includeStreamUsage: provider.noStreamUsage ? false : undefined,
+      signal: ctl.signal,
+    });
+    for await (const evt of stream) {
+      if (evt.type === 'text_delta' || evt.type === 'tool_use') {
+        return { ok: true, text: 'stream OK', modelId, subProvider: provider.id };
+      }
+      if (evt.type === 'error') {
+        return { ok: false, error: evt.error, modelId, subProvider: provider.id };
+      }
+    }
+    if (ctl.signal.aborted) {
+      return { ok: false, error: `Stream probe timed out after ${REQUEST_TIMEOUT_MS / 1000}s.`, modelId, subProvider: provider.id };
+    }
+    return { ok: false, error: 'The stream ended without producing any output.', modelId, subProvider: provider.id };
+  } catch (err: any) {
+    const msg = err?.name === 'AbortError' ? `Stream probe timed out after ${REQUEST_TIMEOUT_MS / 1000}s.` : err?.message || String(err);
+    return { ok: false, error: msg, modelId, subProvider: provider.id };
+  } finally {
+    clearTimeout(timer);
+  }
+}
 interface DispatchArgs {
   baseUrl: string;
   modelId: string;