npm - @geminilight/mindos - Versions diffs - 0.5.11 → 0.5.13 - Mend

@geminilight/mindos 0.5.11 → 0.5.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

package/README.md +9 -9
package/README_zh.md +9 -9
package/app/README.md +2 -2
package/app/app/api/ask/route.ts +191 -19
package/app/app/api/mcp/install/route.ts +1 -1
package/app/app/api/mcp/status/route.ts +11 -16
package/app/app/api/settings/route.ts +3 -1
package/app/app/api/setup/route.ts +7 -7
package/app/app/api/sync/route.ts +18 -15
package/app/components/AskModal.tsx +28 -32
package/app/components/SettingsModal.tsx +7 -3
package/app/components/ask/MessageList.tsx +65 -3
package/app/components/ask/ThinkingBlock.tsx +55 -0
package/app/components/ask/ToolCallBlock.tsx +97 -0
package/app/components/settings/AiTab.tsx +76 -2
package/app/components/settings/types.ts +8 -0
package/app/components/setup/StepReview.tsx +31 -25
package/app/components/setup/index.tsx +6 -3
package/app/lib/agent/context.ts +317 -0
package/app/lib/agent/index.ts +4 -0
package/app/lib/agent/prompt.ts +46 -31
package/app/lib/agent/stream-consumer.ts +212 -0
package/app/lib/agent/tools.ts +159 -4
package/app/lib/i18n.ts +28 -0
package/app/lib/settings.ts +22 -0
package/app/lib/types.ts +23 -0
package/app/package.json +2 -3
package/bin/cli.js +41 -21
package/bin/lib/build.js +6 -2
package/bin/lib/gateway.js +24 -3
package/bin/lib/mcp-install.js +2 -2
package/bin/lib/mcp-spawn.js +3 -3
package/bin/lib/stop.js +1 -1
package/bin/lib/sync.js +81 -40
package/mcp/README.md +5 -5
package/mcp/src/index.ts +2 -2
package/package.json +3 -2
package/scripts/setup.js +17 -12
package/scripts/upgrade-prompt.md +6 -6
package/skills/mindos/SKILL.md +47 -183
package/skills/mindos-zh/SKILL.md +47 -183
package/app/package-lock.json +0 -15615

package/app/lib/agent/context.ts ADDED Viewed

@@ -0,0 +1,317 @@
+/**
+ * Phase 3: Context management — token estimation, compaction, tool output truncation.
+ *
+ * All operations are request-scoped (no persistence to frontend session).
+ */
+import { generateText, type ModelMessage, type ToolResultPart, type ToolModelMessage } from 'ai';
+import type { LanguageModel } from 'ai';
+// ---------------------------------------------------------------------------
+// Token estimation (1 token ≈ 4 chars)
+// ---------------------------------------------------------------------------
+/** Rough token count for a single ModelMessage */
+function messageTokens(msg: ModelMessage): number {
+  if (typeof msg.content === 'string') return Math.ceil(msg.content.length / 4);
+  if (Array.isArray(msg.content)) {
+    let chars = 0;
+    for (const part of msg.content) {
+      if ('text' in part && typeof part.text === 'string') chars += part.text.length;
+      if ('value' in part && typeof part.value === 'string') chars += part.value.length;
+      if ('input' in part) chars += JSON.stringify(part.input).length;
+    }
+    return Math.ceil(chars / 4);
+  }
+  return 0;
+}
+/** Estimate total tokens for a message array */
+export function estimateTokens(messages: ModelMessage[]): number {
+  let total = 0;
+  for (const m of messages) total += messageTokens(m);
+  return total;
+}
+/** Estimate tokens for a plain string (e.g. system prompt) */
+export function estimateStringTokens(text: string): number {
+  return Math.ceil(text.length / 4);
+}
+// ---------------------------------------------------------------------------
+// Context limits by model family
+// ---------------------------------------------------------------------------
+const MODEL_LIMITS: Record<string, number> = {
+  'claude': 200_000,
+  'gpt-4o': 128_000,
+  'gpt-4': 128_000,
+  'gpt-3.5': 16_000,
+  'gpt-5': 200_000,
+};
+// Sort by prefix length descending so "gpt-4o" matches before "gpt-4"
+const MODEL_LIMIT_ENTRIES = Object.entries(MODEL_LIMITS)
+  .sort((a, b) => b[0].length - a[0].length);
+/** Get context token limit for a model string */
+export function getContextLimit(model: string): number {
+  const lower = model.toLowerCase();
+  for (const [prefix, limit] of MODEL_LIMIT_ENTRIES) {
+    if (lower.includes(prefix)) return limit;
+  }
+  return 100_000; // conservative default
+}
+/** Check if messages + system prompt exceed threshold of context limit */
+export function needsCompact(
+  messages: ModelMessage[],
+  systemPrompt: string,
+  model: string,
+  threshold = 0.7,
+): boolean {
+  const total = estimateTokens(messages) + estimateStringTokens(systemPrompt);
+  const limit = getContextLimit(model);
+  return total > limit * threshold;
+}
+// ---------------------------------------------------------------------------
+// Tool output truncation (per-tool-type thresholds)
+// ---------------------------------------------------------------------------
+const TOOL_OUTPUT_LIMITS: Record<string, number> = {
+  // List/search tools — only need to know "what was found"
+  search: 500,
+  list_files: 500,
+  get_recent: 500,
+  get_backlinks: 500,
+  get_history: 500,
+  // Read tools — some context value, but not full file
+  read_file: 2000,
+  get_file_at_version: 2000,
+  // Write tools — only need success/failure
+  write_file: 200,
+  create_file: 200,
+  delete_file: 200,
+  rename_file: 200,
+  move_file: 200,
+  append_to_file: 200,
+  insert_after_heading: 200,
+  update_section: 200,
+  append_csv: 200,
+};
+/**
+ * Truncate tool outputs in historical messages to save tokens.
+ * Only truncates non-last tool messages (the last tool message is kept intact
+ * because the model may need its full output for the current step).
+ */
+export function truncateToolOutputs(messages: ModelMessage[]): ModelMessage[] {
+  // Find the index of the last 'tool' role message
+  let lastToolIdx = -1;
+  for (let i = messages.length - 1; i >= 0; i--) {
+    if (messages[i].role === 'tool') { lastToolIdx = i; break; }
+  }
+  return messages.map((msg, idx) => {
+    if (msg.role !== 'tool' || idx === lastToolIdx) return msg;
+    const toolMsg = msg as ToolModelMessage;
+    const truncatedContent = toolMsg.content.map(part => {
+      if (part.type !== 'tool-result') return part;
+      const trp = part as ToolResultPart;
+      const toolName = trp.toolName ?? '';
+      const limit = TOOL_OUTPUT_LIMITS[toolName] ?? 500;
+      if (!trp.output || typeof trp.output !== 'object' || trp.output.type !== 'text') return part;
+      if (trp.output.value.length <= limit) return part;
+      return {
+        ...trp,
+        output: {
+          ...trp.output,
+          value: trp.output.value.slice(0, limit) + `\n[...truncated from ${trp.output.value.length} chars]`,
+        },
+      } satisfies ToolResultPart;
+    });
+    return { ...toolMsg, content: truncatedContent } satisfies ToolModelMessage;
+  });
+}
+// ---------------------------------------------------------------------------
+// Compact: summarize early messages via LLM
+// ---------------------------------------------------------------------------
+const COMPACT_PROMPT = `Summarize the key points, decisions, and file operations from this conversation in under 500 words. Focus on:
+- What the user asked for
+- What files were read, created, or modified
+- Key decisions and outcomes
+- Any unresolved issues
+Be concise and factual. Output only the summary, no preamble.`;
+/** Extract a short text representation from a ModelMessage for summarization */
+function messageToText(m: ModelMessage): string {
+  const role = m.role;
+  let content = '';
+  if (typeof m.content === 'string') {
+    content = m.content;
+  } else if (Array.isArray(m.content)) {
+    const pieces: string[] = [];
+    for (const part of m.content) {
+      if ('text' in part && typeof (part as { text?: string }).text === 'string') {
+        pieces.push((part as { text: string }).text);
+      } else if (part.type === 'tool-call' && 'toolName' in part) {
+        pieces.push(`[Tool: ${(part as { toolName: string }).toolName}]`);
+      } else if (part.type === 'tool-result' && 'output' in part) {
+        const trp = part as ToolResultPart;
+        const val = trp.output && typeof trp.output === 'object' && trp.output.type === 'text' ? trp.output.value : '';
+        pieces.push(`[Result: ${val.slice(0, 200)}]`);
+      }
+    }
+    content = pieces.filter(Boolean).join(' ');
+  }
+  return `${role}: ${content}`;
+}
+/**
+ * Compact messages by summarizing early ones with LLM.
+ * Returns a new message array with early messages replaced by a summary.
+ * Only called when needsCompact() returns true.
+ *
+ * NOTE: Currently uses the same model as the main generation. A cheaper model
+ * (e.g. haiku) would suffice for summarization and avoid competing for rate
+ * limits. Deferred until users report rate-limit issues — compact triggers
+ * infrequently (>70% context fill).
+ */
+export async function compactMessages(
+  messages: ModelMessage[],
+  model: LanguageModel,
+): Promise<{ messages: ModelMessage[]; compacted: boolean }> {
+  if (messages.length < 6) {
+    return { messages, compacted: false };
+  }
+  // Keep the last 6 messages intact, summarize the rest.
+  // Adjust split point to avoid cutting between an assistant (with tool calls)
+  // and its tool result. Only need to check for orphaned 'tool' messages —
+  // an assistant at the split point is safe because its tool results follow it.
+  // (Orphaned assistants without results can't exist in history: only completed
+  // tool calls are persisted by the frontend.)
+  let splitIdx = messages.length - 6;
+  while (splitIdx > 0 && messages[splitIdx]?.role === 'tool') {
+    splitIdx--;
+  }
+  if (splitIdx < 2) {
+    return { messages, compacted: false };
+  }
+  const earlyMessages = messages.slice(0, splitIdx);
+  const recentMessages = messages.slice(splitIdx);
+  // Build a text representation of early messages for summarization
+  let earlyText = earlyMessages.map(messageToText).join('\n\n');
+  // Truncate if enormous (avoid sending too much to summarizer)
+  if (earlyText.length > 30_000) {
+    earlyText = earlyText.slice(0, 30_000) + '\n[...truncated]';
+  }
+  try {
+    const { text: summary } = await generateText({
+      model,
+      prompt: `${COMPACT_PROMPT}\n\n---\n\nConversation to summarize:\n\n${earlyText}`,
+    });
+    console.log(`[ask] Compacted ${earlyMessages.length} early messages into summary (${summary.length} chars)`);
+    const summaryText = `[Summary of earlier conversation]\n\n${summary}`;
+    // If first recent message is also 'user', merge summary into it to avoid
+    // consecutive user messages (Anthropic rejects user→user sequences).
+    if (recentMessages[0]?.role === 'user') {
+      const merged = { ...recentMessages[0] };
+      if (typeof merged.content === 'string') {
+        merged.content = `${summaryText}\n\n---\n\n${merged.content}`;
+      } else if (Array.isArray(merged.content)) {
+        // Multimodal content (e.g. images) — prepend summary as text part
+        merged.content = [{ type: 'text' as const, text: `${summaryText}\n\n---\n\n` }, ...merged.content];
+      } else {
+        merged.content = summaryText;
+      }
+      return {
+        messages: [merged, ...recentMessages.slice(1)],
+        compacted: true,
+      };
+    }
+    // Otherwise prepend as separate user message
+    const summaryMessage: ModelMessage = {
+      role: 'user',
+      content: summaryText,
+    };
+    return {
+      messages: [summaryMessage, ...recentMessages],
+      compacted: true,
+    };
+  } catch (err) {
+    console.error('[ask] Compact failed, using uncompacted messages:', err);
+    return { messages, compacted: false };
+  }
+}
+// ---------------------------------------------------------------------------
+// Hard prune: drop earliest messages as last resort (>90% context)
+// ---------------------------------------------------------------------------
+/**
+ * Hard prune: if still over 90% context after compact, drop earliest messages.
+ * Respects assistant-tool pairs: never cuts between an assistant message
+ * (containing tool calls) and its following tool result message.
+ */
+export function hardPrune(
+  messages: ModelMessage[],
+  systemPrompt: string,
+  model: string,
+): ModelMessage[] {
+  const limit = getContextLimit(model);
+  const threshold = limit * 0.9;
+  const systemTokens = estimateStringTokens(systemPrompt);
+  let total = systemTokens + estimateTokens(messages);
+  if (total <= threshold) return messages;
+  // Find the cut index: keep messages from cutIdx onward
+  let cutIdx = 0;
+  while (cutIdx < messages.length - 2 && total > threshold) {
+    total -= messageTokens(messages[cutIdx]);
+    cutIdx++;
+  }
+  // Ensure we don't cut between an assistant (with tool calls) and its tool result.
+  // If cutIdx lands on a 'tool' message, advance past it so the pair stays together
+  // or is fully removed.
+  while (cutIdx < messages.length - 1 && messages[cutIdx].role === 'tool') {
+    total -= messageTokens(messages[cutIdx]);
+    cutIdx++;
+  }
+  // Ensure first message is 'user' (Anthropic requirement)
+  while (cutIdx < messages.length - 1 && messages[cutIdx].role !== 'user') {
+    total -= messageTokens(messages[cutIdx]);
+    cutIdx++;
+  }
+  // Fallback: if no user message found in remaining messages, inject a synthetic one
+  const pruned = cutIdx > 0 ? messages.slice(cutIdx) : messages;
+  if (pruned.length > 0 && pruned[0].role !== 'user') {
+    console.log(`[ask] Hard pruned ${cutIdx} messages, injecting synthetic user message (${messages.length} → ${pruned.length + 1})`);
+    return [{ role: 'user', content: '[Conversation context was pruned due to length. Continuing from here.]' } as ModelMessage, ...pruned];
+  }
+  if (cutIdx > 0) {
+    console.log(`[ask] Hard pruned ${cutIdx} messages (${messages.length} → ${messages.length - cutIdx})`);
+    return pruned;
+  }
+  return messages;
+}

package/app/lib/agent/index.ts CHANGED Viewed

@@ -1,3 +1,7 @@
 export { getModel } from './model';
 export { knowledgeBaseTools, truncate, assertWritable } from './tools';
 export { AGENT_SYSTEM_PROMPT } from './prompt';
+export {
+  estimateTokens, estimateStringTokens, getContextLimit, needsCompact,
+  truncateToolOutputs, compactMessages, hardPrune,
+} from './context';

package/app/lib/agent/prompt.ts CHANGED Viewed

@@ -1,32 +1,47 @@
-// Agent system prompt — v2: uploaded-file awareness + pdfjs extraction fix
-export const AGENT_SYSTEM_PROMPT = `You are MindOS Agent — an execution-oriented AI assistant for a personal knowledge base.
-Runtime capabilities already available in this request:
-- bootstrap context (MindOS startup files) is auto-loaded by the server
-- mindos skill guidance is auto-loaded by the server
-- knowledge-base tools are available for file operations
-How to operate:
-1. Treat the auto-loaded bootstrap + skill context as your initialization baseline.
-2. If the task needs fresher or broader evidence, call tools proactively (list/search/read) before concluding.
-3. Execute edits safely and minimally, then verify outcomes.
-Tool policy:
-- Always read a file before modifying it.
-- Use search/list tools first when file location is unclear.
-- Prefer targeted edits (update_section / insert_after_heading / append_to_file) over full overwrite.
-- Use write_file only when replacing the whole file is required.
-- INSTRUCTION.md is read-only and must not be modified.
-Uploaded files:
-- Users may upload local files (PDF, txt, csv, etc.) via the chat interface.
-- The content of uploaded files is ALREADY INCLUDED in this system prompt in a dedicated "⚠️ USER-UPLOADED FILES" section near the end.
-- IMPORTANT: When the user references an uploaded file (e.g. a resume/CV, a report, a document), you MUST use the content from that section directly. Extract specific details, quote relevant passages, and demonstrate that you have read the file thoroughly.
-- Do NOT attempt to use read_file or search tools to find uploaded files — they do not exist in the knowledge base. They are ONLY available in the uploaded files section of this prompt.
-- If the uploaded files section is empty or missing, tell the user the upload may have failed and ask them to re-upload.
-Response policy:
+/**
+ * Agent system prompt — v3: de-duplicated, persona-driven, with missing instructions added.
+ *
+ * Design principles:
+ * - prompt.ts owns: identity, persona, global behavioral constraints, output format
+ * - SKILL.md owns: knowledge-base-specific execution patterns, tool selection, safety rules
+ * - Tool descriptions own: per-tool usage instructions (no duplication here)
+ *
+ * Token budget: ~600 tokens (down from ~900 in v2). Freed space = more room for
+ * SKILL.md + bootstrap context within the same context window.
+ */
+export const AGENT_SYSTEM_PROMPT = `You are MindOS Agent — a personal knowledge-base operator that reads, writes, and organizes a user's second brain.
+Persona: methodical, concise, execution-oriented. You surface what you found (or didn't find) and act on it — no filler, no caveats that add no information.
+## What is already loaded
+The server auto-loads before each request:
+- Bootstrap context: INSTRUCTION.md, README.md, CONFIG files, and directory-local guidance.
+- Skill guidance (SKILL.md): detailed knowledge-base rules, tool selection, execution patterns.
+- Tool definitions with per-tool usage instructions.
+Treat these as your initialization baseline. If the task needs fresher or broader evidence, call tools proactively before concluding.
+## Behavioral rules
+1. **Read before write.** Never modify a file you haven't read in this request.
+2. **Minimal edits.** Prefer section/heading/line-level tools over full file overwrites.
+3. **Verify after edit.** Re-read the changed file to confirm correctness.
+4. **Cite sources.** When answering from stored knowledge, state the file path so the user can verify.
+5. **Fail fast.** If a tool call returns an error or unexpected result, try a different approach or ask the user — do not retry identical arguments.
+6. **Be token-aware.** You have a limited step budget (typically 10-30). Batch parallel reads/searches when possible. Do not waste steps on redundant tool calls.
+7. **Multilingual content, user-language replies.** Write file content in whatever language the file already uses. Reply to the user in the language they used.
+## Uploaded files
+Users may upload local files (PDF, txt, csv, etc.) via the chat interface.
+- Their content appears in a "⚠️ USER-UPLOADED FILES" section near the end of this prompt.
+- Use that content directly — do NOT call read_file or search tools for uploaded files; they are not in the knowledge base.
+- If the section is empty or missing, tell the user the upload may have failed.
+## Output format
 - Answer in the user's language.
-- Be concise, concrete, and action-oriented.
-- Use Markdown for structure when it improves clarity.
-- When relevant, explicitly state whether initialization context appears sufficient or if additional tool reads were needed.`;
+- Use Markdown when it improves clarity (headings, lists, tables, code blocks).
+- For multi-step tasks: output a brief numbered plan, execute, then summarize outcomes.
+- End with concrete next actions when applicable.`;

package/app/lib/agent/stream-consumer.ts ADDED Viewed

@@ -0,0 +1,212 @@
+import type { Message, MessagePart, ToolCallPart, TextPart, ReasoningPart } from '@/lib/types';
+/**
+ * Parse a UIMessageStream SSE response into structured Message parts.
+ * The stream format is Server-Sent Events where each data line is a JSON-encoded UIMessageChunk.
+ */
+export async function consumeUIMessageStream(
+  body: ReadableStream<Uint8Array>,
+  onUpdate: (message: Message) => void,
+  signal?: AbortSignal,
+): Promise<Message> {
+  const reader = body.getReader();
+  const decoder = new TextDecoder();
+  let buffer = '';
+  // Mutable working copies — we deep-clone when emitting to React
+  const parts: MessagePart[] = [];
+  const toolCalls = new Map<string, ToolCallPart>();
+  let currentTextId: string | null = null;
+  let currentReasoningPart: ReasoningPart | null = null;
+  /** Deep-clone parts into an immutable Message snapshot for React state */
+  function buildMessage(): Message {
+    const clonedParts: MessagePart[] = parts.map(p => {
+      if (p.type === 'text') return { type: 'text' as const, text: p.text };
+      if (p.type === 'reasoning') return { type: 'reasoning' as const, text: p.text };
+      return { ...p }; // ToolCallPart — shallow copy is safe (all primitive fields + `input` is replaced, not mutated)
+    });
+    const textContent = clonedParts
+      .filter((p): p is TextPart => p.type === 'text')
+      .map(p => p.text)
+      .join('');
+    return {
+      role: 'assistant',
+      content: textContent,
+      parts: clonedParts,
+    };
+  }
+  function findOrCreateTextPart(id: string): TextPart {
+    if (currentTextId === id) {
+      const last = parts[parts.length - 1];
+      if (last && last.type === 'text') return last;
+    }
+    const part: TextPart = { type: 'text', text: '' };
+    parts.push(part);
+    currentTextId = id;
+    return part;
+  }
+  function findOrCreateToolCall(toolCallId: string, toolName?: string): ToolCallPart {
+    let tc = toolCalls.get(toolCallId);
+    if (!tc) {
+      tc = {
+        type: 'tool-call',
+        toolCallId,
+        toolName: toolName ?? 'unknown',
+        input: undefined,
+        state: 'pending',
+      };
+      toolCalls.set(toolCallId, tc);
+      parts.push(tc);
+      currentTextId = null; // break text continuity
+    }
+    return tc;
+  }
+  try {
+    while (true) {
+      if (signal?.aborted) break;
+      const { done, value } = await reader.read();
+      if (done) break;
+      buffer += decoder.decode(value, { stream: true });
+      // Process complete SSE lines
+      const lines = buffer.split('\n');
+      buffer = lines.pop() ?? ''; // keep incomplete last line
+      let changed = false;
+      for (const line of lines) {
+        const trimmed = line.trim();
+        // SSE format: the ai SDK v6 UIMessageStream uses "d:{json}\n"
+        // Also handle standard "data:{json}" for robustness
+        let jsonStr: string | null = null;
+        if (trimmed.startsWith('d:')) {
+          jsonStr = trimmed.slice(2);
+        } else if (trimmed.startsWith('data:')) {
+          jsonStr = trimmed.slice(5).trim();
+        }
+        if (!jsonStr) continue;
+        let chunk: Record<string, unknown>;
+        try {
+          chunk = JSON.parse(jsonStr);
+        } catch {
+          continue; // skip malformed lines
+        }
+        const type = chunk.type as string;
+        switch (type) {
+          case 'text-start': {
+            findOrCreateTextPart(chunk.id as string);
+            changed = true;
+            break;
+          }
+          case 'text-delta': {
+            const part = findOrCreateTextPart(chunk.id as string);
+            part.text += chunk.delta as string;
+            changed = true;
+            break;
+          }
+          case 'text-end': {
+            // Text part is complete — no state change needed
+            break;
+          }
+          case 'tool-input-start': {
+            const tc = findOrCreateToolCall(chunk.toolCallId as string, chunk.toolName as string);
+            tc.state = 'running';
+            changed = true;
+            break;
+          }
+          case 'tool-input-delta': {
+            // Streaming input — we wait for input-available for the complete input
+            break;
+          }
+          case 'tool-input-available': {
+            const tc = findOrCreateToolCall(chunk.toolCallId as string, chunk.toolName as string);
+            tc.input = chunk.input;
+            tc.state = 'running';
+            changed = true;
+            break;
+          }
+          case 'tool-output-available': {
+            const tc = toolCalls.get(chunk.toolCallId as string);
+            if (tc) {
+              tc.output = typeof chunk.output === 'string' ? chunk.output : JSON.stringify(chunk.output);
+              tc.state = 'done';
+              changed = true;
+            }
+            break;
+          }
+          case 'tool-output-error':
+          case 'tool-input-error': {
+            const tc = toolCalls.get(chunk.toolCallId as string);
+            if (tc) {
+              tc.output = (chunk.errorText as string) ?? 'Error';
+              tc.state = 'error';
+              changed = true;
+            }
+            break;
+          }
+          case 'error': {
+            const errorText = (chunk.errorText as string) ?? 'Unknown error';
+            parts.push({ type: 'text', text: `\n\n**Error:** ${errorText}` });
+            currentTextId = null;
+            changed = true;
+            break;
+          }
+          // step-start, metadata, finish — ignored for now
+          case 'reasoning-start': {
+            currentReasoningPart = { type: 'reasoning', text: '' };
+            parts.push(currentReasoningPart);
+            currentTextId = null;
+            changed = true;
+            break;
+          }
+          case 'reasoning-delta': {
+            if (currentReasoningPart) {
+              currentReasoningPart.text += chunk.delta as string;
+              changed = true;
+            }
+            break;
+          }
+          case 'reasoning-end': {
+            currentReasoningPart = null;
+            break;
+          }
+          default:
+            break;
+        }
+      }
+      // Emit once per reader.read() batch, not per SSE line
+      if (changed) {
+        onUpdate(buildMessage());
+      }
+    }
+  } finally {
+    reader.releaseLock();
+  }
+  // Finalize any tool calls still stuck in running/pending state
+  // (stream ended before their output arrived — e.g. abort, network error, step limit)
+  let finalized = false;
+  for (const tc of toolCalls.values()) {
+    if (tc.state === 'running' || tc.state === 'pending') {
+      tc.state = 'error';
+      tc.output = tc.output ?? 'Stream ended before tool completed';
+      finalized = true;
+    }
+  }
+  if (finalized) {
+    onUpdate(buildMessage());
+  }
+  return buildMessage();
+}