npm - @geminilight/mindos - Versions diffs - 0.5.20 → 0.5.22 - Mend

@geminilight/mindos 0.5.20 → 0.5.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

package/app/app/api/ask/route.ts +343 -178
package/app/app/api/monitoring/route.ts +95 -0
package/app/components/SettingsModal.tsx +58 -58
package/app/components/settings/AgentsTab.tsx +240 -0
package/app/components/settings/AiTab.tsx +4 -25
package/app/components/settings/AppearanceTab.tsx +31 -13
package/app/components/settings/KnowledgeTab.tsx +13 -28
package/app/components/settings/McpAgentInstall.tsx +227 -0
package/app/components/settings/McpServerStatus.tsx +172 -0
package/app/components/settings/McpSkillsSection.tsx +583 -0
package/app/components/settings/McpTab.tsx +17 -959
package/app/components/settings/MonitoringTab.tsx +202 -0
package/app/components/settings/PluginsTab.tsx +4 -27
package/app/components/settings/Primitives.tsx +69 -0
package/app/components/settings/ShortcutsTab.tsx +2 -4
package/app/components/settings/SyncTab.tsx +8 -24
package/app/components/settings/types.ts +116 -2
package/app/instrumentation.ts +7 -2
package/app/lib/agent/context.ts +151 -87
package/app/lib/agent/index.ts +5 -3
package/app/lib/agent/log.ts +1 -0
package/app/lib/agent/model.ts +76 -10
package/app/lib/agent/skill-rules.ts +70 -0
package/app/lib/agent/stream-consumer.ts +73 -77
package/app/lib/agent/to-agent-messages.ts +106 -0
package/app/lib/agent/tools.ts +260 -266
package/app/lib/api.ts +12 -3
package/app/lib/core/csv.ts +2 -1
package/app/lib/core/fs-ops.ts +7 -6
package/app/lib/core/index.ts +1 -1
package/app/lib/core/lines.ts +7 -6
package/app/lib/core/search-index.ts +174 -0
package/app/lib/core/search.ts +30 -1
package/app/lib/core/security.ts +6 -3
package/app/lib/errors.ts +108 -0
package/app/lib/fs.ts +6 -3
package/app/lib/i18n-en.ts +523 -0
package/app/lib/i18n-zh.ts +548 -0
package/app/lib/i18n.ts +4 -963
package/app/lib/metrics.ts +81 -0
package/app/next-env.d.ts +1 -1
package/app/next.config.ts +1 -1
package/app/package-lock.json +3258 -3093
package/app/package.json +6 -3
package/bin/cli.js +7 -4
package/package.json +4 -1

package/app/app/api/ask/route.ts CHANGED Viewed

@@ -1,107 +1,142 @@
 export const dynamic = 'force-dynamic';
-import { streamText, stepCountIs, type ModelMessage } from 'ai';
+import { Agent, type AgentEvent, type BeforeToolCallContext, type BeforeToolCallResult, type AfterToolCallContext, type AfterToolCallResult } from '@mariozechner/pi-agent-core';
 import { NextRequest, NextResponse } from 'next/server';
 import fs from 'fs';
 import path from 'path';
 import { getFileContent, getMindRoot } from '@/lib/fs';
-import { getModel, knowledgeBaseTools, truncate, AGENT_SYSTEM_PROMPT, estimateTokens, estimateStringTokens, getContextLimit, needsCompact, truncateToolOutputs, compactMessages, hardPrune } from '@/lib/agent';
-import { effectiveAiConfig, readSettings } from '@/lib/settings';
-import type { Message as FrontendMessage, ToolCallPart as FrontendToolCallPart } from '@/lib/types';
-/**
- * Convert frontend Message[] (with parts containing tool calls + results)
- * into AI SDK ModelMessage[] that streamText expects.
- *
- * Frontend format:
- *   { role: 'assistant', content: '...', parts: [TextPart, ToolCallPart(with output/state)] }
- *
- * AI SDK format:
- *   { role: 'assistant', content: [TextPart, ToolCallPart(no output)] }
- *   { role: 'tool', content: [ToolResultPart] }  // one per completed tool call
- */
-function convertToModelMessages(messages: FrontendMessage[]): ModelMessage[] {
-  const result: ModelMessage[] = [];
-  for (const msg of messages) {
-    if (msg.role === 'user') {
-      result.push({ role: 'user', content: msg.content });
-      continue;
-    }
+import { getModelConfig } from '@/lib/agent/model';
+import { knowledgeBaseTools, WRITE_TOOLS, truncate } from '@/lib/agent/tools';
+import { AGENT_SYSTEM_PROMPT } from '@/lib/agent/prompt';
+import { toAgentMessages } from '@/lib/agent/to-agent-messages';
+import {
+  estimateTokens, estimateStringTokens, getContextLimit,
+  createTransformContext,
+} from '@/lib/agent/context';
+import { logAgentOp } from '@/lib/agent/log';
+import { loadSkillRules } from '@/lib/agent/skill-rules';
+import { readSettings } from '@/lib/settings';
+import { MindOSError, apiError, ErrorCodes } from '@/lib/errors';
+import { metrics } from '@/lib/metrics';
+import { assertNotProtected } from '@/lib/core';
+import type { Message as FrontendMessage } from '@/lib/types';
+// ---------------------------------------------------------------------------
+// MindOS SSE format — 6 event types (front-back contract)
+// ---------------------------------------------------------------------------
+type MindOSSSEvent =
+  | { type: 'text_delta'; delta: string }
+  | { type: 'thinking_delta'; delta: string }
+  | { type: 'tool_start'; toolCallId: string; toolName: string; args: unknown }
+  | { type: 'tool_end'; toolCallId: string; output: string; isError: boolean }
+  | { type: 'done'; usage?: { input: number; output: number } }
+  | { type: 'error'; message: string };
+// ---------------------------------------------------------------------------
+// Type Guards for AgentEvent variants (safe event handling)
+// ---------------------------------------------------------------------------
+function isTextDeltaEvent(e: AgentEvent): boolean {
+  return e.type === 'message_update' && (e as any).assistantMessageEvent?.type === 'text_delta';
+}
-    // Skip error placeholder messages from frontend
-    if (msg.content.startsWith('__error__')) continue;
+function getTextDelta(e: AgentEvent): string {
+  return (e as any).assistantMessageEvent?.delta ?? '';
+}
-    // Assistant message
-    if (!msg.parts || msg.parts.length === 0) {
-      // Plain text assistant message — no tool calls
-      if (msg.content) {
-        result.push({ role: 'assistant', content: msg.content });
-      }
-      continue;
-    }
+function isThinkingDeltaEvent(e: AgentEvent): boolean {
+  return e.type === 'message_update' && (e as any).assistantMessageEvent?.type === 'thinking_delta';
+}
-    // Build assistant message content array (text parts + tool call parts)
-    const assistantContent: Array<
-      { type: 'text'; text: string } |
-      { type: 'tool-call'; toolCallId: string; toolName: string; input: unknown }
-    > = [];
-    const completedToolCalls: FrontendToolCallPart[] = [];
-    for (const part of msg.parts) {
-      if (part.type === 'text') {
-        if (part.text) {
-          assistantContent.push({ type: 'text', text: part.text });
-        }
-      } else if (part.type === 'tool-call') {
-        assistantContent.push({
-          type: 'tool-call',
-          toolCallId: part.toolCallId,
-          toolName: part.toolName,
-          input: part.input ?? {},
-        });
-        // Always emit a tool result for every tool call. Orphaned tool calls
-        // (running/pending from interrupted streams) get an empty result;
-        // without one the API rejects the request.
-        completedToolCalls.push(part);
-      }
-      // 'reasoning' parts are display-only; not sent back to model
-    }
+function getThinkingDelta(e: AgentEvent): string {
+  return (e as any).assistantMessageEvent?.delta ?? '';
+}
-    if (assistantContent.length > 0) {
-      result.push({ role: 'assistant', content: assistantContent });
-    }
+function isToolExecutionStartEvent(e: AgentEvent): boolean {
+  return e.type === 'tool_execution_start';
+}
-    // Add tool result messages for completed tool calls
-    if (completedToolCalls.length > 0) {
-      result.push({
-        role: 'tool',
-        content: completedToolCalls.map(tc => ({
-          type: 'tool-result' as const,
-          toolCallId: tc.toolCallId,
-          toolName: tc.toolName,
-          output: { type: 'text' as const, value: tc.output ?? '' },
-        })),
-      });
-    }
-  }
+function getToolExecutionStart(e: AgentEvent): { toolCallId: string; toolName: string; args: unknown } {
+  const evt = e as any;
+  return {
+    toolCallId: evt.toolCallId ?? '',
+    toolName: evt.toolName ?? 'unknown',
+    args: evt.args ?? {},
+  };
+}
+function isToolExecutionEndEvent(e: AgentEvent): boolean {
+  return e.type === 'tool_execution_end';
+}
-  return result;
+function getToolExecutionEnd(e: AgentEvent): { toolCallId: string; output: string; isError: boolean } {
+  const evt = e as any;
+  const outputText = evt.result?.content
+    ?.filter((p: any) => p.type === 'text')
+    .map((p: any) => p.text)
+    .join('') ?? '';
+  return {
+    toolCallId: evt.toolCallId ?? '',
+    output: outputText,
+    isError: !!evt.isError,
+  };
 }
-function readKnowledgeFile(filePath: string): { ok: boolean; content: string; error?: string } {
+function isTurnEndEvent(e: AgentEvent): boolean {
+  return e.type === 'turn_end';
+}
+function getTurnEndData(e: AgentEvent): { toolResults: Array<{ toolName: string; content: unknown }> } {
+  return {
+    toolResults: ((e as any).toolResults as any[]) ?? [],
+  };
+}
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+function readKnowledgeFile(filePath: string): { ok: boolean; content: string; truncated: boolean; error?: string } {
   try {
-    return { ok: true, content: truncate(getFileContent(filePath)) };
+    const raw = getFileContent(filePath);
+    if (raw.length > 20_000) {
+      return {
+        ok: true,
+        content: truncate(raw),
+        truncated: true,
+        error: undefined,
+      };
+    }
+    return { ok: true, content: raw, truncated: false };
   } catch (err) {
-    return { ok: false, content: '', error: err instanceof Error ? err.message : String(err) };
+    return {
+      ok: false,
+      content: '',
+      truncated: false,
+      error: err instanceof Error ? err.message : String(err),
+    };
   }
 }
-function readAbsoluteFile(absPath: string): { ok: boolean; content: string; error?: string } {
+function readAbsoluteFile(absPath: string): { ok: boolean; content: string; truncated: boolean; error?: string } {
   try {
     const raw = fs.readFileSync(absPath, 'utf-8');
-    return { ok: true, content: truncate(raw) };
+    if (raw.length > 20_000) {
+      return {
+        ok: true,
+        content: truncate(raw),
+        truncated: true,
+        error: undefined,
+      };
+    }
+    return { ok: true, content: raw, truncated: false };
   } catch (err) {
-    return { ok: false, content: '', error: err instanceof Error ? err.message : String(err) };
+    return {
+      ok: false,
+      content: '',
+      truncated: false,
+      error: err instanceof Error ? err.message : String(err),
+    };
   }
 }
@@ -113,6 +148,10 @@ function dirnameOf(filePath?: string): string | null {
   return normalized.slice(0, idx);
 }
+// ---------------------------------------------------------------------------
+// POST /api/ask
+// ---------------------------------------------------------------------------
 export async function POST(req: NextRequest) {
   let body: {
     messages: FrontendMessage[];
@@ -124,14 +163,12 @@ export async function POST(req: NextRequest) {
   try {
     body = await req.json();
   } catch {
-    return NextResponse.json({ error: 'Invalid JSON body' }, { status: 400 });
+    return apiError(ErrorCodes.INVALID_REQUEST, 'Invalid JSON body', 400);
   }
   const { messages, currentFile, attachedFiles, uploadedFiles } = body;
   // Read agent config from settings
-  // NOTE: readSettings() is also called inside getModel() → effectiveAiConfig().
-  // Acceptable duplication — both are sync fs reads with identical results.
   const serverSettings = readSettings();
   const agentConfig = serverSettings.agent ?? {};
   const stepLimit = Number.isFinite(body.maxSteps)
@@ -142,9 +179,18 @@ export async function POST(req: NextRequest) {
   const contextStrategy = agentConfig.contextStrategy ?? 'auto';
   // Auto-load skill + bootstrap context for each request.
-  const skillPath = path.resolve(process.cwd(), 'data/skills/mindos/SKILL.md');
+  // 1. SKILL.md — static trigger + protocol (always loaded)
+  // 2. skill-rules.md — user's knowledge base operating rules (if exists)
+  // 3. user-rules.md — user's personalized rules (if exists)
+  const isZh = serverSettings.disabledSkills?.includes('mindos') ?? false;
+  const skillDirName = isZh ? 'mindos-zh' : 'mindos';
+  const skillPath = path.resolve(process.cwd(), `data/skills/${skillDirName}/SKILL.md`);
   const skill = readAbsoluteFile(skillPath);
+  // Progressive skill loading: read skill-rules + user-rules from knowledge base
+  const mindRoot = getMindRoot();
+  const { skillRules, userRules } = loadSkillRules(mindRoot, skillDirName);
   const targetDir = dirnameOf(currentFile);
   const bootstrap = {
     instruction: readKnowledgeFile('INSTRUCTION.md'),
@@ -157,24 +203,43 @@ export async function POST(req: NextRequest) {
     target_config_md: targetDir ? readKnowledgeFile(`${targetDir}/CONFIG.md`) : null,
   };
-  // Only report failures — when everything loads fine, a single summary line suffices.
+  // Only report failures + truncation warnings
   const initFailures: string[] = [];
+  const truncationWarnings: string[] = [];
   if (!skill.ok) initFailures.push(`skill.mindos: failed (${skill.error})`);
+  if (skill.ok && skill.truncated) truncationWarnings.push('skill.mindos was truncated');
+  if (skillRules.ok && skillRules.truncated) truncationWarnings.push('skill-rules.md was truncated');
+  if (userRules.ok && userRules.truncated) truncationWarnings.push('user-rules.md was truncated');
   if (!bootstrap.instruction.ok) initFailures.push(`bootstrap.instruction: failed (${bootstrap.instruction.error})`);
+  if (bootstrap.instruction.ok && bootstrap.instruction.truncated) truncationWarnings.push('bootstrap.instruction was truncated');
   if (!bootstrap.index.ok) initFailures.push(`bootstrap.index: failed (${bootstrap.index.error})`);
+  if (bootstrap.index.ok && bootstrap.index.truncated) truncationWarnings.push('bootstrap.index was truncated');
   if (!bootstrap.config_json.ok) initFailures.push(`bootstrap.config_json: failed (${bootstrap.config_json.error})`);
+  if (bootstrap.config_json.ok && bootstrap.config_json.truncated) truncationWarnings.push('bootstrap.config_json was truncated');
   if (!bootstrap.config_md.ok) initFailures.push(`bootstrap.config_md: failed (${bootstrap.config_md.error})`);
+  if (bootstrap.config_md.ok && bootstrap.config_md.truncated) truncationWarnings.push('bootstrap.config_md was truncated');
   if (bootstrap.target_readme && !bootstrap.target_readme.ok) initFailures.push(`bootstrap.target_readme: failed (${bootstrap.target_readme.error})`);
+  if (bootstrap.target_readme?.ok && bootstrap.target_readme.truncated) truncationWarnings.push('bootstrap.target_readme was truncated');
   if (bootstrap.target_instruction && !bootstrap.target_instruction.ok) initFailures.push(`bootstrap.target_instruction: failed (${bootstrap.target_instruction.error})`);
+  if (bootstrap.target_instruction?.ok && bootstrap.target_instruction.truncated) truncationWarnings.push('bootstrap.target_instruction was truncated');
   if (bootstrap.target_config_json && !bootstrap.target_config_json.ok) initFailures.push(`bootstrap.target_config_json: failed (${bootstrap.target_config_json.error})`);
+  if (bootstrap.target_config_json?.ok && bootstrap.target_config_json.truncated) truncationWarnings.push('bootstrap.target_config_json was truncated');
   if (bootstrap.target_config_md && !bootstrap.target_config_md.ok) initFailures.push(`bootstrap.target_config_md: failed (${bootstrap.target_config_md.error})`);
+  if (bootstrap.target_config_md?.ok && bootstrap.target_config_md.truncated) truncationWarnings.push('bootstrap.target_config_md was truncated');
   const initStatus = initFailures.length === 0
-    ? `All initialization contexts loaded successfully. mind_root=${getMindRoot()}${targetDir ? `, target_dir=${targetDir}` : ''}`
-    : `Initialization issues:\n${initFailures.join('\n')}\nmind_root=${getMindRoot()}${targetDir ? `, target_dir=${targetDir}` : ''}`;
+    ? `All initialization contexts loaded successfully. mind_root=${getMindRoot()}${targetDir ? `, target_dir=${targetDir}` : ''}${truncationWarnings.length > 0 ? ` ⚠️ ${truncationWarnings.length} files truncated` : ''}`
+    : `Initialization issues:\n${initFailures.join('\n')}\nmind_root=${getMindRoot()}${targetDir ? `, target_dir=${targetDir}` : ''}${truncationWarnings.length > 0 ? `\n⚠️ Warnings:\n${truncationWarnings.join('\n')}` : ''}`;
   const initContextBlocks: string[] = [];
   if (skill.ok) initContextBlocks.push(`## mindos_skill_md\n\n${skill.content}`);
+  // Progressive skill loading: inject skill-rules and user-rules after SKILL.md
+  if (skillRules.ok && !skillRules.empty) {
+    initContextBlocks.push(`## skill_rules\n\nOperating rules loaded from knowledge base (.agents/skills/${skillDirName}/skill-rules.md):\n\n${skillRules.content}`);
+  }
+  if (userRules.ok && !userRules.empty) {
+    initContextBlocks.push(`## user_rules\n\nUser personalization rules (.agents/skills/${skillDirName}/user-rules.md):\n\n${userRules.content}`);
+  }
   if (bootstrap.instruction.ok) initContextBlocks.push(`## bootstrap_instruction\n\n${bootstrap.instruction.content}`);
   if (bootstrap.index.ok) initContextBlocks.push(`## bootstrap_index\n\n${bootstrap.index.content}`);
   if (bootstrap.config_json.ok) initContextBlocks.push(`## bootstrap_config_json\n\n${bootstrap.config_json.content}`);
@@ -190,13 +255,13 @@ export async function POST(req: NextRequest) {
   const hasAttached = Array.isArray(attachedFiles) && attachedFiles.length > 0;
   if (hasAttached) {
-    for (const filePath of attachedFiles) {
+    for (const filePath of attachedFiles!) {
       if (seen.has(filePath)) continue;
       seen.add(filePath);
       try {
         const content = truncate(getFileContent(filePath));
         contextParts.push(`## Attached: ${filePath}\n\n${content}`);
-      } catch {}
+      } catch { /* ignore missing files */ }
     }
   }
@@ -205,11 +270,10 @@ export async function POST(req: NextRequest) {
     try {
       const content = truncate(getFileContent(currentFile));
       contextParts.push(`## Current file: ${currentFile}\n\n${content}`);
-    } catch {}
+    } catch { /* ignore */ }
   }
-  // Uploaded files go into a SEPARATE top-level section so the Agent
-  // treats them with high priority and never tries to look them up via tools.
+  // Uploaded files
   const uploadedParts: string[] = [];
   if (Array.isArray(uploadedFiles) && uploadedFiles.length > 0) {
     for (const f of uploadedFiles.slice(0, 8)) {
@@ -242,102 +306,203 @@ export async function POST(req: NextRequest) {
   const systemPrompt = promptParts.join('\n\n');
   try {
-    const model = getModel();
-    const cfg = effectiveAiConfig();
-    const modelName = cfg.provider === 'openai' ? cfg.openaiModel : cfg.anthropicModel;
-    let modelMessages = convertToModelMessages(messages);
-    // Phase 3: Context management pipeline
-    // 1. Truncate tool outputs in historical messages
-    modelMessages = truncateToolOutputs(modelMessages);
-    const preTokens = estimateTokens(modelMessages);
-    const sysTokens = estimateStringTokens(systemPrompt);
-    const ctxLimit = getContextLimit(modelName);
-    console.log(`[ask] Context: ~${preTokens + sysTokens} tokens (messages=${preTokens}, system=${sysTokens}), limit=${ctxLimit}`);
-    // 2. Compact if >70% context limit (skip if user disabled)
-    if (contextStrategy === 'auto' && needsCompact(modelMessages, systemPrompt, modelName)) {
-      console.log('[ask] Context >70% limit, compacting...');
-      const result = await compactMessages(modelMessages, model);
-      modelMessages = result.messages;
-      if (result.compacted) {
-        const postTokens = estimateTokens(modelMessages);
-        console.log(`[ask] After compact: ~${postTokens + sysTokens} tokens`);
-      } else {
-        console.log('[ask] Compact skipped (too few messages), hard prune will handle overflow if needed');
-      }
-    }
+    const { model, modelName, apiKey, provider } = getModelConfig();
-    // 3. Hard prune if still >90% context limit
-    modelMessages = hardPrune(modelMessages, systemPrompt, modelName);
+    // Convert frontend messages to AgentMessage[]
+    const agentMessages = toAgentMessages(messages);
-    // Phase 2: Step monitoring + loop detection
-    const stepHistory: Array<{ tool: string; input: string }> = [];
-    let loopDetected = false;
-    let loopCooldown = 0; // skip detection for N steps after warning
-    const result = streamText({
-      model,
-      system: systemPrompt,
-      messages: modelMessages,
-      tools: knowledgeBaseTools,
-      stopWhen: stepCountIs(stepLimit),
-      ...(enableThinking && cfg.provider === 'anthropic' ? {
-        providerOptions: {
-          anthropic: {
-            thinking: { type: 'enabled', budgetTokens: thinkingBudget },
-          },
-        },
-      } : {}),
+    // Extract the last user message for agent.prompt()
+    const lastUserContent = messages.length > 0 && messages[messages.length - 1].role === 'user'
+      ? messages[messages.length - 1].content
+      : '';
-      onStepFinish: ({ toolCalls, usage }) => {
-        if (toolCalls) {
-          for (const tc of toolCalls) {
-            stepHistory.push({ tool: tc.toolName, input: JSON.stringify(tc.input) });
-          }
-        }
-        // Loop detection: same tool + same args 3 times in a row
-        // Skip detection during cooldown to avoid repeated warnings
-        if (loopCooldown > 0) {
-          loopCooldown--;
-        } else if (stepHistory.length >= 3) {
-          const last3 = stepHistory.slice(-3);
-          if (last3.every(s => s.tool === last3[0].tool && s.input === last3[0].input)) {
-            loopDetected = true;
+    // History = all messages except the last user message (agent.prompt adds it)
+    const historyMessages = agentMessages.slice(0, -1);
+    // Capture API key for this request — safe since each POST creates a new Agent instance.
+    // Even though JS closures are lexically scoped, being explicit guards against future refactors.
+    const requestApiKey = apiKey;
+    // ── Loop detection state ──
+    const stepHistory: Array<{ tool: string; input: string }> = [];
+    let stepCount = 0;
+    let loopCooldown = 0;
+    // ── Create Agent (per-request lifecycle) ──
+    const agent = new Agent({
+      initialState: {
+        systemPrompt,
+        model,
+        thinkingLevel: (enableThinking && provider === 'anthropic') ? 'medium' : 'off',
+        tools: knowledgeBaseTools,
+        messages: historyMessages,
+      },
+      getApiKey: async () => requestApiKey,
+      toolExecution: 'parallel',
+      // Context management: truncate → compact → prune
+      transformContext: createTransformContext(
+        systemPrompt,
+        modelName,
+        () => model,
+        apiKey,
+        contextStrategy,
+      ),
+      // Write-protection: block writes to protected files
+      beforeToolCall: async (context: BeforeToolCallContext): Promise<BeforeToolCallResult | undefined> => {
+        const { toolCall, args } = context;
+        // toolCall is an object with type "toolCall" and contains the tool name and ID
+        const toolName = (toolCall as any).toolName ?? (toolCall as any).name;
+        if (toolName && WRITE_TOOLS.has(toolName)) {
+          const filePath = (args as any).path ?? (args as any).from_path;
+          if (filePath) {
+            try {
+              assertNotProtected(filePath, 'modified by AI agent');
+            } catch (e) {
+              const errorMsg = e instanceof Error ? e.message : String(e);
+              return {
+                block: true,
+                reason: `Write-protection error: ${errorMsg}`,
+              };
+            }
           }
         }
-        console.log(`[ask] Step ${stepHistory.length}/${stepLimit}, tokens=${usage?.totalTokens ?? '?'}`);
+        return undefined;
       },
-      prepareStep: ({ messages: stepMessages }) => {
-        if (loopDetected) {
-          loopDetected = false;
-          loopCooldown = 3; // suppress re-detection for 3 steps
-          return {
-            messages: [
-              ...stepMessages,
-              {
-                role: 'user' as const,
-                content: '[SYSTEM WARNING] You have called the same tool with identical arguments 3 times in a row. This appears to be a loop. Try a completely different approach or ask the user for clarification.',
-              },
-            ],
-          };
-        }
-        return {}; // no modification
+      // Logging: record all tool executions
+      afterToolCall: async (context: AfterToolCallContext): Promise<AfterToolCallResult | undefined> => {
+        const ts = new Date().toISOString();
+        const { toolCall, args, result, isError } = context;
+        const toolName = (toolCall as any).toolName ?? (toolCall as any).name;
+        const outputText = result?.content
+          ?.filter((p: any) => p.type === 'text')
+          .map((p: any) => p.text)
+          .join('') ?? '';
+        try {
+          logAgentOp({
+            ts,
+            tool: toolName ?? 'unknown',
+            params: args as Record<string, unknown>,
+            result: isError ? 'error' : 'ok',
+            message: outputText.slice(0, 200),
+          });
+        } catch { /* logging must never kill the stream */ }
+        return undefined;
       },
-      onError: ({ error }) => {
-        console.error('[ask] Stream error:', error);
+      ...(enableThinking && provider === 'anthropic' ? {
+        thinkingBudgets: { medium: thinkingBudget },
+      } : {}),
+    });
+    // ── SSE Stream ──
+    const encoder = new TextEncoder();
+    const requestStartTime = Date.now();
+    const stream = new ReadableStream({
+      start(controller) {
+        function send(event: MindOSSSEvent) {
+          try {
+            controller.enqueue(encoder.encode(`data:${JSON.stringify(event)}\n\n`));
+          } catch { /* controller may be closed */ }
+        }
+        agent.subscribe((event: AgentEvent) => {
+          if (isTextDeltaEvent(event)) {
+            send({ type: 'text_delta', delta: getTextDelta(event) });
+          } else if (isThinkingDeltaEvent(event)) {
+            send({ type: 'thinking_delta', delta: getThinkingDelta(event) });
+          } else if (isToolExecutionStartEvent(event)) {
+            const { toolCallId, toolName, args } = getToolExecutionStart(event);
+            send({
+              type: 'tool_start',
+              toolCallId,
+              toolName,
+              args,
+            });
+          } else if (isToolExecutionEndEvent(event)) {
+            const { toolCallId, output, isError } = getToolExecutionEnd(event);
+            metrics.recordToolExecution();
+            send({
+              type: 'tool_end',
+              toolCallId,
+              output,
+              isError,
+            });
+          } else if (isTurnEndEvent(event)) {
+            stepCount++;
+            // Record token usage if available from the turn
+            const turnUsage = (event as any).usage;
+            if (turnUsage && typeof turnUsage.inputTokens === 'number') {
+              metrics.recordTokens(turnUsage.inputTokens, turnUsage.outputTokens ?? 0);
+            }
+            // Track tool calls for loop detection (lock-free batch update).
+            // Deterministic JSON.stringify ensures consistent input comparison.
+            const { toolResults } = getTurnEndData(event);
+            if (Array.isArray(toolResults) && toolResults.length > 0) {
+              const newEntries = toolResults.map(tr => ({
+                tool: tr.toolName ?? 'unknown',
+                input: JSON.stringify(tr.content, null, 0), // Deterministic (no whitespace)
+              }));
+              stepHistory.push(...newEntries);
+            }
+            // Loop detection: same tool + same args 3 times in a row.
+            // Only trigger if we have 3+ history entries (prevent false positives on first turn).
+            const LOOP_DETECTION_THRESHOLD = 3;
+            if (loopCooldown > 0) {
+              loopCooldown--;
+            } else if (stepHistory.length >= LOOP_DETECTION_THRESHOLD) {
+              const lastN = stepHistory.slice(-LOOP_DETECTION_THRESHOLD);
+              if (lastN.every(s => s.tool === lastN[0].tool && s.input === lastN[0].input)) {
+                loopCooldown = 3;
+                // TODO (metrics): Track loop detection rate — metrics.increment('agent.loop_detected', { model: modelName })
+                agent.steer({
+                  role: 'user',
+                  content: '[SYSTEM WARNING] You have called the same tool with identical arguments 3 times in a row. This appears to be a loop. Try a completely different approach or ask the user for clarification.',
+                  timestamp: Date.now(),
+                } as any);
+              }
+            }
+            // Step limit enforcement
+            if (stepCount >= stepLimit) {
+              agent.abort();
+            }
+            console.log(`[ask] Step ${stepCount}/${stepLimit}`);
+          }
+        });
+        agent.prompt(lastUserContent).then(() => {
+          metrics.recordRequest(Date.now() - requestStartTime);
+          send({ type: 'done' });
+          controller.close();
+        }).catch((err) => {
+          metrics.recordRequest(Date.now() - requestStartTime);
+          metrics.recordError();
+          send({ type: 'error', message: err instanceof Error ? err.message : String(err) });
+          controller.close();
+        });
       },
     });
-    return result.toUIMessageStreamResponse();
+    return new Response(stream, {
+      headers: {
+        'Content-Type': 'text/event-stream',
+        'Cache-Control': 'no-cache, no-transform',
+        'Connection': 'keep-alive',
+        'X-Accel-Buffering': 'no',
+      },
+    });
   } catch (err) {
     console.error('[ask] Failed to initialize model:', err);
-    return NextResponse.json(
-      { error: err instanceof Error ? err.message : 'Failed to initialize AI model' },
-      { status: 500 },
-    );
+    if (err instanceof MindOSError) {
+      return apiError(err.code, err.message);
+    }
+    return apiError(ErrorCodes.MODEL_INIT_FAILED, err instanceof Error ? err.message : 'Failed to initialize AI model', 500);
   }
 }