npm - jinzd-ai-cli - Versions diffs - 0.4.185 → 0.4.187 - Mend

jinzd-ai-cli 0.4.185 → 0.4.187

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/dist/index.js CHANGED Viewed

@@ -15,7 +15,7 @@ import {
   saveDevState,
   sessionHasMeaningfulContent,
   setupProxy
-} from "./chunk-U7P5A3MJ.js";
+} from "./chunk-ZTBUTA24.js";
 import {
   ToolExecutor,
   ToolRegistry,
@@ -34,10 +34,10 @@ import {
   spawnAgentContext,
   theme,
   undoStack
-} from "./chunk-GOS4DWW5.js";
+} from "./chunk-Q6BSUIDV.js";
 import "./chunk-HDSKW7Q3.js";
 import "./chunk-ZWVIDFGY.js";
-import "./chunk-GSRXKHZ7.js";
+import "./chunk-4SZ6X47A.js";
 import {
   SessionManager,
   getContentText
@@ -54,47 +54,35 @@ import {
   getConfigDirUsage,
   listRecentCrashes,
   writeCrashLog
-} from "./chunk-ZO4LKUDM.js";
+} from "./chunk-43T4MY5B.js";
 import {
-  BudgetWarner,
   CONTENT_ONLY_STREAM_REMINDER,
-  ContextPressureMonitor,
-  EmptyResponseGuard,
-  FreeRoundTracker,
-  HALLUCINATION_CORRECTION_MESSAGE,
   ProviderRegistry,
   TEE_FINAL_USER_NUDGE,
   TOOL_CALL_REMINDER,
   ThinkTagFilter,
   accumulateUsage,
-  buildPhantomCorrectionMessage,
   buildRoundBudgetHint,
-  buildRoundsExhaustedPrompt,
-  buildUserStopMessage,
   buildWriteRoundReminder,
   consumeToolCallStream,
   detectMetaNarration,
   detectPseudoToolCalls,
-  detectsHallucinatedFileOp,
-  extractBashCommands,
   extractWrittenFilePaths,
-  findPhantomClaims,
-  hadPreviousWriteToolCalls,
   looksLikeDocumentBody,
+  runAgentLoop,
   stripPseudoToolCalls,
-  stripToolCallReminder,
-  summarizeRecentTools
-} from "./chunk-5LK7H45B.js";
+  stripToolCallReminder
+} from "./chunk-IQ7JE43O.js";
 import {
   getStatsSnapshot,
   getTopFailingTools,
   getTopUsedTools,
   installFlushOnExit
-} from "./chunk-GH32XE5K.js";
+} from "./chunk-MVK25WZW.js";
 import "./chunk-HIU2SH4V.js";
 import {
   ConfigManager
-} from "./chunk-FHZ2LKM5.js";
+} from "./chunk-UAJKGLRV.js";
 import {
   AuthError,
   ProviderError,
@@ -121,7 +109,7 @@ import {
   SKILLS_DIR_NAME,
   VERSION,
   buildUserIdentityPrompt
-} from "./chunk-PMZCQAJL.js";
+} from "./chunk-SOWBY545.js";
 import {
   formatGitContextForPrompt,
   getGitContext,
@@ -1836,7 +1824,7 @@ No tools match "${filter}".
           const { join: join5 } = await import("path");
           const { existsSync: existsSync5 } = await import("fs");
           const { getGitRoot: getGitRoot2 } = await import("./git-context-7KIP4X2V.js");
-          const { MCP_PROJECT_CONFIG_NAME: MCP_PROJECT_CONFIG_NAME2 } = await import("./constants-RJDN7GOH.js");
+          const { MCP_PROJECT_CONFIG_NAME: MCP_PROJECT_CONFIG_NAME2 } = await import("./constants-XEL5347E.js");
           const { approveProject, hashMcpFile } = await import("./project-trust-IFM7FXEV.js");
           const cwd = process.cwd();
           const projectRoot = getGitRoot2(cwd) ?? cwd;
@@ -2897,7 +2885,7 @@ ${hint}` : "")
       usage: "/test [command|filter]",
       async execute(args, ctx) {
         try {
-          const { executeTests } = await import("./run-tests-37RHYYD4.js");
+          const { executeTests } = await import("./run-tests-Z7IGVS2W.js");
           const argStr = args.join(" ").trim();
           let testArgs = {};
           if (argStr) {
@@ -6391,7 +6379,6 @@ Session '${this.resumeSessionId}' not found.
       toolDefs = toolDefs.filter((t) => !this.blockedTools.has(t.name));
     }
     const apiMessages = [...messages];
-    const extraMessages = [];
     const maxToolRounds = this.maxToolRoundsOverride ?? this.config.get("maxToolRounds") ?? DEFAULT_MAX_TOOL_ROUNDS;
     const autoPauseIntervalRaw = this.config.get("autoPauseInterval");
     const autoPauseInterval = typeof autoPauseIntervalRaw === "number" ? autoPauseIntervalRaw : DEFAULT_AUTO_PAUSE_INTERVAL;
@@ -6404,55 +6391,175 @@ ${mcpBudgetNote}` : "");
     const modelParams = this.getModelParams(effectiveModel);
     const useStreaming = this.config.get("ui").streaming;
     const spinner = this.renderer.showSpinner("Thinking...");
-    const roundUsage = { inputTokens: 0, outputTokens: 0, cacheCreationTokens: 0, cacheReadTokens: 0 };
+    const usage = { inputTokens: 0, outputTokens: 0, cacheCreationTokens: 0, cacheReadTokens: 0 };
     const supportsStreamingTools = useStreaming && typeof provider.chatWithToolsStream === "function";
     let lastToolCallSignature = "";
     let repeatedToolCallCount = 0;
-    const roundToolHistory = [];
-    const budgetWarner = new BudgetWarner(maxToolRounds);
-    const emptyGuard = new EmptyResponseGuard();
-    const ctxMonitor = new ContextPressureMonitor();
-    const freeRounds = new FreeRoundTracker();
     this.setupInterjectionListener();
     try {
-      for (let round = 0; round < maxToolRounds; round++) {
-        this.toolExecutor.setRoundInfo(round + 1, maxToolRounds);
-        if (this.toolExecutor.pendingSlashCommand) {
-          const cmd = this.toolExecutor.pendingSlashCommand;
-          this.toolExecutor.pendingSlashCommand = null;
-          if (cmd === "/exit" || cmd === "/quit" || cmd === "/q") {
-            spinner.stop();
-            process.stdout.write(theme.warning(`\u26A1 ${cmd} \u2014 stopping agentic loop
-`));
-            this.teardownInterjectionListener();
-            return;
+      const loopResult = await runAgentLoop({
+        maxToolRounds,
+        autoPauseInterval,
+        planMode: this.planMode,
+        providerId: this.currentProvider,
+        toolDefs,
+        usage,
+        // ── 模型调用：流式/非流式 + retry/fallback 包装（disabled-by-default，v0.4.144+）──
+        callModel: async (_round, extraMessages) => {
+          const chatRequest = {
+            messages: apiMessages,
+            model: effectiveModel,
+            systemPrompt,
+            systemPromptVolatile,
+            stream: false,
+            temperature: modelParams.temperature,
+            maxTokens: modelParams.maxTokens,
+            timeout: modelParams.timeout,
+            thinking: modelParams.thinking,
+            thinkingBudget: modelParams.thinkingBudget,
+            ...extraMessages.length > 0 ? { _extraMessages: extraMessages } : {}
+          };
+          const fallbackOpts = this.getFallbackOptions(spinner);
+          if (supportsStreamingTools) {
+            const streamAc = this.setupStreamInterrupt();
+            try {
+              const streamGen = withFallbackStream(
+                this.currentProvider,
+                effectiveModel,
+                this.providers,
+                fallbackOpts,
+                (p, m) => {
+                  const tc = p;
+                  if (typeof tc.chatWithToolsStream !== "function") {
+                    throw new Error(`provider ${p.info.id} does not support streaming tool calls`);
+                  }
+                  return tc.chatWithToolsStream({ ...chatRequest, model: m, signal: streamAc.signal }, toolDefs);
+                }
+              );
+              const streamResult = await this.consumeToolStream(streamGen, spinner);
+              if (streamResult.toolCalls.length > 0) {
+                return {
+                  toolCalls: streamResult.toolCalls,
+                  usage: streamResult.usage,
+                  reasoningContent: streamResult.reasoningContent
+                };
+              }
+              return {
+                content: streamResult.textContent,
+                usage: streamResult.usage,
+                reasoningContent: streamResult.reasoningContent,
+                finishReason: streamResult.finishReason,
+                alreadyRendered: true
+              };
+            } finally {
+              this.teardownStreamInterrupt();
+            }
           }
-          process.stdout.write(theme.warning(`\u26A1 Command "${cmd}" \u2014 injected as message
-`));
-          extraMessages.push({ role: "user", content: cmd });
-        }
-        const budgetWarning = budgetWarner.check(maxToolRounds - round);
-        if (budgetWarning) {
-          extraMessages.push({ role: "user", content: budgetWarning.injectMessage });
-          if (budgetWarning.displayMessage) {
-            const paint = budgetWarning.level === "critical" ? theme.error : theme.warning;
-            process.stdout.write(paint(`  ${budgetWarning.displayMessage}
-`));
+          const result = await withFallback(
+            this.currentProvider,
+            effectiveModel,
+            this.providers,
+            fallbackOpts,
+            (p, m) => p.chatWithTools({ ...chatRequest, model: m }, toolDefs)
+          );
+          return result;
+        },
+        // ── 轮次耗尽总结：空工具列表强制纯文本 ──
+        callSummary: async (summaryExtra) => {
+          spinner.stop();
+          spinner.start("Generating summary...");
+          try {
+            const summaryResult = await provider.chatWithTools(
+              {
+                messages: apiMessages,
+                model: effectiveModel,
+                systemPrompt,
+                systemPromptVolatile,
+                stream: false,
+                temperature: modelParams.temperature,
+                maxTokens: modelParams.maxTokens,
+                timeout: modelParams.timeout,
+                thinking: modelParams.thinking,
+                thinkingBudget: modelParams.thinkingBudget,
+                _extraMessages: summaryExtra
+              },
+              []
+              // 不提供任何工具，强制 AI 返回纯文本
+            );
+            spinner.stop();
+            return "content" in summaryResult ? { content: summaryResult.content, usage: summaryResult.usage } : { usage: summaryResult.usage };
+          } catch (err) {
+            spinner.stop();
+            throw err;
           }
-        }
-        if (this._userInterjection) {
+        },
+        executeTools: async (toolCalls, extraMessages) => {
+          spinner.stop();
+          askUserContext.rl = this.rl;
+          googleSearchContext.configManager = this.config;
+          streamToFileContext.provider = provider;
+          streamToFileContext.model = effectiveModel;
+          streamToFileContext.systemPrompt = systemPromptVolatile ? `${systemPrompt}
+---
+${systemPromptVolatile}` : systemPrompt;
+          streamToFileContext.messages = apiMessages;
+          streamToFileContext.extraMessages = extraMessages;
+          streamToFileContext.temperature = modelParams.temperature;
+          streamToFileContext.timeout = modelParams.timeout;
+          spawnAgentContext.provider = provider;
+          spawnAgentContext.model = effectiveModel;
+          spawnAgentContext.systemPrompt = systemPromptVolatile ? `${systemPrompt}
+---
+${systemPromptVolatile}` : systemPrompt;
+          spawnAgentContext.modelParams = modelParams;
+          spawnAgentContext.configManager = this.config;
+          ToolExecutor.currentMessageIndex = session.messages.length;
+          return this.toolExecutor.executeAll(toolCalls);
+        },
+        buildToolResultMessages: (toolCalls, results, reasoningContent) => provider.buildToolResultMessages(toolCalls, results, reasoningContent),
+        getContextWindow: () => this.getContextWindowSize(),
+        estimateRequestTokens: (extraMessages) => this.estimateRequestTokens(systemPrompt, extraMessages),
+        isInterrupted: () => isInterrupted(),
+        pollInterjection: () => {
+          if (!this._userInterjection) return null;
           const msg = this._userInterjection;
           this._userInterjection = null;
           process.stdout.write(theme.warning(`\u26A1 Interjection: "${msg}"
 `));
-          extraMessages.push({ role: "user", content: msg });
-        }
-        const ctxWindow = this.getContextWindowSize();
-        if (ctxWindow > 0) {
-          const reqTokens = this.estimateRequestTokens(systemPrompt, extraMessages);
-          const pressure = ctxMonitor.check(reqTokens, ctxWindow);
+          return msg;
+        },
+        onRoundStart: (round) => {
+          this.toolExecutor.setRoundInfo(round + 1, maxToolRounds);
+        },
+        beforeRound: (_round, extraMessages) => {
+          if (this.toolExecutor.pendingSlashCommand) {
+            const cmd = this.toolExecutor.pendingSlashCommand;
+            this.toolExecutor.pendingSlashCommand = null;
+            if (cmd === "/exit" || cmd === "/quit" || cmd === "/q") {
+              spinner.stop();
+              process.stdout.write(theme.warning(`\u26A1 ${cmd} \u2014 stopping agentic loop
+`));
+              return "stop";
+            }
+            process.stdout.write(theme.warning(`\u26A1 Command "${cmd}" \u2014 injected as message
+`));
+            extraMessages.push({ role: "user", content: cmd });
+          }
+        },
+        onBudgetWarning: (warning) => {
+          if (warning.displayMessage) {
+            const paint = warning.level === "critical" ? theme.error : theme.warning;
+            process.stdout.write(paint(`  ${warning.displayMessage}
+`));
+          }
+        },
+        onContextPressure: (pressure, ctxWindow, round) => {
+          spinner.stop();
           if (pressure.action === "abort") {
-            spinner.stop();
             process.stderr.write(
               theme.error(
                 `
@@ -6468,16 +6575,7 @@ ${mcpBudgetNote}` : "");
 `
               )
             );
-            if (roundUsage.inputTokens > 0 || roundUsage.outputTokens > 0) {
-              this.addSessionUsage(roundUsage, effectiveModel);
-              session.addTokenUsage(roundUsage);
-              if (this.shouldShowTokens()) {
-                this.renderer.renderUsage(roundUsage, this.sessionTokenUsage);
-              }
-            }
-            return;
-          } else if (pressure.action === "warn") {
-            spinner.stop();
+          } else {
             process.stdout.write(
               theme.warning(
                 `
@@ -6485,492 +6583,142 @@ ${mcpBudgetNote}` : "");
 `
               )
             );
-            extraMessages.push({ role: "user", content: pressure.injectMessage });
             spinner.start(`Thinking... (round ${round + 1}/${maxToolRounds})`);
           }
-        }
-        let result;
-        let alreadyRendered = false;
-        const chatRequest = {
-          messages: apiMessages,
-          model: effectiveModel,
-          systemPrompt,
-          systemPromptVolatile,
-          stream: false,
-          temperature: modelParams.temperature,
-          maxTokens: modelParams.maxTokens,
-          timeout: modelParams.timeout,
-          thinking: modelParams.thinking,
-          thinkingBudget: modelParams.thinkingBudget,
-          ...extraMessages.length > 0 ? { _extraMessages: extraMessages } : {}
-        };
-        if (supportsStreamingTools) {
-          const streamAc = this.setupStreamInterrupt();
-          try {
-            const fallbackOpts = this.getFallbackOptions(spinner);
-            const streamGen = withFallbackStream(
-              this.currentProvider,
-              effectiveModel,
-              this.providers,
-              fallbackOpts,
-              (p, m) => {
-                const tc = p;
-                if (typeof tc.chatWithToolsStream !== "function") {
-                  throw new Error(`provider ${p.info.id} does not support streaming tool calls`);
-                }
-                return tc.chatWithToolsStream({ ...chatRequest, model: m, signal: streamAc.signal }, toolDefs);
-              }
-            );
-            const streamResult = await this.consumeToolStream(streamGen, spinner);
-            if (streamResult.toolCalls.length > 0) {
-              const toolCalls = streamResult.toolCalls;
-              if (streamResult.rawContent) {
-                toolCalls._rawContent = streamResult.rawContent;
-              }
-              if (streamResult.textContent) {
-                toolCalls._streamedText = streamResult.textContent;
-              }
-              result = {
-                toolCalls,
-                usage: streamResult.usage,
-                ...streamResult.reasoningContent ? { reasoningContent: streamResult.reasoningContent } : {}
-              };
-            } else {
-              result = {
-                content: streamResult.textContent,
-                usage: streamResult.usage,
-                ...streamResult.reasoningContent ? { reasoningContent: streamResult.reasoningContent } : {},
-                ...streamResult.finishReason ? { finishReason: streamResult.finishReason } : {}
-              };
-              alreadyRendered = true;
-            }
-          } finally {
-            this.teardownStreamInterrupt();
-          }
-        } else {
-          const fallbackOpts = this.getFallbackOptions(spinner);
-          result = await withFallback(
-            this.currentProvider,
-            effectiveModel,
-            this.providers,
-            fallbackOpts,
-            (p, m) => p.chatWithTools({ ...chatRequest, model: m }, toolDefs)
-          );
-        }
-        accumulateUsage(roundUsage, result.usage);
-        if ("content" in result) {
-          const hasWriteTools = toolDefs.some((t) => t.name === "write_file" || t.name === "edit_file");
-          const alreadyWrote = hadPreviousWriteToolCalls(extraMessages);
-          const coarseHallucination = !this.planMode && hasWriteTools && !alreadyWrote && !!result.content && detectsHallucinatedFileOp(result.content);
-          const phantomPaths = (coarseHallucination || alreadyWrote) && !this.planMode && hasWriteTools && result.content ? findPhantomClaims(result.content, extraMessages) : [];
-          const bashRanThisTurn = extractBashCommands(extraMessages).length > 0;
-          const coarseShouldFire = coarseHallucination && !bashRanThisTurn;
-          if ((phantomPaths.length > 0 || coarseShouldFire) && round < maxToolRounds - 1) {
-            const providerName = this.currentProvider;
-            const detail = phantomPaths.length > 0 ? ` phantom files: ${phantomPaths.join(", ")}` : "";
-            process.stderr.write(
-              `[${providerName}] \u26A0 Hallucinated completion detected (AI claimed file was written but no tool was called), forcing retry...${detail}
+        },
+        onHallucinationRetry: ({ phantomPaths, round, alreadyRendered }) => {
+          const detail = phantomPaths.length > 0 ? ` phantom files: ${phantomPaths.join(", ")}` : "";
+          process.stderr.write(
+            `[${this.currentProvider}] \u26A0 Hallucinated completion detected (AI claimed file was written but no tool was called), forcing retry...${detail}
 `
-            );
-            if (alreadyRendered) {
-              process.stdout.write("\n");
-            }
-            const correctionMsg = phantomPaths.length > 0 ? buildPhantomCorrectionMessage(phantomPaths) : HALLUCINATION_CORRECTION_MESSAGE;
-            const reasoningField = "reasoningContent" in result && result.reasoningContent ? { reasoning_content: result.reasoningContent } : this.currentProvider === "deepseek" ? { reasoning_content: "" } : {};
-            extraMessages.push(
-              { role: "assistant", content: result.content, ...reasoningField },
-              { role: "user", content: correctionMsg }
-            );
-            spinner.start(`Retrying... (round ${round + 2}/${maxToolRounds})`);
-            continue;
+          );
+          if (alreadyRendered) {
+            process.stdout.write("\n");
           }
-          if (!result.content || result.content.trim() === "") {
-            const fr = "finishReason" in result ? result.finishReason : void 0;
-            const decision = emptyGuard.onEmpty(round < maxToolRounds - 1, fr);
-            if (decision.action === "nudge") {
-              spinner.stop();
-              if (alreadyRendered) process.stdout.write("\n");
-              process.stderr.write(theme.warning(`${decision.displayMessage}
+          spinner.start(`Retrying... (round ${round + 2}/${maxToolRounds})`);
+        },
+        onEmptyResponse: (decision, { alreadyRendered, round }) => {
+          spinner.stop();
+          if (alreadyRendered) process.stdout.write("\n");
+          if (decision.action === "nudge") {
+            process.stderr.write(theme.warning(`${decision.displayMessage}
 `));
-              extraMessages.push({ role: "user", content: decision.injectMessage });
-              spinner.start(`Retrying... (round ${round + 2}/${maxToolRounds})`);
-              continue;
-            }
-            spinner.stop();
-            if (alreadyRendered) process.stdout.write("\n");
-            process.stderr.write(
-              theme.error(`
+            spinner.start(`Retrying... (round ${round + 2}/${maxToolRounds})`);
+          } else {
+            process.stderr.write(theme.error(`
 ${decision.displayMessage}
-`)
-            );
+`));
             process.stderr.write(
               theme.dim(`  ${decision.hint}
   Try: /compact, /clear, or /model to switch.
 `)
             );
-            if (roundUsage.inputTokens > 0 || roundUsage.outputTokens > 0) {
-              this.addSessionUsage(roundUsage, effectiveModel);
-              session.addTokenUsage(roundUsage);
-              if (this.shouldShowTokens()) {
-                this.renderer.renderUsage(roundUsage, this.sessionTokenUsage);
-              }
-            }
-            return;
           }
-          emptyGuard.onNonEmpty();
+        },
+        onFinalContent: async (content, { reasoningContent, alreadyRendered }) => {
           spinner.stop();
-          const finalContent = result.content;
           if (!alreadyRendered) {
             if (useStreaming) {
               const streamAc = this.setupStreamInterrupt();
               try {
-                await this.renderer.renderContentAsStream(finalContent, { signal: streamAc.signal });
+                await this.renderer.renderContentAsStream(content, { signal: streamAc.signal });
               } finally {
                 this.teardownStreamInterrupt();
               }
             } else {
-              this.renderer.renderResponse(finalContent);
-            }
-          } else {
-            if (finalContent.trim()) {
-              process.stdout.write("\n\n");
+              this.renderer.renderResponse(content);
             }
+          } else if (content.trim()) {
+            process.stdout.write("\n\n");
           }
-          lastResponseStore.content = finalContent;
-          const finalReasoning = "reasoningContent" in result ? result.reasoningContent : void 0;
+          lastResponseStore.content = content;
           session.addMessage({
             role: "assistant",
-            content: finalContent,
+            content,
             timestamp: /* @__PURE__ */ new Date(),
-            ...finalReasoning ? { reasoningContent: finalReasoning } : {}
+            ...reasoningContent ? { reasoningContent } : {}
           });
-          this.events.emit("message.after", { content: finalContent });
-          if (roundUsage.inputTokens > 0 || roundUsage.outputTokens > 0) {
-            this.addSessionUsage(roundUsage, effectiveModel);
-            session.addTokenUsage(roundUsage);
-            if (this.shouldShowTokens()) {
-              this.renderer.renderUsage(roundUsage, this.sessionTokenUsage);
-            }
-          }
-          return;
-        }
-        spinner.stop();
-        const saveLastResponseCall = result.toolCalls.find((tc) => tc.name === "save_last_response");
-        if (saveLastResponseCall) {
-          const saveToFile = String(saveLastResponseCall.arguments["path"] ?? "");
-          if (!saveToFile) {
-          } else {
-            const teeAc = this.setupStreamInterrupt();
-            try {
-              const teeSystemPrompt = stripToolCallReminder(systemPrompt ?? "") + CONTENT_ONLY_STREAM_REMINDER;
-              const teeExtraMessages = extraMessages.length > 0 ? [...extraMessages, { role: "user", content: TEE_FINAL_USER_NUDGE }] : [{ role: "user", content: TEE_FINAL_USER_NUDGE }];
-              const genStream = provider.chatStream({
-                messages: apiMessages,
-                model: effectiveModel,
-                systemPrompt: teeSystemPrompt,
-                systemPromptVolatile,
-                stream: true,
-                temperature: modelParams.temperature,
-                maxTokens: modelParams.maxTokens,
-                timeout: modelParams.timeout,
-                thinking: modelParams.thinking,
-                thinkingBudget: modelParams.thinkingBudget,
-                signal: teeAc.signal,
-                _extraMessages: teeExtraMessages
-              });
-              const teeShowTokens = this.shouldShowTokens();
-              let genContent;
-              let genUsage;
-              let teeTokShown = false;
-              try {
-                const teeResult = await this.renderer.renderStream(
-                  genStream,
-                  { saveToFile, showTokens: teeShowTokens, sessionTotal: teeShowTokens ? { ...this.sessionTokenUsage } : void 0, signal: teeAc.signal }
-                );
-                genContent = teeResult.content;
-                genUsage = teeResult.usage;
-                teeTokShown = teeResult.tokensShown;
-              } catch (teeErr) {
-                try {
-                  unlinkSync2(saveToFile);
-                } catch {
-                }
-                const errMsg = teeErr instanceof Error ? teeErr.message : String(teeErr);
-                process.stdout.write(theme.error(
-                  `
-  \u2717 tee stream failed: ${errMsg}
-  ${saveToFile} (partial) was deleted. Asking model to retry.
-`
-                ));
-                const errorResults = result.toolCalls.map((tc) => ({
-                  callId: tc.id,
-                  content: tc.name === "save_last_response" ? `[save_last_response failed] streaming was interrupted: ${errMsg}. ${saveToFile} was NOT saved. Retry \u2014 and consider producing a more compact output (split very large reports across multiple save_last_response calls if the previous attempt timed out).` : `[skipped: save_last_response failed]`,
-                  isError: tc.name === "save_last_response"
-                }));
-                const reasoningContent3 = "reasoningContent" in result ? result.reasoningContent : void 0;
-                const newMsgs3 = provider.buildToolResultMessages(result.toolCalls, errorResults, reasoningContent3);
-                extraMessages.push(...newMsgs3);
-                continue;
-              }
-              const metaMatch = detectMetaNarration(genContent);
-              if (metaMatch) {
-                try {
-                  unlinkSync2(saveToFile);
-                } catch {
-                }
-                process.stdout.write(theme.error(
-                  `
-  \u2717 Rejected save: response was meta-narration / leaked reasoning, not document body (matched: ${metaMatch})
-  ${saveToFile} was deleted; asking model to retry.
-`
-                ));
-                const errorResults = result.toolCalls.map((tc) => ({
-                  callId: tc.id,
-                  content: tc.name === "save_last_response" ? `[save_last_response REJECTED] Your output was internal reasoning / meta-narration about the task (e.g. "Let me re-read\u2026", "the user is asking me to\u2026") instead of the requested document body. ${saveToFile} was NOT saved.
-This fresh stream has NO tools. Produce ONLY the document body: start with a markdown heading like "# \u5BA1\u8BA1\u62A5\u544A" / "# Audit Report" and write the full content. Do NOT narrate that you will produce the document \u2014 produce it.` : `[skipped: save_last_response was rejected and other parallel calls are abandoned]`,
-                  isError: tc.name === "save_last_response"
-                }));
-                const reasoningContent3 = "reasoningContent" in result ? result.reasoningContent : void 0;
-                const newMsgs3 = provider.buildToolResultMessages(result.toolCalls, errorResults, reasoningContent3);
-                extraMessages.push(...newMsgs3);
-                if (genUsage) {
-                  roundUsage.inputTokens += genUsage.inputTokens;
-                  roundUsage.outputTokens += genUsage.outputTokens;
-                  roundUsage.cacheCreationTokens += genUsage.cacheCreationTokens ?? 0;
-                  roundUsage.cacheReadTokens += genUsage.cacheReadTokens ?? 0;
-                }
-                continue;
-              }
-              const pseudoMatch = detectPseudoToolCalls(genContent);
-              if (pseudoMatch) {
-                const cleaned = stripPseudoToolCalls(genContent);
-                if (looksLikeDocumentBody(cleaned)) {
-                  try {
-                    writeFileSync2(saveToFile, cleaned, "utf-8");
-                    process.stdout.write(theme.warning(
-                      `
-  \u26A0 Salvaged save: stripped pseudo-tool-call markup (matched: ${pseudoMatch})
-  ${saveToFile} now contains the cleaned document (${cleaned.length} chars; was ${genContent.length}).
-`
-                    ));
-                    lastResponseStore.content = cleaned;
-                    if (genUsage) {
-                      roundUsage.inputTokens += genUsage.inputTokens;
-                      roundUsage.outputTokens += genUsage.outputTokens;
-                      roundUsage.cacheCreationTokens += genUsage.cacheCreationTokens ?? 0;
-                      roundUsage.cacheReadTokens += genUsage.cacheReadTokens ?? 0;
-                    }
-                    session.addMessage({ role: "assistant", content: cleaned, timestamp: /* @__PURE__ */ new Date() });
-                    this.events.emit("message.after", { content: cleaned });
-                    const lines2 = cleaned.split("\n").length;
-                    const bytes2 = Buffer.byteLength(cleaned, "utf-8");
-                    const okResults = result.toolCalls.map((tc) => ({
-                      callId: tc.id,
-                      content: tc.name === "save_last_response" ? `File saved (with cleanup): ${saveToFile} (${lines2} lines, ${bytes2} bytes; pseudo-tool-call markup was stripped before save)` : `[skipped: file already saved by tee streaming]`,
-                      isError: false
-                    }));
-                    const reasoningContent4 = "reasoningContent" in result ? result.reasoningContent : void 0;
-                    const newMsgs4 = provider.buildToolResultMessages(result.toolCalls, okResults, reasoningContent4);
-                    extraMessages.push(...newMsgs4);
-                    if (roundUsage.inputTokens > 0 || roundUsage.outputTokens > 0) {
-                      this.addSessionUsage(roundUsage, effectiveModel);
-                      session.addTokenUsage(roundUsage);
-                      if (teeShowTokens && !teeTokShown) {
-                        this.renderer.renderUsage(roundUsage, this.sessionTokenUsage);
-                      }
-                    }
-                    return;
-                  } catch (writeErr) {
-                    process.stderr.write(`[tee] salvage write failed: ${writeErr.message ?? writeErr}
-`);
-                  }
-                }
-                try {
-                  unlinkSync2(saveToFile);
-                } catch {
-                }
-                process.stdout.write(theme.error(
-                  `
-  \u2717 Rejected save: response was pseudo-tool-call markup with no usable document body (matched: ${pseudoMatch})
-  ${saveToFile} was deleted; asking model to retry.
-`
-                ));
-                const errorResults = result.toolCalls.map((tc) => ({
-                  callId: tc.id,
-                  content: tc.name === "save_last_response" ? `[save_last_response REJECTED] Your output was tool-call XML/JSON with no document body. ${saveToFile} was NOT saved.
-This fresh stream has NO tools \u2014 output is captured verbatim. STOP emitting <tool_call>, <function_calls>, <invoke>, <think>, or JSON tool blocks. Produce the document body NOW: start with a markdown heading like "# \u5BA1\u8BA1\u62A5\u544A" and write the full report.` : `[skipped: save_last_response was rejected and other parallel calls are abandoned]`,
-                  isError: tc.name === "save_last_response"
-                }));
-                const reasoningContent3 = "reasoningContent" in result ? result.reasoningContent : void 0;
-                const newMsgs3 = provider.buildToolResultMessages(result.toolCalls, errorResults, reasoningContent3);
-                extraMessages.push(...newMsgs3);
-                if (genUsage) {
-                  roundUsage.inputTokens += genUsage.inputTokens;
-                  roundUsage.outputTokens += genUsage.outputTokens;
-                  roundUsage.cacheCreationTokens += genUsage.cacheCreationTokens ?? 0;
-                  roundUsage.cacheReadTokens += genUsage.cacheReadTokens ?? 0;
-                }
-                continue;
-              }
-              lastResponseStore.content = genContent;
-              if (genUsage) {
-                roundUsage.inputTokens += genUsage.inputTokens;
-                roundUsage.outputTokens += genUsage.outputTokens;
-                roundUsage.cacheCreationTokens += genUsage.cacheCreationTokens ?? 0;
-                roundUsage.cacheReadTokens += genUsage.cacheReadTokens ?? 0;
-              }
-              session.addMessage({ role: "assistant", content: genContent, timestamp: /* @__PURE__ */ new Date() });
-              this.events.emit("message.after", { content: genContent });
-              const lines = genContent.split("\n").length;
-              const bytes = Buffer.byteLength(genContent, "utf-8");
-              const syntheticResults = result.toolCalls.map((tc) => ({
-                callId: tc.id,
-                content: tc.name === "save_last_response" ? `File saved: ${saveToFile} (${lines} lines, ${bytes} bytes)` : `[skipped: file already saved by tee streaming]`,
-                isError: false
-              }));
-              const reasoningContent2 = "reasoningContent" in result ? result.reasoningContent : void 0;
-              const newMsgs2 = provider.buildToolResultMessages(result.toolCalls, syntheticResults, reasoningContent2);
-              extraMessages.push(...newMsgs2);
-              if (roundUsage.inputTokens > 0 || roundUsage.outputTokens > 0) {
-                this.addSessionUsage(roundUsage, effectiveModel);
-                session.addTokenUsage(roundUsage);
-                if (teeShowTokens && !teeTokShown) {
-                  this.renderer.renderUsage(roundUsage, this.sessionTokenUsage);
-                }
+          this.events.emit("message.after", { content });
+        },
+        onInterrupted: () => {
+          spinner.stop();
+          process.stdout.write(theme.warning("\n\u26A1 Interrupted by user (Ctrl+C) \u2014 agentic loop stopped.\n"));
+        },
+        persistRound: (toolCalls, results, info) => {
+          persistToolRound(session, toolCalls, results, info);
+        },
+        onMcpToolUsed: (name) => usedMcpToolNames.add(name),
+        onToolsExecuted: (toolCalls, _results, extraMessages) => {
+          const readFileCalls = toolCalls.filter((tc) => tc.name === "read_file");
+          for (const rfc of readFileCalls) {
+            const filePath = rfc.arguments?.path;
+            if (filePath) {
+              const fileReadCount = extraMessages.filter((msg) => {
+                const m = msg;
+                if (m.role !== "assistant") return false;
+                const tcs = m.tool_calls;
+                if (!Array.isArray(tcs)) return false;
+                return tcs.some((tc) => {
+                  const fn = tc.function;
+                  return fn?.name === "read_file" && JSON.stringify(fn?.arguments ?? "").includes(filePath);
+                });
+              }).length;
+              if (fileReadCount >= 2) {
+                extraMessages.push({
+                  role: "user",
+                  content: `\u26A0\uFE0F You have read the file "${filePath}" ${fileReadCount + 1} times already. The content hasn't changed \u2014 do NOT read it again. Use the information you already have.`
+                });
               }
-            } finally {
-              this.teardownStreamInterrupt();
             }
-            return;
           }
-        }
-        askUserContext.rl = this.rl;
-        googleSearchContext.configManager = this.config;
-        streamToFileContext.provider = provider;
-        streamToFileContext.model = effectiveModel;
-        streamToFileContext.systemPrompt = systemPromptVolatile ? `${systemPrompt}
----
-${systemPromptVolatile}` : systemPrompt;
-        streamToFileContext.messages = apiMessages;
-        streamToFileContext.extraMessages = extraMessages;
-        streamToFileContext.temperature = modelParams.temperature;
-        streamToFileContext.timeout = modelParams.timeout;
-        spawnAgentContext.provider = provider;
-        spawnAgentContext.model = effectiveModel;
-        spawnAgentContext.systemPrompt = systemPromptVolatile ? `${systemPrompt}
----
-${systemPromptVolatile}` : systemPrompt;
-        spawnAgentContext.modelParams = modelParams;
-        spawnAgentContext.configManager = this.config;
-        ToolExecutor.currentMessageIndex = session.messages.length;
-        const toolResults = await this.toolExecutor.executeAll(result.toolCalls);
-        if (isInterrupted()) {
-          spinner.stop();
-          process.stdout.write(theme.warning("\n\u26A1 Interrupted by user (Ctrl+C) \u2014 agentic loop stopped.\n"));
-          this.teardownInterjectionListener();
-          return;
-        }
-        const thisRoundTools = result.toolCalls.map((tc) => tc.name);
-        roundToolHistory.push({ round: round + 1, tools: thisRoundTools });
-        const readFileCalls = result.toolCalls.filter((tc) => tc.name === "read_file");
-        for (const rfc of readFileCalls) {
-          const filePath = rfc.arguments?.path;
-          if (filePath) {
-            const fileReadCount = extraMessages.filter((msg) => {
-              const m = msg;
-              if (m.role !== "assistant") return false;
-              const tcs = m.tool_calls;
-              if (!Array.isArray(tcs)) return false;
-              return tcs.some((tc) => {
-                const fn = tc.function;
-                return fn?.name === "read_file" && JSON.stringify(fn?.arguments ?? "").includes(filePath);
-              });
-            }).length;
-            if (fileReadCount >= 2) {
+          process.stdin.resume();
+        },
+        afterToolRoundPersist: (toolCalls, _results, extraMessages) => {
+          const thisRoundHadWrite = toolCalls.some(
+            (tc) => tc.name === "write_file" || tc.name === "edit_file"
+          );
+          if (thisRoundHadWrite) {
+            const totalWritten = extractWrittenFilePaths(extraMessages).length;
+            if (totalWritten > 0) {
               extraMessages.push({
                 role: "user",
-                content: `\u26A0\uFE0F You have read the file "${filePath}" ${fileReadCount + 1} times already. The content hasn't changed \u2014 do NOT read it again. Use the information you already have.`
+                content: buildWriteRoundReminder(totalWritten)
               });
             }
           }
-        }
-        process.stdin.resume();
-        const reasoningContent = "reasoningContent" in result ? result.reasoningContent : void 0;
-        const newMsgs = provider.buildToolResultMessages(result.toolCalls, toolResults, reasoningContent);
-        extraMessages.push(...newMsgs);
-        for (const tc of result.toolCalls) {
-          if (tc.name.startsWith("mcp__")) usedMcpToolNames.add(tc.name);
-        }
-        const streamedContent = "content" in result ? result.content : void 0;
-        persistToolRound(session, result.toolCalls, toolResults, {
-          assistantContent: streamedContent,
-          reasoningContent
-        });
-        const thisRoundHadWrite = result.toolCalls.some(
-          (tc) => tc.name === "write_file" || tc.name === "edit_file"
-        );
-        if (thisRoundHadWrite) {
-          const totalWritten = extractWrittenFilePaths(extraMessages).length;
-          if (totalWritten > 0) {
-            extraMessages.push({
-              role: "user",
-              content: buildWriteRoundReminder(totalWritten)
-            });
-          }
-        }
-        if (freeRounds.apply(result.toolCalls.map((tc) => tc.name))) {
-          round--;
-        }
-        const currentSignature = result.toolCalls.map((tc) => `${tc.name}:${JSON.stringify(tc.arguments)}`).join("|");
-        if (currentSignature === lastToolCallSignature) {
-          repeatedToolCallCount++;
-          if (repeatedToolCallCount >= MAX_REPEATED_TOOL_CALLS) {
-            spinner.stop();
-            process.stderr.write(
-              theme.warning(`
+        },
+        checkLoopHealth: (toolCalls, extraMessages) => {
+          const currentSignature = toolCalls.map((tc) => `${tc.name}:${JSON.stringify(tc.arguments)}`).join("|");
+          if (currentSignature === lastToolCallSignature) {
+            repeatedToolCallCount++;
+            if (repeatedToolCallCount >= MAX_REPEATED_TOOL_CALLS) {
+              spinner.stop();
+              process.stderr.write(
+                theme.warning(`
 \u26A0 Detected ${repeatedToolCallCount + 1} identical consecutive tool calls \u2014 breaking loop.
 `)
-            );
-            extraMessages.push({
-              role: "user",
-              content: "You are stuck in a loop calling the same tool with the same arguments repeatedly. Stop calling tools and give the user a final text response summarizing what you found and what needs to be done next. " + (this.planMode ? "If you need to execute commands (bash, psql, etc.), tell the user to type `/plan execute` first." : "")
-            });
+              );
+              extraMessages.push({
+                role: "user",
+                content: "You are stuck in a loop calling the same tool with the same arguments repeatedly. Stop calling tools and give the user a final text response summarizing what you found and what needs to be done next. " + (this.planMode ? "If you need to execute commands (bash, psql, etc.), tell the user to type `/plan execute` first." : "")
+              });
+              repeatedToolCallCount = 0;
+              lastToolCallSignature = "";
+              return "skip-checkpoint";
+            }
+          } else {
+            lastToolCallSignature = currentSignature;
             repeatedToolCallCount = 0;
-            lastToolCallSignature = "";
-            continue;
           }
-        } else {
-          lastToolCallSignature = currentSignature;
-          repeatedToolCallCount = 0;
-        }
-        if (this._userInterjection) {
-          const msg = this._userInterjection;
-          this._userInterjection = null;
-          process.stdout.write(theme.warning(`\u26A1 Interjection: "${msg}"
-`));
-          extraMessages.push({ role: "user", content: msg });
-        }
-        const effectiveRound = round + 1;
-        const remaining = maxToolRounds - effectiveRound;
-        if (autoPauseInterval > 0 && effectiveRound > 0 && effectiveRound % autoPauseInterval === 0 && remaining > 0) {
+        },
+        requestAutoPause: async ({ effectiveRound, maxToolRounds: totalRounds, remaining, toolSummary }) => {
           spinner.stop();
           process.stdout.write("\n");
-          process.stdout.write(theme.warning(`\u23F8  Auto-pause: ${effectiveRound}/${maxToolRounds} rounds used, ${remaining} remaining
+          process.stdout.write(theme.warning(`\u23F8  Auto-pause: ${effectiveRound}/${totalRounds} rounds used, ${remaining} remaining
 `));
-          const summary = summarizeRecentTools(roundToolHistory, autoPauseInterval);
-          if (summary) {
-            process.stdout.write(theme.dim(`  Tools used: ${summary}
+          if (toolSummary) {
+            process.stdout.write(theme.dim(`  Tools used: ${toolSummary}
 `));
           }
           process.stdout.write(theme.dim("  Press ") + theme.info("y") + theme.dim(" to continue, or ") + theme.info("type a message") + theme.dim(" to redirect AI:\n"));
@@ -6988,74 +6736,222 @@ ${systemPromptVolatile}` : systemPrompt;
           this.setupInterjectionListener();
           if (pauseResponse === "n" || pauseResponse === "N" || pauseResponse === "\x1B") {
             process.stdout.write(theme.warning("\u26A1 Stopped by user at auto-pause checkpoint\n"));
-            extraMessages.push({ role: "user", content: buildUserStopMessage(effectiveRound, maxToolRounds) });
-            break;
-          } else if (pauseResponse && pauseResponse !== "y" && pauseResponse !== "Y" && pauseResponse !== "") {
+            return { action: "stop" };
+          }
+          if (pauseResponse && pauseResponse !== "y" && pauseResponse !== "Y") {
             process.stdout.write(theme.warning(`\u26A1 Redirect: "${pauseResponse}"
 `));
-            extraMessages.push({ role: "user", content: pauseResponse });
+            process.stdout.write(theme.success(`\u25B6 Continuing... (${remaining} rounds left)
+`));
+            return { action: "redirect", message: pauseResponse };
           }
           process.stdout.write(theme.success(`\u25B6 Continuing... (${remaining} rounds left)
 `));
-        }
-        const nextRound = round + 2;
-        spinner.start(
-          nextRound <= maxToolRounds ? `Thinking... (round ${nextRound}/${maxToolRounds})` : "Thinking..."
-        );
-      }
-      spinner.stop();
-      try {
-        spinner.start("Generating summary...");
-        const summaryExtra = [
-          ...extraMessages,
-          { role: "user", content: buildRoundsExhaustedPrompt(maxToolRounds) }
-        ];
-        const summaryResult = await provider.chatWithTools(
-          {
-            messages: apiMessages,
-            model: effectiveModel,
-            systemPrompt,
-            systemPromptVolatile,
-            stream: false,
-            temperature: modelParams.temperature,
-            maxTokens: modelParams.maxTokens,
-            timeout: modelParams.timeout,
-            thinking: modelParams.thinking,
-            thinkingBudget: modelParams.thinkingBudget,
-            _extraMessages: summaryExtra
-          },
-          []
-          // 不提供任何工具，强制 AI 返回纯文本
-        );
-        spinner.stop();
-        if ("content" in summaryResult) {
-          this.renderer.renderError(`Reached maximum tool call rounds (${maxToolRounds}). Here is a summary:`);
-          this.renderer.renderResponse(summaryResult.content);
-          lastResponseStore.content = summaryResult.content;
-          session.addMessage({ role: "assistant", content: summaryResult.content, timestamp: /* @__PURE__ */ new Date() });
-          if (summaryResult.usage) {
-            roundUsage.inputTokens += summaryResult.usage.inputTokens;
-            roundUsage.outputTokens += summaryResult.usage.outputTokens;
-            roundUsage.cacheCreationTokens += summaryResult.usage.cacheCreationTokens ?? 0;
-            roundUsage.cacheReadTokens += summaryResult.usage.cacheReadTokens ?? 0;
-          }
-        } else {
-          this.renderer.renderError(
-            `Reached maximum tool call rounds (${maxToolRounds}). Stopping.
-Tip: You can continue the conversation by asking the AI to proceed.`
+          return { action: "continue" };
+        },
+        onRoundEnd: (round) => {
+          const nextRound = round + 2;
+          spinner.start(
+            nextRound <= maxToolRounds ? `Thinking... (round ${nextRound}/${maxToolRounds})` : "Thinking..."
           );
-        }
-      } catch {
-        this.renderer.renderError(
-          `Reached maximum tool call rounds (${maxToolRounds}). Stopping.
+        },
+        onRoundsExhausted: (summaryContent) => {
+          if (summaryContent !== null) {
+            this.renderer.renderError(`Reached maximum tool call rounds (${maxToolRounds}). Here is a summary:`);
+            this.renderer.renderResponse(summaryContent);
+            lastResponseStore.content = summaryContent;
+            session.addMessage({ role: "assistant", content: summaryContent, timestamp: /* @__PURE__ */ new Date() });
+          } else {
+            this.renderer.renderError(
+              `Reached maximum tool call rounds (${maxToolRounds}). Stopping.
 Tip: You can continue the conversation by asking the AI to proceed.`
-        );
-      }
-      if (roundUsage.inputTokens > 0 || roundUsage.outputTokens > 0) {
-        this.addSessionUsage(roundUsage, effectiveModel);
-        session.addTokenUsage(roundUsage);
+            );
+          }
+        },
+        // ─── save_last_response 特殊处理（tee 流式写盘）────────────────────────
+        // 架构设计：
+        //   AI 在工具调用轮次中调用 save_last_response(path)，意图是"生成内容 + 保存"。
+        //   旧方案：AI 先输出内容 → lastResponseStore 暂存 → 工具读 store 写盘
+        //     问题：Kimi API 会截断 tool_call arguments (~2KB)，内容无法通过参数传递；
+        //           第一轮调用时 store 里只有上一轮旧内容，根本没有本次生成的内容。
+        //   新方案（tee streaming）：
+        //     1. core 检测到 save_last_response 调用并从参数中提取目标文件路径
+        //     2. 此钩子发起流式请求，renderStream 同时写入终端 + 磁盘（tee 模式）
+        //     3. 注入合成的工具成功结果到 extraMessages（跳过实际工具执行）
+        //     4. 成功返回 'stop' 直接结束（usage 已在钩子内入账）；失败/拒绝返回
+        //        'continue' 让模型重试
+        runSaveLastResponseTee: async ({ toolCalls, saveToFile, extraMessages, reasoningContent }) => {
+          spinner.stop();
+          const teeAc = this.setupStreamInterrupt();
+          try {
+            const teeSystemPrompt = stripToolCallReminder(systemPrompt ?? "") + CONTENT_ONLY_STREAM_REMINDER;
+            const teeExtraMessages = extraMessages.length > 0 ? [...extraMessages, { role: "user", content: TEE_FINAL_USER_NUDGE }] : [{ role: "user", content: TEE_FINAL_USER_NUDGE }];
+            const genStream = provider.chatStream({
+              messages: apiMessages,
+              model: effectiveModel,
+              systemPrompt: teeSystemPrompt,
+              systemPromptVolatile,
+              stream: true,
+              temperature: modelParams.temperature,
+              maxTokens: modelParams.maxTokens,
+              timeout: modelParams.timeout,
+              thinking: modelParams.thinking,
+              thinkingBudget: modelParams.thinkingBudget,
+              signal: teeAc.signal,
+              _extraMessages: teeExtraMessages
+            });
+            const teeShowTokens = this.shouldShowTokens();
+            let genContent;
+            let genUsage;
+            let teeTokShown = false;
+            try {
+              const teeResult = await this.renderer.renderStream(
+                genStream,
+                { saveToFile, showTokens: teeShowTokens, sessionTotal: teeShowTokens ? { ...this.sessionTokenUsage } : void 0, signal: teeAc.signal }
+              );
+              genContent = teeResult.content;
+              genUsage = teeResult.usage;
+              teeTokShown = teeResult.tokensShown;
+            } catch (teeErr) {
+              try {
+                unlinkSync2(saveToFile);
+              } catch {
+              }
+              const errMsg = teeErr instanceof Error ? teeErr.message : String(teeErr);
+              process.stdout.write(theme.error(
+                `
+  \u2717 tee stream failed: ${errMsg}
+  ${saveToFile} (partial) was deleted. Asking model to retry.
+`
+              ));
+              const errorResults = toolCalls.map((tc) => ({
+                callId: tc.id,
+                content: tc.name === "save_last_response" ? `[save_last_response failed] streaming was interrupted: ${errMsg}. ${saveToFile} was NOT saved. Retry \u2014 and consider producing a more compact output (split very large reports across multiple save_last_response calls if the previous attempt timed out).` : `[skipped: save_last_response failed]`,
+                isError: tc.name === "save_last_response"
+              }));
+              const newMsgs2 = provider.buildToolResultMessages(toolCalls, errorResults, reasoningContent);
+              extraMessages.push(...newMsgs2);
+              return "continue";
+            }
+            const metaMatch = detectMetaNarration(genContent);
+            if (metaMatch) {
+              try {
+                unlinkSync2(saveToFile);
+              } catch {
+              }
+              process.stdout.write(theme.error(
+                `
+  \u2717 Rejected save: response was meta-narration / leaked reasoning, not document body (matched: ${metaMatch})
+  ${saveToFile} was deleted; asking model to retry.
+`
+              ));
+              const errorResults = toolCalls.map((tc) => ({
+                callId: tc.id,
+                content: tc.name === "save_last_response" ? `[save_last_response REJECTED] Your output was internal reasoning / meta-narration about the task (e.g. "Let me re-read\u2026", "the user is asking me to\u2026") instead of the requested document body. ${saveToFile} was NOT saved.
+This fresh stream has NO tools. Produce ONLY the document body: start with a markdown heading like "# \u5BA1\u8BA1\u62A5\u544A" / "# Audit Report" and write the full content. Do NOT narrate that you will produce the document \u2014 produce it.` : `[skipped: save_last_response was rejected and other parallel calls are abandoned]`,
+                isError: tc.name === "save_last_response"
+              }));
+              const newMsgs2 = provider.buildToolResultMessages(toolCalls, errorResults, reasoningContent);
+              extraMessages.push(...newMsgs2);
+              if (genUsage) accumulateUsage(usage, genUsage);
+              return "continue";
+            }
+            const pseudoMatch = detectPseudoToolCalls(genContent);
+            if (pseudoMatch) {
+              const cleaned = stripPseudoToolCalls(genContent);
+              if (looksLikeDocumentBody(cleaned)) {
+                try {
+                  writeFileSync2(saveToFile, cleaned, "utf-8");
+                  process.stdout.write(theme.warning(
+                    `
+  \u26A0 Salvaged save: stripped pseudo-tool-call markup (matched: ${pseudoMatch})
+  ${saveToFile} now contains the cleaned document (${cleaned.length} chars; was ${genContent.length}).
+`
+                  ));
+                  lastResponseStore.content = cleaned;
+                  if (genUsage) accumulateUsage(usage, genUsage);
+                  session.addMessage({ role: "assistant", content: cleaned, timestamp: /* @__PURE__ */ new Date() });
+                  this.events.emit("message.after", { content: cleaned });
+                  const lines2 = cleaned.split("\n").length;
+                  const bytes2 = Buffer.byteLength(cleaned, "utf-8");
+                  const okResults = toolCalls.map((tc) => ({
+                    callId: tc.id,
+                    content: tc.name === "save_last_response" ? `File saved (with cleanup): ${saveToFile} (${lines2} lines, ${bytes2} bytes; pseudo-tool-call markup was stripped before save)` : `[skipped: file already saved by tee streaming]`,
+                    isError: false
+                  }));
+                  const newMsgs3 = provider.buildToolResultMessages(toolCalls, okResults, reasoningContent);
+                  extraMessages.push(...newMsgs3);
+                  if (usage.inputTokens > 0 || usage.outputTokens > 0) {
+                    this.addSessionUsage(usage, effectiveModel);
+                    session.addTokenUsage(usage);
+                    if (teeShowTokens && !teeTokShown) {
+                      this.renderer.renderUsage(usage, this.sessionTokenUsage);
+                    }
+                  }
+                  return "stop";
+                } catch (writeErr) {
+                  process.stderr.write(`[tee] salvage write failed: ${writeErr.message ?? writeErr}
+`);
+                }
+              }
+              try {
+                unlinkSync2(saveToFile);
+              } catch {
+              }
+              process.stdout.write(theme.error(
+                `
+  \u2717 Rejected save: response was pseudo-tool-call markup with no usable document body (matched: ${pseudoMatch})
+  ${saveToFile} was deleted; asking model to retry.
+`
+              ));
+              const errorResults = toolCalls.map((tc) => ({
+                callId: tc.id,
+                content: tc.name === "save_last_response" ? `[save_last_response REJECTED] Your output was tool-call XML/JSON with no document body. ${saveToFile} was NOT saved.
+This fresh stream has NO tools \u2014 output is captured verbatim. STOP emitting <tool_call>, <function_calls>, <invoke>, <think>, or JSON tool blocks. Produce the document body NOW: start with a markdown heading like "# \u5BA1\u8BA1\u62A5\u544A" and write the full report.` : `[skipped: save_last_response was rejected and other parallel calls are abandoned]`,
+                isError: tc.name === "save_last_response"
+              }));
+              const newMsgs2 = provider.buildToolResultMessages(toolCalls, errorResults, reasoningContent);
+              extraMessages.push(...newMsgs2);
+              if (genUsage) accumulateUsage(usage, genUsage);
+              return "continue";
+            }
+            lastResponseStore.content = genContent;
+            if (genUsage) accumulateUsage(usage, genUsage);
+            session.addMessage({ role: "assistant", content: genContent, timestamp: /* @__PURE__ */ new Date() });
+            this.events.emit("message.after", { content: genContent });
+            const lines = genContent.split("\n").length;
+            const bytes = Buffer.byteLength(genContent, "utf-8");
+            const syntheticResults = toolCalls.map((tc) => ({
+              callId: tc.id,
+              content: tc.name === "save_last_response" ? `File saved: ${saveToFile} (${lines} lines, ${bytes} bytes)` : `[skipped: file already saved by tee streaming]`,
+              isError: false
+            }));
+            const newMsgs = provider.buildToolResultMessages(toolCalls, syntheticResults, reasoningContent);
+            extraMessages.push(...newMsgs);
+            if (usage.inputTokens > 0 || usage.outputTokens > 0) {
+              this.addSessionUsage(usage, effectiveModel);
+              session.addTokenUsage(usage);
+              if (teeShowTokens && !teeTokShown) {
+                this.renderer.renderUsage(usage, this.sessionTokenUsage);
+              }
+            }
+            return "stop";
+          } finally {
+            this.teardownStreamInterrupt();
+          }
+        }
+      });
+      if (loopResult.reason !== "tee-stop" && (usage.inputTokens > 0 || usage.outputTokens > 0)) {
+        this.addSessionUsage(usage, effectiveModel);
+        session.addTokenUsage(usage);
         if (this.shouldShowTokens()) {
-          this.renderer.renderUsage(roundUsage, this.sessionTokenUsage);
+          this.renderer.renderUsage(usage, this.sessionTokenUsage);
         }
       }
     } finally {
@@ -7360,7 +7256,7 @@ program.command("web").description("Start Web UI server with browser-based chat
     console.error("Error: Invalid port number. Must be between 1 and 65535.");
     process.exit(1);
   }
-  const { startWebServer } = await import("./server-UT6PLLZC.js");
+  const { startWebServer } = await import("./server-2B5JDVJS.js");
   await startWebServer({ port, host: options.host });
 });
 program.command("user [action] [username]").description("Manage Web UI users (list | create <name> | delete <name> | reset-password <name> | logout-all <name> | migrate <name>)").action(async (action, username) => {
@@ -7527,16 +7423,16 @@ program.command("sessions").description("List recent conversation sessions").opt
   console.log(footer + "\n");
 });
 program.command("usage").description("Show token + cost usage grouped by provider/model (cross-session)").option("--days <n>", "Only the last N days (inclusive of today)").option("--month <ym>", "Only a specific month, format YYYY-MM (e.g. 2026-06)").option("--json", "Output as JSON (for scripting)").action(async (options) => {
-  const { runUsageCli } = await import("./usage-5KBD4UBB.js");
+  const { runUsageCli } = await import("./usage-ZVKFH7BM.js");
   await runUsageCli(options);
 });
 program.command("doctor").description("Health check: API keys, config, MCP, recent crashes, tool usage, disk usage").option("--json", "Output as JSON (for scripting)").option("--reset-stats", "Reset accumulated tool usage statistics").action(async (options) => {
-  const { runDoctorCli } = await import("./doctor-cli-X6MOE3YE.js");
+  const { runDoctorCli } = await import("./doctor-cli-MYJFAWKV.js");
   await runDoctorCli({ json: !!options.json, resetStats: !!options.resetStats });
 });
 program.command("batch <action> [arg] [arg2]").description("Anthropic Message Batches: submit | list | status <id> | results <id> [out] | cancel <id>").option("--dry-run", "Parse and validate input without submitting (submit only)").action(async (action, arg, arg2, options) => {
   try {
-    const batch = await import("./batch-ILD2EPEO.js");
+    const batch = await import("./batch-UTP6NYVX.js");
     switch (action) {
       case "submit":
         if (!arg) {
@@ -7579,7 +7475,7 @@ program.command("batch <action> [arg] [arg2]").description("Anthropic Message Ba
   }
 });
 program.command("mcp-serve").description("Start an MCP server over STDIO, exposing aicli's built-in tools to Claude Desktop / Cursor / other MCP clients").option("--allow-destructive", "Allow bash / run_interactive / task_create (always destructive in MCP mode)").option("--allow-outside-cwd", "Allow tool path arguments to escape the sandbox root \u2014 disabled by default").option("--tools <list>", "Comma-separated whitelist of tools to expose (default: all eligible tools)").option("--cwd <path>", "Working directory AND sandbox root (default: current directory)").action(async (options) => {
-  const { startMcpServer } = await import("./server-H3KIFOLK.js");
+  const { startMcpServer } = await import("./server-25WVH5YX.js");
   await startMcpServer({
     allowDestructive: !!options.allowDestructive,
     allowOutsideCwd: !!options.allowOutsideCwd,
@@ -7588,7 +7484,7 @@ program.command("mcp-serve").description("Start an MCP server over STDIO, exposi
   });
 });
 program.command("ci").description("Headless PR review (code + security) \u2014 reads git/gh diff, optionally posts to PR. Designed for GitHub Actions.").option("--pr <num>", "PR number; diff fetched via `gh pr diff <num>`", (v) => parseInt(v, 10)).option("--base <ref>", "Base ref for `git diff <ref>...HEAD` (ignored when --pr set)").option("--post", "Post review as a PR comment (requires gh CLI + GH_TOKEN, needs --pr)").option("--no-update", "Always create a new comment instead of updating the previous aicli review").option("--skip-code", "Skip the code review section").option("--skip-security", "Skip the security review section").option("--detailed", "Use the detailed code-review prompt").option("--max-diff <n>", "Max diff chars sent to the model (default 30000)", (v) => parseInt(v, 10)).option("--provider <id>", "Override provider (default: config.defaultProvider)").option("--model <id>", "Override model").option("--dry-run", "Print result to stdout instead of posting (overrides --post)").action(async (options) => {
-  const { runCi } = await import("./ci-7YWXFKGE.js");
+  const { runCi } = await import("./ci-2WFKSG2J.js");
   const result = await runCi({
     pr: options.pr,
     base: options.base,
@@ -7734,7 +7630,7 @@ program.command("hub [topic]").description("Start multi-agent hub (discuss / bra
     }),
     config.get("customProviders")
   );
-  const { startHub } = await import("./hub-SFMWUEUW.js");
+  const { startHub } = await import("./hub-CHE7JDIH.js");
   await startHub(
     {
       topic: topic ?? "",