jinzd-ai-cli 0.4.185 → 0.4.187

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -15,7 +15,7 @@ import {
15
15
  saveDevState,
16
16
  sessionHasMeaningfulContent,
17
17
  setupProxy
18
- } from "./chunk-U7P5A3MJ.js";
18
+ } from "./chunk-ZTBUTA24.js";
19
19
  import {
20
20
  ToolExecutor,
21
21
  ToolRegistry,
@@ -34,10 +34,10 @@ import {
34
34
  spawnAgentContext,
35
35
  theme,
36
36
  undoStack
37
- } from "./chunk-GOS4DWW5.js";
37
+ } from "./chunk-Q6BSUIDV.js";
38
38
  import "./chunk-HDSKW7Q3.js";
39
39
  import "./chunk-ZWVIDFGY.js";
40
- import "./chunk-GSRXKHZ7.js";
40
+ import "./chunk-4SZ6X47A.js";
41
41
  import {
42
42
  SessionManager,
43
43
  getContentText
@@ -54,47 +54,35 @@ import {
54
54
  getConfigDirUsage,
55
55
  listRecentCrashes,
56
56
  writeCrashLog
57
- } from "./chunk-ZO4LKUDM.js";
57
+ } from "./chunk-43T4MY5B.js";
58
58
  import {
59
- BudgetWarner,
60
59
  CONTENT_ONLY_STREAM_REMINDER,
61
- ContextPressureMonitor,
62
- EmptyResponseGuard,
63
- FreeRoundTracker,
64
- HALLUCINATION_CORRECTION_MESSAGE,
65
60
  ProviderRegistry,
66
61
  TEE_FINAL_USER_NUDGE,
67
62
  TOOL_CALL_REMINDER,
68
63
  ThinkTagFilter,
69
64
  accumulateUsage,
70
- buildPhantomCorrectionMessage,
71
65
  buildRoundBudgetHint,
72
- buildRoundsExhaustedPrompt,
73
- buildUserStopMessage,
74
66
  buildWriteRoundReminder,
75
67
  consumeToolCallStream,
76
68
  detectMetaNarration,
77
69
  detectPseudoToolCalls,
78
- detectsHallucinatedFileOp,
79
- extractBashCommands,
80
70
  extractWrittenFilePaths,
81
- findPhantomClaims,
82
- hadPreviousWriteToolCalls,
83
71
  looksLikeDocumentBody,
72
+ runAgentLoop,
84
73
  stripPseudoToolCalls,
85
- stripToolCallReminder,
86
- summarizeRecentTools
87
- } from "./chunk-5LK7H45B.js";
74
+ stripToolCallReminder
75
+ } from "./chunk-IQ7JE43O.js";
88
76
  import {
89
77
  getStatsSnapshot,
90
78
  getTopFailingTools,
91
79
  getTopUsedTools,
92
80
  installFlushOnExit
93
- } from "./chunk-GH32XE5K.js";
81
+ } from "./chunk-MVK25WZW.js";
94
82
  import "./chunk-HIU2SH4V.js";
95
83
  import {
96
84
  ConfigManager
97
- } from "./chunk-FHZ2LKM5.js";
85
+ } from "./chunk-UAJKGLRV.js";
98
86
  import {
99
87
  AuthError,
100
88
  ProviderError,
@@ -121,7 +109,7 @@ import {
121
109
  SKILLS_DIR_NAME,
122
110
  VERSION,
123
111
  buildUserIdentityPrompt
124
- } from "./chunk-PMZCQAJL.js";
112
+ } from "./chunk-SOWBY545.js";
125
113
  import {
126
114
  formatGitContextForPrompt,
127
115
  getGitContext,
@@ -1836,7 +1824,7 @@ No tools match "${filter}".
1836
1824
  const { join: join5 } = await import("path");
1837
1825
  const { existsSync: existsSync5 } = await import("fs");
1838
1826
  const { getGitRoot: getGitRoot2 } = await import("./git-context-7KIP4X2V.js");
1839
- const { MCP_PROJECT_CONFIG_NAME: MCP_PROJECT_CONFIG_NAME2 } = await import("./constants-RJDN7GOH.js");
1827
+ const { MCP_PROJECT_CONFIG_NAME: MCP_PROJECT_CONFIG_NAME2 } = await import("./constants-XEL5347E.js");
1840
1828
  const { approveProject, hashMcpFile } = await import("./project-trust-IFM7FXEV.js");
1841
1829
  const cwd = process.cwd();
1842
1830
  const projectRoot = getGitRoot2(cwd) ?? cwd;
@@ -2897,7 +2885,7 @@ ${hint}` : "")
2897
2885
  usage: "/test [command|filter]",
2898
2886
  async execute(args, ctx) {
2899
2887
  try {
2900
- const { executeTests } = await import("./run-tests-37RHYYD4.js");
2888
+ const { executeTests } = await import("./run-tests-Z7IGVS2W.js");
2901
2889
  const argStr = args.join(" ").trim();
2902
2890
  let testArgs = {};
2903
2891
  if (argStr) {
@@ -6391,7 +6379,6 @@ Session '${this.resumeSessionId}' not found.
6391
6379
  toolDefs = toolDefs.filter((t) => !this.blockedTools.has(t.name));
6392
6380
  }
6393
6381
  const apiMessages = [...messages];
6394
- const extraMessages = [];
6395
6382
  const maxToolRounds = this.maxToolRoundsOverride ?? this.config.get("maxToolRounds") ?? DEFAULT_MAX_TOOL_ROUNDS;
6396
6383
  const autoPauseIntervalRaw = this.config.get("autoPauseInterval");
6397
6384
  const autoPauseInterval = typeof autoPauseIntervalRaw === "number" ? autoPauseIntervalRaw : DEFAULT_AUTO_PAUSE_INTERVAL;
@@ -6404,55 +6391,175 @@ ${mcpBudgetNote}` : "");
6404
6391
  const modelParams = this.getModelParams(effectiveModel);
6405
6392
  const useStreaming = this.config.get("ui").streaming;
6406
6393
  const spinner = this.renderer.showSpinner("Thinking...");
6407
- const roundUsage = { inputTokens: 0, outputTokens: 0, cacheCreationTokens: 0, cacheReadTokens: 0 };
6394
+ const usage = { inputTokens: 0, outputTokens: 0, cacheCreationTokens: 0, cacheReadTokens: 0 };
6408
6395
  const supportsStreamingTools = useStreaming && typeof provider.chatWithToolsStream === "function";
6409
6396
  let lastToolCallSignature = "";
6410
6397
  let repeatedToolCallCount = 0;
6411
- const roundToolHistory = [];
6412
- const budgetWarner = new BudgetWarner(maxToolRounds);
6413
- const emptyGuard = new EmptyResponseGuard();
6414
- const ctxMonitor = new ContextPressureMonitor();
6415
- const freeRounds = new FreeRoundTracker();
6416
6398
  this.setupInterjectionListener();
6417
6399
  try {
6418
- for (let round = 0; round < maxToolRounds; round++) {
6419
- this.toolExecutor.setRoundInfo(round + 1, maxToolRounds);
6420
- if (this.toolExecutor.pendingSlashCommand) {
6421
- const cmd = this.toolExecutor.pendingSlashCommand;
6422
- this.toolExecutor.pendingSlashCommand = null;
6423
- if (cmd === "/exit" || cmd === "/quit" || cmd === "/q") {
6424
- spinner.stop();
6425
- process.stdout.write(theme.warning(`\u26A1 ${cmd} \u2014 stopping agentic loop
6426
- `));
6427
- this.teardownInterjectionListener();
6428
- return;
6400
+ const loopResult = await runAgentLoop({
6401
+ maxToolRounds,
6402
+ autoPauseInterval,
6403
+ planMode: this.planMode,
6404
+ providerId: this.currentProvider,
6405
+ toolDefs,
6406
+ usage,
6407
+ // ── 模型调用:流式/非流式 + retry/fallback 包装(disabled-by-default,v0.4.144+)──
6408
+ callModel: async (_round, extraMessages) => {
6409
+ const chatRequest = {
6410
+ messages: apiMessages,
6411
+ model: effectiveModel,
6412
+ systemPrompt,
6413
+ systemPromptVolatile,
6414
+ stream: false,
6415
+ temperature: modelParams.temperature,
6416
+ maxTokens: modelParams.maxTokens,
6417
+ timeout: modelParams.timeout,
6418
+ thinking: modelParams.thinking,
6419
+ thinkingBudget: modelParams.thinkingBudget,
6420
+ ...extraMessages.length > 0 ? { _extraMessages: extraMessages } : {}
6421
+ };
6422
+ const fallbackOpts = this.getFallbackOptions(spinner);
6423
+ if (supportsStreamingTools) {
6424
+ const streamAc = this.setupStreamInterrupt();
6425
+ try {
6426
+ const streamGen = withFallbackStream(
6427
+ this.currentProvider,
6428
+ effectiveModel,
6429
+ this.providers,
6430
+ fallbackOpts,
6431
+ (p, m) => {
6432
+ const tc = p;
6433
+ if (typeof tc.chatWithToolsStream !== "function") {
6434
+ throw new Error(`provider ${p.info.id} does not support streaming tool calls`);
6435
+ }
6436
+ return tc.chatWithToolsStream({ ...chatRequest, model: m, signal: streamAc.signal }, toolDefs);
6437
+ }
6438
+ );
6439
+ const streamResult = await this.consumeToolStream(streamGen, spinner);
6440
+ if (streamResult.toolCalls.length > 0) {
6441
+ return {
6442
+ toolCalls: streamResult.toolCalls,
6443
+ usage: streamResult.usage,
6444
+ reasoningContent: streamResult.reasoningContent
6445
+ };
6446
+ }
6447
+ return {
6448
+ content: streamResult.textContent,
6449
+ usage: streamResult.usage,
6450
+ reasoningContent: streamResult.reasoningContent,
6451
+ finishReason: streamResult.finishReason,
6452
+ alreadyRendered: true
6453
+ };
6454
+ } finally {
6455
+ this.teardownStreamInterrupt();
6456
+ }
6429
6457
  }
6430
- process.stdout.write(theme.warning(`\u26A1 Command "${cmd}" \u2014 injected as message
6431
- `));
6432
- extraMessages.push({ role: "user", content: cmd });
6433
- }
6434
- const budgetWarning = budgetWarner.check(maxToolRounds - round);
6435
- if (budgetWarning) {
6436
- extraMessages.push({ role: "user", content: budgetWarning.injectMessage });
6437
- if (budgetWarning.displayMessage) {
6438
- const paint = budgetWarning.level === "critical" ? theme.error : theme.warning;
6439
- process.stdout.write(paint(` ${budgetWarning.displayMessage}
6440
- `));
6458
+ const result = await withFallback(
6459
+ this.currentProvider,
6460
+ effectiveModel,
6461
+ this.providers,
6462
+ fallbackOpts,
6463
+ (p, m) => p.chatWithTools({ ...chatRequest, model: m }, toolDefs)
6464
+ );
6465
+ return result;
6466
+ },
6467
+ // ── 轮次耗尽总结:空工具列表强制纯文本 ──
6468
+ callSummary: async (summaryExtra) => {
6469
+ spinner.stop();
6470
+ spinner.start("Generating summary...");
6471
+ try {
6472
+ const summaryResult = await provider.chatWithTools(
6473
+ {
6474
+ messages: apiMessages,
6475
+ model: effectiveModel,
6476
+ systemPrompt,
6477
+ systemPromptVolatile,
6478
+ stream: false,
6479
+ temperature: modelParams.temperature,
6480
+ maxTokens: modelParams.maxTokens,
6481
+ timeout: modelParams.timeout,
6482
+ thinking: modelParams.thinking,
6483
+ thinkingBudget: modelParams.thinkingBudget,
6484
+ _extraMessages: summaryExtra
6485
+ },
6486
+ []
6487
+ // 不提供任何工具,强制 AI 返回纯文本
6488
+ );
6489
+ spinner.stop();
6490
+ return "content" in summaryResult ? { content: summaryResult.content, usage: summaryResult.usage } : { usage: summaryResult.usage };
6491
+ } catch (err) {
6492
+ spinner.stop();
6493
+ throw err;
6441
6494
  }
6442
- }
6443
- if (this._userInterjection) {
6495
+ },
6496
+ executeTools: async (toolCalls, extraMessages) => {
6497
+ spinner.stop();
6498
+ askUserContext.rl = this.rl;
6499
+ googleSearchContext.configManager = this.config;
6500
+ streamToFileContext.provider = provider;
6501
+ streamToFileContext.model = effectiveModel;
6502
+ streamToFileContext.systemPrompt = systemPromptVolatile ? `${systemPrompt}
6503
+
6504
+ ---
6505
+
6506
+ ${systemPromptVolatile}` : systemPrompt;
6507
+ streamToFileContext.messages = apiMessages;
6508
+ streamToFileContext.extraMessages = extraMessages;
6509
+ streamToFileContext.temperature = modelParams.temperature;
6510
+ streamToFileContext.timeout = modelParams.timeout;
6511
+ spawnAgentContext.provider = provider;
6512
+ spawnAgentContext.model = effectiveModel;
6513
+ spawnAgentContext.systemPrompt = systemPromptVolatile ? `${systemPrompt}
6514
+
6515
+ ---
6516
+
6517
+ ${systemPromptVolatile}` : systemPrompt;
6518
+ spawnAgentContext.modelParams = modelParams;
6519
+ spawnAgentContext.configManager = this.config;
6520
+ ToolExecutor.currentMessageIndex = session.messages.length;
6521
+ return this.toolExecutor.executeAll(toolCalls);
6522
+ },
6523
+ buildToolResultMessages: (toolCalls, results, reasoningContent) => provider.buildToolResultMessages(toolCalls, results, reasoningContent),
6524
+ getContextWindow: () => this.getContextWindowSize(),
6525
+ estimateRequestTokens: (extraMessages) => this.estimateRequestTokens(systemPrompt, extraMessages),
6526
+ isInterrupted: () => isInterrupted(),
6527
+ pollInterjection: () => {
6528
+ if (!this._userInterjection) return null;
6444
6529
  const msg = this._userInterjection;
6445
6530
  this._userInterjection = null;
6446
6531
  process.stdout.write(theme.warning(`\u26A1 Interjection: "${msg}"
6447
6532
  `));
6448
- extraMessages.push({ role: "user", content: msg });
6449
- }
6450
- const ctxWindow = this.getContextWindowSize();
6451
- if (ctxWindow > 0) {
6452
- const reqTokens = this.estimateRequestTokens(systemPrompt, extraMessages);
6453
- const pressure = ctxMonitor.check(reqTokens, ctxWindow);
6533
+ return msg;
6534
+ },
6535
+ onRoundStart: (round) => {
6536
+ this.toolExecutor.setRoundInfo(round + 1, maxToolRounds);
6537
+ },
6538
+ beforeRound: (_round, extraMessages) => {
6539
+ if (this.toolExecutor.pendingSlashCommand) {
6540
+ const cmd = this.toolExecutor.pendingSlashCommand;
6541
+ this.toolExecutor.pendingSlashCommand = null;
6542
+ if (cmd === "/exit" || cmd === "/quit" || cmd === "/q") {
6543
+ spinner.stop();
6544
+ process.stdout.write(theme.warning(`\u26A1 ${cmd} \u2014 stopping agentic loop
6545
+ `));
6546
+ return "stop";
6547
+ }
6548
+ process.stdout.write(theme.warning(`\u26A1 Command "${cmd}" \u2014 injected as message
6549
+ `));
6550
+ extraMessages.push({ role: "user", content: cmd });
6551
+ }
6552
+ },
6553
+ onBudgetWarning: (warning) => {
6554
+ if (warning.displayMessage) {
6555
+ const paint = warning.level === "critical" ? theme.error : theme.warning;
6556
+ process.stdout.write(paint(` ${warning.displayMessage}
6557
+ `));
6558
+ }
6559
+ },
6560
+ onContextPressure: (pressure, ctxWindow, round) => {
6561
+ spinner.stop();
6454
6562
  if (pressure.action === "abort") {
6455
- spinner.stop();
6456
6563
  process.stderr.write(
6457
6564
  theme.error(
6458
6565
  `
@@ -6468,16 +6575,7 @@ ${mcpBudgetNote}` : "");
6468
6575
  `
6469
6576
  )
6470
6577
  );
6471
- if (roundUsage.inputTokens > 0 || roundUsage.outputTokens > 0) {
6472
- this.addSessionUsage(roundUsage, effectiveModel);
6473
- session.addTokenUsage(roundUsage);
6474
- if (this.shouldShowTokens()) {
6475
- this.renderer.renderUsage(roundUsage, this.sessionTokenUsage);
6476
- }
6477
- }
6478
- return;
6479
- } else if (pressure.action === "warn") {
6480
- spinner.stop();
6578
+ } else {
6481
6579
  process.stdout.write(
6482
6580
  theme.warning(
6483
6581
  `
@@ -6485,492 +6583,142 @@ ${mcpBudgetNote}` : "");
6485
6583
  `
6486
6584
  )
6487
6585
  );
6488
- extraMessages.push({ role: "user", content: pressure.injectMessage });
6489
6586
  spinner.start(`Thinking... (round ${round + 1}/${maxToolRounds})`);
6490
6587
  }
6491
- }
6492
- let result;
6493
- let alreadyRendered = false;
6494
- const chatRequest = {
6495
- messages: apiMessages,
6496
- model: effectiveModel,
6497
- systemPrompt,
6498
- systemPromptVolatile,
6499
- stream: false,
6500
- temperature: modelParams.temperature,
6501
- maxTokens: modelParams.maxTokens,
6502
- timeout: modelParams.timeout,
6503
- thinking: modelParams.thinking,
6504
- thinkingBudget: modelParams.thinkingBudget,
6505
- ...extraMessages.length > 0 ? { _extraMessages: extraMessages } : {}
6506
- };
6507
- if (supportsStreamingTools) {
6508
- const streamAc = this.setupStreamInterrupt();
6509
- try {
6510
- const fallbackOpts = this.getFallbackOptions(spinner);
6511
- const streamGen = withFallbackStream(
6512
- this.currentProvider,
6513
- effectiveModel,
6514
- this.providers,
6515
- fallbackOpts,
6516
- (p, m) => {
6517
- const tc = p;
6518
- if (typeof tc.chatWithToolsStream !== "function") {
6519
- throw new Error(`provider ${p.info.id} does not support streaming tool calls`);
6520
- }
6521
- return tc.chatWithToolsStream({ ...chatRequest, model: m, signal: streamAc.signal }, toolDefs);
6522
- }
6523
- );
6524
- const streamResult = await this.consumeToolStream(streamGen, spinner);
6525
- if (streamResult.toolCalls.length > 0) {
6526
- const toolCalls = streamResult.toolCalls;
6527
- if (streamResult.rawContent) {
6528
- toolCalls._rawContent = streamResult.rawContent;
6529
- }
6530
- if (streamResult.textContent) {
6531
- toolCalls._streamedText = streamResult.textContent;
6532
- }
6533
- result = {
6534
- toolCalls,
6535
- usage: streamResult.usage,
6536
- ...streamResult.reasoningContent ? { reasoningContent: streamResult.reasoningContent } : {}
6537
- };
6538
- } else {
6539
- result = {
6540
- content: streamResult.textContent,
6541
- usage: streamResult.usage,
6542
- ...streamResult.reasoningContent ? { reasoningContent: streamResult.reasoningContent } : {},
6543
- ...streamResult.finishReason ? { finishReason: streamResult.finishReason } : {}
6544
- };
6545
- alreadyRendered = true;
6546
- }
6547
- } finally {
6548
- this.teardownStreamInterrupt();
6549
- }
6550
- } else {
6551
- const fallbackOpts = this.getFallbackOptions(spinner);
6552
- result = await withFallback(
6553
- this.currentProvider,
6554
- effectiveModel,
6555
- this.providers,
6556
- fallbackOpts,
6557
- (p, m) => p.chatWithTools({ ...chatRequest, model: m }, toolDefs)
6558
- );
6559
- }
6560
- accumulateUsage(roundUsage, result.usage);
6561
- if ("content" in result) {
6562
- const hasWriteTools = toolDefs.some((t) => t.name === "write_file" || t.name === "edit_file");
6563
- const alreadyWrote = hadPreviousWriteToolCalls(extraMessages);
6564
- const coarseHallucination = !this.planMode && hasWriteTools && !alreadyWrote && !!result.content && detectsHallucinatedFileOp(result.content);
6565
- const phantomPaths = (coarseHallucination || alreadyWrote) && !this.planMode && hasWriteTools && result.content ? findPhantomClaims(result.content, extraMessages) : [];
6566
- const bashRanThisTurn = extractBashCommands(extraMessages).length > 0;
6567
- const coarseShouldFire = coarseHallucination && !bashRanThisTurn;
6568
- if ((phantomPaths.length > 0 || coarseShouldFire) && round < maxToolRounds - 1) {
6569
- const providerName = this.currentProvider;
6570
- const detail = phantomPaths.length > 0 ? ` phantom files: ${phantomPaths.join(", ")}` : "";
6571
- process.stderr.write(
6572
- `[${providerName}] \u26A0 Hallucinated completion detected (AI claimed file was written but no tool was called), forcing retry...${detail}
6588
+ },
6589
+ onHallucinationRetry: ({ phantomPaths, round, alreadyRendered }) => {
6590
+ const detail = phantomPaths.length > 0 ? ` phantom files: ${phantomPaths.join(", ")}` : "";
6591
+ process.stderr.write(
6592
+ `[${this.currentProvider}] \u26A0 Hallucinated completion detected (AI claimed file was written but no tool was called), forcing retry...${detail}
6573
6593
  `
6574
- );
6575
- if (alreadyRendered) {
6576
- process.stdout.write("\n");
6577
- }
6578
- const correctionMsg = phantomPaths.length > 0 ? buildPhantomCorrectionMessage(phantomPaths) : HALLUCINATION_CORRECTION_MESSAGE;
6579
- const reasoningField = "reasoningContent" in result && result.reasoningContent ? { reasoning_content: result.reasoningContent } : this.currentProvider === "deepseek" ? { reasoning_content: "" } : {};
6580
- extraMessages.push(
6581
- { role: "assistant", content: result.content, ...reasoningField },
6582
- { role: "user", content: correctionMsg }
6583
- );
6584
- spinner.start(`Retrying... (round ${round + 2}/${maxToolRounds})`);
6585
- continue;
6594
+ );
6595
+ if (alreadyRendered) {
6596
+ process.stdout.write("\n");
6586
6597
  }
6587
- if (!result.content || result.content.trim() === "") {
6588
- const fr = "finishReason" in result ? result.finishReason : void 0;
6589
- const decision = emptyGuard.onEmpty(round < maxToolRounds - 1, fr);
6590
- if (decision.action === "nudge") {
6591
- spinner.stop();
6592
- if (alreadyRendered) process.stdout.write("\n");
6593
- process.stderr.write(theme.warning(`${decision.displayMessage}
6598
+ spinner.start(`Retrying... (round ${round + 2}/${maxToolRounds})`);
6599
+ },
6600
+ onEmptyResponse: (decision, { alreadyRendered, round }) => {
6601
+ spinner.stop();
6602
+ if (alreadyRendered) process.stdout.write("\n");
6603
+ if (decision.action === "nudge") {
6604
+ process.stderr.write(theme.warning(`${decision.displayMessage}
6594
6605
  `));
6595
- extraMessages.push({ role: "user", content: decision.injectMessage });
6596
- spinner.start(`Retrying... (round ${round + 2}/${maxToolRounds})`);
6597
- continue;
6598
- }
6599
- spinner.stop();
6600
- if (alreadyRendered) process.stdout.write("\n");
6601
- process.stderr.write(
6602
- theme.error(`
6606
+ spinner.start(`Retrying... (round ${round + 2}/${maxToolRounds})`);
6607
+ } else {
6608
+ process.stderr.write(theme.error(`
6603
6609
  ${decision.displayMessage}
6604
- `)
6605
- );
6610
+ `));
6606
6611
  process.stderr.write(
6607
6612
  theme.dim(` ${decision.hint}
6608
6613
  Try: /compact, /clear, or /model to switch.
6609
6614
 
6610
6615
  `)
6611
6616
  );
6612
- if (roundUsage.inputTokens > 0 || roundUsage.outputTokens > 0) {
6613
- this.addSessionUsage(roundUsage, effectiveModel);
6614
- session.addTokenUsage(roundUsage);
6615
- if (this.shouldShowTokens()) {
6616
- this.renderer.renderUsage(roundUsage, this.sessionTokenUsage);
6617
- }
6618
- }
6619
- return;
6620
6617
  }
6621
- emptyGuard.onNonEmpty();
6618
+ },
6619
+ onFinalContent: async (content, { reasoningContent, alreadyRendered }) => {
6622
6620
  spinner.stop();
6623
- const finalContent = result.content;
6624
6621
  if (!alreadyRendered) {
6625
6622
  if (useStreaming) {
6626
6623
  const streamAc = this.setupStreamInterrupt();
6627
6624
  try {
6628
- await this.renderer.renderContentAsStream(finalContent, { signal: streamAc.signal });
6625
+ await this.renderer.renderContentAsStream(content, { signal: streamAc.signal });
6629
6626
  } finally {
6630
6627
  this.teardownStreamInterrupt();
6631
6628
  }
6632
6629
  } else {
6633
- this.renderer.renderResponse(finalContent);
6634
- }
6635
- } else {
6636
- if (finalContent.trim()) {
6637
- process.stdout.write("\n\n");
6630
+ this.renderer.renderResponse(content);
6638
6631
  }
6632
+ } else if (content.trim()) {
6633
+ process.stdout.write("\n\n");
6639
6634
  }
6640
- lastResponseStore.content = finalContent;
6641
- const finalReasoning = "reasoningContent" in result ? result.reasoningContent : void 0;
6635
+ lastResponseStore.content = content;
6642
6636
  session.addMessage({
6643
6637
  role: "assistant",
6644
- content: finalContent,
6638
+ content,
6645
6639
  timestamp: /* @__PURE__ */ new Date(),
6646
- ...finalReasoning ? { reasoningContent: finalReasoning } : {}
6640
+ ...reasoningContent ? { reasoningContent } : {}
6647
6641
  });
6648
- this.events.emit("message.after", { content: finalContent });
6649
- if (roundUsage.inputTokens > 0 || roundUsage.outputTokens > 0) {
6650
- this.addSessionUsage(roundUsage, effectiveModel);
6651
- session.addTokenUsage(roundUsage);
6652
- if (this.shouldShowTokens()) {
6653
- this.renderer.renderUsage(roundUsage, this.sessionTokenUsage);
6654
- }
6655
- }
6656
- return;
6657
- }
6658
- spinner.stop();
6659
- const saveLastResponseCall = result.toolCalls.find((tc) => tc.name === "save_last_response");
6660
- if (saveLastResponseCall) {
6661
- const saveToFile = String(saveLastResponseCall.arguments["path"] ?? "");
6662
- if (!saveToFile) {
6663
- } else {
6664
- const teeAc = this.setupStreamInterrupt();
6665
- try {
6666
- const teeSystemPrompt = stripToolCallReminder(systemPrompt ?? "") + CONTENT_ONLY_STREAM_REMINDER;
6667
- const teeExtraMessages = extraMessages.length > 0 ? [...extraMessages, { role: "user", content: TEE_FINAL_USER_NUDGE }] : [{ role: "user", content: TEE_FINAL_USER_NUDGE }];
6668
- const genStream = provider.chatStream({
6669
- messages: apiMessages,
6670
- model: effectiveModel,
6671
- systemPrompt: teeSystemPrompt,
6672
- systemPromptVolatile,
6673
- stream: true,
6674
- temperature: modelParams.temperature,
6675
- maxTokens: modelParams.maxTokens,
6676
- timeout: modelParams.timeout,
6677
- thinking: modelParams.thinking,
6678
- thinkingBudget: modelParams.thinkingBudget,
6679
- signal: teeAc.signal,
6680
- _extraMessages: teeExtraMessages
6681
- });
6682
- const teeShowTokens = this.shouldShowTokens();
6683
- let genContent;
6684
- let genUsage;
6685
- let teeTokShown = false;
6686
- try {
6687
- const teeResult = await this.renderer.renderStream(
6688
- genStream,
6689
- { saveToFile, showTokens: teeShowTokens, sessionTotal: teeShowTokens ? { ...this.sessionTokenUsage } : void 0, signal: teeAc.signal }
6690
- );
6691
- genContent = teeResult.content;
6692
- genUsage = teeResult.usage;
6693
- teeTokShown = teeResult.tokensShown;
6694
- } catch (teeErr) {
6695
- try {
6696
- unlinkSync2(saveToFile);
6697
- } catch {
6698
- }
6699
- const errMsg = teeErr instanceof Error ? teeErr.message : String(teeErr);
6700
- process.stdout.write(theme.error(
6701
- `
6702
- \u2717 tee stream failed: ${errMsg}
6703
- ${saveToFile} (partial) was deleted. Asking model to retry.
6704
-
6705
- `
6706
- ));
6707
- const errorResults = result.toolCalls.map((tc) => ({
6708
- callId: tc.id,
6709
- content: tc.name === "save_last_response" ? `[save_last_response failed] streaming was interrupted: ${errMsg}. ${saveToFile} was NOT saved. Retry \u2014 and consider producing a more compact output (split very large reports across multiple save_last_response calls if the previous attempt timed out).` : `[skipped: save_last_response failed]`,
6710
- isError: tc.name === "save_last_response"
6711
- }));
6712
- const reasoningContent3 = "reasoningContent" in result ? result.reasoningContent : void 0;
6713
- const newMsgs3 = provider.buildToolResultMessages(result.toolCalls, errorResults, reasoningContent3);
6714
- extraMessages.push(...newMsgs3);
6715
- continue;
6716
- }
6717
- const metaMatch = detectMetaNarration(genContent);
6718
- if (metaMatch) {
6719
- try {
6720
- unlinkSync2(saveToFile);
6721
- } catch {
6722
- }
6723
- process.stdout.write(theme.error(
6724
- `
6725
- \u2717 Rejected save: response was meta-narration / leaked reasoning, not document body (matched: ${metaMatch})
6726
- ${saveToFile} was deleted; asking model to retry.
6727
-
6728
- `
6729
- ));
6730
- const errorResults = result.toolCalls.map((tc) => ({
6731
- callId: tc.id,
6732
- content: tc.name === "save_last_response" ? `[save_last_response REJECTED] Your output was internal reasoning / meta-narration about the task (e.g. "Let me re-read\u2026", "the user is asking me to\u2026") instead of the requested document body. ${saveToFile} was NOT saved.
6733
-
6734
- This fresh stream has NO tools. Produce ONLY the document body: start with a markdown heading like "# \u5BA1\u8BA1\u62A5\u544A" / "# Audit Report" and write the full content. Do NOT narrate that you will produce the document \u2014 produce it.` : `[skipped: save_last_response was rejected and other parallel calls are abandoned]`,
6735
- isError: tc.name === "save_last_response"
6736
- }));
6737
- const reasoningContent3 = "reasoningContent" in result ? result.reasoningContent : void 0;
6738
- const newMsgs3 = provider.buildToolResultMessages(result.toolCalls, errorResults, reasoningContent3);
6739
- extraMessages.push(...newMsgs3);
6740
- if (genUsage) {
6741
- roundUsage.inputTokens += genUsage.inputTokens;
6742
- roundUsage.outputTokens += genUsage.outputTokens;
6743
- roundUsage.cacheCreationTokens += genUsage.cacheCreationTokens ?? 0;
6744
- roundUsage.cacheReadTokens += genUsage.cacheReadTokens ?? 0;
6745
- }
6746
- continue;
6747
- }
6748
- const pseudoMatch = detectPseudoToolCalls(genContent);
6749
- if (pseudoMatch) {
6750
- const cleaned = stripPseudoToolCalls(genContent);
6751
- if (looksLikeDocumentBody(cleaned)) {
6752
- try {
6753
- writeFileSync2(saveToFile, cleaned, "utf-8");
6754
- process.stdout.write(theme.warning(
6755
- `
6756
- \u26A0 Salvaged save: stripped pseudo-tool-call markup (matched: ${pseudoMatch})
6757
- ${saveToFile} now contains the cleaned document (${cleaned.length} chars; was ${genContent.length}).
6758
-
6759
- `
6760
- ));
6761
- lastResponseStore.content = cleaned;
6762
- if (genUsage) {
6763
- roundUsage.inputTokens += genUsage.inputTokens;
6764
- roundUsage.outputTokens += genUsage.outputTokens;
6765
- roundUsage.cacheCreationTokens += genUsage.cacheCreationTokens ?? 0;
6766
- roundUsage.cacheReadTokens += genUsage.cacheReadTokens ?? 0;
6767
- }
6768
- session.addMessage({ role: "assistant", content: cleaned, timestamp: /* @__PURE__ */ new Date() });
6769
- this.events.emit("message.after", { content: cleaned });
6770
- const lines2 = cleaned.split("\n").length;
6771
- const bytes2 = Buffer.byteLength(cleaned, "utf-8");
6772
- const okResults = result.toolCalls.map((tc) => ({
6773
- callId: tc.id,
6774
- content: tc.name === "save_last_response" ? `File saved (with cleanup): ${saveToFile} (${lines2} lines, ${bytes2} bytes; pseudo-tool-call markup was stripped before save)` : `[skipped: file already saved by tee streaming]`,
6775
- isError: false
6776
- }));
6777
- const reasoningContent4 = "reasoningContent" in result ? result.reasoningContent : void 0;
6778
- const newMsgs4 = provider.buildToolResultMessages(result.toolCalls, okResults, reasoningContent4);
6779
- extraMessages.push(...newMsgs4);
6780
- if (roundUsage.inputTokens > 0 || roundUsage.outputTokens > 0) {
6781
- this.addSessionUsage(roundUsage, effectiveModel);
6782
- session.addTokenUsage(roundUsage);
6783
- if (teeShowTokens && !teeTokShown) {
6784
- this.renderer.renderUsage(roundUsage, this.sessionTokenUsage);
6785
- }
6786
- }
6787
- return;
6788
- } catch (writeErr) {
6789
- process.stderr.write(`[tee] salvage write failed: ${writeErr.message ?? writeErr}
6790
- `);
6791
- }
6792
- }
6793
- try {
6794
- unlinkSync2(saveToFile);
6795
- } catch {
6796
- }
6797
- process.stdout.write(theme.error(
6798
- `
6799
- \u2717 Rejected save: response was pseudo-tool-call markup with no usable document body (matched: ${pseudoMatch})
6800
- ${saveToFile} was deleted; asking model to retry.
6801
-
6802
- `
6803
- ));
6804
- const errorResults = result.toolCalls.map((tc) => ({
6805
- callId: tc.id,
6806
- content: tc.name === "save_last_response" ? `[save_last_response REJECTED] Your output was tool-call XML/JSON with no document body. ${saveToFile} was NOT saved.
6807
-
6808
- This fresh stream has NO tools \u2014 output is captured verbatim. STOP emitting <tool_call>, <function_calls>, <invoke>, <think>, or JSON tool blocks. Produce the document body NOW: start with a markdown heading like "# \u5BA1\u8BA1\u62A5\u544A" and write the full report.` : `[skipped: save_last_response was rejected and other parallel calls are abandoned]`,
6809
- isError: tc.name === "save_last_response"
6810
- }));
6811
- const reasoningContent3 = "reasoningContent" in result ? result.reasoningContent : void 0;
6812
- const newMsgs3 = provider.buildToolResultMessages(result.toolCalls, errorResults, reasoningContent3);
6813
- extraMessages.push(...newMsgs3);
6814
- if (genUsage) {
6815
- roundUsage.inputTokens += genUsage.inputTokens;
6816
- roundUsage.outputTokens += genUsage.outputTokens;
6817
- roundUsage.cacheCreationTokens += genUsage.cacheCreationTokens ?? 0;
6818
- roundUsage.cacheReadTokens += genUsage.cacheReadTokens ?? 0;
6819
- }
6820
- continue;
6821
- }
6822
- lastResponseStore.content = genContent;
6823
- if (genUsage) {
6824
- roundUsage.inputTokens += genUsage.inputTokens;
6825
- roundUsage.outputTokens += genUsage.outputTokens;
6826
- roundUsage.cacheCreationTokens += genUsage.cacheCreationTokens ?? 0;
6827
- roundUsage.cacheReadTokens += genUsage.cacheReadTokens ?? 0;
6828
- }
6829
- session.addMessage({ role: "assistant", content: genContent, timestamp: /* @__PURE__ */ new Date() });
6830
- this.events.emit("message.after", { content: genContent });
6831
- const lines = genContent.split("\n").length;
6832
- const bytes = Buffer.byteLength(genContent, "utf-8");
6833
- const syntheticResults = result.toolCalls.map((tc) => ({
6834
- callId: tc.id,
6835
- content: tc.name === "save_last_response" ? `File saved: ${saveToFile} (${lines} lines, ${bytes} bytes)` : `[skipped: file already saved by tee streaming]`,
6836
- isError: false
6837
- }));
6838
- const reasoningContent2 = "reasoningContent" in result ? result.reasoningContent : void 0;
6839
- const newMsgs2 = provider.buildToolResultMessages(result.toolCalls, syntheticResults, reasoningContent2);
6840
- extraMessages.push(...newMsgs2);
6841
- if (roundUsage.inputTokens > 0 || roundUsage.outputTokens > 0) {
6842
- this.addSessionUsage(roundUsage, effectiveModel);
6843
- session.addTokenUsage(roundUsage);
6844
- if (teeShowTokens && !teeTokShown) {
6845
- this.renderer.renderUsage(roundUsage, this.sessionTokenUsage);
6846
- }
6642
+ this.events.emit("message.after", { content });
6643
+ },
6644
+ onInterrupted: () => {
6645
+ spinner.stop();
6646
+ process.stdout.write(theme.warning("\n\u26A1 Interrupted by user (Ctrl+C) \u2014 agentic loop stopped.\n"));
6647
+ },
6648
+ persistRound: (toolCalls, results, info) => {
6649
+ persistToolRound(session, toolCalls, results, info);
6650
+ },
6651
+ onMcpToolUsed: (name) => usedMcpToolNames.add(name),
6652
+ onToolsExecuted: (toolCalls, _results, extraMessages) => {
6653
+ const readFileCalls = toolCalls.filter((tc) => tc.name === "read_file");
6654
+ for (const rfc of readFileCalls) {
6655
+ const filePath = rfc.arguments?.path;
6656
+ if (filePath) {
6657
+ const fileReadCount = extraMessages.filter((msg) => {
6658
+ const m = msg;
6659
+ if (m.role !== "assistant") return false;
6660
+ const tcs = m.tool_calls;
6661
+ if (!Array.isArray(tcs)) return false;
6662
+ return tcs.some((tc) => {
6663
+ const fn = tc.function;
6664
+ return fn?.name === "read_file" && JSON.stringify(fn?.arguments ?? "").includes(filePath);
6665
+ });
6666
+ }).length;
6667
+ if (fileReadCount >= 2) {
6668
+ extraMessages.push({
6669
+ role: "user",
6670
+ content: `\u26A0\uFE0F You have read the file "${filePath}" ${fileReadCount + 1} times already. The content hasn't changed \u2014 do NOT read it again. Use the information you already have.`
6671
+ });
6847
6672
  }
6848
- } finally {
6849
- this.teardownStreamInterrupt();
6850
6673
  }
6851
- return;
6852
6674
  }
6853
- }
6854
- askUserContext.rl = this.rl;
6855
- googleSearchContext.configManager = this.config;
6856
- streamToFileContext.provider = provider;
6857
- streamToFileContext.model = effectiveModel;
6858
- streamToFileContext.systemPrompt = systemPromptVolatile ? `${systemPrompt}
6859
-
6860
- ---
6861
-
6862
- ${systemPromptVolatile}` : systemPrompt;
6863
- streamToFileContext.messages = apiMessages;
6864
- streamToFileContext.extraMessages = extraMessages;
6865
- streamToFileContext.temperature = modelParams.temperature;
6866
- streamToFileContext.timeout = modelParams.timeout;
6867
- spawnAgentContext.provider = provider;
6868
- spawnAgentContext.model = effectiveModel;
6869
- spawnAgentContext.systemPrompt = systemPromptVolatile ? `${systemPrompt}
6870
-
6871
- ---
6872
-
6873
- ${systemPromptVolatile}` : systemPrompt;
6874
- spawnAgentContext.modelParams = modelParams;
6875
- spawnAgentContext.configManager = this.config;
6876
- ToolExecutor.currentMessageIndex = session.messages.length;
6877
- const toolResults = await this.toolExecutor.executeAll(result.toolCalls);
6878
- if (isInterrupted()) {
6879
- spinner.stop();
6880
- process.stdout.write(theme.warning("\n\u26A1 Interrupted by user (Ctrl+C) \u2014 agentic loop stopped.\n"));
6881
- this.teardownInterjectionListener();
6882
- return;
6883
- }
6884
- const thisRoundTools = result.toolCalls.map((tc) => tc.name);
6885
- roundToolHistory.push({ round: round + 1, tools: thisRoundTools });
6886
- const readFileCalls = result.toolCalls.filter((tc) => tc.name === "read_file");
6887
- for (const rfc of readFileCalls) {
6888
- const filePath = rfc.arguments?.path;
6889
- if (filePath) {
6890
- const fileReadCount = extraMessages.filter((msg) => {
6891
- const m = msg;
6892
- if (m.role !== "assistant") return false;
6893
- const tcs = m.tool_calls;
6894
- if (!Array.isArray(tcs)) return false;
6895
- return tcs.some((tc) => {
6896
- const fn = tc.function;
6897
- return fn?.name === "read_file" && JSON.stringify(fn?.arguments ?? "").includes(filePath);
6898
- });
6899
- }).length;
6900
- if (fileReadCount >= 2) {
6675
+ process.stdin.resume();
6676
+ },
6677
+ afterToolRoundPersist: (toolCalls, _results, extraMessages) => {
6678
+ const thisRoundHadWrite = toolCalls.some(
6679
+ (tc) => tc.name === "write_file" || tc.name === "edit_file"
6680
+ );
6681
+ if (thisRoundHadWrite) {
6682
+ const totalWritten = extractWrittenFilePaths(extraMessages).length;
6683
+ if (totalWritten > 0) {
6901
6684
  extraMessages.push({
6902
6685
  role: "user",
6903
- content: `\u26A0\uFE0F You have read the file "${filePath}" ${fileReadCount + 1} times already. The content hasn't changed \u2014 do NOT read it again. Use the information you already have.`
6686
+ content: buildWriteRoundReminder(totalWritten)
6904
6687
  });
6905
6688
  }
6906
6689
  }
6907
- }
6908
- process.stdin.resume();
6909
- const reasoningContent = "reasoningContent" in result ? result.reasoningContent : void 0;
6910
- const newMsgs = provider.buildToolResultMessages(result.toolCalls, toolResults, reasoningContent);
6911
- extraMessages.push(...newMsgs);
6912
- for (const tc of result.toolCalls) {
6913
- if (tc.name.startsWith("mcp__")) usedMcpToolNames.add(tc.name);
6914
- }
6915
- const streamedContent = "content" in result ? result.content : void 0;
6916
- persistToolRound(session, result.toolCalls, toolResults, {
6917
- assistantContent: streamedContent,
6918
- reasoningContent
6919
- });
6920
- const thisRoundHadWrite = result.toolCalls.some(
6921
- (tc) => tc.name === "write_file" || tc.name === "edit_file"
6922
- );
6923
- if (thisRoundHadWrite) {
6924
- const totalWritten = extractWrittenFilePaths(extraMessages).length;
6925
- if (totalWritten > 0) {
6926
- extraMessages.push({
6927
- role: "user",
6928
- content: buildWriteRoundReminder(totalWritten)
6929
- });
6930
- }
6931
- }
6932
- if (freeRounds.apply(result.toolCalls.map((tc) => tc.name))) {
6933
- round--;
6934
- }
6935
- const currentSignature = result.toolCalls.map((tc) => `${tc.name}:${JSON.stringify(tc.arguments)}`).join("|");
6936
- if (currentSignature === lastToolCallSignature) {
6937
- repeatedToolCallCount++;
6938
- if (repeatedToolCallCount >= MAX_REPEATED_TOOL_CALLS) {
6939
- spinner.stop();
6940
- process.stderr.write(
6941
- theme.warning(`
6690
+ },
6691
+ checkLoopHealth: (toolCalls, extraMessages) => {
6692
+ const currentSignature = toolCalls.map((tc) => `${tc.name}:${JSON.stringify(tc.arguments)}`).join("|");
6693
+ if (currentSignature === lastToolCallSignature) {
6694
+ repeatedToolCallCount++;
6695
+ if (repeatedToolCallCount >= MAX_REPEATED_TOOL_CALLS) {
6696
+ spinner.stop();
6697
+ process.stderr.write(
6698
+ theme.warning(`
6942
6699
  \u26A0 Detected ${repeatedToolCallCount + 1} identical consecutive tool calls \u2014 breaking loop.
6943
6700
  `)
6944
- );
6945
- extraMessages.push({
6946
- role: "user",
6947
- content: "You are stuck in a loop calling the same tool with the same arguments repeatedly. Stop calling tools and give the user a final text response summarizing what you found and what needs to be done next. " + (this.planMode ? "If you need to execute commands (bash, psql, etc.), tell the user to type `/plan execute` first." : "")
6948
- });
6701
+ );
6702
+ extraMessages.push({
6703
+ role: "user",
6704
+ content: "You are stuck in a loop calling the same tool with the same arguments repeatedly. Stop calling tools and give the user a final text response summarizing what you found and what needs to be done next. " + (this.planMode ? "If you need to execute commands (bash, psql, etc.), tell the user to type `/plan execute` first." : "")
6705
+ });
6706
+ repeatedToolCallCount = 0;
6707
+ lastToolCallSignature = "";
6708
+ return "skip-checkpoint";
6709
+ }
6710
+ } else {
6711
+ lastToolCallSignature = currentSignature;
6949
6712
  repeatedToolCallCount = 0;
6950
- lastToolCallSignature = "";
6951
- continue;
6952
6713
  }
6953
- } else {
6954
- lastToolCallSignature = currentSignature;
6955
- repeatedToolCallCount = 0;
6956
- }
6957
- if (this._userInterjection) {
6958
- const msg = this._userInterjection;
6959
- this._userInterjection = null;
6960
- process.stdout.write(theme.warning(`\u26A1 Interjection: "${msg}"
6961
- `));
6962
- extraMessages.push({ role: "user", content: msg });
6963
- }
6964
- const effectiveRound = round + 1;
6965
- const remaining = maxToolRounds - effectiveRound;
6966
- if (autoPauseInterval > 0 && effectiveRound > 0 && effectiveRound % autoPauseInterval === 0 && remaining > 0) {
6714
+ },
6715
+ requestAutoPause: async ({ effectiveRound, maxToolRounds: totalRounds, remaining, toolSummary }) => {
6967
6716
  spinner.stop();
6968
6717
  process.stdout.write("\n");
6969
- process.stdout.write(theme.warning(`\u23F8 Auto-pause: ${effectiveRound}/${maxToolRounds} rounds used, ${remaining} remaining
6718
+ process.stdout.write(theme.warning(`\u23F8 Auto-pause: ${effectiveRound}/${totalRounds} rounds used, ${remaining} remaining
6970
6719
  `));
6971
- const summary = summarizeRecentTools(roundToolHistory, autoPauseInterval);
6972
- if (summary) {
6973
- process.stdout.write(theme.dim(` Tools used: ${summary}
6720
+ if (toolSummary) {
6721
+ process.stdout.write(theme.dim(` Tools used: ${toolSummary}
6974
6722
  `));
6975
6723
  }
6976
6724
  process.stdout.write(theme.dim(" Press ") + theme.info("y") + theme.dim(" to continue, or ") + theme.info("type a message") + theme.dim(" to redirect AI:\n"));
@@ -6988,74 +6736,222 @@ ${systemPromptVolatile}` : systemPrompt;
6988
6736
  this.setupInterjectionListener();
6989
6737
  if (pauseResponse === "n" || pauseResponse === "N" || pauseResponse === "\x1B") {
6990
6738
  process.stdout.write(theme.warning("\u26A1 Stopped by user at auto-pause checkpoint\n"));
6991
- extraMessages.push({ role: "user", content: buildUserStopMessage(effectiveRound, maxToolRounds) });
6992
- break;
6993
- } else if (pauseResponse && pauseResponse !== "y" && pauseResponse !== "Y" && pauseResponse !== "") {
6739
+ return { action: "stop" };
6740
+ }
6741
+ if (pauseResponse && pauseResponse !== "y" && pauseResponse !== "Y") {
6994
6742
  process.stdout.write(theme.warning(`\u26A1 Redirect: "${pauseResponse}"
6995
6743
  `));
6996
- extraMessages.push({ role: "user", content: pauseResponse });
6744
+ process.stdout.write(theme.success(`\u25B6 Continuing... (${remaining} rounds left)
6745
+ `));
6746
+ return { action: "redirect", message: pauseResponse };
6997
6747
  }
6998
6748
  process.stdout.write(theme.success(`\u25B6 Continuing... (${remaining} rounds left)
6999
6749
  `));
7000
- }
7001
- const nextRound = round + 2;
7002
- spinner.start(
7003
- nextRound <= maxToolRounds ? `Thinking... (round ${nextRound}/${maxToolRounds})` : "Thinking..."
7004
- );
7005
- }
7006
- spinner.stop();
7007
- try {
7008
- spinner.start("Generating summary...");
7009
- const summaryExtra = [
7010
- ...extraMessages,
7011
- { role: "user", content: buildRoundsExhaustedPrompt(maxToolRounds) }
7012
- ];
7013
- const summaryResult = await provider.chatWithTools(
7014
- {
7015
- messages: apiMessages,
7016
- model: effectiveModel,
7017
- systemPrompt,
7018
- systemPromptVolatile,
7019
- stream: false,
7020
- temperature: modelParams.temperature,
7021
- maxTokens: modelParams.maxTokens,
7022
- timeout: modelParams.timeout,
7023
- thinking: modelParams.thinking,
7024
- thinkingBudget: modelParams.thinkingBudget,
7025
- _extraMessages: summaryExtra
7026
- },
7027
- []
7028
- // 不提供任何工具,强制 AI 返回纯文本
7029
- );
7030
- spinner.stop();
7031
- if ("content" in summaryResult) {
7032
- this.renderer.renderError(`Reached maximum tool call rounds (${maxToolRounds}). Here is a summary:`);
7033
- this.renderer.renderResponse(summaryResult.content);
7034
- lastResponseStore.content = summaryResult.content;
7035
- session.addMessage({ role: "assistant", content: summaryResult.content, timestamp: /* @__PURE__ */ new Date() });
7036
- if (summaryResult.usage) {
7037
- roundUsage.inputTokens += summaryResult.usage.inputTokens;
7038
- roundUsage.outputTokens += summaryResult.usage.outputTokens;
7039
- roundUsage.cacheCreationTokens += summaryResult.usage.cacheCreationTokens ?? 0;
7040
- roundUsage.cacheReadTokens += summaryResult.usage.cacheReadTokens ?? 0;
7041
- }
7042
- } else {
7043
- this.renderer.renderError(
7044
- `Reached maximum tool call rounds (${maxToolRounds}). Stopping.
7045
- Tip: You can continue the conversation by asking the AI to proceed.`
6750
+ return { action: "continue" };
6751
+ },
6752
+ onRoundEnd: (round) => {
6753
+ const nextRound = round + 2;
6754
+ spinner.start(
6755
+ nextRound <= maxToolRounds ? `Thinking... (round ${nextRound}/${maxToolRounds})` : "Thinking..."
7046
6756
  );
7047
- }
7048
- } catch {
7049
- this.renderer.renderError(
7050
- `Reached maximum tool call rounds (${maxToolRounds}). Stopping.
6757
+ },
6758
+ onRoundsExhausted: (summaryContent) => {
6759
+ if (summaryContent !== null) {
6760
+ this.renderer.renderError(`Reached maximum tool call rounds (${maxToolRounds}). Here is a summary:`);
6761
+ this.renderer.renderResponse(summaryContent);
6762
+ lastResponseStore.content = summaryContent;
6763
+ session.addMessage({ role: "assistant", content: summaryContent, timestamp: /* @__PURE__ */ new Date() });
6764
+ } else {
6765
+ this.renderer.renderError(
6766
+ `Reached maximum tool call rounds (${maxToolRounds}). Stopping.
7051
6767
  Tip: You can continue the conversation by asking the AI to proceed.`
7052
- );
7053
- }
7054
- if (roundUsage.inputTokens > 0 || roundUsage.outputTokens > 0) {
7055
- this.addSessionUsage(roundUsage, effectiveModel);
7056
- session.addTokenUsage(roundUsage);
6768
+ );
6769
+ }
6770
+ },
6771
+ // ─── save_last_response 特殊处理(tee 流式写盘)────────────────────────
6772
+ // 架构设计:
6773
+ // AI 在工具调用轮次中调用 save_last_response(path),意图是"生成内容 + 保存"。
6774
+ // 旧方案:AI 先输出内容 → lastResponseStore 暂存 → 工具读 store 写盘
6775
+ // 问题:Kimi API 会截断 tool_call arguments (~2KB),内容无法通过参数传递;
6776
+ // 第一轮调用时 store 里只有上一轮旧内容,根本没有本次生成的内容。
6777
+ // 新方案(tee streaming):
6778
+ // 1. core 检测到 save_last_response 调用并从参数中提取目标文件路径
6779
+ // 2. 此钩子发起流式请求,renderStream 同时写入终端 + 磁盘(tee 模式)
6780
+ // 3. 注入合成的工具成功结果到 extraMessages(跳过实际工具执行)
6781
+ // 4. 成功返回 'stop' 直接结束(usage 已在钩子内入账);失败/拒绝返回
6782
+ // 'continue' 让模型重试
6783
+ runSaveLastResponseTee: async ({ toolCalls, saveToFile, extraMessages, reasoningContent }) => {
6784
+ spinner.stop();
6785
+ const teeAc = this.setupStreamInterrupt();
6786
+ try {
6787
+ const teeSystemPrompt = stripToolCallReminder(systemPrompt ?? "") + CONTENT_ONLY_STREAM_REMINDER;
6788
+ const teeExtraMessages = extraMessages.length > 0 ? [...extraMessages, { role: "user", content: TEE_FINAL_USER_NUDGE }] : [{ role: "user", content: TEE_FINAL_USER_NUDGE }];
6789
+ const genStream = provider.chatStream({
6790
+ messages: apiMessages,
6791
+ model: effectiveModel,
6792
+ systemPrompt: teeSystemPrompt,
6793
+ systemPromptVolatile,
6794
+ stream: true,
6795
+ temperature: modelParams.temperature,
6796
+ maxTokens: modelParams.maxTokens,
6797
+ timeout: modelParams.timeout,
6798
+ thinking: modelParams.thinking,
6799
+ thinkingBudget: modelParams.thinkingBudget,
6800
+ signal: teeAc.signal,
6801
+ _extraMessages: teeExtraMessages
6802
+ });
6803
+ const teeShowTokens = this.shouldShowTokens();
6804
+ let genContent;
6805
+ let genUsage;
6806
+ let teeTokShown = false;
6807
+ try {
6808
+ const teeResult = await this.renderer.renderStream(
6809
+ genStream,
6810
+ { saveToFile, showTokens: teeShowTokens, sessionTotal: teeShowTokens ? { ...this.sessionTokenUsage } : void 0, signal: teeAc.signal }
6811
+ );
6812
+ genContent = teeResult.content;
6813
+ genUsage = teeResult.usage;
6814
+ teeTokShown = teeResult.tokensShown;
6815
+ } catch (teeErr) {
6816
+ try {
6817
+ unlinkSync2(saveToFile);
6818
+ } catch {
6819
+ }
6820
+ const errMsg = teeErr instanceof Error ? teeErr.message : String(teeErr);
6821
+ process.stdout.write(theme.error(
6822
+ `
6823
+ \u2717 tee stream failed: ${errMsg}
6824
+ ${saveToFile} (partial) was deleted. Asking model to retry.
6825
+
6826
+ `
6827
+ ));
6828
+ const errorResults = toolCalls.map((tc) => ({
6829
+ callId: tc.id,
6830
+ content: tc.name === "save_last_response" ? `[save_last_response failed] streaming was interrupted: ${errMsg}. ${saveToFile} was NOT saved. Retry \u2014 and consider producing a more compact output (split very large reports across multiple save_last_response calls if the previous attempt timed out).` : `[skipped: save_last_response failed]`,
6831
+ isError: tc.name === "save_last_response"
6832
+ }));
6833
+ const newMsgs2 = provider.buildToolResultMessages(toolCalls, errorResults, reasoningContent);
6834
+ extraMessages.push(...newMsgs2);
6835
+ return "continue";
6836
+ }
6837
+ const metaMatch = detectMetaNarration(genContent);
6838
+ if (metaMatch) {
6839
+ try {
6840
+ unlinkSync2(saveToFile);
6841
+ } catch {
6842
+ }
6843
+ process.stdout.write(theme.error(
6844
+ `
6845
+ \u2717 Rejected save: response was meta-narration / leaked reasoning, not document body (matched: ${metaMatch})
6846
+ ${saveToFile} was deleted; asking model to retry.
6847
+
6848
+ `
6849
+ ));
6850
+ const errorResults = toolCalls.map((tc) => ({
6851
+ callId: tc.id,
6852
+ content: tc.name === "save_last_response" ? `[save_last_response REJECTED] Your output was internal reasoning / meta-narration about the task (e.g. "Let me re-read\u2026", "the user is asking me to\u2026") instead of the requested document body. ${saveToFile} was NOT saved.
6853
+
6854
+ This fresh stream has NO tools. Produce ONLY the document body: start with a markdown heading like "# \u5BA1\u8BA1\u62A5\u544A" / "# Audit Report" and write the full content. Do NOT narrate that you will produce the document \u2014 produce it.` : `[skipped: save_last_response was rejected and other parallel calls are abandoned]`,
6855
+ isError: tc.name === "save_last_response"
6856
+ }));
6857
+ const newMsgs2 = provider.buildToolResultMessages(toolCalls, errorResults, reasoningContent);
6858
+ extraMessages.push(...newMsgs2);
6859
+ if (genUsage) accumulateUsage(usage, genUsage);
6860
+ return "continue";
6861
+ }
6862
+ const pseudoMatch = detectPseudoToolCalls(genContent);
6863
+ if (pseudoMatch) {
6864
+ const cleaned = stripPseudoToolCalls(genContent);
6865
+ if (looksLikeDocumentBody(cleaned)) {
6866
+ try {
6867
+ writeFileSync2(saveToFile, cleaned, "utf-8");
6868
+ process.stdout.write(theme.warning(
6869
+ `
6870
+ \u26A0 Salvaged save: stripped pseudo-tool-call markup (matched: ${pseudoMatch})
6871
+ ${saveToFile} now contains the cleaned document (${cleaned.length} chars; was ${genContent.length}).
6872
+
6873
+ `
6874
+ ));
6875
+ lastResponseStore.content = cleaned;
6876
+ if (genUsage) accumulateUsage(usage, genUsage);
6877
+ session.addMessage({ role: "assistant", content: cleaned, timestamp: /* @__PURE__ */ new Date() });
6878
+ this.events.emit("message.after", { content: cleaned });
6879
+ const lines2 = cleaned.split("\n").length;
6880
+ const bytes2 = Buffer.byteLength(cleaned, "utf-8");
6881
+ const okResults = toolCalls.map((tc) => ({
6882
+ callId: tc.id,
6883
+ content: tc.name === "save_last_response" ? `File saved (with cleanup): ${saveToFile} (${lines2} lines, ${bytes2} bytes; pseudo-tool-call markup was stripped before save)` : `[skipped: file already saved by tee streaming]`,
6884
+ isError: false
6885
+ }));
6886
+ const newMsgs3 = provider.buildToolResultMessages(toolCalls, okResults, reasoningContent);
6887
+ extraMessages.push(...newMsgs3);
6888
+ if (usage.inputTokens > 0 || usage.outputTokens > 0) {
6889
+ this.addSessionUsage(usage, effectiveModel);
6890
+ session.addTokenUsage(usage);
6891
+ if (teeShowTokens && !teeTokShown) {
6892
+ this.renderer.renderUsage(usage, this.sessionTokenUsage);
6893
+ }
6894
+ }
6895
+ return "stop";
6896
+ } catch (writeErr) {
6897
+ process.stderr.write(`[tee] salvage write failed: ${writeErr.message ?? writeErr}
6898
+ `);
6899
+ }
6900
+ }
6901
+ try {
6902
+ unlinkSync2(saveToFile);
6903
+ } catch {
6904
+ }
6905
+ process.stdout.write(theme.error(
6906
+ `
6907
+ \u2717 Rejected save: response was pseudo-tool-call markup with no usable document body (matched: ${pseudoMatch})
6908
+ ${saveToFile} was deleted; asking model to retry.
6909
+
6910
+ `
6911
+ ));
6912
+ const errorResults = toolCalls.map((tc) => ({
6913
+ callId: tc.id,
6914
+ content: tc.name === "save_last_response" ? `[save_last_response REJECTED] Your output was tool-call XML/JSON with no document body. ${saveToFile} was NOT saved.
6915
+
6916
+ This fresh stream has NO tools \u2014 output is captured verbatim. STOP emitting <tool_call>, <function_calls>, <invoke>, <think>, or JSON tool blocks. Produce the document body NOW: start with a markdown heading like "# \u5BA1\u8BA1\u62A5\u544A" and write the full report.` : `[skipped: save_last_response was rejected and other parallel calls are abandoned]`,
6917
+ isError: tc.name === "save_last_response"
6918
+ }));
6919
+ const newMsgs2 = provider.buildToolResultMessages(toolCalls, errorResults, reasoningContent);
6920
+ extraMessages.push(...newMsgs2);
6921
+ if (genUsage) accumulateUsage(usage, genUsage);
6922
+ return "continue";
6923
+ }
6924
+ lastResponseStore.content = genContent;
6925
+ if (genUsage) accumulateUsage(usage, genUsage);
6926
+ session.addMessage({ role: "assistant", content: genContent, timestamp: /* @__PURE__ */ new Date() });
6927
+ this.events.emit("message.after", { content: genContent });
6928
+ const lines = genContent.split("\n").length;
6929
+ const bytes = Buffer.byteLength(genContent, "utf-8");
6930
+ const syntheticResults = toolCalls.map((tc) => ({
6931
+ callId: tc.id,
6932
+ content: tc.name === "save_last_response" ? `File saved: ${saveToFile} (${lines} lines, ${bytes} bytes)` : `[skipped: file already saved by tee streaming]`,
6933
+ isError: false
6934
+ }));
6935
+ const newMsgs = provider.buildToolResultMessages(toolCalls, syntheticResults, reasoningContent);
6936
+ extraMessages.push(...newMsgs);
6937
+ if (usage.inputTokens > 0 || usage.outputTokens > 0) {
6938
+ this.addSessionUsage(usage, effectiveModel);
6939
+ session.addTokenUsage(usage);
6940
+ if (teeShowTokens && !teeTokShown) {
6941
+ this.renderer.renderUsage(usage, this.sessionTokenUsage);
6942
+ }
6943
+ }
6944
+ return "stop";
6945
+ } finally {
6946
+ this.teardownStreamInterrupt();
6947
+ }
6948
+ }
6949
+ });
6950
+ if (loopResult.reason !== "tee-stop" && (usage.inputTokens > 0 || usage.outputTokens > 0)) {
6951
+ this.addSessionUsage(usage, effectiveModel);
6952
+ session.addTokenUsage(usage);
7057
6953
  if (this.shouldShowTokens()) {
7058
- this.renderer.renderUsage(roundUsage, this.sessionTokenUsage);
6954
+ this.renderer.renderUsage(usage, this.sessionTokenUsage);
7059
6955
  }
7060
6956
  }
7061
6957
  } finally {
@@ -7360,7 +7256,7 @@ program.command("web").description("Start Web UI server with browser-based chat
7360
7256
  console.error("Error: Invalid port number. Must be between 1 and 65535.");
7361
7257
  process.exit(1);
7362
7258
  }
7363
- const { startWebServer } = await import("./server-UT6PLLZC.js");
7259
+ const { startWebServer } = await import("./server-2B5JDVJS.js");
7364
7260
  await startWebServer({ port, host: options.host });
7365
7261
  });
7366
7262
  program.command("user [action] [username]").description("Manage Web UI users (list | create <name> | delete <name> | reset-password <name> | logout-all <name> | migrate <name>)").action(async (action, username) => {
@@ -7527,16 +7423,16 @@ program.command("sessions").description("List recent conversation sessions").opt
7527
7423
  console.log(footer + "\n");
7528
7424
  });
7529
7425
  program.command("usage").description("Show token + cost usage grouped by provider/model (cross-session)").option("--days <n>", "Only the last N days (inclusive of today)").option("--month <ym>", "Only a specific month, format YYYY-MM (e.g. 2026-06)").option("--json", "Output as JSON (for scripting)").action(async (options) => {
7530
- const { runUsageCli } = await import("./usage-5KBD4UBB.js");
7426
+ const { runUsageCli } = await import("./usage-ZVKFH7BM.js");
7531
7427
  await runUsageCli(options);
7532
7428
  });
7533
7429
  program.command("doctor").description("Health check: API keys, config, MCP, recent crashes, tool usage, disk usage").option("--json", "Output as JSON (for scripting)").option("--reset-stats", "Reset accumulated tool usage statistics").action(async (options) => {
7534
- const { runDoctorCli } = await import("./doctor-cli-X6MOE3YE.js");
7430
+ const { runDoctorCli } = await import("./doctor-cli-MYJFAWKV.js");
7535
7431
  await runDoctorCli({ json: !!options.json, resetStats: !!options.resetStats });
7536
7432
  });
7537
7433
  program.command("batch <action> [arg] [arg2]").description("Anthropic Message Batches: submit | list | status <id> | results <id> [out] | cancel <id>").option("--dry-run", "Parse and validate input without submitting (submit only)").action(async (action, arg, arg2, options) => {
7538
7434
  try {
7539
- const batch = await import("./batch-ILD2EPEO.js");
7435
+ const batch = await import("./batch-UTP6NYVX.js");
7540
7436
  switch (action) {
7541
7437
  case "submit":
7542
7438
  if (!arg) {
@@ -7579,7 +7475,7 @@ program.command("batch <action> [arg] [arg2]").description("Anthropic Message Ba
7579
7475
  }
7580
7476
  });
7581
7477
  program.command("mcp-serve").description("Start an MCP server over STDIO, exposing aicli's built-in tools to Claude Desktop / Cursor / other MCP clients").option("--allow-destructive", "Allow bash / run_interactive / task_create (always destructive in MCP mode)").option("--allow-outside-cwd", "Allow tool path arguments to escape the sandbox root \u2014 disabled by default").option("--tools <list>", "Comma-separated whitelist of tools to expose (default: all eligible tools)").option("--cwd <path>", "Working directory AND sandbox root (default: current directory)").action(async (options) => {
7582
- const { startMcpServer } = await import("./server-H3KIFOLK.js");
7478
+ const { startMcpServer } = await import("./server-25WVH5YX.js");
7583
7479
  await startMcpServer({
7584
7480
  allowDestructive: !!options.allowDestructive,
7585
7481
  allowOutsideCwd: !!options.allowOutsideCwd,
@@ -7588,7 +7484,7 @@ program.command("mcp-serve").description("Start an MCP server over STDIO, exposi
7588
7484
  });
7589
7485
  });
7590
7486
  program.command("ci").description("Headless PR review (code + security) \u2014 reads git/gh diff, optionally posts to PR. Designed for GitHub Actions.").option("--pr <num>", "PR number; diff fetched via `gh pr diff <num>`", (v) => parseInt(v, 10)).option("--base <ref>", "Base ref for `git diff <ref>...HEAD` (ignored when --pr set)").option("--post", "Post review as a PR comment (requires gh CLI + GH_TOKEN, needs --pr)").option("--no-update", "Always create a new comment instead of updating the previous aicli review").option("--skip-code", "Skip the code review section").option("--skip-security", "Skip the security review section").option("--detailed", "Use the detailed code-review prompt").option("--max-diff <n>", "Max diff chars sent to the model (default 30000)", (v) => parseInt(v, 10)).option("--provider <id>", "Override provider (default: config.defaultProvider)").option("--model <id>", "Override model").option("--dry-run", "Print result to stdout instead of posting (overrides --post)").action(async (options) => {
7591
- const { runCi } = await import("./ci-7YWXFKGE.js");
7487
+ const { runCi } = await import("./ci-2WFKSG2J.js");
7592
7488
  const result = await runCi({
7593
7489
  pr: options.pr,
7594
7490
  base: options.base,
@@ -7734,7 +7630,7 @@ program.command("hub [topic]").description("Start multi-agent hub (discuss / bra
7734
7630
  }),
7735
7631
  config.get("customProviders")
7736
7632
  );
7737
- const { startHub } = await import("./hub-SFMWUEUW.js");
7633
+ const { startHub } = await import("./hub-CHE7JDIH.js");
7738
7634
  await startHub(
7739
7635
  {
7740
7636
  topic: topic ?? "",