jinzd-ai-cli 0.4.186 → 0.4.188

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. package/dist/{batch-IPALJR2D.js → batch-D6K2KHJK.js} +2 -2
  2. package/dist/{chat-index-2I7ZHRE5.js → chat-index-JXTYDRCY.js} +1 -1
  3. package/dist/{chat-index-BE4TPLFH.js → chat-index-WDMVP7BN.js} +1 -1
  4. package/dist/{chunk-MM3F43H6.js → chunk-4UZE4ADL.js} +54 -23
  5. package/dist/{chunk-V7NTQ6UB.js → chunk-DFQSQQEU.js} +1 -1
  6. package/dist/{chunk-RADH6ECW.js → chunk-IQ7JE43O.js} +1442 -1318
  7. package/dist/{chunk-JBTVDYJM.js → chunk-J3XSJCO5.js} +4 -4
  8. package/dist/{chunk-T5VKNPLD.js → chunk-KIGVJVX4.js} +52 -23
  9. package/dist/{chunk-ZLWYP3RB.js → chunk-MUQZOUV5.js} +1 -1
  10. package/dist/{chunk-2CLMIRKL.js → chunk-NRSAAMIF.js} +1 -1
  11. package/dist/{chunk-7HMX2MTY.js → chunk-ODAAPNSL.js} +1 -1
  12. package/dist/{chunk-KNGDSMMF.js → chunk-Q7SB3R25.js} +1 -1
  13. package/dist/{chunk-MIXN7VBY.js → chunk-UK6E2563.js} +1 -1
  14. package/dist/{chunk-OFP5BE7H.js → chunk-VPTRE7IW.js} +2 -2
  15. package/dist/{ci-FYXVC5MX.js → ci-42ZBP2SY.js} +3 -3
  16. package/dist/{constants-RB5H7L34.js → constants-NCWVAAI7.js} +1 -1
  17. package/dist/{doctor-cli-ZWLHBS43.js → doctor-cli-R3SWTL5Z.js} +5 -5
  18. package/dist/electron-server.js +1341 -1131
  19. package/dist/{hub-X4OBH5A3.js → hub-3ZGIM2FN.js} +1 -1
  20. package/dist/index.js +480 -584
  21. package/dist/{run-tests-625NA546.js → run-tests-IJYP6BMT.js} +1 -1
  22. package/dist/{run-tests-CRVIUT4O.js → run-tests-NS3SPH6S.js} +2 -2
  23. package/dist/{server-Q3A737OP.js → server-SVTSJ3PK.js} +5 -5
  24. package/dist/{server-O6ZMNWNS.js → server-TZRMRT3O.js} +208 -260
  25. package/dist/{task-orchestrator-TLUGDQMO.js → task-orchestrator-GMJ5PLVV.js} +5 -5
  26. package/dist/{usage-B4OU5CDJ.js → usage-IYMFSHDX.js} +2 -2
  27. package/package.json +1 -1
@@ -36,7 +36,7 @@ import {
36
36
  VERSION,
37
37
  buildUserIdentityPrompt,
38
38
  runTestsTool
39
- } from "./chunk-7HMX2MTY.js";
39
+ } from "./chunk-ODAAPNSL.js";
40
40
  import {
41
41
  hasSemanticIndex,
42
42
  semanticSearch
@@ -49,7 +49,7 @@ import {
49
49
  loadChatIndex,
50
50
  redactJson,
51
51
  searchChatMemory
52
- } from "./chunk-T5VKNPLD.js";
52
+ } from "./chunk-KIGVJVX4.js";
53
53
  import "./chunk-JV5N65KN.js";
54
54
  import "./chunk-3RG5ZIWI.js";
55
55
 
@@ -1552,625 +1552,1173 @@ Node.js does not automatically use system proxies. Try one of the following:
1552
1552
  // src/providers/openai-compatible.ts
1553
1553
  import OpenAI from "openai";
1554
1554
 
1555
- // src/core/agent-loop.ts
1556
- function partialTagTail(s, tag) {
1557
- const max = Math.min(s.length, tag.length - 1);
1558
- for (let len = max; len > 0; len--) {
1559
- if (s.endsWith(tag.slice(0, len))) return len;
1560
- }
1561
- return 0;
1555
+ // src/tools/hallucination.ts
1556
+ var HALLUCINATION_PATTERNS = [
1557
+ /文件路径[::]\s*`?[^\s`]+\.\w{1,5}/,
1558
+ // 文件路径: `path/to/file.ext`(要求文件扩展名)
1559
+ /已生成[::!!]/,
1560
+ // 已生成完成!
1561
+ /已保存到?\s*[`'"]/,
1562
+ // 已保存到 `path`(要求后跟路径引号)
1563
+ /已写入[::!!]/,
1564
+ // 已写入!
1565
+ /已创建[::!!]/,
1566
+ // 已创建!
1567
+ /File\s+(?:written|saved|created)\s+(?:to|as|at)/i,
1568
+ // File written to / saved as(要求介词)
1569
+ /生成完成[!!]/,
1570
+ // 生成完成!
1571
+ /✅\s*(?:文件|已[生保写创]|第)\S*\.\w{1,5}/,
1572
+ // ✅ 文件已保存 path.ext(要求文件扩展名)
1573
+ /文件已[成功]?创建/,
1574
+ // 文件已成功创建 / 文件已创建
1575
+ /教案已[成功]?[生创保写]/,
1576
+ // 教案已成功生成 / 教案已保存
1577
+ /已成功[保写创生]入?[::!!\s`'"]/,
1578
+ // 已成功保存 / 已成功写入 / 已成功创建
1579
+ /保存[到至]了?\s*[`'"]/,
1580
+ // 保存到了 `path` / 保存至 'path'
1581
+ /内容如下[::]/,
1582
+ // 内容如下:(后跟大段文件内容)
1583
+ /以下是.*(?:教案|文件|内容)[::]/
1584
+ // 以下是xx教案内容:(Kimi 常见模式)
1585
+ ];
1586
+ function detectsHallucinatedFileOp(content) {
1587
+ return HALLUCINATION_PATTERNS.some((pattern) => pattern.test(content));
1562
1588
  }
1563
- var ThinkTagFilter = class {
1564
- inThink = false;
1565
- buf = "";
1566
- push(raw) {
1567
- this.buf += raw;
1568
- let out = "";
1569
- while (this.buf.length > 0) {
1570
- if (!this.inThink) {
1571
- const open = this.buf.indexOf("<think>");
1572
- if (open === -1) {
1573
- const keep = partialTagTail(this.buf, "<think>");
1574
- out += this.buf.slice(0, this.buf.length - keep);
1575
- this.buf = this.buf.slice(this.buf.length - keep);
1576
- break;
1577
- }
1578
- out += this.buf.slice(0, open);
1579
- this.buf = this.buf.slice(open + "<think>".length);
1580
- this.inThink = true;
1581
- } else {
1582
- const close = this.buf.indexOf("</think>");
1583
- if (close === -1) {
1584
- const keep = partialTagTail(this.buf, "</think>");
1585
- this.buf = this.buf.slice(this.buf.length - keep);
1586
- break;
1587
- }
1588
- this.buf = this.buf.slice(close + "</think>".length);
1589
- this.inThink = false;
1590
- }
1589
+ function hadPreviousWriteToolCalls(extraMessages) {
1590
+ const msgs = extraMessages;
1591
+ return msgs.some((msg) => {
1592
+ if (msg.role === "assistant" && Array.isArray(msg.tool_calls)) {
1593
+ return msg.tool_calls.some((tc) => {
1594
+ const fn = tc.function;
1595
+ const name = fn?.name ?? "";
1596
+ return name === "write_file" || name === "edit_file";
1597
+ });
1591
1598
  }
1592
- return out;
1593
- }
1594
- /** 流结束:若仍持留可能的半截 '<think>' 前缀且并未进入 think 块,它是真实文本。 */
1595
- flush() {
1596
- if (!this.inThink && this.buf) {
1597
- const tail = this.buf;
1598
- this.buf = "";
1599
- return tail;
1599
+ if (msg.role === "assistant" && Array.isArray(msg.content)) {
1600
+ return msg.content.some((block) => {
1601
+ if (block.type !== "tool_use") return false;
1602
+ const name = block.name ?? "";
1603
+ return name === "write_file" || name === "edit_file";
1604
+ });
1600
1605
  }
1601
- this.buf = "";
1602
- return "";
1603
- }
1604
- };
1605
- function repairToolCallArguments(raw, onWarn) {
1606
- const argStr = raw || "{}";
1607
- try {
1608
- return JSON.parse(argStr);
1609
- } catch {
1610
- const truncated = argStr.trimEnd();
1611
- const lastComma = truncated.lastIndexOf(",");
1612
- const fixed = lastComma > 0 ? truncated.slice(0, lastComma) + "}" : truncated.slice(0, truncated.indexOf("{") + 1) + "}";
1613
- try {
1614
- const repaired = JSON.parse(fixed);
1615
- onWarn?.("Tool call JSON was truncated and auto-repaired. Some parameters may be missing.");
1616
- return repaired;
1617
- } catch {
1618
- onWarn?.("Tool call JSON could not be parsed, using empty arguments.");
1619
- return {};
1606
+ if (msg.role === "model" && Array.isArray(msg.parts)) {
1607
+ return msg.parts.some((part) => {
1608
+ const fc = part.functionCall;
1609
+ const name = fc?.name ?? "";
1610
+ return name === "write_file" || name === "edit_file";
1611
+ });
1620
1612
  }
1621
- }
1613
+ return false;
1614
+ });
1622
1615
  }
1623
- async function consumeToolCallStream(stream, hooks = {}) {
1624
- const textParts = [];
1625
- const accumulators = /* @__PURE__ */ new Map();
1626
- let usage;
1627
- let rawContent;
1628
- let reasoningContent;
1629
- let finishReason;
1630
- let aborted = false;
1631
- const thinkFilter = new ThinkTagFilter();
1632
- const emitText = (raw) => {
1633
- const visible = thinkFilter.push(raw);
1634
- if (visible) {
1635
- textParts.push(visible);
1636
- hooks.onText?.(visible);
1616
+ var TOOL_CALL_REMINDER = `
1617
+
1618
+ [\u26A0\uFE0F Mandatory Tool Call Policy]
1619
+ When you need to create, write, or modify files, you MUST use the function calling API to invoke write_file or edit_file.
1620
+ NEVER claim "file saved", "file created", "written to", etc. in your response text without actually calling the tool.
1621
+ Describing file content in text without calling the tool = the file does not exist = task failure.
1622
+ If multiple files need to be generated, you MUST call write_file separately for each file \u2014 do not skip any.
1623
+ Do NOT output fake "completion summaries" unless you have actually completed all file writes via tool_calls.
1624
+
1625
+ CRITICAL \u2014 Batch file generation rules:
1626
+ 1. You MUST call write_file once per file. There are NO shortcuts.
1627
+ 2. After writing file N, immediately proceed to call write_file for file N+1. Do NOT stop to summarize.
1628
+ 3. If you find yourself typing file content into your response text instead of into a write_file call, STOP and use the tool.
1629
+ 4. Only produce a text summary AFTER all write_file calls have been made and returned success.
1630
+ 5. The system compares every "file saved" claim against actual tool calls. Phantom claims trigger an automatic retry \u2014 do not waste rounds.`;
1631
+ var HALLUCINATION_CORRECTION_MESSAGE = "You did NOT actually call the write_file tool \u2014 the file was NOT created! Please immediately use the write_file tool via the function calling API to perform the actual file write. Do NOT describe file content in text \u2014 you MUST invoke write_file through the tool_calls mechanism.";
1632
+ function extractClaimedFilePaths(content) {
1633
+ const paths = /* @__PURE__ */ new Set();
1634
+ const add = (p) => {
1635
+ const trimmed = p.trim().replace(/[,,。、;;::]+$/, "");
1636
+ if (trimmed && /\.\w{1,6}$/.test(trimmed)) paths.add(trimmed);
1637
+ };
1638
+ let m;
1639
+ const actionLineRe = /(?:已[生保写创]|saved|written|created|完成.*(?:写入|保存|创建|生成)|输出|file\s+(?:saved|written|created))/i;
1640
+ const backtickRe = /`([^`\n]+?\.\w{1,6})`/g;
1641
+ while ((m = backtickRe.exec(content)) !== null) {
1642
+ let pos = m.index;
1643
+ let linesBack = 0;
1644
+ while (linesBack < 9 && pos > 0) {
1645
+ pos--;
1646
+ if (content[pos] === "\n") linesBack++;
1647
+ }
1648
+ const windowStart = pos === 0 ? 0 : pos + 1;
1649
+ const lineEndIdx = content.indexOf("\n", m.index + m[0].length);
1650
+ const window = content.slice(windowStart, lineEndIdx === -1 ? void 0 : lineEndIdx);
1651
+ if (actionLineRe.test(window)) add(m[1]);
1652
+ }
1653
+ const zhRe = /(?:已保存(?:到)?|已写入(?:到)?|已创建|已生成|文件路径[::]|保存为|写入到)\s*[`'”””]?([^\s`'”””,,。\n]+?\.\w{1,6})/g;
1654
+ while ((m = zhRe.exec(content)) !== null) add(m[1]);
1655
+ const enRe = /(?:saved|written|created)\s+(?:to|as|at)\s+[`'”]?([^\s`'”\n,]+?\.\w{1,6})/gi;
1656
+ while ((m = enRe.exec(content)) !== null) add(m[1]);
1657
+ const checkRe = /✅[^\n`]*?[`'”]?([^\s`'”\n,,。]+?\.\w{1,6})/g;
1658
+ while ((m = checkRe.exec(content)) !== null) {
1659
+ let pos = m.index;
1660
+ let linesBack = 0;
1661
+ while (linesBack < 9 && pos > 0) {
1662
+ pos--;
1663
+ if (content[pos] === "\n") linesBack++;
1664
+ }
1665
+ const windowStart = pos === 0 ? 0 : pos + 1;
1666
+ const lineEndIdx = content.indexOf("\n", m.index + m[0].length);
1667
+ const window = content.slice(windowStart, lineEndIdx === -1 ? void 0 : lineEndIdx);
1668
+ if (actionLineRe.test(window)) add(m[1]);
1669
+ }
1670
+ return Array.from(paths);
1671
+ }
1672
+ function extractWrittenFilePaths(extraMessages) {
1673
+ const paths = /* @__PURE__ */ new Set();
1674
+ const msgs = extraMessages;
1675
+ const addFromArgs = (raw) => {
1676
+ if (typeof raw === "string") {
1677
+ try {
1678
+ const parsed = JSON.parse(raw);
1679
+ if (typeof parsed.path === "string") paths.add(parsed.path);
1680
+ } catch {
1681
+ }
1682
+ } else if (raw && typeof raw === "object") {
1683
+ const p = raw.path;
1684
+ if (typeof p === "string") paths.add(p);
1637
1685
  }
1638
1686
  };
1639
- try {
1640
- for await (const event of stream) {
1641
- if (hooks.signal?.aborted) {
1642
- aborted = true;
1643
- break;
1687
+ for (const msg of msgs) {
1688
+ if (msg.role === "assistant" && Array.isArray(msg.tool_calls)) {
1689
+ for (const tc of msg.tool_calls) {
1690
+ const fn = tc.function;
1691
+ const name = fn?.name ?? "";
1692
+ if (name === "write_file" || name === "edit_file") {
1693
+ addFromArgs(fn?.arguments);
1694
+ }
1644
1695
  }
1645
- switch (event.type) {
1646
- case "text_delta":
1647
- emitText(event.delta);
1648
- break;
1649
- case "thinking_start":
1650
- hooks.onThinkingStart?.();
1651
- break;
1652
- case "thinking_delta":
1653
- hooks.onThinkingDelta?.(event.delta);
1654
- break;
1655
- case "thinking_end":
1656
- hooks.onThinkingEnd?.();
1657
- break;
1658
- case "tool_call_start":
1659
- accumulators.set(event.index, { id: event.id, name: event.name, arguments: "" });
1660
- hooks.onToolCallStart?.(event.index, event.id, event.name);
1661
- break;
1662
- case "tool_call_delta": {
1663
- const acc = accumulators.get(event.index);
1664
- if (acc) acc.arguments += event.argumentsDelta;
1665
- break;
1696
+ }
1697
+ if (msg.role === "assistant" && Array.isArray(msg.content)) {
1698
+ for (const block of msg.content) {
1699
+ if (block.type !== "tool_use") continue;
1700
+ const name = block.name ?? "";
1701
+ if (name === "write_file" || name === "edit_file") {
1702
+ addFromArgs(block.input);
1666
1703
  }
1667
- case "tool_call_end":
1668
- break;
1669
- case "done":
1670
- if (event.usage) usage = event.usage;
1671
- if (event.rawContent) rawContent = event.rawContent;
1672
- if (event.reasoningContent) reasoningContent = event.reasoningContent;
1673
- if (event.finishReason) finishReason = event.finishReason;
1674
- break;
1675
1704
  }
1676
1705
  }
1677
- } catch (err) {
1678
- if (err instanceof Error && (err.name === "AbortError" || err.message.includes("aborted"))) {
1679
- aborted = true;
1680
- } else {
1681
- throw err;
1706
+ if (msg.role === "model" && Array.isArray(msg.parts)) {
1707
+ for (const part of msg.parts) {
1708
+ const fc = part.functionCall;
1709
+ if (!fc) continue;
1710
+ const name = fc.name ?? "";
1711
+ if (name === "write_file" || name === "edit_file") {
1712
+ addFromArgs(fc.args);
1713
+ }
1714
+ }
1682
1715
  }
1683
1716
  }
1684
- const tail = thinkFilter.flush();
1685
- if (tail && !aborted) {
1686
- textParts.push(tail);
1687
- hooks.onText?.(tail);
1688
- }
1689
- const textContent = textParts.join("");
1690
- if (aborted) {
1691
- return { textContent, toolCalls: [], usage, rawContent, reasoningContent, finishReason, aborted };
1692
- }
1693
- const toolCalls = [];
1694
- for (const [, acc] of accumulators) {
1695
- toolCalls.push({
1696
- id: acc.id,
1697
- name: acc.name,
1698
- arguments: repairToolCallArguments(acc.arguments, hooks.onWarn)
1699
- });
1700
- }
1701
- if (toolCalls.length > 0) {
1702
- if (rawContent) {
1703
- toolCalls._rawContent = rawContent;
1717
+ return Array.from(paths);
1718
+ }
1719
+ function extractBashCommands(extraMessages) {
1720
+ const cmds = [];
1721
+ const msgs = extraMessages;
1722
+ const addCmd = (raw) => {
1723
+ if (typeof raw === "string") {
1724
+ try {
1725
+ const parsed = JSON.parse(raw);
1726
+ if (typeof parsed.command === "string") cmds.push(parsed.command);
1727
+ } catch {
1728
+ }
1729
+ } else if (raw && typeof raw === "object") {
1730
+ const c = raw.command;
1731
+ if (typeof c === "string") cmds.push(c);
1704
1732
  }
1705
- if (textContent) {
1706
- toolCalls._streamedText = textContent;
1733
+ };
1734
+ for (const msg of msgs) {
1735
+ if (msg.role === "assistant" && Array.isArray(msg.tool_calls)) {
1736
+ for (const tc of msg.tool_calls) {
1737
+ const fn = tc.function;
1738
+ if (fn?.name === "bash") addCmd(fn?.arguments);
1739
+ }
1707
1740
  }
1708
- }
1709
- return { textContent, toolCalls, usage, rawContent, reasoningContent, finishReason, aborted };
1710
- }
1711
- var FREE_ROUND_TOOLS = /* @__PURE__ */ new Set(["write_todos"]);
1712
- var MAX_CONSECUTIVE_FREE_ROUNDS = 3;
1713
- var FreeRoundTracker = class {
1714
- consecutive = 0;
1715
- /** 返回 true 表示本轮不消耗有效轮次(调用方执行 round--)。 */
1716
- apply(toolNames) {
1717
- const allFree = toolNames.length > 0 && toolNames.every((n) => FREE_ROUND_TOOLS.has(n));
1718
- if (!allFree) {
1719
- this.consecutive = 0;
1720
- return false;
1721
- }
1722
- this.consecutive++;
1723
- return this.consecutive <= MAX_CONSECUTIVE_FREE_ROUNDS;
1724
- }
1725
- };
1726
- var BudgetWarner = class {
1727
- constructor(maxToolRounds) {
1728
- this.maxToolRounds = maxToolRounds;
1729
- this.noteAt = Math.max(10, Math.floor(maxToolRounds * 0.2));
1730
- const lowRaw = Math.max(5, Math.floor(maxToolRounds * 0.1));
1731
- const criticalRaw = Math.max(3, Math.floor(maxToolRounds * 0.05));
1732
- this.lowAt = Math.min(lowRaw, this.noteAt - 1);
1733
- this.criticalAt = Math.min(criticalRaw, this.lowAt - 1);
1734
- }
1735
- noteAt;
1736
- lowAt;
1737
- criticalAt;
1738
- warnedNote = false;
1739
- warnedLow = false;
1740
- warnedCritical = false;
1741
- check(roundsLeft) {
1742
- if (!this.warnedCritical && roundsLeft <= this.criticalAt) {
1743
- this.warnedCritical = true;
1744
- return {
1745
- level: "critical",
1746
- injectMessage: `\u{1F6A8} Critical budget: Only ${roundsLeft} rounds left! Wrap up NOW \u2014 complete the current operation and give a final summary. Do NOT start new tasks.`,
1747
- displayMessage: `\u{1F6A8} Critical: ${roundsLeft} rounds remaining`
1748
- };
1749
- }
1750
- if (!this.warnedLow && roundsLeft <= this.lowAt) {
1751
- this.warnedLow = true;
1752
- return {
1753
- level: "low",
1754
- injectMessage: `\u26A0\uFE0F Budget warning: Only ${roundsLeft} tool rounds remaining. Prioritize completing the most critical task. Use efficient approaches (batch edits, fewer reads). If you cannot finish everything, summarize what's done and what remains.`,
1755
- displayMessage: `\u26A0\uFE0F Low budget: ${roundsLeft} rounds remaining`
1756
- };
1741
+ if (msg.role === "assistant" && Array.isArray(msg.content)) {
1742
+ for (const block of msg.content) {
1743
+ if (block.type === "tool_use" && block.name === "bash") addCmd(block.input);
1744
+ }
1757
1745
  }
1758
- if (!this.warnedNote && roundsLeft <= this.noteAt) {
1759
- this.warnedNote = true;
1760
- return {
1761
- level: "note",
1762
- injectMessage: `\u{1F4CA} Budget note: ${roundsLeft} tool rounds remaining out of ${this.maxToolRounds}. Plan your remaining work efficiently \u2014 use batch operations (e.g., replaceAll) when possible.`
1763
- };
1746
+ if (msg.role === "model" && Array.isArray(msg.parts)) {
1747
+ for (const part of msg.parts) {
1748
+ const fc = part.functionCall;
1749
+ if (fc && fc.name === "bash") addCmd(fc.args);
1750
+ }
1764
1751
  }
1765
- return null;
1766
1752
  }
1767
- };
1768
- var EMPTY_RESPONSE_NUDGE = "Your previous response was empty \u2014 no text and no tool calls. This usually means the context window is nearly full. Please either: (1) continue the task by calling the next tool you need, or (2) give a concise final text summary of what has been accomplished so far and what remains. Do NOT repeat earlier long outputs.";
1769
- function describeFinishReason(fr) {
1770
- if (fr === "length") return "output limit reached (finish_reason=length)";
1771
- if (fr === "content_filter") return "content blocked (finish_reason=content_filter)";
1772
- if (fr) return `empty response (finish_reason=${fr})`;
1773
- return "empty response";
1753
+ return cmds;
1774
1754
  }
1775
- function emptyResponseHint(fr) {
1776
- if (fr === "length") return "Output token limit hit \u2014 try /compact to reduce context, raise maxTokens, or /model to switch.";
1777
- if (fr === "content_filter") return "Content was blocked by the provider filter.";
1778
- return "Context window may be exhausted or max_tokens too low.";
1755
+ function findPhantomClaims(content, extraMessages) {
1756
+ const claimed = extractClaimedFilePaths(content);
1757
+ if (claimed.length === 0) return [];
1758
+ const normalize = (p) => p.replace(/\\/g, "/").toLowerCase().replace(/^\.\//, "");
1759
+ const basename6 = (p) => {
1760
+ const parts = normalize(p).split("/");
1761
+ return parts[parts.length - 1] ?? "";
1762
+ };
1763
+ const written = extractWrittenFilePaths(extraMessages).map(normalize);
1764
+ const writtenBases = new Set(written.map(basename6));
1765
+ const writtenFull = new Set(written);
1766
+ const bashText = extractBashCommands(extraMessages).map((c) => c.replace(/\\/g, "/").toLowerCase()).join("\n");
1767
+ return claimed.filter((raw) => {
1768
+ const norm2 = normalize(raw);
1769
+ if (writtenFull.has(norm2)) return false;
1770
+ for (const w of writtenFull) {
1771
+ if (w.endsWith("/" + norm2) || norm2.endsWith("/" + w)) return false;
1772
+ }
1773
+ if (writtenBases.has(basename6(norm2))) return false;
1774
+ const base = basename6(norm2);
1775
+ if (base && bashText.includes(base)) return false;
1776
+ return true;
1777
+ });
1779
1778
  }
1780
- var EmptyResponseGuard = class {
1781
- retries = 0;
1782
- onEmpty(canRetry, finishReason) {
1783
- if (this.retries === 0 && canRetry) {
1784
- this.retries++;
1785
- return {
1786
- action: "nudge",
1787
- injectMessage: EMPTY_RESPONSE_NUDGE,
1788
- displayMessage: `\u26A0 ${describeFinishReason(finishReason)} \u2014 nudging AI to continue...`
1789
- };
1790
- }
1791
- return {
1792
- action: "stop",
1793
- displayMessage: "\u26A0 AI returned empty responses twice in a row. Stopping agentic loop.",
1794
- hint: emptyResponseHint(finishReason)
1795
- };
1796
- }
1797
- /** 非空响应到达 → 重置计数(下次空响应仍可 nudge 一次)。 */
1798
- onNonEmpty() {
1799
- this.retries = 0;
1800
- }
1801
- };
1802
- var ContextPressureMonitor = class {
1803
- warned80 = false;
1804
- check(requestTokens, contextWindow) {
1805
- if (contextWindow <= 0) return { action: "ok", ratio: 0 };
1806
- const ratio = requestTokens / contextWindow;
1807
- if (ratio >= 0.95) return { action: "abort", ratio };
1808
- if (ratio >= 0.8 && !this.warned80) {
1809
- this.warned80 = true;
1810
- return {
1811
- action: "warn",
1812
- ratio,
1813
- injectMessage: `\u26A0\uFE0F Context pressure: ~${Math.round(ratio * 100)}% of the ${contextWindow.toLocaleString()}-token context window is used. Avoid reading more files or running broad scans. Finish the current critical step, then produce a final summary. Every unnecessary tool call now risks breaking the conversation.`
1814
- };
1815
- }
1816
- return { action: "ok", ratio };
1817
- }
1818
- };
1819
- function accumulateUsage(total, delta) {
1820
- if (!delta) return;
1821
- total.inputTokens += delta.inputTokens;
1822
- total.outputTokens += delta.outputTokens;
1823
- total.cacheCreationTokens += delta.cacheCreationTokens ?? 0;
1824
- total.cacheReadTokens += delta.cacheReadTokens ?? 0;
1779
+ function buildPhantomCorrectionMessage(phantoms) {
1780
+ const list = phantoms.map((p) => ` - ${p}`).join("\n");
1781
+ return "You claimed to have written the following file(s), but no matching write_file tool call was actually made in this turn:\n" + list + '\n\nEach of these files does NOT exist on disk. You MUST now invoke write_file (via the function calling API) for every missing file listed above. Do NOT output another "completion summary" until the tool calls have actually been made.';
1825
1782
  }
1826
- function buildRoundBudgetHint(opts) {
1827
- const pauseHint = opts.autoPauseInterval > 0 ? `
1828
- - Every ${opts.autoPauseInterval} rounds the user will be asked whether to continue \u2014 use this as a natural checkpoint to report progress.` : "";
1829
- if (opts.planMode) {
1830
- return `
1831
-
1832
- [Tool Round Budget \u2014 Plan Mode]
1833
- You have a maximum of ${opts.maxToolRounds} tool call rounds. You are in READ-ONLY Plan Mode:
1834
- - Only use: read_file, list_dir, grep_files, glob_files, ask_user, write_todos
1835
- - Do NOT attempt to call bash, write_file, edit_file \u2014 they are disabled
1836
- - Do NOT write shell commands or code blocks as a substitute for tool calls
1837
- - Do NOT read the same file more than once
1838
- - Call write_todos ONCE to present your plan, then give a text summary
1839
- - If the user asks you to execute anything, respond: "Please type /plan execute to switch to execute mode."${pauseHint}`;
1783
+ var DSML_PIPE_CLASS = "[|\\uFF5C\\u2502\\u2503\\u01C0]";
1784
+ var PSEUDO_TOOL_CALL_PATTERNS = [
1785
+ // <tool_call name="..."> ... </tool_call> (DeepSeek V4 thinking, GLM)
1786
+ /<tool_call\s+name\s*=\s*["'][\w._-]+["']/,
1787
+ // <function_calls> ... </function_calls> (Anthropic-style as text)
1788
+ /<\/?function_calls\s*>/,
1789
+ // <invoke name="..." /> (Anthropic XML tool-call, which is real for
1790
+ // Claude API but is text/garbage for any other provider's plain stream)
1791
+ /<invoke\s+name\s*=\s*["'][\w._-]+["']/,
1792
+ // <tool_use> ... <tool_use_id> (Claude flavor leaked into text)
1793
+ /<tool_use(?:_id)?\b/,
1794
+ // ```tool_call\n...\n``` markdown fences (Kimi/Zhipu fallback)
1795
+ /```\s*tool_call\b/i,
1796
+ // Bare JSON tool-call block: lines starting with `{"name":"...","arguments":`
1797
+ /^\s*\{\s*"name"\s*:\s*"[\w._-]+"\s*,\s*"arguments"\s*:/m,
1798
+ // v0.4.112: <think> ... </think> reasoning blocks. The REPL renderer
1799
+ // suppresses these from terminal output, but tee mode writes the raw
1800
+ // delta to disk → reasoning leaks into the saved file. We saw a 600-line
1801
+ // 审计报告.md whose first 57 lines were the model's planning monologue.
1802
+ /<think\b[^>]*>/i,
1803
+ // v0.4.112: leading ```markdown / ```md fence wrapping the entire document.
1804
+ // DeepSeek V4 Pro Thinking sometimes "politely" wraps its document output
1805
+ // in a markdown fence. The fence ends up literally in the saved file.
1806
+ /^\s*```\s*(?:markdown|md|gfm)\b/im,
1807
+ // v0.4.173: DeepSeek V4 DSML pseudo-tool-call markup leaked as text. DeepSeek
1808
+ // emits a fake tool call using its native special-token markup
1809
+ // <||DSML||tool_calls> <||DSML||invoke name="write"> <||DSML||parameter …>
1810
+ // where the "pipe" is U+FF5C FULLWIDTH VERTICAL LINE (the same token family as
1811
+ // <|User|>/<|Assistant|>). We saw an exam paper saved via save_last_response
1812
+ // whose tee stream was preamble + this DSML wrapper + the real document body.
1813
+ // The earlier <invoke …> pattern uses ASCII < > and does NOT match these.
1814
+ new RegExp(`<\\/?\\s*${DSML_PIPE_CLASS}+\\s*DSML\\s*${DSML_PIPE_CLASS}+`, "i")
1815
+ ];
1816
+ function detectPseudoToolCalls(content) {
1817
+ if (!content || content.length === 0) return null;
1818
+ for (const re of PSEUDO_TOOL_CALL_PATTERNS) {
1819
+ if (re.test(content)) return re.source;
1840
1820
  }
1841
- return `
1842
-
1843
- [Tool Round Budget]
1844
- You have a maximum of ${opts.maxToolRounds} tool call rounds for this task. Plan efficiently:
1845
- - Prefer batch operations (e.g. global find-and-replace) over repetitive single edits.
1846
- - Do NOT read the same file more than once \u2014 use the content from previous reads.
1847
- - Prioritize the most critical tasks first in case rounds run out.
1848
- - When remaining rounds are low, focus on completing the current task and summarizing.${pauseHint}`;
1849
- }
1850
- function buildRoundsExhaustedPrompt(maxToolRounds) {
1851
- return `You have used all ${maxToolRounds} tool call rounds. Do not call any more tools. Summarize in text:
1852
- 1. What work has been completed so far
1853
- 2. What tasks remain unfinished
1854
- 3. What the user can do next (e.g. send another request to continue)`;
1821
+ return null;
1855
1822
  }
1856
- function buildUserStopMessage(effectiveRound, maxToolRounds) {
1857
- return `The user has stopped the task at round ${effectiveRound}/${maxToolRounds}. Do not call any more tools. Summarize what has been completed and what remains.`;
1823
+ function stripPseudoToolCalls(content) {
1824
+ if (!content) return content;
1825
+ let out = content;
1826
+ const dsmlBody = extractDsmlContent(out);
1827
+ if (dsmlBody !== null) {
1828
+ out = dsmlBody;
1829
+ } else {
1830
+ out = stripDsmlTags(out);
1831
+ }
1832
+ out = out.replace(/<tool_call\b[^>]*>[\s\S]*?<\/tool_call>/gi, "");
1833
+ out = out.replace(/<tool_call\b[^>]*\/>/gi, "");
1834
+ out = out.replace(/<function_calls\b[^>]*>[\s\S]*?<\/function_calls>/gi, "");
1835
+ out = out.replace(/<invoke\b[^>]*>[\s\S]*?<\/invoke>/gi, "");
1836
+ out = out.replace(/<invoke\b[^>]*\/>/gi, "");
1837
+ out = out.replace(/<tool_use(?:_id)?\b[^>]*>[\s\S]*?<\/tool_use(?:_id)?>/gi, "");
1838
+ out = out.replace(/```\s*tool_call\b[\s\S]*?```/gi, "");
1839
+ out = out.replace(/<think\b[^>]*>[\s\S]*?<\/think>/gi, "");
1840
+ out = out.replace(/<think\b[^>]*>[\s\S]*?(?=^#{1,3}\s+\S|\n\s*\n)/im, "");
1841
+ out = out.replace(/^\s*\{\s*"name"\s*:\s*"[\w._-]+"\s*,\s*"arguments"\s*:[\s\S]*?\}\s*$/gm, "");
1842
+ out = unwrapDocumentFence(out);
1843
+ out = peelMetaNarration(out);
1844
+ out = out.replace(/\n{3,}/g, "\n\n").trim();
1845
+ return out;
1858
1846
  }
1859
- function summarizeRecentTools(history, interval) {
1860
- const recent = history.slice(-interval);
1861
- const counts = /* @__PURE__ */ new Map();
1862
- for (const rh of recent) {
1863
- for (const t of rh.tools) counts.set(t, (counts.get(t) || 0) + 1);
1847
+ function extractDsmlContent(content) {
1848
+ if (!content) return null;
1849
+ const P = DSML_PIPE_CLASS;
1850
+ const re = new RegExp(
1851
+ `<\\s*${P}+\\s*DSML\\s*${P}+\\s*parameter\\b[^>]*\\bname\\s*=\\s*["']content["'][^>]*>([\\s\\S]*?)<\\s*/\\s*${P}+\\s*DSML\\s*${P}+\\s*parameter\\s*>`,
1852
+ "i"
1853
+ );
1854
+ const m = content.match(re);
1855
+ if (m && typeof m[1] === "string") {
1856
+ const body = m[1].trim();
1857
+ return body.length > 0 ? body : null;
1864
1858
  }
1865
- return [...counts.entries()].sort((a, b) => b[1] - a[1]).map(([name, count]) => count > 1 ? `${name}\xD7${count}` : name).join(", ");
1859
+ return null;
1866
1860
  }
1867
-
1868
- // src/providers/openai-compatible.ts
1869
- function toUsage(u) {
1870
- if (!u) return void 0;
1871
- const cached = u.prompt_tokens_details?.cached_tokens ?? 0;
1872
- const usage = {
1873
- inputTokens: Math.max(0, u.prompt_tokens - cached),
1874
- outputTokens: u.completion_tokens
1875
- };
1876
- if (cached > 0) usage.cacheReadTokens = cached;
1877
- return usage;
1861
+ function stripDsmlTags(content) {
1862
+ const P = DSML_PIPE_CLASS;
1863
+ let out = content;
1864
+ out = out.replace(
1865
+ new RegExp(
1866
+ `<\\s*${P}+\\s*DSML\\s*${P}+\\s*tool_calls\\b[\\s\\S]*?<\\s*/\\s*${P}+\\s*DSML\\s*${P}+\\s*tool_calls\\s*>`,
1867
+ "gi"
1868
+ ),
1869
+ ""
1870
+ );
1871
+ out = out.replace(new RegExp(`<\\s*/?\\s*${P}+\\s*DSML\\s*${P}+[^>]*>`, "gi"), "");
1872
+ return out;
1878
1873
  }
1879
- var OpenAICompatibleProvider = class extends BaseProvider {
1880
- client;
1881
- defaultTimeout = 6e4;
1882
- // ms
1883
- /** 子类设为 false 可禁用流式工具调用(虚假声明检测需要完整响应) */
1884
- enableStreamingToolCalls = true;
1885
- async initialize(apiKey, options) {
1886
- if (options?.timeout !== void 0) {
1887
- this.defaultTimeout = options.timeout;
1874
+ function unwrapDocumentFence(content) {
1875
+ const trimmed = content.trim();
1876
+ const open = trimmed.match(/^```\s*(markdown|md|gfm)?\s*\n/i);
1877
+ if (!open) return content;
1878
+ const afterOpen = trimmed.slice(open[0].length);
1879
+ const closeMatch = afterOpen.match(/\n```\s*$/);
1880
+ if (!closeMatch) return content;
1881
+ const inner = afterOpen.slice(0, afterOpen.length - closeMatch[0].length);
1882
+ if (inner.length < 200) return content;
1883
+ return inner;
1884
+ }
1885
+ function peelMetaNarration(content) {
1886
+ let out = content;
1887
+ const firstHeadingMatch = out.match(/^#{1,3}\s+\S.*$/m);
1888
+ if (firstHeadingMatch && firstHeadingMatch.index !== void 0) {
1889
+ const before = out.slice(0, firstHeadingMatch.index);
1890
+ const hasIntroMarker = /(?:以下(?:即为|是|就是)|这是|Here\s+is|Below\s+is|完整的?(?:审计报告|内容|文档)|审计报告(?:如下|的完整内容))/i.test(before);
1891
+ if (before.length > 0 && before.length < 800 && hasIntroMarker) {
1892
+ out = out.slice(firstHeadingMatch.index);
1888
1893
  }
1889
- const clientOptions = {
1890
- apiKey,
1891
- baseURL: options?.baseUrl ?? this.defaultBaseUrl,
1892
- timeout: this.defaultTimeout
1893
- };
1894
- const proxyUrl = options?.proxy;
1895
- try {
1896
- const { Agent: Agent2, ProxyAgent, fetch: undiciFetch } = await import("undici");
1897
- const STREAM_BODY_TIMEOUT = 30 * 60 * 1e3;
1898
- const STREAM_HEADERS_TIMEOUT = 5 * 60 * 1e3;
1899
- const dispatcher = proxyUrl ? new ProxyAgent({
1900
- uri: proxyUrl,
1901
- bodyTimeout: STREAM_BODY_TIMEOUT,
1902
- headersTimeout: STREAM_HEADERS_TIMEOUT
1903
- }) : new Agent2({
1904
- bodyTimeout: STREAM_BODY_TIMEOUT,
1905
- headersTimeout: STREAM_HEADERS_TIMEOUT
1906
- });
1907
- clientOptions.fetch = ((url, init) => undiciFetch(url, { ...init, dispatcher }));
1908
- } catch {
1894
+ if (out.startsWith("---\n")) {
1895
+ const headingAfterRule = out.slice(4).match(/^#{1,3}\s+\S/m);
1896
+ if (headingAfterRule && headingAfterRule.index !== void 0 && headingAfterRule.index < 100) {
1897
+ out = out.slice(4 + headingAfterRule.index);
1898
+ }
1909
1899
  }
1910
- this.client = new OpenAI(clientOptions);
1911
1900
  }
1912
- /**
1913
- * systemPrompt + messages 合并为 OpenAI messages 数组(system 消息放首位)。
1914
- *
1915
- * v0.4.100+:按原始顺序保留工具消息(assistant.toolCalls 和 role='tool'),
1916
- * 不再剥离到 _extraMessages 末尾——之前的剥离会让历史工具往返被插到当前用户消息之后,
1917
- * 导致模型把"过去的工具调用结果"当作"对当前问题的回应",DeepSeek V4 Flash 上尤其明显
1918
- * (会复读上一轮的"完成汇总")。
1919
- *
1920
- * DeepSeek V4 thinking 模式:所有 assistant 消息(含带 toolCalls 的)必须有
1921
- * reasoning_content 字段,缺失则 API 400。
1922
- */
1923
- buildMessages(request) {
1924
- const msgs = [];
1925
- for (const m of request.messages) {
1926
- if (m.role === "tool") {
1927
- if (!m.toolCallId) continue;
1928
- msgs.push({
1929
- role: "tool",
1930
- tool_call_id: m.toolCallId,
1931
- content: typeof m.content === "string" ? m.content : ""
1932
- });
1933
- continue;
1934
- }
1935
- if (m.role === "assistant" && m.toolCalls && m.toolCalls.length > 0) {
1936
- const assistantMsg = {
1937
- role: "assistant",
1938
- content: typeof m.content === "string" && m.content ? m.content : null,
1939
- tool_calls: m.toolCalls.map((tc) => ({
1940
- id: tc.id,
1941
- type: "function",
1942
- function: { name: tc.name, arguments: JSON.stringify(tc.arguments) }
1943
- })),
1944
- reasoning_content: m.reasoningContent ?? ""
1945
- };
1946
- msgs.push(assistantMsg);
1947
- continue;
1948
- }
1949
- const base = { role: m.role, content: m.content };
1950
- if (m.role === "assistant") {
1951
- base.reasoning_content = m.reasoningContent ?? "";
1901
+ const codaMatch = out.match(/\n[^\n]*?(?:以上(?:即为|就是|内容|为完整的?)|Above\s+is\s+the|本报告已经|该报告(?:已经|包含)|报告(?:已|至此)结束)[^\n]*$/i);
1902
+ if (codaMatch && codaMatch.index !== void 0 && codaMatch.index > out.length / 2) {
1903
+ out = out.slice(0, codaMatch.index);
1904
+ }
1905
+ return out.trim();
1906
+ }
1907
+ var META_NARRATION_HARD_MARKERS = [
1908
+ /\[⚠️\s*CONTENT GENERATION MODE\]/,
1909
+ /CONTENT_ONLY_STREAM_REMINDER\b/,
1910
+ /<system-reminder>/i
1911
+ ];
1912
+ var META_NARRATION_HEURISTICS = [
1913
+ /\bthe user (?:is asking me|wants me|is requesting|expects me)\b/i,
1914
+ /\blet me (?:re-?read|re-?consider|reconsider|think about|carefully (?:re-?read|consider))\b/i,
1915
+ /\bI'?m (?:in (?:a )?content-only|in CONTENT-ONLY|currently in)\b/i,
1916
+ /\bI think (?:there might be|I should|I cannot|the (?:user|best)|maybe)\b/i,
1917
+ /\bWait,?\s+let me\b/i,
1918
+ /\bActually,?\s+I\b/i,
1919
+ /\bI need to be honest with the user\b/i,
1920
+ /\bI(?:'m| am) in a special mode\b/i,
1921
+ /\bGiven that I cannot\b/i
1922
+ ];
1923
+ function detectMetaNarration(content) {
1924
+ if (!content) return null;
1925
+ const head = content.slice(0, 2e3);
1926
+ for (const re of META_NARRATION_HARD_MARKERS) {
1927
+ if (re.test(head)) return re.source;
1928
+ }
1929
+ if (/^#{1,3}\s+\S/m.test(head)) return null;
1930
+ let hits = 0;
1931
+ let firstMatch = "";
1932
+ for (const re of META_NARRATION_HEURISTICS) {
1933
+ if (re.test(head)) {
1934
+ hits++;
1935
+ if (!firstMatch) firstMatch = re.source;
1936
+ if (hits >= 2) return `meta-narration:${firstMatch}`;
1937
+ }
1938
+ }
1939
+ return null;
1940
+ }
1941
+ function looksLikeDocumentBody(content) {
1942
+ if (!content || content.length < 200) return false;
1943
+ if (/^#{1,6}\s+\S/m.test(content)) return true;
1944
+ const paragraphs = content.split(/\n\s*\n/).filter((p) => p.trim().length > 30);
1945
+ if (paragraphs.length >= 3) return true;
1946
+ return false;
1947
+ }
1948
+ function stripToolCallReminder(systemPrompt) {
1949
+ if (!systemPrompt) return systemPrompt;
1950
+ const idx = systemPrompt.indexOf("[\u26A0\uFE0F Mandatory Tool Call Policy]");
1951
+ if (idx === -1) return systemPrompt;
1952
+ return systemPrompt.slice(0, idx).trimEnd();
1953
+ }
1954
+ var TEE_FINAL_USER_NUDGE = `\u26A0\uFE0F STOP using tools NOW. The save_last_response tee stream is open and capturing every token of THIS response. Output ONLY the requested document body, in markdown. The very first character of your response must be the document's top-level heading (e.g. "# \u5BA1\u8BA1\u62A5\u544A" / "# Audit Report"). Do NOT print <tool_call>, </tool_call>, <function_calls>, <invoke>, <tool_use>, <think>, or any other tool-call markup. Do NOT narrate that you will produce the document \u2014 just produce it. Do NOT pretend to call tools \u2014 there are none in this stream.`;
1955
+ var CONTENT_ONLY_STREAM_REMINDER = `
1956
+
1957
+ [\u26A0\uFE0F CONTENT GENERATION MODE]
1958
+ You are now in a CONTENT-ONLY streaming pass. The file at the configured path will receive every token of THIS response.
1959
+ - Do NOT emit <tool_call>, </tool_call>, <function_calls>, <invoke>, <tool_use>, or any tool-call XML/JSON markup.
1960
+ - Do NOT print "I will now call ...", "let me read ...", "<think>" reasoning blocks (the surrounding REPL handles those separately \u2014 they should not enter the saved file).
1961
+ - Do NOT pretend to call tools. There are NO tools available in this stream \u2014 only your text output is captured.
1962
+ - Produce ONLY the requested document body. Markdown is fine. Code blocks are fine. Tool-call markup is NOT.
1963
+ - If you accidentally start a <tool_call>, STOP and produce the document body instead.
1964
+
1965
+ The file is closed and named when this stream ends. If your output contains pseudo-tool-call markup, the save will be REJECTED and you will be asked to retry.`;
1966
+
1967
+ // src/core/agent-loop.ts
1968
+ function partialTagTail(s, tag) {
1969
+ const max = Math.min(s.length, tag.length - 1);
1970
+ for (let len = max; len > 0; len--) {
1971
+ if (s.endsWith(tag.slice(0, len))) return len;
1972
+ }
1973
+ return 0;
1974
+ }
1975
+ var ThinkTagFilter = class {
1976
+ inThink = false;
1977
+ buf = "";
1978
+ push(raw) {
1979
+ this.buf += raw;
1980
+ let out = "";
1981
+ while (this.buf.length > 0) {
1982
+ if (!this.inThink) {
1983
+ const open = this.buf.indexOf("<think>");
1984
+ if (open === -1) {
1985
+ const keep = partialTagTail(this.buf, "<think>");
1986
+ out += this.buf.slice(0, this.buf.length - keep);
1987
+ this.buf = this.buf.slice(this.buf.length - keep);
1988
+ break;
1989
+ }
1990
+ out += this.buf.slice(0, open);
1991
+ this.buf = this.buf.slice(open + "<think>".length);
1992
+ this.inThink = true;
1993
+ } else {
1994
+ const close = this.buf.indexOf("</think>");
1995
+ if (close === -1) {
1996
+ const keep = partialTagTail(this.buf, "</think>");
1997
+ this.buf = this.buf.slice(this.buf.length - keep);
1998
+ break;
1999
+ }
2000
+ this.buf = this.buf.slice(close + "</think>".length);
2001
+ this.inThink = false;
1952
2002
  }
1953
- msgs.push(base);
1954
2003
  }
1955
- const systemContent = [request.systemPrompt, request.systemPromptVolatile].filter(Boolean).join("\n\n---\n\n");
1956
- if (systemContent) {
1957
- return [{ role: "system", content: systemContent }, ...msgs];
2004
+ return out;
2005
+ }
2006
+ /** 流结束:若仍持留可能的半截 '<think>' 前缀且并未进入 think 块,它是真实文本。 */
2007
+ flush() {
2008
+ if (!this.inThink && this.buf) {
2009
+ const tail = this.buf;
2010
+ this.buf = "";
2011
+ return tail;
1958
2012
  }
1959
- return msgs;
2013
+ this.buf = "";
2014
+ return "";
1960
2015
  }
1961
- async chat(request) {
2016
+ };
2017
+ function repairToolCallArguments(raw, onWarn) {
2018
+ const argStr = raw || "{}";
2019
+ try {
2020
+ return JSON.parse(argStr);
2021
+ } catch {
2022
+ const truncated = argStr.trimEnd();
2023
+ const lastComma = truncated.lastIndexOf(",");
2024
+ const fixed = lastComma > 0 ? truncated.slice(0, lastComma) + "}" : truncated.slice(0, truncated.indexOf("{") + 1) + "}";
1962
2025
  try {
1963
- const response = await this.client.chat.completions.create({
1964
- model: request.model,
1965
- messages: this.buildMessages(request),
1966
- temperature: request.temperature,
1967
- max_tokens: request.maxTokens,
1968
- stream: false,
1969
- ...request.thinking ? { thinking: { type: "enabled" } } : {}
1970
- }, {
1971
- timeout: request.timeout ?? this.defaultTimeout
1972
- });
1973
- const firstChoice = response.choices?.[0];
1974
- if (!firstChoice) {
1975
- return { content: "", model: response.model, usage: void 0 };
1976
- }
1977
- return {
1978
- content: firstChoice.message.content ?? "",
1979
- model: response.model,
1980
- usage: toUsage(response.usage)
1981
- };
1982
- } catch (err) {
1983
- throw this.wrapError(err);
2026
+ const repaired = JSON.parse(fixed);
2027
+ onWarn?.("Tool call JSON was truncated and auto-repaired. Some parameters may be missing.");
2028
+ return repaired;
2029
+ } catch {
2030
+ onWarn?.("Tool call JSON could not be parsed, using empty arguments.");
2031
+ return {};
1984
2032
  }
1985
2033
  }
1986
- async *chatStream(request) {
1987
- try {
1988
- const stream = await this.client.chat.completions.create({
1989
- model: request.model,
1990
- messages: this.buildMessages(request),
1991
- temperature: request.temperature,
1992
- max_tokens: request.maxTokens,
1993
- stream: true,
1994
- // 请求末尾 usage chunk,供 token 统计使用
1995
- stream_options: { include_usage: true },
1996
- ...request.thinking ? { thinking: { type: "enabled" } } : {}
1997
- }, {
1998
- timeout: request.timeout ?? this.defaultTimeout,
1999
- signal: request.signal
2000
- });
2001
- let thinkingStarted = false;
2002
- let reasoningAccumulator = "";
2003
- for await (const chunk of stream) {
2004
- const choice = chunk.choices[0];
2005
- const done = choice?.finish_reason != null;
2006
- if (!choice && chunk.usage) {
2007
- yield {
2008
- delta: "",
2009
- done: true,
2010
- usage: toUsage(chunk.usage),
2011
- ...reasoningAccumulator ? { reasoningContent: reasoningAccumulator } : {}
2012
- };
2013
- continue;
2014
- }
2015
- const reasoningDelta = choice?.delta?.reasoning_content;
2016
- if (reasoningDelta) {
2017
- if (!thinkingStarted) {
2018
- yield { delta: "<think>", done: false };
2019
- thinkingStarted = true;
2020
- }
2021
- reasoningAccumulator += reasoningDelta;
2022
- yield { delta: reasoningDelta, done: false };
2023
- continue;
2024
- }
2025
- const delta = choice?.delta?.content ?? "";
2026
- if (thinkingStarted && delta) {
2027
- thinkingStarted = false;
2028
- yield { delta: "</think>", done: false };
2029
- }
2030
- if (done) {
2031
- yield { delta, done, ...reasoningAccumulator ? { reasoningContent: reasoningAccumulator } : {} };
2032
- } else {
2033
- yield { delta, done };
2034
+ }
2035
+ async function consumeToolCallStream(stream, hooks = {}) {
2036
+ const textParts = [];
2037
+ const accumulators = /* @__PURE__ */ new Map();
2038
+ let usage;
2039
+ let rawContent;
2040
+ let reasoningContent;
2041
+ let finishReason;
2042
+ let aborted = false;
2043
+ const thinkFilter = new ThinkTagFilter();
2044
+ const emitText = (raw) => {
2045
+ const visible = thinkFilter.push(raw);
2046
+ if (visible) {
2047
+ textParts.push(visible);
2048
+ hooks.onText?.(visible);
2049
+ }
2050
+ };
2051
+ try {
2052
+ for await (const event of stream) {
2053
+ if (hooks.signal?.aborted) {
2054
+ aborted = true;
2055
+ break;
2056
+ }
2057
+ switch (event.type) {
2058
+ case "text_delta":
2059
+ emitText(event.delta);
2060
+ break;
2061
+ case "thinking_start":
2062
+ hooks.onThinkingStart?.();
2063
+ break;
2064
+ case "thinking_delta":
2065
+ hooks.onThinkingDelta?.(event.delta);
2066
+ break;
2067
+ case "thinking_end":
2068
+ hooks.onThinkingEnd?.();
2069
+ break;
2070
+ case "tool_call_start":
2071
+ accumulators.set(event.index, { id: event.id, name: event.name, arguments: "" });
2072
+ hooks.onToolCallStart?.(event.index, event.id, event.name);
2073
+ break;
2074
+ case "tool_call_delta": {
2075
+ const acc = accumulators.get(event.index);
2076
+ if (acc) acc.arguments += event.argumentsDelta;
2077
+ break;
2034
2078
  }
2079
+ case "tool_call_end":
2080
+ break;
2081
+ case "done":
2082
+ if (event.usage) usage = event.usage;
2083
+ if (event.rawContent) rawContent = event.rawContent;
2084
+ if (event.reasoningContent) reasoningContent = event.reasoningContent;
2085
+ if (event.finishReason) finishReason = event.finishReason;
2086
+ break;
2035
2087
  }
2036
- } catch (err) {
2037
- throw this.wrapError(err);
2088
+ }
2089
+ } catch (err) {
2090
+ if (err instanceof Error && (err.name === "AbortError" || err.message.includes("aborted"))) {
2091
+ aborted = true;
2092
+ } else {
2093
+ throw err;
2038
2094
  }
2039
2095
  }
2040
- /**
2041
- * 请求 AI 并获取工具调用列表(不执行,只解析)。
2042
- * 返回 { toolCalls, usage? } 时说明 AI 想要调用工具,
2043
- * 返回 { content, usage? } 时说明 AI 给出了最终回答。
2044
- */
2045
- async chatWithTools(request, tools) {
2046
- try {
2047
- const openaiTools = tools.map((t) => ({
2048
- type: "function",
2049
- function: {
2050
- name: t.name,
2051
- description: t.description,
2052
- parameters: {
2053
- type: "object",
2054
- properties: Object.fromEntries(
2055
- Object.entries(t.parameters).map(([key, schema]) => [
2056
- key,
2057
- schemaToJsonSchema(schema)
2058
- ])
2059
- ),
2060
- required: Object.entries(t.parameters).filter(([, s]) => s.required).map(([k]) => k)
2061
- }
2062
- }
2063
- }));
2064
- const baseMessages = this.buildMessages(request);
2065
- const extraMessages = request._extraMessages ?? [];
2066
- const allMessages = [...baseMessages, ...extraMessages];
2067
- const response = await this.client.chat.completions.create({
2068
- model: request.model,
2069
- messages: allMessages,
2070
- tools: openaiTools,
2071
- tool_choice: "auto",
2072
- temperature: request.temperature,
2073
- max_tokens: request.maxTokens,
2074
- stream: false,
2075
- ...request.thinking ? { thinking: { type: "enabled" } } : {}
2076
- }, {
2077
- timeout: request.timeout ?? this.defaultTimeout
2078
- });
2079
- const firstChoice = response.choices?.[0];
2080
- if (!firstChoice) {
2081
- return { content: "", usage: void 0 };
2082
- }
2083
- const message = firstChoice.message;
2084
- const finishReason = firstChoice.finish_reason;
2085
- const usage = toUsage(response.usage);
2086
- const contentStr = typeof message.content === "string" ? message.content : "";
2087
- const hasToolCalls = !!(message.tool_calls && message.tool_calls.length > 0);
2088
- const reasoningContent = message.reasoning_content;
2089
- if (message.tool_calls && message.tool_calls.length > 0) {
2090
- const toolCalls = message.tool_calls.map((tc) => {
2091
- const parsedArgs = repairToolCallArguments(
2092
- tc.function.arguments || "{}",
2093
- (m) => process.stderr.write(`[warn] ${m}
2094
- `)
2095
- );
2096
- return {
2097
- id: tc.id,
2098
- name: tc.function.name,
2099
- arguments: parsedArgs
2100
- };
2101
- });
2102
- return { toolCalls, usage, reasoningContent };
2103
- }
2096
+ const tail = thinkFilter.flush();
2097
+ if (tail && !aborted) {
2098
+ textParts.push(tail);
2099
+ hooks.onText?.(tail);
2100
+ }
2101
+ const textContent = textParts.join("");
2102
+ if (aborted) {
2103
+ return { textContent, toolCalls: [], usage, rawContent, reasoningContent, finishReason, aborted };
2104
+ }
2105
+ const toolCalls = [];
2106
+ for (const [, acc] of accumulators) {
2107
+ toolCalls.push({
2108
+ id: acc.id,
2109
+ name: acc.name,
2110
+ arguments: repairToolCallArguments(acc.arguments, hooks.onWarn)
2111
+ });
2112
+ }
2113
+ if (toolCalls.length > 0) {
2114
+ if (rawContent) {
2115
+ toolCalls._rawContent = rawContent;
2116
+ }
2117
+ if (textContent) {
2118
+ toolCalls._streamedText = textContent;
2119
+ }
2120
+ }
2121
+ return { textContent, toolCalls, usage, rawContent, reasoningContent, finishReason, aborted };
2122
+ }
2123
+ var FREE_ROUND_TOOLS = /* @__PURE__ */ new Set(["write_todos"]);
2124
+ var MAX_CONSECUTIVE_FREE_ROUNDS = 3;
2125
+ var FreeRoundTracker = class {
2126
+ consecutive = 0;
2127
+ /** 返回 true 表示本轮不消耗有效轮次(调用方执行 round--)。 */
2128
+ apply(toolNames) {
2129
+ const allFree = toolNames.length > 0 && toolNames.every((n) => FREE_ROUND_TOOLS.has(n));
2130
+ if (!allFree) {
2131
+ this.consecutive = 0;
2132
+ return false;
2133
+ }
2134
+ this.consecutive++;
2135
+ return this.consecutive <= MAX_CONSECUTIVE_FREE_ROUNDS;
2136
+ }
2137
+ };
2138
+ var BudgetWarner = class {
2139
+ constructor(maxToolRounds) {
2140
+ this.maxToolRounds = maxToolRounds;
2141
+ this.noteAt = Math.max(10, Math.floor(maxToolRounds * 0.2));
2142
+ const lowRaw = Math.max(5, Math.floor(maxToolRounds * 0.1));
2143
+ const criticalRaw = Math.max(3, Math.floor(maxToolRounds * 0.05));
2144
+ this.lowAt = Math.min(lowRaw, this.noteAt - 1);
2145
+ this.criticalAt = Math.min(criticalRaw, this.lowAt - 1);
2146
+ }
2147
+ noteAt;
2148
+ lowAt;
2149
+ criticalAt;
2150
+ warnedNote = false;
2151
+ warnedLow = false;
2152
+ warnedCritical = false;
2153
+ check(roundsLeft) {
2154
+ if (!this.warnedCritical && roundsLeft <= this.criticalAt) {
2155
+ this.warnedCritical = true;
2104
2156
  return {
2105
- content: message.content ?? "",
2106
- usage,
2107
- ...reasoningContent ? { reasoningContent } : {},
2108
- ...!hasToolCalls && (finishReason ?? "") ? { finishReason } : {}
2157
+ level: "critical",
2158
+ injectMessage: `\u{1F6A8} Critical budget: Only ${roundsLeft} rounds left! Wrap up NOW \u2014 complete the current operation and give a final summary. Do NOT start new tasks.`,
2159
+ displayMessage: `\u{1F6A8} Critical: ${roundsLeft} rounds remaining`
2160
+ };
2161
+ }
2162
+ if (!this.warnedLow && roundsLeft <= this.lowAt) {
2163
+ this.warnedLow = true;
2164
+ return {
2165
+ level: "low",
2166
+ injectMessage: `\u26A0\uFE0F Budget warning: Only ${roundsLeft} tool rounds remaining. Prioritize completing the most critical task. Use efficient approaches (batch edits, fewer reads). If you cannot finish everything, summarize what's done and what remains.`,
2167
+ displayMessage: `\u26A0\uFE0F Low budget: ${roundsLeft} rounds remaining`
2168
+ };
2169
+ }
2170
+ if (!this.warnedNote && roundsLeft <= this.noteAt) {
2171
+ this.warnedNote = true;
2172
+ return {
2173
+ level: "note",
2174
+ injectMessage: `\u{1F4CA} Budget note: ${roundsLeft} tool rounds remaining out of ${this.maxToolRounds}. Plan your remaining work efficiently \u2014 use batch operations (e.g., replaceAll) when possible.`
2109
2175
  };
2110
- } catch (err) {
2111
- throw this.wrapError(err);
2112
2176
  }
2177
+ return null;
2113
2178
  }
2114
- /**
2115
- * 流式工具调用:文本内容实时输出、工具名称/参数逐块发射。
2116
- * 子类(DeepSeek / Kimi)因虚假声明检测需要完整响应,故不继承此方法。
2117
- */
2118
- async *chatWithToolsStream(request, tools) {
2119
- if (!this.enableStreamingToolCalls) {
2120
- const result = await this.chatWithTools(request, tools);
2121
- if ("toolCalls" in result) {
2122
- for (let i = 0; i < result.toolCalls.length; i++) {
2123
- const tc = result.toolCalls[i];
2124
- yield { type: "tool_call_start", index: i, id: tc.id, name: tc.name };
2125
- yield { type: "tool_call_delta", index: i, argumentsDelta: JSON.stringify(tc.arguments) };
2126
- yield { type: "tool_call_end", index: i };
2127
- }
2128
- } else {
2129
- yield { type: "text_delta", delta: result.content };
2130
- }
2131
- const rc = "reasoningContent" in result ? result.reasoningContent : void 0;
2132
- const fr = "finishReason" in result ? result.finishReason : void 0;
2133
- yield {
2134
- type: "done",
2135
- usage: result.usage,
2136
- ...rc ? { reasoningContent: rc } : {},
2137
- ...fr ? { finishReason: fr } : {}
2179
+ };
2180
+ var EMPTY_RESPONSE_NUDGE = "Your previous response was empty \u2014 no text and no tool calls. This usually means the context window is nearly full. Please either: (1) continue the task by calling the next tool you need, or (2) give a concise final text summary of what has been accomplished so far and what remains. Do NOT repeat earlier long outputs.";
2181
+ function describeFinishReason(fr) {
2182
+ if (fr === "length") return "output limit reached (finish_reason=length)";
2183
+ if (fr === "content_filter") return "content blocked (finish_reason=content_filter)";
2184
+ if (fr) return `empty response (finish_reason=${fr})`;
2185
+ return "empty response";
2186
+ }
2187
+ function emptyResponseHint(fr) {
2188
+ if (fr === "length") return "Output token limit hit \u2014 try /compact to reduce context, raise maxTokens, or /model to switch.";
2189
+ if (fr === "content_filter") return "Content was blocked by the provider filter.";
2190
+ return "Context window may be exhausted or max_tokens too low.";
2191
+ }
2192
+ var EmptyResponseGuard = class {
2193
+ retries = 0;
2194
+ onEmpty(canRetry, finishReason) {
2195
+ if (this.retries === 0 && canRetry) {
2196
+ this.retries++;
2197
+ return {
2198
+ action: "nudge",
2199
+ injectMessage: EMPTY_RESPONSE_NUDGE,
2200
+ displayMessage: `\u26A0 ${describeFinishReason(finishReason)} \u2014 nudging AI to continue...`
2138
2201
  };
2139
- return;
2140
2202
  }
2141
- const openaiTools = tools.map((t) => ({
2142
- type: "function",
2143
- function: {
2144
- name: t.name,
2145
- description: t.description,
2146
- parameters: {
2147
- type: "object",
2148
- properties: Object.fromEntries(
2149
- Object.entries(t.parameters).map(([key, schema]) => [
2150
- key,
2151
- schemaToJsonSchema(schema)
2152
- ])
2153
- ),
2154
- required: Object.entries(t.parameters).filter(([, s]) => s.required).map(([k]) => k)
2203
+ return {
2204
+ action: "stop",
2205
+ displayMessage: "\u26A0 AI returned empty responses twice in a row. Stopping agentic loop.",
2206
+ hint: emptyResponseHint(finishReason)
2207
+ };
2208
+ }
2209
+ /** 非空响应到达 → 重置计数(下次空响应仍可 nudge 一次)。 */
2210
+ onNonEmpty() {
2211
+ this.retries = 0;
2212
+ }
2213
+ };
2214
+ var ContextPressureMonitor = class {
2215
+ warned80 = false;
2216
+ check(requestTokens, contextWindow) {
2217
+ if (contextWindow <= 0) return { action: "ok", ratio: 0 };
2218
+ const ratio = requestTokens / contextWindow;
2219
+ if (ratio >= 0.95) return { action: "abort", ratio };
2220
+ if (ratio >= 0.8 && !this.warned80) {
2221
+ this.warned80 = true;
2222
+ return {
2223
+ action: "warn",
2224
+ ratio,
2225
+ injectMessage: `\u26A0\uFE0F Context pressure: ~${Math.round(ratio * 100)}% of the ${contextWindow.toLocaleString()}-token context window is used. Avoid reading more files or running broad scans. Finish the current critical step, then produce a final summary. Every unnecessary tool call now risks breaking the conversation.`
2226
+ };
2227
+ }
2228
+ return { action: "ok", ratio };
2229
+ }
2230
+ };
2231
+ function accumulateUsage(total, delta) {
2232
+ if (!delta) return;
2233
+ total.inputTokens += delta.inputTokens;
2234
+ total.outputTokens += delta.outputTokens;
2235
+ total.cacheCreationTokens += delta.cacheCreationTokens ?? 0;
2236
+ total.cacheReadTokens += delta.cacheReadTokens ?? 0;
2237
+ }
2238
+ function buildRoundBudgetHint(opts) {
2239
+ const pauseHint = opts.autoPauseInterval > 0 ? `
2240
+ - Every ${opts.autoPauseInterval} rounds the user will be asked whether to continue \u2014 use this as a natural checkpoint to report progress.` : "";
2241
+ if (opts.planMode) {
2242
+ return `
2243
+
2244
+ [Tool Round Budget \u2014 Plan Mode]
2245
+ You have a maximum of ${opts.maxToolRounds} tool call rounds. You are in READ-ONLY Plan Mode:
2246
+ - Only use: read_file, list_dir, grep_files, glob_files, ask_user, write_todos
2247
+ - Do NOT attempt to call bash, write_file, edit_file \u2014 they are disabled
2248
+ - Do NOT write shell commands or code blocks as a substitute for tool calls
2249
+ - Do NOT read the same file more than once
2250
+ - Call write_todos ONCE to present your plan, then give a text summary
2251
+ - If the user asks you to execute anything, respond: "Please type /plan execute to switch to execute mode."${pauseHint}`;
2252
+ }
2253
+ return `
2254
+
2255
+ [Tool Round Budget]
2256
+ You have a maximum of ${opts.maxToolRounds} tool call rounds for this task. Plan efficiently:
2257
+ - Prefer batch operations (e.g. global find-and-replace) over repetitive single edits.
2258
+ - Do NOT read the same file more than once \u2014 use the content from previous reads.
2259
+ - Prioritize the most critical tasks first in case rounds run out.
2260
+ - When remaining rounds are low, focus on completing the current task and summarizing.${pauseHint}`;
2261
+ }
2262
+ function buildRoundsExhaustedPrompt(maxToolRounds) {
2263
+ return `You have used all ${maxToolRounds} tool call rounds. Do not call any more tools. Summarize in text:
2264
+ 1. What work has been completed so far
2265
+ 2. What tasks remain unfinished
2266
+ 3. What the user can do next (e.g. send another request to continue)`;
2267
+ }
2268
+ function buildUserStopMessage(effectiveRound, maxToolRounds) {
2269
+ return `The user has stopped the task at round ${effectiveRound}/${maxToolRounds}. Do not call any more tools. Summarize what has been completed and what remains.`;
2270
+ }
2271
+ async function runAgentLoop(host) {
2272
+ const { maxToolRounds, autoPauseInterval, usage } = host;
2273
+ const extraMessages = [];
2274
+ const budgetWarner = new BudgetWarner(maxToolRounds);
2275
+ const emptyGuard = new EmptyResponseGuard();
2276
+ const ctxMonitor = new ContextPressureMonitor();
2277
+ const freeRounds = new FreeRoundTracker();
2278
+ const roundToolHistory = [];
2279
+ const hasWriteTools = host.toolDefs.some((t) => t.name === "write_file" || t.name === "edit_file");
2280
+ for (let round = 0; round < maxToolRounds; round++) {
2281
+ if (host.signal?.aborted) return { reason: "aborted", usage };
2282
+ host.onRoundStart?.(round, maxToolRounds);
2283
+ if (await host.beforeRound?.(round, extraMessages) === "stop") {
2284
+ return { reason: "stopped", usage };
2285
+ }
2286
+ const warning = budgetWarner.check(maxToolRounds - round);
2287
+ if (warning) {
2288
+ extraMessages.push({ role: "user", content: warning.injectMessage });
2289
+ host.onBudgetWarning?.(warning);
2290
+ }
2291
+ const interjection = host.pollInterjection?.();
2292
+ if (interjection) extraMessages.push({ role: "user", content: interjection });
2293
+ const ctxWindow = host.getContextWindow();
2294
+ if (ctxWindow > 0) {
2295
+ const pressure = ctxMonitor.check(host.estimateRequestTokens(extraMessages), ctxWindow);
2296
+ if (pressure.action === "abort") {
2297
+ host.onContextPressure?.(pressure, ctxWindow, round);
2298
+ return { reason: "context-overflow", usage };
2299
+ } else if (pressure.action === "warn") {
2300
+ host.onContextPressure?.(pressure, ctxWindow, round);
2301
+ extraMessages.push({ role: "user", content: pressure.injectMessage });
2302
+ }
2303
+ }
2304
+ const outcome = await host.callModel(round, extraMessages);
2305
+ if (host.signal?.aborted) return { reason: "aborted", usage };
2306
+ accumulateUsage(usage, outcome.usage);
2307
+ if (outcome.stopLoop) return { reason: "host-stop", usage };
2308
+ const toolCalls = outcome.toolCalls ?? [];
2309
+ if (toolCalls.length === 0) {
2310
+ const content = outcome.content ?? "";
2311
+ const alreadyRendered = !!outcome.alreadyRendered;
2312
+ const alreadyWrote = hadPreviousWriteToolCalls(extraMessages);
2313
+ const coarseHallucination = !host.planMode && hasWriteTools && !alreadyWrote && !!content && detectsHallucinatedFileOp(content);
2314
+ const phantomPaths = (coarseHallucination || alreadyWrote) && !host.planMode && hasWriteTools && content ? findPhantomClaims(content, extraMessages) : [];
2315
+ const bashRanThisTurn = extractBashCommands(extraMessages).length > 0;
2316
+ const coarseShouldFire = coarseHallucination && !bashRanThisTurn;
2317
+ if ((phantomPaths.length > 0 || coarseShouldFire) && round < maxToolRounds - 1) {
2318
+ host.onHallucinationRetry?.({ phantomPaths, round, alreadyRendered });
2319
+ const correctionMsg = phantomPaths.length > 0 ? buildPhantomCorrectionMessage(phantomPaths) : HALLUCINATION_CORRECTION_MESSAGE;
2320
+ const reasoningField = outcome.reasoningContent ? { reasoning_content: outcome.reasoningContent } : host.providerId === "deepseek" ? { reasoning_content: "" } : {};
2321
+ extraMessages.push(
2322
+ { role: "assistant", content, ...reasoningField },
2323
+ { role: "user", content: correctionMsg }
2324
+ );
2325
+ continue;
2326
+ }
2327
+ if (!content || content.trim() === "") {
2328
+ const decision = emptyGuard.onEmpty(round < maxToolRounds - 1, outcome.finishReason);
2329
+ host.onEmptyResponse?.(decision, { alreadyRendered, round });
2330
+ if (decision.action === "nudge") {
2331
+ extraMessages.push({ role: "user", content: decision.injectMessage });
2332
+ continue;
2155
2333
  }
2334
+ return { reason: "empty-response", usage };
2335
+ }
2336
+ emptyGuard.onNonEmpty();
2337
+ await host.onFinalContent(content, { reasoningContent: outcome.reasoningContent, alreadyRendered });
2338
+ return { reason: "final", usage };
2339
+ }
2340
+ emptyGuard.onNonEmpty();
2341
+ const saveCall = toolCalls.find((tc) => tc.name === "save_last_response");
2342
+ const savePath = saveCall ? String(saveCall.arguments["path"] ?? "") : "";
2343
+ if (saveCall && savePath && host.runSaveLastResponseTee) {
2344
+ const directive = await host.runSaveLastResponseTee({
2345
+ toolCalls,
2346
+ call: saveCall,
2347
+ saveToFile: savePath,
2348
+ extraMessages,
2349
+ reasoningContent: outcome.reasoningContent
2350
+ });
2351
+ if (directive === "stop") return { reason: "tee-stop", usage };
2352
+ freeRounds.apply(toolCalls.map((tc) => tc.name));
2353
+ continue;
2354
+ }
2355
+ const toolResults = await host.executeTools(toolCalls, extraMessages);
2356
+ if (host.isInterrupted?.() || host.signal?.aborted) {
2357
+ host.onInterrupted?.();
2358
+ return { reason: "aborted", usage };
2359
+ }
2360
+ roundToolHistory.push({ round: round + 1, tools: toolCalls.map((tc) => tc.name) });
2361
+ host.onToolsExecuted?.(toolCalls, toolResults, extraMessages);
2362
+ extraMessages.push(...host.buildToolResultMessages(toolCalls, toolResults, outcome.reasoningContent));
2363
+ for (const tc of toolCalls) {
2364
+ if (tc.name.startsWith("mcp__")) host.onMcpToolUsed?.(tc.name);
2365
+ }
2366
+ host.persistRound?.(toolCalls, toolResults, {
2367
+ assistantContent: outcome.content,
2368
+ reasoningContent: outcome.reasoningContent
2369
+ });
2370
+ host.afterToolRoundPersist?.(toolCalls, toolResults, extraMessages);
2371
+ if (freeRounds.apply(toolCalls.map((tc) => tc.name))) {
2372
+ round--;
2373
+ }
2374
+ if (host.checkLoopHealth?.(toolCalls, extraMessages) === "skip-checkpoint") continue;
2375
+ const postInterjection = host.pollInterjection?.();
2376
+ if (postInterjection) extraMessages.push({ role: "user", content: postInterjection });
2377
+ const effectiveRound = round + 1;
2378
+ const remaining = maxToolRounds - effectiveRound;
2379
+ if (autoPauseInterval > 0 && effectiveRound > 0 && effectiveRound % autoPauseInterval === 0 && remaining > 0 && !host.signal?.aborted && host.requestAutoPause) {
2380
+ const toolSummary = summarizeRecentTools(roundToolHistory, autoPauseInterval);
2381
+ const resp = await host.requestAutoPause({ effectiveRound, maxToolRounds, remaining, toolSummary });
2382
+ if (host.signal?.aborted) return { reason: "aborted", usage };
2383
+ if (resp.action === "stop") {
2384
+ extraMessages.push({ role: "user", content: buildUserStopMessage(effectiveRound, maxToolRounds) });
2385
+ break;
2386
+ } else if (resp.action === "redirect" && resp.message) {
2387
+ extraMessages.push({ role: "user", content: resp.message });
2156
2388
  }
2157
- }));
2158
- const baseMessages = this.buildMessages(request);
2159
- const extraMessages = request._extraMessages ?? [];
2160
- const allMessages = [...baseMessages, ...extraMessages];
2389
+ }
2390
+ host.onRoundEnd?.(round);
2391
+ }
2392
+ let summaryContent = null;
2393
+ try {
2394
+ const summaryExtra = [
2395
+ ...extraMessages,
2396
+ { role: "user", content: buildRoundsExhaustedPrompt(maxToolRounds) }
2397
+ ];
2398
+ const summary = await host.callSummary(summaryExtra);
2399
+ accumulateUsage(usage, summary.usage);
2400
+ summaryContent = summary.content && summary.content.trim() ? summary.content : null;
2401
+ } catch {
2402
+ summaryContent = null;
2403
+ }
2404
+ await host.onRoundsExhausted(summaryContent);
2405
+ return { reason: "rounds-exhausted", usage };
2406
+ }
2407
+ function summarizeRecentTools(history, interval) {
2408
+ const recent = history.slice(-interval);
2409
+ const counts = /* @__PURE__ */ new Map();
2410
+ for (const rh of recent) {
2411
+ for (const t of rh.tools) counts.set(t, (counts.get(t) || 0) + 1);
2412
+ }
2413
+ return [...counts.entries()].sort((a, b) => b[1] - a[1]).map(([name, count]) => count > 1 ? `${name}\xD7${count}` : name).join(", ");
2414
+ }
2415
+
2416
+ // src/providers/openai-compatible.ts
2417
+ function toUsage(u) {
2418
+ if (!u) return void 0;
2419
+ const cached = u.prompt_tokens_details?.cached_tokens ?? 0;
2420
+ const usage = {
2421
+ inputTokens: Math.max(0, u.prompt_tokens - cached),
2422
+ outputTokens: u.completion_tokens
2423
+ };
2424
+ if (cached > 0) usage.cacheReadTokens = cached;
2425
+ return usage;
2426
+ }
2427
+ var OpenAICompatibleProvider = class extends BaseProvider {
2428
+ client;
2429
+ defaultTimeout = 6e4;
2430
+ // ms
2431
+ /** 子类设为 false 可禁用流式工具调用(虚假声明检测需要完整响应) */
2432
+ enableStreamingToolCalls = true;
2433
+ async initialize(apiKey, options) {
2434
+ if (options?.timeout !== void 0) {
2435
+ this.defaultTimeout = options.timeout;
2436
+ }
2437
+ const clientOptions = {
2438
+ apiKey,
2439
+ baseURL: options?.baseUrl ?? this.defaultBaseUrl,
2440
+ timeout: this.defaultTimeout
2441
+ };
2442
+ const proxyUrl = options?.proxy;
2161
2443
  try {
2162
- const stream = await this.client.chat.completions.create({
2444
+ const { Agent: Agent2, ProxyAgent, fetch: undiciFetch } = await import("undici");
2445
+ const STREAM_BODY_TIMEOUT = 30 * 60 * 1e3;
2446
+ const STREAM_HEADERS_TIMEOUT = 5 * 60 * 1e3;
2447
+ const dispatcher = proxyUrl ? new ProxyAgent({
2448
+ uri: proxyUrl,
2449
+ bodyTimeout: STREAM_BODY_TIMEOUT,
2450
+ headersTimeout: STREAM_HEADERS_TIMEOUT
2451
+ }) : new Agent2({
2452
+ bodyTimeout: STREAM_BODY_TIMEOUT,
2453
+ headersTimeout: STREAM_HEADERS_TIMEOUT
2454
+ });
2455
+ clientOptions.fetch = ((url, init) => undiciFetch(url, { ...init, dispatcher }));
2456
+ } catch {
2457
+ }
2458
+ this.client = new OpenAI(clientOptions);
2459
+ }
2460
+ /**
2461
+ * 将 systemPrompt + messages 合并为 OpenAI messages 数组(system 消息放首位)。
2462
+ *
2463
+ * v0.4.100+:按原始顺序保留工具消息(assistant.toolCalls 和 role='tool'),
2464
+ * 不再剥离到 _extraMessages 末尾——之前的剥离会让历史工具往返被插到当前用户消息之后,
2465
+ * 导致模型把"过去的工具调用结果"当作"对当前问题的回应",DeepSeek V4 Flash 上尤其明显
2466
+ * (会复读上一轮的"完成汇总")。
2467
+ *
2468
+ * DeepSeek V4 thinking 模式:所有 assistant 消息(含带 toolCalls 的)必须有
2469
+ * reasoning_content 字段,缺失则 API 400。
2470
+ */
2471
+ buildMessages(request) {
2472
+ const msgs = [];
2473
+ for (const m of request.messages) {
2474
+ if (m.role === "tool") {
2475
+ if (!m.toolCallId) continue;
2476
+ msgs.push({
2477
+ role: "tool",
2478
+ tool_call_id: m.toolCallId,
2479
+ content: typeof m.content === "string" ? m.content : ""
2480
+ });
2481
+ continue;
2482
+ }
2483
+ if (m.role === "assistant" && m.toolCalls && m.toolCalls.length > 0) {
2484
+ const assistantMsg = {
2485
+ role: "assistant",
2486
+ content: typeof m.content === "string" && m.content ? m.content : null,
2487
+ tool_calls: m.toolCalls.map((tc) => ({
2488
+ id: tc.id,
2489
+ type: "function",
2490
+ function: { name: tc.name, arguments: JSON.stringify(tc.arguments) }
2491
+ })),
2492
+ reasoning_content: m.reasoningContent ?? ""
2493
+ };
2494
+ msgs.push(assistantMsg);
2495
+ continue;
2496
+ }
2497
+ const base = { role: m.role, content: m.content };
2498
+ if (m.role === "assistant") {
2499
+ base.reasoning_content = m.reasoningContent ?? "";
2500
+ }
2501
+ msgs.push(base);
2502
+ }
2503
+ const systemContent = [request.systemPrompt, request.systemPromptVolatile].filter(Boolean).join("\n\n---\n\n");
2504
+ if (systemContent) {
2505
+ return [{ role: "system", content: systemContent }, ...msgs];
2506
+ }
2507
+ return msgs;
2508
+ }
2509
+ async chat(request) {
2510
+ try {
2511
+ const response = await this.client.chat.completions.create({
2163
2512
  model: request.model,
2164
- messages: allMessages,
2165
- tools: openaiTools,
2166
- tool_choice: "auto",
2513
+ messages: this.buildMessages(request),
2167
2514
  temperature: request.temperature,
2168
2515
  max_tokens: request.maxTokens,
2169
- stream: true,
2170
- stream_options: { include_usage: true },
2516
+ stream: false,
2171
2517
  ...request.thinking ? { thinking: { type: "enabled" } } : {}
2172
2518
  }, {
2173
- timeout: request.timeout ?? this.defaultTimeout,
2519
+ timeout: request.timeout ?? this.defaultTimeout
2520
+ });
2521
+ const firstChoice = response.choices?.[0];
2522
+ if (!firstChoice) {
2523
+ return { content: "", model: response.model, usage: void 0 };
2524
+ }
2525
+ return {
2526
+ content: firstChoice.message.content ?? "",
2527
+ model: response.model,
2528
+ usage: toUsage(response.usage)
2529
+ };
2530
+ } catch (err) {
2531
+ throw this.wrapError(err);
2532
+ }
2533
+ }
2534
+ async *chatStream(request) {
2535
+ try {
2536
+ const stream = await this.client.chat.completions.create({
2537
+ model: request.model,
2538
+ messages: this.buildMessages(request),
2539
+ temperature: request.temperature,
2540
+ max_tokens: request.maxTokens,
2541
+ stream: true,
2542
+ // 请求末尾 usage chunk,供 token 统计使用
2543
+ stream_options: { include_usage: true },
2544
+ ...request.thinking ? { thinking: { type: "enabled" } } : {}
2545
+ }, {
2546
+ timeout: request.timeout ?? this.defaultTimeout,
2547
+ signal: request.signal
2548
+ });
2549
+ let thinkingStarted = false;
2550
+ let reasoningAccumulator = "";
2551
+ for await (const chunk of stream) {
2552
+ const choice = chunk.choices[0];
2553
+ const done = choice?.finish_reason != null;
2554
+ if (!choice && chunk.usage) {
2555
+ yield {
2556
+ delta: "",
2557
+ done: true,
2558
+ usage: toUsage(chunk.usage),
2559
+ ...reasoningAccumulator ? { reasoningContent: reasoningAccumulator } : {}
2560
+ };
2561
+ continue;
2562
+ }
2563
+ const reasoningDelta = choice?.delta?.reasoning_content;
2564
+ if (reasoningDelta) {
2565
+ if (!thinkingStarted) {
2566
+ yield { delta: "<think>", done: false };
2567
+ thinkingStarted = true;
2568
+ }
2569
+ reasoningAccumulator += reasoningDelta;
2570
+ yield { delta: reasoningDelta, done: false };
2571
+ continue;
2572
+ }
2573
+ const delta = choice?.delta?.content ?? "";
2574
+ if (thinkingStarted && delta) {
2575
+ thinkingStarted = false;
2576
+ yield { delta: "</think>", done: false };
2577
+ }
2578
+ if (done) {
2579
+ yield { delta, done, ...reasoningAccumulator ? { reasoningContent: reasoningAccumulator } : {} };
2580
+ } else {
2581
+ yield { delta, done };
2582
+ }
2583
+ }
2584
+ } catch (err) {
2585
+ throw this.wrapError(err);
2586
+ }
2587
+ }
2588
+ /**
2589
+ * 请求 AI 并获取工具调用列表(不执行,只解析)。
2590
+ * 返回 { toolCalls, usage? } 时说明 AI 想要调用工具,
2591
+ * 返回 { content, usage? } 时说明 AI 给出了最终回答。
2592
+ */
2593
+ async chatWithTools(request, tools) {
2594
+ try {
2595
+ const openaiTools = tools.map((t) => ({
2596
+ type: "function",
2597
+ function: {
2598
+ name: t.name,
2599
+ description: t.description,
2600
+ parameters: {
2601
+ type: "object",
2602
+ properties: Object.fromEntries(
2603
+ Object.entries(t.parameters).map(([key, schema]) => [
2604
+ key,
2605
+ schemaToJsonSchema(schema)
2606
+ ])
2607
+ ),
2608
+ required: Object.entries(t.parameters).filter(([, s]) => s.required).map(([k]) => k)
2609
+ }
2610
+ }
2611
+ }));
2612
+ const baseMessages = this.buildMessages(request);
2613
+ const extraMessages = request._extraMessages ?? [];
2614
+ const allMessages = [...baseMessages, ...extraMessages];
2615
+ const response = await this.client.chat.completions.create({
2616
+ model: request.model,
2617
+ messages: allMessages,
2618
+ tools: openaiTools,
2619
+ tool_choice: "auto",
2620
+ temperature: request.temperature,
2621
+ max_tokens: request.maxTokens,
2622
+ stream: false,
2623
+ ...request.thinking ? { thinking: { type: "enabled" } } : {}
2624
+ }, {
2625
+ timeout: request.timeout ?? this.defaultTimeout
2626
+ });
2627
+ const firstChoice = response.choices?.[0];
2628
+ if (!firstChoice) {
2629
+ return { content: "", usage: void 0 };
2630
+ }
2631
+ const message = firstChoice.message;
2632
+ const finishReason = firstChoice.finish_reason;
2633
+ const usage = toUsage(response.usage);
2634
+ const contentStr = typeof message.content === "string" ? message.content : "";
2635
+ const hasToolCalls = !!(message.tool_calls && message.tool_calls.length > 0);
2636
+ const reasoningContent = message.reasoning_content;
2637
+ if (message.tool_calls && message.tool_calls.length > 0) {
2638
+ const toolCalls = message.tool_calls.map((tc) => {
2639
+ const parsedArgs = repairToolCallArguments(
2640
+ tc.function.arguments || "{}",
2641
+ (m) => process.stderr.write(`[warn] ${m}
2642
+ `)
2643
+ );
2644
+ return {
2645
+ id: tc.id,
2646
+ name: tc.function.name,
2647
+ arguments: parsedArgs
2648
+ };
2649
+ });
2650
+ return { toolCalls, usage, reasoningContent };
2651
+ }
2652
+ return {
2653
+ content: message.content ?? "",
2654
+ usage,
2655
+ ...reasoningContent ? { reasoningContent } : {},
2656
+ ...!hasToolCalls && (finishReason ?? "") ? { finishReason } : {}
2657
+ };
2658
+ } catch (err) {
2659
+ throw this.wrapError(err);
2660
+ }
2661
+ }
2662
+ /**
2663
+ * 流式工具调用:文本内容实时输出、工具名称/参数逐块发射。
2664
+ * 子类(DeepSeek / Kimi)因虚假声明检测需要完整响应,故不继承此方法。
2665
+ */
2666
+ async *chatWithToolsStream(request, tools) {
2667
+ if (!this.enableStreamingToolCalls) {
2668
+ const result = await this.chatWithTools(request, tools);
2669
+ if ("toolCalls" in result) {
2670
+ for (let i = 0; i < result.toolCalls.length; i++) {
2671
+ const tc = result.toolCalls[i];
2672
+ yield { type: "tool_call_start", index: i, id: tc.id, name: tc.name };
2673
+ yield { type: "tool_call_delta", index: i, argumentsDelta: JSON.stringify(tc.arguments) };
2674
+ yield { type: "tool_call_end", index: i };
2675
+ }
2676
+ } else {
2677
+ yield { type: "text_delta", delta: result.content };
2678
+ }
2679
+ const rc = "reasoningContent" in result ? result.reasoningContent : void 0;
2680
+ const fr = "finishReason" in result ? result.finishReason : void 0;
2681
+ yield {
2682
+ type: "done",
2683
+ usage: result.usage,
2684
+ ...rc ? { reasoningContent: rc } : {},
2685
+ ...fr ? { finishReason: fr } : {}
2686
+ };
2687
+ return;
2688
+ }
2689
+ const openaiTools = tools.map((t) => ({
2690
+ type: "function",
2691
+ function: {
2692
+ name: t.name,
2693
+ description: t.description,
2694
+ parameters: {
2695
+ type: "object",
2696
+ properties: Object.fromEntries(
2697
+ Object.entries(t.parameters).map(([key, schema]) => [
2698
+ key,
2699
+ schemaToJsonSchema(schema)
2700
+ ])
2701
+ ),
2702
+ required: Object.entries(t.parameters).filter(([, s]) => s.required).map(([k]) => k)
2703
+ }
2704
+ }
2705
+ }));
2706
+ const baseMessages = this.buildMessages(request);
2707
+ const extraMessages = request._extraMessages ?? [];
2708
+ const allMessages = [...baseMessages, ...extraMessages];
2709
+ try {
2710
+ const stream = await this.client.chat.completions.create({
2711
+ model: request.model,
2712
+ messages: allMessages,
2713
+ tools: openaiTools,
2714
+ tool_choice: "auto",
2715
+ temperature: request.temperature,
2716
+ max_tokens: request.maxTokens,
2717
+ stream: true,
2718
+ stream_options: { include_usage: true },
2719
+ ...request.thinking ? { thinking: { type: "enabled" } } : {}
2720
+ }, {
2721
+ timeout: request.timeout ?? this.defaultTimeout,
2174
2722
  signal: request.signal
2175
2723
  });
2176
2724
  const toolCallAccumulators = /* @__PURE__ */ new Map();
@@ -2488,336 +3036,39 @@ var ZhipuProvider = class extends OpenAICompatibleProvider {
2488
3036
  {
2489
3037
  id: "glm-z1-air",
2490
3038
  displayName: "GLM-Z1 Air (Lightweight Reasoning)",
2491
- contextWindow: 131072,
2492
- supportsStreaming: true,
2493
- supportsThinking: true
2494
- },
2495
- {
2496
- id: "glm-z1-flash",
2497
- displayName: "GLM-Z1 Flash (Free Reasoning)",
2498
- contextWindow: 128e3,
2499
- supportsStreaming: true,
2500
- supportsThinking: true
2501
- },
2502
- // ── GLM-4 系列(稳定,价格低) ──
2503
- {
2504
- id: "glm-4-plus",
2505
- displayName: "GLM-4 Plus",
2506
- contextWindow: 128e3,
2507
- supportsStreaming: true
2508
- },
2509
- {
2510
- id: "glm-4-air",
2511
- displayName: "GLM-4 Air",
2512
- contextWindow: 128e3,
2513
- supportsStreaming: true
2514
- },
2515
- {
2516
- id: "glm-4-flash",
2517
- displayName: "GLM-4 Flash (Free)",
2518
- contextWindow: 128e3,
2519
- supportsStreaming: true
2520
- }
2521
- ]
2522
- };
2523
- };
2524
-
2525
- // src/tools/hallucination.ts
2526
- var HALLUCINATION_PATTERNS = [
2527
- /文件路径[::]\s*`?[^\s`]+\.\w{1,5}/,
2528
- // 文件路径: `path/to/file.ext`(要求文件扩展名)
2529
- /已生成[::!!]/,
2530
- // 已生成完成!
2531
- /已保存到?\s*[`'"]/,
2532
- // 已保存到 `path`(要求后跟路径引号)
2533
- /已写入[::!!]/,
2534
- // 已写入!
2535
- /已创建[::!!]/,
2536
- // 已创建!
2537
- /File\s+(?:written|saved|created)\s+(?:to|as|at)/i,
2538
- // File written to / saved as(要求介词)
2539
- /生成完成[!!]/,
2540
- // 生成完成!
2541
- /✅\s*(?:文件|已[生保写创]|第)\S*\.\w{1,5}/,
2542
- // ✅ 文件已保存 path.ext(要求文件扩展名)
2543
- /文件已[成功]?创建/,
2544
- // 文件已成功创建 / 文件已创建
2545
- /教案已[成功]?[生创保写]/,
2546
- // 教案已成功生成 / 教案已保存
2547
- /已成功[保写创生]入?[::!!\s`'"]/,
2548
- // 已成功保存 / 已成功写入 / 已成功创建
2549
- /保存[到至]了?\s*[`'"]/,
2550
- // 保存到了 `path` / 保存至 'path'
2551
- /内容如下[::]/,
2552
- // 内容如下:(后跟大段文件内容)
2553
- /以下是.*(?:教案|文件|内容)[::]/
2554
- // 以下是xx教案内容:(Kimi 常见模式)
2555
- ];
2556
- function detectsHallucinatedFileOp(content) {
2557
- return HALLUCINATION_PATTERNS.some((pattern) => pattern.test(content));
2558
- }
2559
- function hadPreviousWriteToolCalls(extraMessages) {
2560
- const msgs = extraMessages;
2561
- return msgs.some((msg) => {
2562
- if (msg.role === "assistant" && Array.isArray(msg.tool_calls)) {
2563
- return msg.tool_calls.some((tc) => {
2564
- const fn = tc.function;
2565
- const name = fn?.name ?? "";
2566
- return name === "write_file" || name === "edit_file";
2567
- });
2568
- }
2569
- if (msg.role === "assistant" && Array.isArray(msg.content)) {
2570
- return msg.content.some((block) => {
2571
- if (block.type !== "tool_use") return false;
2572
- const name = block.name ?? "";
2573
- return name === "write_file" || name === "edit_file";
2574
- });
2575
- }
2576
- if (msg.role === "model" && Array.isArray(msg.parts)) {
2577
- return msg.parts.some((part) => {
2578
- const fc = part.functionCall;
2579
- const name = fc?.name ?? "";
2580
- return name === "write_file" || name === "edit_file";
2581
- });
2582
- }
2583
- return false;
2584
- });
2585
- }
2586
- var TOOL_CALL_REMINDER = `
2587
-
2588
- [\u26A0\uFE0F Mandatory Tool Call Policy]
2589
- When you need to create, write, or modify files, you MUST use the function calling API to invoke write_file or edit_file.
2590
- NEVER claim "file saved", "file created", "written to", etc. in your response text without actually calling the tool.
2591
- Describing file content in text without calling the tool = the file does not exist = task failure.
2592
- If multiple files need to be generated, you MUST call write_file separately for each file \u2014 do not skip any.
2593
- Do NOT output fake "completion summaries" unless you have actually completed all file writes via tool_calls.
2594
-
2595
- CRITICAL \u2014 Batch file generation rules:
2596
- 1. You MUST call write_file once per file. There are NO shortcuts.
2597
- 2. After writing file N, immediately proceed to call write_file for file N+1. Do NOT stop to summarize.
2598
- 3. If you find yourself typing file content into your response text instead of into a write_file call, STOP and use the tool.
2599
- 4. Only produce a text summary AFTER all write_file calls have been made and returned success.
2600
- 5. The system compares every "file saved" claim against actual tool calls. Phantom claims trigger an automatic retry \u2014 do not waste rounds.`;
2601
- var HALLUCINATION_CORRECTION_MESSAGE = "You did NOT actually call the write_file tool \u2014 the file was NOT created! Please immediately use the write_file tool via the function calling API to perform the actual file write. Do NOT describe file content in text \u2014 you MUST invoke write_file through the tool_calls mechanism.";
2602
- function extractBashCommands(extraMessages) {
2603
- const cmds = [];
2604
- const msgs = extraMessages;
2605
- const addCmd = (raw) => {
2606
- if (typeof raw === "string") {
2607
- try {
2608
- const parsed = JSON.parse(raw);
2609
- if (typeof parsed.command === "string") cmds.push(parsed.command);
2610
- } catch {
2611
- }
2612
- } else if (raw && typeof raw === "object") {
2613
- const c = raw.command;
2614
- if (typeof c === "string") cmds.push(c);
2615
- }
2616
- };
2617
- for (const msg of msgs) {
2618
- if (msg.role === "assistant" && Array.isArray(msg.tool_calls)) {
2619
- for (const tc of msg.tool_calls) {
2620
- const fn = tc.function;
2621
- if (fn?.name === "bash") addCmd(fn?.arguments);
2622
- }
2623
- }
2624
- if (msg.role === "assistant" && Array.isArray(msg.content)) {
2625
- for (const block of msg.content) {
2626
- if (block.type === "tool_use" && block.name === "bash") addCmd(block.input);
2627
- }
2628
- }
2629
- if (msg.role === "model" && Array.isArray(msg.parts)) {
2630
- for (const part of msg.parts) {
2631
- const fc = part.functionCall;
2632
- if (fc && fc.name === "bash") addCmd(fc.args);
2633
- }
2634
- }
2635
- }
2636
- return cmds;
2637
- }
2638
- var DSML_PIPE_CLASS = "[|\\uFF5C\\u2502\\u2503\\u01C0]";
2639
- var PSEUDO_TOOL_CALL_PATTERNS = [
2640
- // <tool_call name="..."> ... </tool_call> (DeepSeek V4 thinking, GLM)
2641
- /<tool_call\s+name\s*=\s*["'][\w._-]+["']/,
2642
- // <function_calls> ... </function_calls> (Anthropic-style as text)
2643
- /<\/?function_calls\s*>/,
2644
- // <invoke name="..." /> (Anthropic XML tool-call, which is real for
2645
- // Claude API but is text/garbage for any other provider's plain stream)
2646
- /<invoke\s+name\s*=\s*["'][\w._-]+["']/,
2647
- // <tool_use> ... <tool_use_id> (Claude flavor leaked into text)
2648
- /<tool_use(?:_id)?\b/,
2649
- // ```tool_call\n...\n``` markdown fences (Kimi/Zhipu fallback)
2650
- /```\s*tool_call\b/i,
2651
- // Bare JSON tool-call block: lines starting with `{"name":"...","arguments":`
2652
- /^\s*\{\s*"name"\s*:\s*"[\w._-]+"\s*,\s*"arguments"\s*:/m,
2653
- // v0.4.112: <think> ... </think> reasoning blocks. The REPL renderer
2654
- // suppresses these from terminal output, but tee mode writes the raw
2655
- // delta to disk → reasoning leaks into the saved file. We saw a 600-line
2656
- // 审计报告.md whose first 57 lines were the model's planning monologue.
2657
- /<think\b[^>]*>/i,
2658
- // v0.4.112: leading ```markdown / ```md fence wrapping the entire document.
2659
- // DeepSeek V4 Pro Thinking sometimes "politely" wraps its document output
2660
- // in a markdown fence. The fence ends up literally in the saved file.
2661
- /^\s*```\s*(?:markdown|md|gfm)\b/im,
2662
- // v0.4.173: DeepSeek V4 DSML pseudo-tool-call markup leaked as text. DeepSeek
2663
- // emits a fake tool call using its native special-token markup
2664
- // <||DSML||tool_calls> <||DSML||invoke name="write"> <||DSML||parameter …>
2665
- // where the "pipe" is U+FF5C FULLWIDTH VERTICAL LINE (the same token family as
2666
- // <|User|>/<|Assistant|>). We saw an exam paper saved via save_last_response
2667
- // whose tee stream was preamble + this DSML wrapper + the real document body.
2668
- // The earlier <invoke …> pattern uses ASCII < > and does NOT match these.
2669
- new RegExp(`<\\/?\\s*${DSML_PIPE_CLASS}+\\s*DSML\\s*${DSML_PIPE_CLASS}+`, "i")
2670
- ];
2671
- function detectPseudoToolCalls(content) {
2672
- if (!content || content.length === 0) return null;
2673
- for (const re of PSEUDO_TOOL_CALL_PATTERNS) {
2674
- if (re.test(content)) return re.source;
2675
- }
2676
- return null;
2677
- }
2678
- function stripPseudoToolCalls(content) {
2679
- if (!content) return content;
2680
- let out = content;
2681
- const dsmlBody = extractDsmlContent(out);
2682
- if (dsmlBody !== null) {
2683
- out = dsmlBody;
2684
- } else {
2685
- out = stripDsmlTags(out);
2686
- }
2687
- out = out.replace(/<tool_call\b[^>]*>[\s\S]*?<\/tool_call>/gi, "");
2688
- out = out.replace(/<tool_call\b[^>]*\/>/gi, "");
2689
- out = out.replace(/<function_calls\b[^>]*>[\s\S]*?<\/function_calls>/gi, "");
2690
- out = out.replace(/<invoke\b[^>]*>[\s\S]*?<\/invoke>/gi, "");
2691
- out = out.replace(/<invoke\b[^>]*\/>/gi, "");
2692
- out = out.replace(/<tool_use(?:_id)?\b[^>]*>[\s\S]*?<\/tool_use(?:_id)?>/gi, "");
2693
- out = out.replace(/```\s*tool_call\b[\s\S]*?```/gi, "");
2694
- out = out.replace(/<think\b[^>]*>[\s\S]*?<\/think>/gi, "");
2695
- out = out.replace(/<think\b[^>]*>[\s\S]*?(?=^#{1,3}\s+\S|\n\s*\n)/im, "");
2696
- out = out.replace(/^\s*\{\s*"name"\s*:\s*"[\w._-]+"\s*,\s*"arguments"\s*:[\s\S]*?\}\s*$/gm, "");
2697
- out = unwrapDocumentFence(out);
2698
- out = peelMetaNarration(out);
2699
- out = out.replace(/\n{3,}/g, "\n\n").trim();
2700
- return out;
2701
- }
2702
- function extractDsmlContent(content) {
2703
- if (!content) return null;
2704
- const P = DSML_PIPE_CLASS;
2705
- const re = new RegExp(
2706
- `<\\s*${P}+\\s*DSML\\s*${P}+\\s*parameter\\b[^>]*\\bname\\s*=\\s*["']content["'][^>]*>([\\s\\S]*?)<\\s*/\\s*${P}+\\s*DSML\\s*${P}+\\s*parameter\\s*>`,
2707
- "i"
2708
- );
2709
- const m = content.match(re);
2710
- if (m && typeof m[1] === "string") {
2711
- const body = m[1].trim();
2712
- return body.length > 0 ? body : null;
2713
- }
2714
- return null;
2715
- }
2716
- function stripDsmlTags(content) {
2717
- const P = DSML_PIPE_CLASS;
2718
- let out = content;
2719
- out = out.replace(
2720
- new RegExp(
2721
- `<\\s*${P}+\\s*DSML\\s*${P}+\\s*tool_calls\\b[\\s\\S]*?<\\s*/\\s*${P}+\\s*DSML\\s*${P}+\\s*tool_calls\\s*>`,
2722
- "gi"
2723
- ),
2724
- ""
2725
- );
2726
- out = out.replace(new RegExp(`<\\s*/?\\s*${P}+\\s*DSML\\s*${P}+[^>]*>`, "gi"), "");
2727
- return out;
2728
- }
2729
- function unwrapDocumentFence(content) {
2730
- const trimmed = content.trim();
2731
- const open = trimmed.match(/^```\s*(markdown|md|gfm)?\s*\n/i);
2732
- if (!open) return content;
2733
- const afterOpen = trimmed.slice(open[0].length);
2734
- const closeMatch = afterOpen.match(/\n```\s*$/);
2735
- if (!closeMatch) return content;
2736
- const inner = afterOpen.slice(0, afterOpen.length - closeMatch[0].length);
2737
- if (inner.length < 200) return content;
2738
- return inner;
2739
- }
2740
- function peelMetaNarration(content) {
2741
- let out = content;
2742
- const firstHeadingMatch = out.match(/^#{1,3}\s+\S.*$/m);
2743
- if (firstHeadingMatch && firstHeadingMatch.index !== void 0) {
2744
- const before = out.slice(0, firstHeadingMatch.index);
2745
- const hasIntroMarker = /(?:以下(?:即为|是|就是)|这是|Here\s+is|Below\s+is|完整的?(?:审计报告|内容|文档)|审计报告(?:如下|的完整内容))/i.test(before);
2746
- if (before.length > 0 && before.length < 800 && hasIntroMarker) {
2747
- out = out.slice(firstHeadingMatch.index);
2748
- }
2749
- if (out.startsWith("---\n")) {
2750
- const headingAfterRule = out.slice(4).match(/^#{1,3}\s+\S/m);
2751
- if (headingAfterRule && headingAfterRule.index !== void 0 && headingAfterRule.index < 100) {
2752
- out = out.slice(4 + headingAfterRule.index);
3039
+ contextWindow: 131072,
3040
+ supportsStreaming: true,
3041
+ supportsThinking: true
3042
+ },
3043
+ {
3044
+ id: "glm-z1-flash",
3045
+ displayName: "GLM-Z1 Flash (Free Reasoning)",
3046
+ contextWindow: 128e3,
3047
+ supportsStreaming: true,
3048
+ supportsThinking: true
3049
+ },
3050
+ // ── GLM-4 系列(稳定,价格低) ──
3051
+ {
3052
+ id: "glm-4-plus",
3053
+ displayName: "GLM-4 Plus",
3054
+ contextWindow: 128e3,
3055
+ supportsStreaming: true
3056
+ },
3057
+ {
3058
+ id: "glm-4-air",
3059
+ displayName: "GLM-4 Air",
3060
+ contextWindow: 128e3,
3061
+ supportsStreaming: true
3062
+ },
3063
+ {
3064
+ id: "glm-4-flash",
3065
+ displayName: "GLM-4 Flash (Free)",
3066
+ contextWindow: 128e3,
3067
+ supportsStreaming: true
2753
3068
  }
2754
- }
2755
- }
2756
- const codaMatch = out.match(/\n[^\n]*?(?:以上(?:即为|就是|内容|为完整的?)|Above\s+is\s+the|本报告已经|该报告(?:已经|包含)|报告(?:已|至此)结束)[^\n]*$/i);
2757
- if (codaMatch && codaMatch.index !== void 0 && codaMatch.index > out.length / 2) {
2758
- out = out.slice(0, codaMatch.index);
2759
- }
2760
- return out.trim();
2761
- }
2762
- var META_NARRATION_HARD_MARKERS = [
2763
- /\[⚠️\s*CONTENT GENERATION MODE\]/,
2764
- /CONTENT_ONLY_STREAM_REMINDER\b/,
2765
- /<system-reminder>/i
2766
- ];
2767
- var META_NARRATION_HEURISTICS = [
2768
- /\bthe user (?:is asking me|wants me|is requesting|expects me)\b/i,
2769
- /\blet me (?:re-?read|re-?consider|reconsider|think about|carefully (?:re-?read|consider))\b/i,
2770
- /\bI'?m (?:in (?:a )?content-only|in CONTENT-ONLY|currently in)\b/i,
2771
- /\bI think (?:there might be|I should|I cannot|the (?:user|best)|maybe)\b/i,
2772
- /\bWait,?\s+let me\b/i,
2773
- /\bActually,?\s+I\b/i,
2774
- /\bI need to be honest with the user\b/i,
2775
- /\bI(?:'m| am) in a special mode\b/i,
2776
- /\bGiven that I cannot\b/i
2777
- ];
2778
- function detectMetaNarration(content) {
2779
- if (!content) return null;
2780
- const head = content.slice(0, 2e3);
2781
- for (const re of META_NARRATION_HARD_MARKERS) {
2782
- if (re.test(head)) return re.source;
2783
- }
2784
- if (/^#{1,3}\s+\S/m.test(head)) return null;
2785
- let hits = 0;
2786
- let firstMatch = "";
2787
- for (const re of META_NARRATION_HEURISTICS) {
2788
- if (re.test(head)) {
2789
- hits++;
2790
- if (!firstMatch) firstMatch = re.source;
2791
- if (hits >= 2) return `meta-narration:${firstMatch}`;
2792
- }
2793
- }
2794
- return null;
2795
- }
2796
- function looksLikeDocumentBody(content) {
2797
- if (!content || content.length < 200) return false;
2798
- if (/^#{1,6}\s+\S/m.test(content)) return true;
2799
- const paragraphs = content.split(/\n\s*\n/).filter((p) => p.trim().length > 30);
2800
- if (paragraphs.length >= 3) return true;
2801
- return false;
2802
- }
2803
- function stripToolCallReminder(systemPrompt) {
2804
- if (!systemPrompt) return systemPrompt;
2805
- const idx = systemPrompt.indexOf("[\u26A0\uFE0F Mandatory Tool Call Policy]");
2806
- if (idx === -1) return systemPrompt;
2807
- return systemPrompt.slice(0, idx).trimEnd();
2808
- }
2809
- var TEE_FINAL_USER_NUDGE = `\u26A0\uFE0F STOP using tools NOW. The save_last_response tee stream is open and capturing every token of THIS response. Output ONLY the requested document body, in markdown. The very first character of your response must be the document's top-level heading (e.g. "# \u5BA1\u8BA1\u62A5\u544A" / "# Audit Report"). Do NOT print <tool_call>, </tool_call>, <function_calls>, <invoke>, <tool_use>, <think>, or any other tool-call markup. Do NOT narrate that you will produce the document \u2014 just produce it. Do NOT pretend to call tools \u2014 there are none in this stream.`;
2810
- var CONTENT_ONLY_STREAM_REMINDER = `
2811
-
2812
- [\u26A0\uFE0F CONTENT GENERATION MODE]
2813
- You are now in a CONTENT-ONLY streaming pass. The file at the configured path will receive every token of THIS response.
2814
- - Do NOT emit <tool_call>, </tool_call>, <function_calls>, <invoke>, <tool_use>, or any tool-call XML/JSON markup.
2815
- - Do NOT print "I will now call ...", "let me read ...", "<think>" reasoning blocks (the surrounding REPL handles those separately \u2014 they should not enter the saved file).
2816
- - Do NOT pretend to call tools. There are NO tools available in this stream \u2014 only your text output is captured.
2817
- - Produce ONLY the requested document body. Markdown is fine. Code blocks are fine. Tool-call markup is NOT.
2818
- - If you accidentally start a <tool_call>, STOP and produce the document body instead.
2819
-
2820
- The file is closed and named when this stream ends. If your output contains pseudo-tool-call markup, the save will be REJECTED and you will be asked to retry.`;
3069
+ ]
3070
+ };
3071
+ };
2821
3072
 
2822
3073
  // src/providers/kimi.ts
2823
3074
  var KIMI_XML_REMINDER = `
@@ -12384,7 +12635,6 @@ var SessionHandler = class _SessionHandler {
12384
12635
  async handleChatWithTools(provider, messages, toolDefs, mcpBudgetNote) {
12385
12636
  const session = this.sessions.current;
12386
12637
  const apiMessages = [...messages];
12387
- const extraMessages = [];
12388
12638
  const maxToolRounds = this.config.get("maxToolRounds") ?? DEFAULT_MAX_TOOL_ROUNDS;
12389
12639
  const autoPauseIntervalRaw = this.config.get("autoPauseInterval");
12390
12640
  const autoPauseInterval = typeof autoPauseIntervalRaw === "number" ? autoPauseIntervalRaw : 50;
@@ -12395,37 +12645,113 @@ var SessionHandler = class _SessionHandler {
12395
12645
  ${mcpBudgetNote}` : "");
12396
12646
  const systemPromptVolatile = toolVolatile;
12397
12647
  const modelParams = this.getModelParams();
12398
- const roundUsage = { inputTokens: 0, outputTokens: 0, cacheCreationTokens: 0, cacheReadTokens: 0 };
12648
+ const usage = { inputTokens: 0, outputTokens: 0, cacheCreationTokens: 0, cacheReadTokens: 0 };
12399
12649
  const supportsStreamingTools = typeof provider.chatWithToolsStream === "function";
12400
- const roundToolHistory = [];
12401
- const budgetWarner = new BudgetWarner(maxToolRounds);
12402
- const emptyGuard = new EmptyResponseGuard();
12403
- const ctxMonitor = new ContextPressureMonitor();
12404
- const freeRounds = new FreeRoundTracker();
12405
12650
  const ac = new AbortController();
12406
12651
  this.abortController = ac;
12407
12652
  try {
12408
- for (let round = 0; round < maxToolRounds; round++) {
12409
- if (ac.signal.aborted) break;
12410
- this.toolExecutor.setRoundInfo(round + 1, maxToolRounds);
12411
- this.send({ type: "round_progress", current: round + 1, total: maxToolRounds });
12412
- const budgetWarning = budgetWarner.check(maxToolRounds - round);
12413
- if (budgetWarning) {
12414
- extraMessages.push({ role: "user", content: budgetWarning.injectMessage });
12415
- if (budgetWarning.displayMessage) {
12416
- this.send({ type: "info", message: budgetWarning.displayMessage });
12653
+ const loopResult = await runAgentLoop({
12654
+ maxToolRounds,
12655
+ autoPauseInterval,
12656
+ planMode: this.planMode,
12657
+ providerId: this.currentProvider,
12658
+ toolDefs,
12659
+ signal: ac.signal,
12660
+ usage,
12661
+ callModel: async (_round, extraMessages) => {
12662
+ const chatRequest = {
12663
+ messages: apiMessages,
12664
+ model: this.currentModel,
12665
+ systemPrompt,
12666
+ systemPromptVolatile,
12667
+ stream: false,
12668
+ temperature: modelParams.temperature,
12669
+ maxTokens: modelParams.maxTokens,
12670
+ timeout: modelParams.timeout,
12671
+ thinking: modelParams.thinking,
12672
+ thinkingBudget: modelParams.thinkingBudget,
12673
+ signal: ac.signal,
12674
+ ...extraMessages.length > 0 ? { _extraMessages: extraMessages } : {}
12675
+ };
12676
+ try {
12677
+ if (supportsStreamingTools) {
12678
+ const streamGen = provider.chatWithToolsStream(chatRequest, toolDefs);
12679
+ return await this.consumeToolStream(streamGen, ac);
12680
+ }
12681
+ const result = await provider.chatWithTools(chatRequest, toolDefs);
12682
+ return result;
12683
+ } catch (providerErr) {
12684
+ const errMsg = providerErr instanceof Error ? providerErr.message : String(providerErr);
12685
+ const isCtxLengthError = /maximum context length|context_length_exceeded|context window|too many tokens|reduce the length of the messages/i.test(errMsg);
12686
+ if (isCtxLengthError) {
12687
+ this.send({
12688
+ type: "response_done",
12689
+ content: `\u26A0 Context length exceeded \u2014 the conversation is too long for this model.
12690
+
12691
+ Details: ${errMsg.split("\n")[0]}
12692
+
12693
+ **Recovery options**:
12694
+ 1. Run \`/compact\` to summarize old messages and free context
12695
+ 2. Run \`/clear\` to start a fresh session
12696
+ 3. Run \`/model\` to switch to a model with a larger context window`,
12697
+ usage
12698
+ });
12699
+ return { stopLoop: true };
12700
+ }
12701
+ throw providerErr;
12417
12702
  }
12418
- }
12419
- if (this.userInterjection) {
12703
+ },
12704
+ callSummary: async (summaryExtra) => {
12705
+ const summaryResult = await provider.chatWithTools(
12706
+ {
12707
+ messages: apiMessages,
12708
+ model: this.currentModel,
12709
+ systemPrompt,
12710
+ systemPromptVolatile,
12711
+ stream: false,
12712
+ temperature: modelParams.temperature,
12713
+ maxTokens: modelParams.maxTokens,
12714
+ timeout: modelParams.timeout,
12715
+ _extraMessages: summaryExtra
12716
+ },
12717
+ []
12718
+ );
12719
+ return "content" in summaryResult ? { content: summaryResult.content, usage: summaryResult.usage } : { usage: summaryResult.usage };
12720
+ },
12721
+ executeTools: async (toolCalls) => {
12722
+ googleSearchContext.configManager = this.config;
12723
+ spawnAgentContext.provider = provider;
12724
+ spawnAgentContext.model = this.currentModel;
12725
+ spawnAgentContext.systemPrompt = systemPromptVolatile ? `${systemPrompt}
12726
+
12727
+ ---
12728
+
12729
+ ${systemPromptVolatile}` : systemPrompt;
12730
+ spawnAgentContext.modelParams = modelParams;
12731
+ spawnAgentContext.configManager = this.config;
12732
+ ToolExecutor.currentMessageIndex = this.sessions.current?.messages.length ?? 0;
12733
+ return this.toolExecutor.executeAll(toolCalls);
12734
+ },
12735
+ buildToolResultMessages: (toolCalls, results, reasoningContent) => provider.buildToolResultMessages(toolCalls, results, reasoningContent),
12736
+ getContextWindow: () => this.getContextWindowSize(),
12737
+ estimateRequestTokens: (extraMessages) => this.estimateRequestTokens(systemPrompt, extraMessages),
12738
+ pollInterjection: () => {
12739
+ if (!this.userInterjection) return null;
12420
12740
  const msg = this.userInterjection;
12421
12741
  this.userInterjection = null;
12422
12742
  this.send({ type: "info", message: `\u26A1 Interjection: "${msg}"` });
12423
- extraMessages.push({ role: "user", content: msg });
12424
- }
12425
- const ctxWindow = this.getContextWindowSize();
12426
- if (ctxWindow > 0) {
12427
- const reqTokens = this.estimateRequestTokens(systemPrompt, extraMessages);
12428
- const pressure = ctxMonitor.check(reqTokens, ctxWindow);
12743
+ return msg;
12744
+ },
12745
+ onRoundStart: (round, total) => {
12746
+ this.toolExecutor.setRoundInfo(round + 1, total);
12747
+ this.send({ type: "round_progress", current: round + 1, total });
12748
+ },
12749
+ onBudgetWarning: (warning) => {
12750
+ if (warning.displayMessage) {
12751
+ this.send({ type: "info", message: warning.displayMessage });
12752
+ }
12753
+ },
12754
+ onContextPressure: (pressure, ctxWindow) => {
12429
12755
  if (pressure.action === "abort") {
12430
12756
  this.send({
12431
12757
  type: "response_done",
@@ -12437,191 +12763,48 @@ Too much tool output accumulated this turn. Your work so far is preserved.
12437
12763
  1. Run \`/compact\` to shrink history, then ask the AI to continue
12438
12764
  2. Run \`/clear\` to start fresh
12439
12765
  3. Switch to a larger-context model`,
12440
- usage: roundUsage
12766
+ usage
12441
12767
  });
12442
- this.addWebSessionUsage(roundUsage);
12443
- session.addTokenUsage(roundUsage);
12444
- return;
12445
- } else if (pressure.action === "warn") {
12768
+ } else {
12446
12769
  this.send({
12447
12770
  type: "info",
12448
12771
  message: `\u26A0 Context at ${Math.round(pressure.ratio * 100)}% \u2014 asking AI to wrap up`
12449
12772
  });
12450
- extraMessages.push({ role: "user", content: pressure.injectMessage });
12451
12773
  }
12452
- }
12453
- const chatRequest = {
12454
- messages: apiMessages,
12455
- model: this.currentModel,
12456
- systemPrompt,
12457
- systemPromptVolatile,
12458
- stream: false,
12459
- temperature: modelParams.temperature,
12460
- maxTokens: modelParams.maxTokens,
12461
- timeout: modelParams.timeout,
12462
- thinking: modelParams.thinking,
12463
- thinkingBudget: modelParams.thinkingBudget,
12464
- signal: ac.signal,
12465
- ...extraMessages.length > 0 ? { _extraMessages: extraMessages } : {}
12466
- };
12467
- let result;
12468
- try {
12469
- if (supportsStreamingTools) {
12470
- const streamGen = provider.chatWithToolsStream(chatRequest, toolDefs);
12471
- result = await this.consumeToolStream(streamGen, ac);
12774
+ },
12775
+ onHallucinationRetry: ({ phantomPaths }) => {
12776
+ const detail = phantomPaths.length > 0 ? ` (phantom files: ${phantomPaths.join(", ")})` : "";
12777
+ this.send({ type: "info", message: `\u26A0 Hallucinated completion detected, forcing retry...${detail}` });
12778
+ },
12779
+ onEmptyResponse: (decision) => {
12780
+ if (decision.action === "nudge") {
12781
+ this.send({ type: "info", message: decision.displayMessage });
12472
12782
  } else {
12473
- result = await provider.chatWithTools(chatRequest, toolDefs);
12474
- }
12475
- } catch (providerErr) {
12476
- const errMsg = providerErr instanceof Error ? providerErr.message : String(providerErr);
12477
- const isCtxLengthError = /maximum context length|context_length_exceeded|context window|too many tokens|reduce the length of the messages/i.test(errMsg);
12478
- if (isCtxLengthError) {
12479
12783
  this.send({
12480
12784
  type: "response_done",
12481
- content: `\u26A0 Context length exceeded \u2014 the conversation is too long for this model.
12482
-
12483
- Details: ${errMsg.split("\n")[0]}
12484
-
12485
- **Recovery options**:
12486
- 1. Run \`/compact\` to summarize old messages and free context
12487
- 2. Run \`/clear\` to start a fresh session
12488
- 3. Run \`/model\` to switch to a model with a larger context window`,
12489
- usage: roundUsage
12490
- });
12491
- this.addWebSessionUsage(roundUsage);
12492
- session.addTokenUsage(roundUsage);
12493
- return;
12494
- }
12495
- throw providerErr;
12496
- }
12497
- if (ac.signal.aborted) break;
12498
- accumulateUsage(roundUsage, result.usage);
12499
- const hasToolCalls = !!(result.toolCalls && result.toolCalls.length > 0);
12500
- const contentBlank = !result.content || result.content.trim() === "";
12501
- if (!hasToolCalls && contentBlank) {
12502
- const decision = emptyGuard.onEmpty(round < maxToolRounds - 1, result.finishReason);
12503
- if (decision.action === "nudge") {
12504
- this.send({ type: "info", message: decision.displayMessage });
12505
- extraMessages.push({ role: "user", content: decision.injectMessage });
12506
- continue;
12507
- }
12508
- this.send({
12509
- type: "response_done",
12510
- content: `${decision.displayMessage}
12785
+ content: `${decision.displayMessage}
12511
12786
 
12512
12787
  ${decision.hint}
12513
12788
  Try: /compact to reduce context, /clear to reset, or switch to a larger-context model.`,
12514
- usage: roundUsage
12515
- });
12516
- this.addWebSessionUsage(roundUsage);
12517
- session.addTokenUsage(roundUsage);
12518
- return;
12519
- }
12520
- emptyGuard.onNonEmpty();
12521
- if (result.content && !result.toolCalls) {
12522
- const hasWriteTools = toolDefs.some((t) => t.name === "write_file" || t.name === "edit_file");
12523
- const alreadyWrote = hadPreviousWriteToolCalls(extraMessages);
12524
- const bashRanThisTurn = extractBashCommands(extraMessages).length > 0;
12525
- if (hasWriteTools && !alreadyWrote && !bashRanThisTurn && detectsHallucinatedFileOp(result.content) && round < maxToolRounds - 1) {
12526
- this.send({ type: "info", message: "\u26A0 Hallucinated completion detected, forcing retry..." });
12527
- const reasoningField = result.reasoningContent ? { reasoning_content: result.reasoningContent } : this.currentProvider === "deepseek" ? { reasoning_content: "" } : {};
12528
- extraMessages.push(
12529
- { role: "assistant", content: result.content, ...reasoningField },
12530
- { role: "user", content: HALLUCINATION_CORRECTION_MESSAGE }
12531
- );
12532
- continue;
12789
+ usage
12790
+ });
12533
12791
  }
12534
- this.send({ type: "response_done", content: result.content, usage: roundUsage });
12792
+ },
12793
+ onFinalContent: (content, { reasoningContent }) => {
12794
+ this.send({ type: "response_done", content, usage });
12535
12795
  session.addMessage({
12536
12796
  role: "assistant",
12537
- content: result.content,
12797
+ content,
12538
12798
  timestamp: /* @__PURE__ */ new Date(),
12539
- ...result.reasoningContent ? { reasoningContent: result.reasoningContent } : {}
12540
- });
12541
- this.addWebSessionUsage(roundUsage);
12542
- session.addTokenUsage(roundUsage);
12543
- return;
12544
- }
12545
- if (result.toolCalls && result.toolCalls.length > 0) {
12546
- roundToolHistory.push({
12547
- round: round + 1,
12548
- tools: result.toolCalls.map((tc) => tc.name)
12549
- });
12550
- for (const tc of result.toolCalls) {
12551
- if (tc.name.startsWith("mcp__")) this.usedMcpToolNames.add(tc.name);
12552
- }
12553
- googleSearchContext.configManager = this.config;
12554
- spawnAgentContext.provider = provider;
12555
- spawnAgentContext.model = this.currentModel;
12556
- spawnAgentContext.systemPrompt = systemPromptVolatile ? `${systemPrompt}
12557
-
12558
- ---
12559
-
12560
- ${systemPromptVolatile}` : systemPrompt;
12561
- spawnAgentContext.modelParams = modelParams;
12562
- spawnAgentContext.configManager = this.config;
12563
- ToolExecutor.currentMessageIndex = this.sessions.current?.messages.length ?? 0;
12564
- const saveLastResponseCall = result.toolCalls.find((tc) => tc.name === "save_last_response");
12565
- const saveLastResponsePath = saveLastResponseCall ? String(saveLastResponseCall.arguments["path"] ?? "") : "";
12566
- if (saveLastResponseCall && saveLastResponsePath) {
12567
- const teeResult = await this.runSaveLastResponseTee(
12568
- provider,
12569
- saveLastResponseCall,
12570
- saveLastResponsePath,
12571
- apiMessages,
12572
- extraMessages,
12573
- systemPrompt,
12574
- systemPromptVolatile,
12575
- modelParams,
12576
- ac,
12577
- roundUsage
12578
- );
12579
- const teeToolResults = result.toolCalls.map((tc) => {
12580
- if (tc.id === saveLastResponseCall.id) {
12581
- return {
12582
- callId: tc.id,
12583
- content: teeResult.summary,
12584
- isError: teeResult.isError
12585
- };
12586
- }
12587
- return {
12588
- callId: tc.id,
12589
- content: "[skipped: file already saved by tee streaming]",
12590
- isError: false
12591
- };
12592
- });
12593
- const reasoningContent2 = result.reasoningContent;
12594
- const newMsgs2 = provider.buildToolResultMessages(result.toolCalls, teeToolResults, reasoningContent2);
12595
- extraMessages.push(...newMsgs2);
12596
- persistToolRound(session, result.toolCalls, teeToolResults, {
12597
- assistantContent: teeResult.content,
12598
- reasoningContent: reasoningContent2
12599
- });
12600
- freeRounds.apply(result.toolCalls.map((tc) => tc.name));
12601
- continue;
12602
- }
12603
- const toolResults = await this.toolExecutor.executeAll(result.toolCalls);
12604
- const reasoningContent = result.reasoningContent;
12605
- const newMsgs = provider.buildToolResultMessages(result.toolCalls, toolResults, reasoningContent);
12606
- extraMessages.push(...newMsgs);
12607
- persistToolRound(session, result.toolCalls, toolResults, {
12608
- assistantContent: result.content,
12609
- reasoningContent
12799
+ ...reasoningContent ? { reasoningContent } : {}
12610
12800
  });
12611
- if (freeRounds.apply(result.toolCalls.map((tc) => tc.name))) {
12612
- round--;
12613
- }
12614
- if (this.userInterjection) {
12615
- const msg = this.userInterjection;
12616
- this.userInterjection = null;
12617
- this.send({ type: "info", message: `\u26A1 Interjection: "${msg}"` });
12618
- extraMessages.push({ role: "user", content: msg });
12619
- }
12620
- }
12621
- const effectiveRound = round + 1;
12622
- const remaining = maxToolRounds - effectiveRound;
12623
- if (autoPauseInterval > 0 && effectiveRound > 0 && effectiveRound % autoPauseInterval === 0 && remaining > 0 && !ac.signal.aborted) {
12624
- const toolSummary = summarizeRecentTools(roundToolHistory, autoPauseInterval);
12801
+ },
12802
+ persistRound: (toolCalls, results, info) => {
12803
+ persistToolRound(session, toolCalls, results, info);
12804
+ },
12805
+ // Track MCP tool usage for next-turn budget prioritization (C1)
12806
+ onMcpToolUsed: (name) => this.usedMcpToolNames.add(name),
12807
+ requestAutoPause: async ({ effectiveRound, maxToolRounds: totalRounds, toolSummary }) => {
12625
12808
  const requestId = `pause_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
12626
12809
  const pauseResp = await new Promise((resolve7) => {
12627
12810
  this.pendingAutoPause.set(requestId, resolve7);
@@ -12629,58 +12812,85 @@ ${systemPromptVolatile}` : systemPrompt;
12629
12812
  type: "auto_pause_request",
12630
12813
  requestId,
12631
12814
  currentRound: effectiveRound,
12632
- totalRounds: maxToolRounds,
12815
+ totalRounds,
12633
12816
  toolSummary
12634
12817
  });
12635
12818
  });
12636
- if (ac.signal.aborted) break;
12637
12819
  if (pauseResp.action === "stop") {
12638
- this.send({ type: "info", message: `\u23F8 Stopped by user at ${effectiveRound}/${maxToolRounds}` });
12639
- extraMessages.push({ role: "user", content: buildUserStopMessage(effectiveRound, maxToolRounds) });
12640
- break;
12820
+ this.send({ type: "info", message: `\u23F8 Stopped by user at ${effectiveRound}/${totalRounds}` });
12641
12821
  } else if (pauseResp.action === "redirect" && pauseResp.message) {
12642
12822
  this.send({ type: "info", message: `\u26A1 Redirect: "${pauseResp.message}"` });
12643
- extraMessages.push({ role: "user", content: pauseResp.message });
12644
12823
  }
12645
- }
12646
- }
12647
- try {
12648
- const summaryExtra = [
12649
- ...extraMessages,
12650
- { role: "user", content: buildRoundsExhaustedPrompt(maxToolRounds) }
12651
- ];
12652
- const summaryResult = await provider.chatWithTools(
12653
- {
12654
- messages: apiMessages,
12655
- model: this.currentModel,
12824
+ return pauseResp;
12825
+ },
12826
+ onRoundsExhausted: (summaryContent) => {
12827
+ if (summaryContent !== null) {
12828
+ this.send({
12829
+ type: "response_done",
12830
+ content: `\u26A0 Reached maximum tool call rounds (${maxToolRounds}).
12831
+
12832
+ ${summaryContent}`,
12833
+ usage
12834
+ });
12835
+ session.addMessage({ role: "assistant", content: summaryContent, timestamp: /* @__PURE__ */ new Date() });
12836
+ } else {
12837
+ this.send({
12838
+ type: "error",
12839
+ message: `Reached maximum tool call rounds (${maxToolRounds}). You can continue by asking the AI to proceed.`
12840
+ });
12841
+ }
12842
+ },
12843
+ // ─── save_last_response tee-streaming(v0.4.102+)─────────────────
12844
+ // AI 在 Web 模式下调用 save_last_response 时,复刻 REPL 的 tee 流式路径:
12845
+ // 发起一次新的 chatStream → 实时通过 WS 推送文本 + 同步写盘 → 注入合成
12846
+ // 工具结果。否则该工具会落到默认 executor,读到空的 lastResponseStore
12847
+ // 直接报错;用户被迫退到 write_file,又因 tool_call arguments 截断
12848
+ // (~2KB) 只能写出片段,再用 edit_file 反复 insert 才能补全(v0.4.101 报告)。
12849
+ // 与 REPL 不同:Web 端 tee 成功后继续 agentic 循环(返回 'continue'),
12850
+ // 让模型基于工具结果给出最终文本。
12851
+ runSaveLastResponseTee: async ({ toolCalls, call, saveToFile, extraMessages, reasoningContent }) => {
12852
+ const teeResult = await this.runSaveLastResponseTee(
12853
+ provider,
12854
+ call,
12855
+ saveToFile,
12856
+ apiMessages,
12857
+ extraMessages,
12656
12858
  systemPrompt,
12657
12859
  systemPromptVolatile,
12658
- stream: false,
12659
- temperature: modelParams.temperature,
12660
- maxTokens: modelParams.maxTokens,
12661
- timeout: modelParams.timeout,
12662
- _extraMessages: summaryExtra
12663
- },
12664
- []
12665
- );
12666
- if ("content" in summaryResult && summaryResult.content) {
12667
- this.send({
12668
- type: "response_done",
12669
- content: `\u26A0 Reached maximum tool call rounds (${maxToolRounds}).
12670
-
12671
- ${summaryResult.content}`,
12672
- usage: roundUsage
12860
+ modelParams,
12861
+ ac,
12862
+ usage
12863
+ );
12864
+ const teeToolResults = toolCalls.map((tc) => {
12865
+ if (tc.id === call.id) {
12866
+ return {
12867
+ callId: tc.id,
12868
+ content: teeResult.summary,
12869
+ isError: teeResult.isError
12870
+ };
12871
+ }
12872
+ return {
12873
+ callId: tc.id,
12874
+ content: "[skipped: file already saved by tee streaming]",
12875
+ isError: false
12876
+ };
12877
+ });
12878
+ const newMsgs = provider.buildToolResultMessages(toolCalls, teeToolResults, reasoningContent);
12879
+ extraMessages.push(...newMsgs);
12880
+ persistToolRound(session, toolCalls, teeToolResults, {
12881
+ assistantContent: teeResult.content,
12882
+ reasoningContent
12673
12883
  });
12674
- session.addMessage({ role: "assistant", content: summaryResult.content, timestamp: /* @__PURE__ */ new Date() });
12884
+ return "continue";
12675
12885
  }
12676
- } catch {
12677
- this.send({
12678
- type: "error",
12679
- message: `Reached maximum tool call rounds (${maxToolRounds}). You can continue by asking the AI to proceed.`
12680
- });
12886
+ });
12887
+ if (loopResult.reason !== "tee-stop") {
12888
+ this.addWebSessionUsage(usage);
12889
+ session.addTokenUsage(usage);
12890
+ }
12891
+ if (loopResult.reason === "aborted") {
12892
+ this.send({ type: "info", message: "[interrupted]" });
12681
12893
  }
12682
- this.addWebSessionUsage(roundUsage);
12683
- session.addTokenUsage(roundUsage);
12684
12894
  } catch (err) {
12685
12895
  if (err.name === "AbortError") {
12686
12896
  this.send({ type: "info", message: "[interrupted]" });
@@ -13866,7 +14076,7 @@ ${undoResults.map((r) => ` \u2022 ${r}`).join("\n")}` });
13866
14076
  case "test": {
13867
14077
  this.send({ type: "info", message: "\u{1F9EA} Running tests..." });
13868
14078
  try {
13869
- const { executeTests } = await import("./run-tests-625NA546.js");
14079
+ const { executeTests } = await import("./run-tests-IJYP6BMT.js");
13870
14080
  const argStr = args.join(" ").trim();
13871
14081
  let testArgs = {};
13872
14082
  if (argStr) {
@@ -14390,7 +14600,7 @@ Add .md files to create commands.` });
14390
14600
  return;
14391
14601
  }
14392
14602
  try {
14393
- const { searchChatMemory: searchChatMemory2, loadChatIndex: loadChatIndex2 } = await import("./chat-index-BE4TPLFH.js");
14603
+ const { searchChatMemory: searchChatMemory2, loadChatIndex: loadChatIndex2 } = await import("./chat-index-WDMVP7BN.js");
14394
14604
  const loaded = loadChatIndex2();
14395
14605
  if (!loaded || loaded.idx.chunks.length === 0) {
14396
14606
  this.send({ type: "memory_hits", query: q, hits: [], indexMissing: true });
@@ -14426,7 +14636,7 @@ Add .md files to create commands.` });
14426
14636
  }
14427
14637
  async handleMemoryStatus() {
14428
14638
  try {
14429
- const { getChatIndexStatus } = await import("./chat-index-BE4TPLFH.js");
14639
+ const { getChatIndexStatus } = await import("./chat-index-WDMVP7BN.js");
14430
14640
  const s = getChatIndexStatus();
14431
14641
  this.send({
14432
14642
  type: "memory_status",
@@ -14451,7 +14661,7 @@ Add .md files to create commands.` });
14451
14661
  type: "info",
14452
14662
  message: full ? "\u{1F9E0} Rebuilding chat memory index (this may take a while on first run \u2014 ~117 MB embedder)." : "\u{1F9E0} Refreshing chat memory index (incremental)\u2026"
14453
14663
  });
14454
- const { buildChatIndex } = await import("./chat-index-BE4TPLFH.js");
14664
+ const { buildChatIndex } = await import("./chat-index-WDMVP7BN.js");
14455
14665
  const stats = await buildChatIndex({
14456
14666
  full,
14457
14667
  onProgress: (p) => {