jinzd-ai-cli 0.4.180 → 0.4.181

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -36,7 +36,7 @@ import {
36
36
  VERSION,
37
37
  buildUserIdentityPrompt,
38
38
  runTestsTool
39
- } from "./chunk-YDI22R3P.js";
39
+ } from "./chunk-D774AWKW.js";
40
40
  import {
41
41
  hasSemanticIndex,
42
42
  semanticSearch
@@ -1551,6 +1551,321 @@ Node.js does not automatically use system proxies. Try one of the following:
1551
1551
 
1552
1552
  // src/providers/openai-compatible.ts
1553
1553
  import OpenAI from "openai";
1554
+
1555
+ // src/core/agent-loop.ts
1556
+ function partialTagTail(s, tag) {
1557
+ const max = Math.min(s.length, tag.length - 1);
1558
+ for (let len = max; len > 0; len--) {
1559
+ if (s.endsWith(tag.slice(0, len))) return len;
1560
+ }
1561
+ return 0;
1562
+ }
1563
+ var ThinkTagFilter = class {
1564
+ inThink = false;
1565
+ buf = "";
1566
+ push(raw) {
1567
+ this.buf += raw;
1568
+ let out = "";
1569
+ while (this.buf.length > 0) {
1570
+ if (!this.inThink) {
1571
+ const open = this.buf.indexOf("<think>");
1572
+ if (open === -1) {
1573
+ const keep = partialTagTail(this.buf, "<think>");
1574
+ out += this.buf.slice(0, this.buf.length - keep);
1575
+ this.buf = this.buf.slice(this.buf.length - keep);
1576
+ break;
1577
+ }
1578
+ out += this.buf.slice(0, open);
1579
+ this.buf = this.buf.slice(open + "<think>".length);
1580
+ this.inThink = true;
1581
+ } else {
1582
+ const close = this.buf.indexOf("</think>");
1583
+ if (close === -1) {
1584
+ const keep = partialTagTail(this.buf, "</think>");
1585
+ this.buf = this.buf.slice(this.buf.length - keep);
1586
+ break;
1587
+ }
1588
+ this.buf = this.buf.slice(close + "</think>".length);
1589
+ this.inThink = false;
1590
+ }
1591
+ }
1592
+ return out;
1593
+ }
1594
+ /** 流结束:若仍持留可能的半截 '<think>' 前缀且并未进入 think 块,它是真实文本。 */
1595
+ flush() {
1596
+ if (!this.inThink && this.buf) {
1597
+ const tail = this.buf;
1598
+ this.buf = "";
1599
+ return tail;
1600
+ }
1601
+ this.buf = "";
1602
+ return "";
1603
+ }
1604
+ };
1605
+ function repairToolCallArguments(raw, onWarn) {
1606
+ const argStr = raw || "{}";
1607
+ try {
1608
+ return JSON.parse(argStr);
1609
+ } catch {
1610
+ const truncated = argStr.trimEnd();
1611
+ const lastComma = truncated.lastIndexOf(",");
1612
+ const fixed = lastComma > 0 ? truncated.slice(0, lastComma) + "}" : truncated.slice(0, truncated.indexOf("{") + 1) + "}";
1613
+ try {
1614
+ const repaired = JSON.parse(fixed);
1615
+ onWarn?.("Tool call JSON was truncated and auto-repaired. Some parameters may be missing.");
1616
+ return repaired;
1617
+ } catch {
1618
+ onWarn?.("Tool call JSON could not be parsed, using empty arguments.");
1619
+ return {};
1620
+ }
1621
+ }
1622
+ }
1623
+ async function consumeToolCallStream(stream, hooks = {}) {
1624
+ const textParts = [];
1625
+ const accumulators = /* @__PURE__ */ new Map();
1626
+ let usage;
1627
+ let rawContent;
1628
+ let reasoningContent;
1629
+ let finishReason;
1630
+ let aborted = false;
1631
+ const thinkFilter = new ThinkTagFilter();
1632
+ const emitText = (raw) => {
1633
+ const visible = thinkFilter.push(raw);
1634
+ if (visible) {
1635
+ textParts.push(visible);
1636
+ hooks.onText?.(visible);
1637
+ }
1638
+ };
1639
+ try {
1640
+ for await (const event of stream) {
1641
+ if (hooks.signal?.aborted) {
1642
+ aborted = true;
1643
+ break;
1644
+ }
1645
+ switch (event.type) {
1646
+ case "text_delta":
1647
+ emitText(event.delta);
1648
+ break;
1649
+ case "thinking_start":
1650
+ hooks.onThinkingStart?.();
1651
+ break;
1652
+ case "thinking_delta":
1653
+ hooks.onThinkingDelta?.(event.delta);
1654
+ break;
1655
+ case "thinking_end":
1656
+ hooks.onThinkingEnd?.();
1657
+ break;
1658
+ case "tool_call_start":
1659
+ accumulators.set(event.index, { id: event.id, name: event.name, arguments: "" });
1660
+ hooks.onToolCallStart?.(event.index, event.id, event.name);
1661
+ break;
1662
+ case "tool_call_delta": {
1663
+ const acc = accumulators.get(event.index);
1664
+ if (acc) acc.arguments += event.argumentsDelta;
1665
+ break;
1666
+ }
1667
+ case "tool_call_end":
1668
+ break;
1669
+ case "done":
1670
+ if (event.usage) usage = event.usage;
1671
+ if (event.rawContent) rawContent = event.rawContent;
1672
+ if (event.reasoningContent) reasoningContent = event.reasoningContent;
1673
+ if (event.finishReason) finishReason = event.finishReason;
1674
+ break;
1675
+ }
1676
+ }
1677
+ } catch (err) {
1678
+ if (err instanceof Error && (err.name === "AbortError" || err.message.includes("aborted"))) {
1679
+ aborted = true;
1680
+ } else {
1681
+ throw err;
1682
+ }
1683
+ }
1684
+ const tail = thinkFilter.flush();
1685
+ if (tail && !aborted) {
1686
+ textParts.push(tail);
1687
+ hooks.onText?.(tail);
1688
+ }
1689
+ const textContent = textParts.join("");
1690
+ if (aborted) {
1691
+ return { textContent, toolCalls: [], usage, rawContent, reasoningContent, finishReason, aborted };
1692
+ }
1693
+ const toolCalls = [];
1694
+ for (const [, acc] of accumulators) {
1695
+ toolCalls.push({
1696
+ id: acc.id,
1697
+ name: acc.name,
1698
+ arguments: repairToolCallArguments(acc.arguments, hooks.onWarn)
1699
+ });
1700
+ }
1701
+ if (toolCalls.length > 0) {
1702
+ if (rawContent) {
1703
+ toolCalls._rawContent = rawContent;
1704
+ }
1705
+ if (textContent) {
1706
+ toolCalls._streamedText = textContent;
1707
+ }
1708
+ }
1709
+ return { textContent, toolCalls, usage, rawContent, reasoningContent, finishReason, aborted };
1710
+ }
1711
+ var FREE_ROUND_TOOLS = /* @__PURE__ */ new Set(["write_todos"]);
1712
+ var MAX_CONSECUTIVE_FREE_ROUNDS = 3;
1713
+ var FreeRoundTracker = class {
1714
+ consecutive = 0;
1715
+ /** 返回 true 表示本轮不消耗有效轮次(调用方执行 round--)。 */
1716
+ apply(toolNames) {
1717
+ const allFree = toolNames.length > 0 && toolNames.every((n) => FREE_ROUND_TOOLS.has(n));
1718
+ if (!allFree) {
1719
+ this.consecutive = 0;
1720
+ return false;
1721
+ }
1722
+ this.consecutive++;
1723
+ return this.consecutive <= MAX_CONSECUTIVE_FREE_ROUNDS;
1724
+ }
1725
+ };
1726
+ var BudgetWarner = class {
1727
+ constructor(maxToolRounds) {
1728
+ this.maxToolRounds = maxToolRounds;
1729
+ this.noteAt = Math.max(10, Math.floor(maxToolRounds * 0.2));
1730
+ const lowRaw = Math.max(5, Math.floor(maxToolRounds * 0.1));
1731
+ const criticalRaw = Math.max(3, Math.floor(maxToolRounds * 0.05));
1732
+ this.lowAt = Math.min(lowRaw, this.noteAt - 1);
1733
+ this.criticalAt = Math.min(criticalRaw, this.lowAt - 1);
1734
+ }
1735
+ noteAt;
1736
+ lowAt;
1737
+ criticalAt;
1738
+ warnedNote = false;
1739
+ warnedLow = false;
1740
+ warnedCritical = false;
1741
+ check(roundsLeft) {
1742
+ if (!this.warnedCritical && roundsLeft <= this.criticalAt) {
1743
+ this.warnedCritical = true;
1744
+ return {
1745
+ level: "critical",
1746
+ injectMessage: `\u{1F6A8} Critical budget: Only ${roundsLeft} rounds left! Wrap up NOW \u2014 complete the current operation and give a final summary. Do NOT start new tasks.`,
1747
+ displayMessage: `\u{1F6A8} Critical: ${roundsLeft} rounds remaining`
1748
+ };
1749
+ }
1750
+ if (!this.warnedLow && roundsLeft <= this.lowAt) {
1751
+ this.warnedLow = true;
1752
+ return {
1753
+ level: "low",
1754
+ injectMessage: `\u26A0\uFE0F Budget warning: Only ${roundsLeft} tool rounds remaining. Prioritize completing the most critical task. Use efficient approaches (batch edits, fewer reads). If you cannot finish everything, summarize what's done and what remains.`,
1755
+ displayMessage: `\u26A0\uFE0F Low budget: ${roundsLeft} rounds remaining`
1756
+ };
1757
+ }
1758
+ if (!this.warnedNote && roundsLeft <= this.noteAt) {
1759
+ this.warnedNote = true;
1760
+ return {
1761
+ level: "note",
1762
+ injectMessage: `\u{1F4CA} Budget note: ${roundsLeft} tool rounds remaining out of ${this.maxToolRounds}. Plan your remaining work efficiently \u2014 use batch operations (e.g., replaceAll) when possible.`
1763
+ };
1764
+ }
1765
+ return null;
1766
+ }
1767
+ };
1768
+ var EMPTY_RESPONSE_NUDGE = "Your previous response was empty \u2014 no text and no tool calls. This usually means the context window is nearly full. Please either: (1) continue the task by calling the next tool you need, or (2) give a concise final text summary of what has been accomplished so far and what remains. Do NOT repeat earlier long outputs.";
1769
+ function describeFinishReason(fr) {
1770
+ if (fr === "length") return "output limit reached (finish_reason=length)";
1771
+ if (fr === "content_filter") return "content blocked (finish_reason=content_filter)";
1772
+ if (fr) return `empty response (finish_reason=${fr})`;
1773
+ return "empty response";
1774
+ }
1775
+ function emptyResponseHint(fr) {
1776
+ if (fr === "length") return "Output token limit hit \u2014 try /compact to reduce context, raise maxTokens, or /model to switch.";
1777
+ if (fr === "content_filter") return "Content was blocked by the provider filter.";
1778
+ return "Context window may be exhausted or max_tokens too low.";
1779
+ }
1780
+ var EmptyResponseGuard = class {
1781
+ retries = 0;
1782
+ onEmpty(canRetry, finishReason) {
1783
+ if (this.retries === 0 && canRetry) {
1784
+ this.retries++;
1785
+ return {
1786
+ action: "nudge",
1787
+ injectMessage: EMPTY_RESPONSE_NUDGE,
1788
+ displayMessage: `\u26A0 ${describeFinishReason(finishReason)} \u2014 nudging AI to continue...`
1789
+ };
1790
+ }
1791
+ return {
1792
+ action: "stop",
1793
+ displayMessage: "\u26A0 AI returned empty responses twice in a row. Stopping agentic loop.",
1794
+ hint: emptyResponseHint(finishReason)
1795
+ };
1796
+ }
1797
+ /** 非空响应到达 → 重置计数(下次空响应仍可 nudge 一次)。 */
1798
+ onNonEmpty() {
1799
+ this.retries = 0;
1800
+ }
1801
+ };
1802
+ var ContextPressureMonitor = class {
1803
+ warned80 = false;
1804
+ check(requestTokens, contextWindow) {
1805
+ if (contextWindow <= 0) return { action: "ok", ratio: 0 };
1806
+ const ratio = requestTokens / contextWindow;
1807
+ if (ratio >= 0.95) return { action: "abort", ratio };
1808
+ if (ratio >= 0.8 && !this.warned80) {
1809
+ this.warned80 = true;
1810
+ return {
1811
+ action: "warn",
1812
+ ratio,
1813
+ injectMessage: `\u26A0\uFE0F Context pressure: ~${Math.round(ratio * 100)}% of the ${contextWindow.toLocaleString()}-token context window is used. Avoid reading more files or running broad scans. Finish the current critical step, then produce a final summary. Every unnecessary tool call now risks breaking the conversation.`
1814
+ };
1815
+ }
1816
+ return { action: "ok", ratio };
1817
+ }
1818
+ };
1819
+ function accumulateUsage(total, delta) {
1820
+ if (!delta) return;
1821
+ total.inputTokens += delta.inputTokens;
1822
+ total.outputTokens += delta.outputTokens;
1823
+ total.cacheCreationTokens += delta.cacheCreationTokens ?? 0;
1824
+ total.cacheReadTokens += delta.cacheReadTokens ?? 0;
1825
+ }
1826
+ function buildRoundBudgetHint(opts) {
1827
+ const pauseHint = opts.autoPauseInterval > 0 ? `
1828
+ - Every ${opts.autoPauseInterval} rounds the user will be asked whether to continue \u2014 use this as a natural checkpoint to report progress.` : "";
1829
+ if (opts.planMode) {
1830
+ return `
1831
+
1832
+ [Tool Round Budget \u2014 Plan Mode]
1833
+ You have a maximum of ${opts.maxToolRounds} tool call rounds. You are in READ-ONLY Plan Mode:
1834
+ - Only use: read_file, list_dir, grep_files, glob_files, ask_user, write_todos
1835
+ - Do NOT attempt to call bash, write_file, edit_file \u2014 they are disabled
1836
+ - Do NOT write shell commands or code blocks as a substitute for tool calls
1837
+ - Do NOT read the same file more than once
1838
+ - Call write_todos ONCE to present your plan, then give a text summary
1839
+ - If the user asks you to execute anything, respond: "Please type /plan execute to switch to execute mode."${pauseHint}`;
1840
+ }
1841
+ return `
1842
+
1843
+ [Tool Round Budget]
1844
+ You have a maximum of ${opts.maxToolRounds} tool call rounds for this task. Plan efficiently:
1845
+ - Prefer batch operations (e.g. global find-and-replace) over repetitive single edits.
1846
+ - Do NOT read the same file more than once \u2014 use the content from previous reads.
1847
+ - Prioritize the most critical tasks first in case rounds run out.
1848
+ - When remaining rounds are low, focus on completing the current task and summarizing.${pauseHint}`;
1849
+ }
1850
+ function buildRoundsExhaustedPrompt(maxToolRounds) {
1851
+ return `You have used all ${maxToolRounds} tool call rounds. Do not call any more tools. Summarize in text:
1852
+ 1. What work has been completed so far
1853
+ 2. What tasks remain unfinished
1854
+ 3. What the user can do next (e.g. send another request to continue)`;
1855
+ }
1856
+ function buildUserStopMessage(effectiveRound, maxToolRounds) {
1857
+ return `The user has stopped the task at round ${effectiveRound}/${maxToolRounds}. Do not call any more tools. Summarize what has been completed and what remains.`;
1858
+ }
1859
+ function summarizeRecentTools(history, interval) {
1860
+ const recent = history.slice(-interval);
1861
+ const counts = /* @__PURE__ */ new Map();
1862
+ for (const rh of recent) {
1863
+ for (const t of rh.tools) counts.set(t, (counts.get(t) || 0) + 1);
1864
+ }
1865
+ return [...counts.entries()].sort((a, b) => b[1] - a[1]).map(([name, count]) => count > 1 ? `${name}\xD7${count}` : name).join(", ");
1866
+ }
1867
+
1868
+ // src/providers/openai-compatible.ts
1554
1869
  function toUsage(u) {
1555
1870
  if (!u) return void 0;
1556
1871
  const cached = u.prompt_tokens_details?.cached_tokens ?? 0;
@@ -1773,28 +2088,11 @@ var OpenAICompatibleProvider = class extends BaseProvider {
1773
2088
  const reasoningContent = message.reasoning_content;
1774
2089
  if (message.tool_calls && message.tool_calls.length > 0) {
1775
2090
  const toolCalls = message.tool_calls.map((tc) => {
1776
- const rawArgs = tc.function.arguments || "{}";
1777
- let parsedArgs;
1778
- try {
1779
- parsedArgs = JSON.parse(rawArgs);
1780
- } catch {
1781
- const truncated = rawArgs.trimEnd();
1782
- const lastComma = truncated.lastIndexOf(",");
1783
- const fixed = lastComma > 0 ? truncated.slice(0, lastComma) + "}" : truncated.slice(0, truncated.indexOf("{") + 1) + "}";
1784
- try {
1785
- parsedArgs = JSON.parse(fixed);
1786
- process.stderr.write(
1787
- `[warn] Tool call JSON was truncated and auto-repaired. Some parameters may be missing.
1788
- `
1789
- );
1790
- } catch {
1791
- process.stderr.write(
1792
- `[warn] Tool call JSON could not be parsed, using empty arguments.
1793
- `
1794
- );
1795
- parsedArgs = {};
1796
- }
1797
- }
2091
+ const parsedArgs = repairToolCallArguments(
2092
+ tc.function.arguments || "{}",
2093
+ (m) => process.stderr.write(`[warn] ${m}
2094
+ `)
2095
+ );
1798
2096
  return {
1799
2097
  id: tc.id,
1800
2098
  name: tc.function.name,
@@ -11585,8 +11883,6 @@ async function persistDiscussion(state2, config, defaultProvider, defaultModel)
11585
11883
  }
11586
11884
 
11587
11885
  // src/web/session-handler.ts
11588
- var FREE_ROUND_TOOLS = /* @__PURE__ */ new Set(["write_todos"]);
11589
- var MAX_CONSECUTIVE_FREE_ROUNDS = 5;
11590
11886
  var SessionHandler = class _SessionHandler {
11591
11887
  ws;
11592
11888
  config;
@@ -12071,15 +12367,7 @@ var SessionHandler = class _SessionHandler {
12071
12367
  const autoPauseIntervalRaw = this.config.get("autoPauseInterval");
12072
12368
  const autoPauseInterval = typeof autoPauseIntervalRaw === "number" ? autoPauseIntervalRaw : 50;
12073
12369
  const { stable: toolStable, volatile: toolVolatile } = this.buildSystemPrompt();
12074
- const pauseHint = autoPauseInterval > 0 ? `
12075
- - Every ${autoPauseInterval} rounds the user will be asked whether to continue \u2014 use this as a natural checkpoint to report progress.` : "";
12076
- const roundBudgetHint = `
12077
-
12078
- [Tool Round Budget]
12079
- You have a maximum of ${maxToolRounds} tool call rounds for this task. Plan efficiently:
12080
- - Prefer batch operations (e.g. global find-and-replace) over repetitive single edits.
12081
- - Prioritize the most critical tasks first in case rounds run out.
12082
- - When remaining rounds are low, focus on completing the current task and summarizing.${pauseHint}`;
12370
+ const roundBudgetHint = buildRoundBudgetHint({ maxToolRounds, autoPauseInterval });
12083
12371
  const systemPrompt = toolStable + TOOL_CALL_REMINDER + roundBudgetHint + (mcpBudgetNote ? `
12084
12372
 
12085
12373
  ${mcpBudgetNote}` : "");
@@ -12087,18 +12375,11 @@ ${mcpBudgetNote}` : "");
12087
12375
  const modelParams = this.getModelParams();
12088
12376
  const roundUsage = { inputTokens: 0, outputTokens: 0, cacheCreationTokens: 0, cacheReadTokens: 0 };
12089
12377
  const supportsStreamingTools = typeof provider.chatWithToolsStream === "function";
12090
- let consecutiveFreeRounds = 0;
12091
12378
  const roundToolHistory = [];
12092
- const warnNoteAt = Math.max(10, Math.floor(maxToolRounds * 0.2));
12093
- const warnLowAt = Math.max(5, Math.floor(maxToolRounds * 0.1));
12094
- const warnCriticalAt = Math.max(3, Math.floor(maxToolRounds * 0.05));
12095
- const warnLowEff = Math.min(warnLowAt, warnNoteAt - 1);
12096
- const warnCriticalEff = Math.min(warnCriticalAt, warnLowEff - 1);
12097
- let warnedNote = false;
12098
- let warnedLow = false;
12099
- let warnedCritical = false;
12100
- let emptyResponseRetries = 0;
12101
- let warnedCtx80 = false;
12379
+ const budgetWarner = new BudgetWarner(maxToolRounds);
12380
+ const emptyGuard = new EmptyResponseGuard();
12381
+ const ctxMonitor = new ContextPressureMonitor();
12382
+ const freeRounds = new FreeRoundTracker();
12102
12383
  const ac = new AbortController();
12103
12384
  this.abortController = ac;
12104
12385
  try {
@@ -12106,27 +12387,12 @@ ${mcpBudgetNote}` : "");
12106
12387
  if (ac.signal.aborted) break;
12107
12388
  this.toolExecutor.setRoundInfo(round + 1, maxToolRounds);
12108
12389
  this.send({ type: "round_progress", current: round + 1, total: maxToolRounds });
12109
- const roundsLeft = maxToolRounds - round;
12110
- if (!warnedCritical && roundsLeft <= warnCriticalEff) {
12111
- warnedCritical = true;
12112
- extraMessages.push({
12113
- role: "user",
12114
- content: `\u{1F6A8} Critical budget: Only ${roundsLeft} rounds left! Wrap up NOW \u2014 complete the current operation and give a final summary. Do NOT start new tasks.`
12115
- });
12116
- this.send({ type: "info", message: `\u{1F6A8} Critical: ${roundsLeft} rounds remaining` });
12117
- } else if (!warnedLow && roundsLeft <= warnLowEff) {
12118
- warnedLow = true;
12119
- extraMessages.push({
12120
- role: "user",
12121
- content: `\u26A0\uFE0F Budget warning: Only ${roundsLeft} tool rounds remaining. Prioritize completing the most critical task. If you cannot finish everything, summarize what's done and what remains.`
12122
- });
12123
- this.send({ type: "info", message: `\u26A0\uFE0F Low budget: ${roundsLeft} rounds remaining` });
12124
- } else if (!warnedNote && roundsLeft <= warnNoteAt) {
12125
- warnedNote = true;
12126
- extraMessages.push({
12127
- role: "user",
12128
- content: `\u{1F4CA} Budget note: ${roundsLeft} tool rounds remaining out of ${maxToolRounds}. Plan your remaining work efficiently \u2014 use batch operations (e.g., replaceAll) when possible.`
12129
- });
12390
+ const budgetWarning = budgetWarner.check(maxToolRounds - round);
12391
+ if (budgetWarning) {
12392
+ extraMessages.push({ role: "user", content: budgetWarning.injectMessage });
12393
+ if (budgetWarning.displayMessage) {
12394
+ this.send({ type: "info", message: budgetWarning.displayMessage });
12395
+ }
12130
12396
  }
12131
12397
  if (this.userInterjection) {
12132
12398
  const msg = this.userInterjection;
@@ -12137,11 +12403,11 @@ ${mcpBudgetNote}` : "");
12137
12403
  const ctxWindow = this.getContextWindowSize();
12138
12404
  if (ctxWindow > 0) {
12139
12405
  const reqTokens = this.estimateRequestTokens(systemPrompt, extraMessages);
12140
- const reqRatio = reqTokens / ctxWindow;
12141
- if (reqRatio >= 0.95) {
12406
+ const pressure = ctxMonitor.check(reqTokens, ctxWindow);
12407
+ if (pressure.action === "abort") {
12142
12408
  this.send({
12143
12409
  type: "response_done",
12144
- content: `\u26A0 Context at ${Math.round(reqRatio * 100)}% of ${ctxWindow.toLocaleString()} tokens \u2014 aborting before API rejection.
12410
+ content: `\u26A0 Context at ${Math.round(pressure.ratio * 100)}% of ${ctxWindow.toLocaleString()} tokens \u2014 aborting before API rejection.
12145
12411
 
12146
12412
  Too much tool output accumulated this turn. Your work so far is preserved.
12147
12413
 
@@ -12154,16 +12420,12 @@ Too much tool output accumulated this turn. Your work so far is preserved.
12154
12420
  this.addWebSessionUsage(roundUsage);
12155
12421
  session.addTokenUsage(roundUsage);
12156
12422
  return;
12157
- } else if (reqRatio >= 0.8 && !warnedCtx80) {
12158
- warnedCtx80 = true;
12423
+ } else if (pressure.action === "warn") {
12159
12424
  this.send({
12160
12425
  type: "info",
12161
- message: `\u26A0 Context at ${Math.round(reqRatio * 100)}% \u2014 asking AI to wrap up`
12162
- });
12163
- extraMessages.push({
12164
- role: "user",
12165
- content: `\u26A0\uFE0F Context pressure: ~${Math.round(reqRatio * 100)}% of the ${ctxWindow.toLocaleString()}-token context window is used. Avoid reading more files or running broad scans. Finish the current critical step, then produce a final summary. Every unnecessary tool call now risks breaking the conversation.`
12426
+ message: `\u26A0 Context at ${Math.round(pressure.ratio * 100)}% \u2014 asking AI to wrap up`
12166
12427
  });
12428
+ extraMessages.push({ role: "user", content: pressure.injectMessage });
12167
12429
  }
12168
12430
  }
12169
12431
  const chatRequest = {
@@ -12211,37 +12473,29 @@ Details: ${errMsg.split("\n")[0]}
12211
12473
  throw providerErr;
12212
12474
  }
12213
12475
  if (ac.signal.aborted) break;
12214
- if (result.usage) {
12215
- roundUsage.inputTokens += result.usage.inputTokens;
12216
- roundUsage.outputTokens += result.usage.outputTokens;
12217
- roundUsage.cacheCreationTokens += result.usage.cacheCreationTokens ?? 0;
12218
- roundUsage.cacheReadTokens += result.usage.cacheReadTokens ?? 0;
12219
- }
12476
+ accumulateUsage(roundUsage, result.usage);
12220
12477
  const hasToolCalls = !!(result.toolCalls && result.toolCalls.length > 0);
12221
12478
  const contentBlank = !result.content || result.content.trim() === "";
12222
12479
  if (!hasToolCalls && contentBlank) {
12223
- if (emptyResponseRetries === 0 && round < maxToolRounds - 1) {
12224
- emptyResponseRetries++;
12225
- this.send({
12226
- type: "info",
12227
- message: "\u26A0 AI returned an empty response. Nudging to continue..."
12228
- });
12229
- extraMessages.push({
12230
- role: "user",
12231
- content: "Your previous response was empty \u2014 no text and no tool calls. This usually means the context window is nearly full. Please either: (1) continue the task by calling the next tool you need, or (2) give a concise final text summary of what has been accomplished so far and what remains. Do NOT repeat earlier long outputs."
12232
- });
12480
+ const decision = emptyGuard.onEmpty(round < maxToolRounds - 1, result.finishReason);
12481
+ if (decision.action === "nudge") {
12482
+ this.send({ type: "info", message: decision.displayMessage });
12483
+ extraMessages.push({ role: "user", content: decision.injectMessage });
12233
12484
  continue;
12234
12485
  }
12235
12486
  this.send({
12236
12487
  type: "response_done",
12237
- content: "\u26A0 AI returned empty responses twice in a row. Stopping agentic loop.\n\nLikely causes: context window exhausted, max_tokens too low, or content filter.\nTry: /compact to reduce context, /clear to reset, or switch to a larger-context model.",
12488
+ content: `${decision.displayMessage}
12489
+
12490
+ ${decision.hint}
12491
+ Try: /compact to reduce context, /clear to reset, or switch to a larger-context model.`,
12238
12492
  usage: roundUsage
12239
12493
  });
12240
12494
  this.addWebSessionUsage(roundUsage);
12241
12495
  session.addTokenUsage(roundUsage);
12242
12496
  return;
12243
12497
  }
12244
- emptyResponseRetries = 0;
12498
+ emptyGuard.onNonEmpty();
12245
12499
  if (result.content && !result.toolCalls) {
12246
12500
  const hasWriteTools = toolDefs.some((t) => t.name === "write_file" || t.name === "edit_file");
12247
12501
  const alreadyWrote = hadPreviousWriteToolCalls(extraMessages);
@@ -12321,7 +12575,7 @@ ${systemPromptVolatile}` : systemPrompt;
12321
12575
  assistantContent: teeResult.content,
12322
12576
  reasoningContent: reasoningContent2
12323
12577
  });
12324
- consecutiveFreeRounds = 0;
12578
+ freeRounds.apply(result.toolCalls.map((tc) => tc.name));
12325
12579
  continue;
12326
12580
  }
12327
12581
  const toolResults = await this.toolExecutor.executeAll(result.toolCalls);
@@ -12332,14 +12586,8 @@ ${systemPromptVolatile}` : systemPrompt;
12332
12586
  assistantContent: result.content,
12333
12587
  reasoningContent
12334
12588
  });
12335
- const allFree = result.toolCalls.every((tc) => FREE_ROUND_TOOLS.has(tc.name));
12336
- if (allFree) {
12337
- consecutiveFreeRounds++;
12338
- if (consecutiveFreeRounds <= MAX_CONSECUTIVE_FREE_ROUNDS) {
12339
- round--;
12340
- }
12341
- } else {
12342
- consecutiveFreeRounds = 0;
12589
+ if (freeRounds.apply(result.toolCalls.map((tc) => tc.name))) {
12590
+ round--;
12343
12591
  }
12344
12592
  if (this.userInterjection) {
12345
12593
  const msg = this.userInterjection;
@@ -12351,12 +12599,7 @@ ${systemPromptVolatile}` : systemPrompt;
12351
12599
  const effectiveRound = round + 1;
12352
12600
  const remaining = maxToolRounds - effectiveRound;
12353
12601
  if (autoPauseInterval > 0 && effectiveRound > 0 && effectiveRound % autoPauseInterval === 0 && remaining > 0 && !ac.signal.aborted) {
12354
- const recentHistory = roundToolHistory.slice(-autoPauseInterval);
12355
- const toolCounts = /* @__PURE__ */ new Map();
12356
- for (const rh of recentHistory) {
12357
- for (const t of rh.tools) toolCounts.set(t, (toolCounts.get(t) || 0) + 1);
12358
- }
12359
- const toolSummary = [...toolCounts.entries()].sort((a, b) => b[1] - a[1]).map(([name, count]) => count > 1 ? `${name}\xD7${count}` : name).join(", ");
12602
+ const toolSummary = summarizeRecentTools(roundToolHistory, autoPauseInterval);
12360
12603
  const requestId = `pause_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
12361
12604
  const pauseResp = await new Promise((resolve7) => {
12362
12605
  this.pendingAutoPause.set(requestId, resolve7);
@@ -12371,10 +12614,7 @@ ${systemPromptVolatile}` : systemPrompt;
12371
12614
  if (ac.signal.aborted) break;
12372
12615
  if (pauseResp.action === "stop") {
12373
12616
  this.send({ type: "info", message: `\u23F8 Stopped by user at ${effectiveRound}/${maxToolRounds}` });
12374
- extraMessages.push({
12375
- role: "user",
12376
- content: `The user has stopped the task at round ${effectiveRound}/${maxToolRounds}. Do not call any more tools. Summarize what has been completed and what remains.`
12377
- });
12617
+ extraMessages.push({ role: "user", content: buildUserStopMessage(effectiveRound, maxToolRounds) });
12378
12618
  break;
12379
12619
  } else if (pauseResp.action === "redirect" && pauseResp.message) {
12380
12620
  this.send({ type: "info", message: `\u26A1 Redirect: "${pauseResp.message}"` });
@@ -12385,13 +12625,7 @@ ${systemPromptVolatile}` : systemPrompt;
12385
12625
  try {
12386
12626
  const summaryExtra = [
12387
12627
  ...extraMessages,
12388
- {
12389
- role: "user",
12390
- content: `You have used all ${maxToolRounds} tool call rounds. Do not call any more tools. Summarize in text:
12391
- 1. What work has been completed so far
12392
- 2. What tasks remain unfinished
12393
- 3. What the user can do next`
12394
- }
12628
+ { role: "user", content: buildRoundsExhaustedPrompt(maxToolRounds) }
12395
12629
  ];
12396
12630
  const summaryResult = await provider.chatWithTools(
12397
12631
  {
@@ -12578,69 +12812,33 @@ This fresh stream has NO tools. Produce ONLY the document body: start with a mar
12578
12812
  });
12579
12813
  return { content: fullContent, summary, isError };
12580
12814
  }
12581
- /** Consume streaming tool call events and forward to client */
12815
+ /**
12816
+ * Consume streaming tool call events and forward to client.
12817
+ *
12818
+ * v0.4.181: 委托给 core/agent-loop 的统一消费器(与 REPL 同一实现)。
12819
+ * Web 端由此获得三个此前只在 REPL 修过的行为:内联 <think> 折叠
12820
+ * (MiniMax 推理泄漏不再渲染进浏览器)、截断 JSON 自动修复、
12821
+ * 工具调用按 event.index 键累积(稀疏 index 不错位)。
12822
+ */
12582
12823
  async consumeToolStream(streamGen, ac) {
12583
- let textContent = "";
12584
- const toolCalls = [];
12585
- const toolArgBuffers = /* @__PURE__ */ new Map();
12586
- let usage;
12587
- let rawContent;
12588
- let reasoningContent;
12589
- for await (const event of streamGen) {
12590
- if (ac.signal.aborted) break;
12591
- switch (event.type) {
12592
- case "text_delta":
12593
- textContent += event.delta;
12594
- this.send({ type: "text_delta", delta: event.delta });
12595
- break;
12596
- case "thinking_start":
12597
- this.send({ type: "thinking_start" });
12598
- break;
12599
- case "thinking_delta":
12600
- this.send({ type: "thinking_delta", delta: event.delta });
12601
- break;
12602
- case "thinking_end":
12603
- this.send({ type: "thinking_end" });
12604
- break;
12605
- case "tool_call_start":
12606
- toolArgBuffers.set(event.index, "");
12607
- this.send({ type: "info", message: `\u2699 Streaming: ${event.name}...` });
12608
- toolCalls.push({ id: event.id, name: event.name, arguments: {} });
12609
- break;
12610
- case "tool_call_delta": {
12611
- const buf = (toolArgBuffers.get(event.index) ?? "") + event.argumentsDelta;
12612
- toolArgBuffers.set(event.index, buf);
12613
- break;
12614
- }
12615
- case "tool_call_end": {
12616
- const tc = toolCalls.find((_, i) => i === event.index);
12617
- const argStr = toolArgBuffers.get(event.index) ?? "{}";
12618
- if (tc) {
12619
- try {
12620
- tc.arguments = JSON.parse(argStr);
12621
- } catch {
12622
- tc.arguments = { _raw: argStr };
12623
- }
12624
- }
12625
- break;
12626
- }
12627
- case "done":
12628
- if (event.usage) usage = event.usage;
12629
- if (event.rawContent) rawContent = event.rawContent;
12630
- if (event.reasoningContent) reasoningContent = event.reasoningContent;
12631
- break;
12632
- }
12633
- }
12634
- if (toolCalls.length > 0) {
12635
- if (rawContent) {
12636
- toolCalls._rawContent = rawContent;
12637
- }
12638
- if (textContent) {
12639
- toolCalls._streamedText = textContent;
12640
- }
12641
- return { toolCalls, usage, reasoningContent };
12824
+ const result = await consumeToolCallStream(streamGen, {
12825
+ signal: ac.signal,
12826
+ onText: (visible) => this.send({ type: "text_delta", delta: visible }),
12827
+ onThinkingStart: () => this.send({ type: "thinking_start" }),
12828
+ onThinkingDelta: (delta) => this.send({ type: "thinking_delta", delta }),
12829
+ onThinkingEnd: () => this.send({ type: "thinking_end" }),
12830
+ onToolCallStart: (_index, _id, name) => this.send({ type: "info", message: `\u2699 Streaming: ${name}...` }),
12831
+ onWarn: (message) => this.send({ type: "info", message: `\u26A0 ${message}` })
12832
+ });
12833
+ if (result.toolCalls.length > 0) {
12834
+ return { toolCalls: result.toolCalls, usage: result.usage, reasoningContent: result.reasoningContent };
12642
12835
  }
12643
- return { content: textContent, usage, reasoningContent };
12836
+ return {
12837
+ content: result.textContent,
12838
+ usage: result.usage,
12839
+ reasoningContent: result.reasoningContent,
12840
+ finishReason: result.finishReason
12841
+ };
12644
12842
  }
12645
12843
  // ── Commands ─────────────────────────────────────────────────────
12646
12844
  async handleCommand(name, args) {
@@ -13646,7 +13844,7 @@ ${undoResults.map((r) => ` \u2022 ${r}`).join("\n")}` });
13646
13844
  case "test": {
13647
13845
  this.send({ type: "info", message: "\u{1F9EA} Running tests..." });
13648
13846
  try {
13649
- const { executeTests } = await import("./run-tests-BUII3HBU.js");
13847
+ const { executeTests } = await import("./run-tests-HZVKHQ33.js");
13650
13848
  const argStr = args.join(" ").trim();
13651
13849
  let testArgs = {};
13652
13850
  if (argStr) {