jinzd-ai-cli 0.4.186 → 0.4.187

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -903,1393 +903,1529 @@ Node.js does not automatically use system proxies. Try one of the following:
903
903
  // src/providers/openai-compatible.ts
904
904
  import OpenAI from "openai";
905
905
 
906
- // src/core/agent-loop.ts
907
- function partialTagTail(s, tag) {
908
- const max = Math.min(s.length, tag.length - 1);
909
- for (let len = max; len > 0; len--) {
910
- if (s.endsWith(tag.slice(0, len))) return len;
911
- }
912
- return 0;
906
+ // src/tools/hallucination.ts
907
+ var HALLUCINATION_PATTERNS = [
908
+ /文件路径[::]\s*`?[^\s`]+\.\w{1,5}/,
909
+ // 文件路径: `path/to/file.ext`(要求文件扩展名)
910
+ /已生成[::!!]/,
911
+ // 已生成完成!
912
+ /已保存到?\s*[`'"]/,
913
+ // 已保存到 `path`(要求后跟路径引号)
914
+ /已写入[::!!]/,
915
+ // 已写入!
916
+ /已创建[::!!]/,
917
+ // 已创建!
918
+ /File\s+(?:written|saved|created)\s+(?:to|as|at)/i,
919
+ // File written to / saved as(要求介词)
920
+ /生成完成[!!]/,
921
+ // 生成完成!
922
+ /✅\s*(?:文件|已[生保写创]|第)\S*\.\w{1,5}/,
923
+ // ✅ 文件已保存 path.ext(要求文件扩展名)
924
+ /文件已[成功]?创建/,
925
+ // 文件已成功创建 / 文件已创建
926
+ /教案已[成功]?[生创保写]/,
927
+ // 教案已成功生成 / 教案已保存
928
+ /已成功[保写创生]入?[::!!\s`'"]/,
929
+ // 已成功保存 / 已成功写入 / 已成功创建
930
+ /保存[到至]了?\s*[`'"]/,
931
+ // 保存到了 `path` / 保存至 'path'
932
+ /内容如下[::]/,
933
+ // 内容如下:(后跟大段文件内容)
934
+ /以下是.*(?:教案|文件|内容)[::]/
935
+ // 以下是xx教案内容:(Kimi 常见模式)
936
+ ];
937
+ function detectsHallucinatedFileOp(content) {
938
+ return HALLUCINATION_PATTERNS.some((pattern) => pattern.test(content));
913
939
  }
914
- var ThinkTagFilter = class {
915
- inThink = false;
916
- buf = "";
917
- push(raw) {
918
- this.buf += raw;
919
- let out = "";
920
- while (this.buf.length > 0) {
921
- if (!this.inThink) {
922
- const open = this.buf.indexOf("<think>");
923
- if (open === -1) {
924
- const keep = partialTagTail(this.buf, "<think>");
925
- out += this.buf.slice(0, this.buf.length - keep);
926
- this.buf = this.buf.slice(this.buf.length - keep);
927
- break;
928
- }
929
- out += this.buf.slice(0, open);
930
- this.buf = this.buf.slice(open + "<think>".length);
931
- this.inThink = true;
932
- } else {
933
- const close = this.buf.indexOf("</think>");
934
- if (close === -1) {
935
- const keep = partialTagTail(this.buf, "</think>");
936
- this.buf = this.buf.slice(this.buf.length - keep);
937
- break;
938
- }
939
- this.buf = this.buf.slice(close + "</think>".length);
940
- this.inThink = false;
941
- }
940
+ function hadPreviousWriteToolCalls(extraMessages) {
941
+ const msgs = extraMessages;
942
+ return msgs.some((msg) => {
943
+ if (msg.role === "assistant" && Array.isArray(msg.tool_calls)) {
944
+ return msg.tool_calls.some((tc) => {
945
+ const fn = tc.function;
946
+ const name = fn?.name ?? "";
947
+ return name === "write_file" || name === "edit_file";
948
+ });
942
949
  }
943
- return out;
944
- }
945
- /** 流结束:若仍持留可能的半截 '<think>' 前缀且并未进入 think 块,它是真实文本。 */
946
- flush() {
947
- if (!this.inThink && this.buf) {
948
- const tail = this.buf;
949
- this.buf = "";
950
- return tail;
950
+ if (msg.role === "assistant" && Array.isArray(msg.content)) {
951
+ return msg.content.some((block) => {
952
+ if (block.type !== "tool_use") return false;
953
+ const name = block.name ?? "";
954
+ return name === "write_file" || name === "edit_file";
955
+ });
951
956
  }
952
- this.buf = "";
953
- return "";
957
+ if (msg.role === "model" && Array.isArray(msg.parts)) {
958
+ return msg.parts.some((part) => {
959
+ const fc = part.functionCall;
960
+ const name = fc?.name ?? "";
961
+ return name === "write_file" || name === "edit_file";
962
+ });
963
+ }
964
+ return false;
965
+ });
966
+ }
967
+ var TOOL_CALL_REMINDER = `
968
+
969
+ [\u26A0\uFE0F Mandatory Tool Call Policy]
970
+ When you need to create, write, or modify files, you MUST use the function calling API to invoke write_file or edit_file.
971
+ NEVER claim "file saved", "file created", "written to", etc. in your response text without actually calling the tool.
972
+ Describing file content in text without calling the tool = the file does not exist = task failure.
973
+ If multiple files need to be generated, you MUST call write_file separately for each file \u2014 do not skip any.
974
+ Do NOT output fake "completion summaries" unless you have actually completed all file writes via tool_calls.
975
+
976
+ CRITICAL \u2014 Batch file generation rules:
977
+ 1. You MUST call write_file once per file. There are NO shortcuts.
978
+ 2. After writing file N, immediately proceed to call write_file for file N+1. Do NOT stop to summarize.
979
+ 3. If you find yourself typing file content into your response text instead of into a write_file call, STOP and use the tool.
980
+ 4. Only produce a text summary AFTER all write_file calls have been made and returned success.
981
+ 5. The system compares every "file saved" claim against actual tool calls. Phantom claims trigger an automatic retry \u2014 do not waste rounds.`;
982
+ function buildWriteRoundReminder(writtenCount) {
983
+ return `
984
+
985
+ [Write Progress Reminder]
986
+ You have successfully called write_file ${writtenCount} time(s) so far in this turn. If there are more files to write, call write_file NOW for the next file. Do NOT produce a text summary until ALL files have been written via tool calls.`;
987
+ }
988
+ var HALLUCINATION_CORRECTION_MESSAGE = "You did NOT actually call the write_file tool \u2014 the file was NOT created! Please immediately use the write_file tool via the function calling API to perform the actual file write. Do NOT describe file content in text \u2014 you MUST invoke write_file through the tool_calls mechanism.";
989
+ function extractClaimedFilePaths(content) {
990
+ const paths = /* @__PURE__ */ new Set();
991
+ const add = (p) => {
992
+ const trimmed = p.trim().replace(/[,,。、;;::]+$/, "");
993
+ if (trimmed && /\.\w{1,6}$/.test(trimmed)) paths.add(trimmed);
994
+ };
995
+ let m;
996
+ const actionLineRe = /(?:已[生保写创]|saved|written|created|完成.*(?:写入|保存|创建|生成)|输出|file\s+(?:saved|written|created))/i;
997
+ const backtickRe = /`([^`\n]+?\.\w{1,6})`/g;
998
+ while ((m = backtickRe.exec(content)) !== null) {
999
+ let pos = m.index;
1000
+ let linesBack = 0;
1001
+ while (linesBack < 9 && pos > 0) {
1002
+ pos--;
1003
+ if (content[pos] === "\n") linesBack++;
1004
+ }
1005
+ const windowStart = pos === 0 ? 0 : pos + 1;
1006
+ const lineEndIdx = content.indexOf("\n", m.index + m[0].length);
1007
+ const window = content.slice(windowStart, lineEndIdx === -1 ? void 0 : lineEndIdx);
1008
+ if (actionLineRe.test(window)) add(m[1]);
954
1009
  }
955
- };
956
- function repairToolCallArguments(raw, onWarn) {
957
- const argStr = raw || "{}";
958
- try {
959
- return JSON.parse(argStr);
960
- } catch {
961
- const truncated = argStr.trimEnd();
962
- const lastComma = truncated.lastIndexOf(",");
963
- const fixed = lastComma > 0 ? truncated.slice(0, lastComma) + "}" : truncated.slice(0, truncated.indexOf("{") + 1) + "}";
964
- try {
965
- const repaired = JSON.parse(fixed);
966
- onWarn?.("Tool call JSON was truncated and auto-repaired. Some parameters may be missing.");
967
- return repaired;
968
- } catch {
969
- onWarn?.("Tool call JSON could not be parsed, using empty arguments.");
970
- return {};
1010
+ const zhRe = /(?:已保存(?:到)?|已写入(?:到)?|已创建|已生成|文件路径[::]|保存为|写入到)\s*[`'”””]?([^\s`'”””,,。\n]+?\.\w{1,6})/g;
1011
+ while ((m = zhRe.exec(content)) !== null) add(m[1]);
1012
+ const enRe = /(?:saved|written|created)\s+(?:to|as|at)\s+[`'”]?([^\s`'”\n,]+?\.\w{1,6})/gi;
1013
+ while ((m = enRe.exec(content)) !== null) add(m[1]);
1014
+ const checkRe = /✅[^\n`]*?[`'”]?([^\s`'”\n,,。]+?\.\w{1,6})/g;
1015
+ while ((m = checkRe.exec(content)) !== null) {
1016
+ let pos = m.index;
1017
+ let linesBack = 0;
1018
+ while (linesBack < 9 && pos > 0) {
1019
+ pos--;
1020
+ if (content[pos] === "\n") linesBack++;
971
1021
  }
1022
+ const windowStart = pos === 0 ? 0 : pos + 1;
1023
+ const lineEndIdx = content.indexOf("\n", m.index + m[0].length);
1024
+ const window = content.slice(windowStart, lineEndIdx === -1 ? void 0 : lineEndIdx);
1025
+ if (actionLineRe.test(window)) add(m[1]);
972
1026
  }
1027
+ return Array.from(paths);
973
1028
  }
974
- async function consumeToolCallStream(stream, hooks = {}) {
975
- const textParts = [];
976
- const accumulators = /* @__PURE__ */ new Map();
977
- let usage;
978
- let rawContent;
979
- let reasoningContent;
980
- let finishReason;
981
- let aborted = false;
982
- const thinkFilter = new ThinkTagFilter();
983
- const emitText = (raw) => {
984
- const visible = thinkFilter.push(raw);
985
- if (visible) {
986
- textParts.push(visible);
987
- hooks.onText?.(visible);
1029
+ function extractWrittenFilePaths(extraMessages) {
1030
+ const paths = /* @__PURE__ */ new Set();
1031
+ const msgs = extraMessages;
1032
+ const addFromArgs = (raw) => {
1033
+ if (typeof raw === "string") {
1034
+ try {
1035
+ const parsed = JSON.parse(raw);
1036
+ if (typeof parsed.path === "string") paths.add(parsed.path);
1037
+ } catch {
1038
+ }
1039
+ } else if (raw && typeof raw === "object") {
1040
+ const p = raw.path;
1041
+ if (typeof p === "string") paths.add(p);
988
1042
  }
989
1043
  };
990
- try {
991
- for await (const event of stream) {
992
- if (hooks.signal?.aborted) {
993
- aborted = true;
994
- break;
1044
+ for (const msg of msgs) {
1045
+ if (msg.role === "assistant" && Array.isArray(msg.tool_calls)) {
1046
+ for (const tc of msg.tool_calls) {
1047
+ const fn = tc.function;
1048
+ const name = fn?.name ?? "";
1049
+ if (name === "write_file" || name === "edit_file") {
1050
+ addFromArgs(fn?.arguments);
1051
+ }
995
1052
  }
996
- switch (event.type) {
997
- case "text_delta":
998
- emitText(event.delta);
999
- break;
1000
- case "thinking_start":
1001
- hooks.onThinkingStart?.();
1002
- break;
1003
- case "thinking_delta":
1004
- hooks.onThinkingDelta?.(event.delta);
1005
- break;
1006
- case "thinking_end":
1007
- hooks.onThinkingEnd?.();
1008
- break;
1009
- case "tool_call_start":
1010
- accumulators.set(event.index, { id: event.id, name: event.name, arguments: "" });
1011
- hooks.onToolCallStart?.(event.index, event.id, event.name);
1012
- break;
1013
- case "tool_call_delta": {
1014
- const acc = accumulators.get(event.index);
1015
- if (acc) acc.arguments += event.argumentsDelta;
1016
- break;
1053
+ }
1054
+ if (msg.role === "assistant" && Array.isArray(msg.content)) {
1055
+ for (const block of msg.content) {
1056
+ if (block.type !== "tool_use") continue;
1057
+ const name = block.name ?? "";
1058
+ if (name === "write_file" || name === "edit_file") {
1059
+ addFromArgs(block.input);
1017
1060
  }
1018
- case "tool_call_end":
1019
- break;
1020
- case "done":
1021
- if (event.usage) usage = event.usage;
1022
- if (event.rawContent) rawContent = event.rawContent;
1023
- if (event.reasoningContent) reasoningContent = event.reasoningContent;
1024
- if (event.finishReason) finishReason = event.finishReason;
1025
- break;
1026
1061
  }
1027
1062
  }
1028
- } catch (err) {
1029
- if (err instanceof Error && (err.name === "AbortError" || err.message.includes("aborted"))) {
1030
- aborted = true;
1031
- } else {
1032
- throw err;
1033
- }
1034
- }
1035
- const tail = thinkFilter.flush();
1036
- if (tail && !aborted) {
1037
- textParts.push(tail);
1038
- hooks.onText?.(tail);
1039
- }
1040
- const textContent = textParts.join("");
1041
- if (aborted) {
1042
- return { textContent, toolCalls: [], usage, rawContent, reasoningContent, finishReason, aborted };
1043
- }
1044
- const toolCalls = [];
1045
- for (const [, acc] of accumulators) {
1046
- toolCalls.push({
1047
- id: acc.id,
1048
- name: acc.name,
1049
- arguments: repairToolCallArguments(acc.arguments, hooks.onWarn)
1050
- });
1051
- }
1052
- if (toolCalls.length > 0) {
1053
- if (rawContent) {
1054
- toolCalls._rawContent = rawContent;
1055
- }
1056
- if (textContent) {
1057
- toolCalls._streamedText = textContent;
1063
+ if (msg.role === "model" && Array.isArray(msg.parts)) {
1064
+ for (const part of msg.parts) {
1065
+ const fc = part.functionCall;
1066
+ if (!fc) continue;
1067
+ const name = fc.name ?? "";
1068
+ if (name === "write_file" || name === "edit_file") {
1069
+ addFromArgs(fc.args);
1070
+ }
1071
+ }
1058
1072
  }
1059
1073
  }
1060
- return { textContent, toolCalls, usage, rawContent, reasoningContent, finishReason, aborted };
1074
+ return Array.from(paths);
1061
1075
  }
1062
- var FREE_ROUND_TOOLS = /* @__PURE__ */ new Set(["write_todos"]);
1063
- var MAX_CONSECUTIVE_FREE_ROUNDS = 3;
1064
- var FreeRoundTracker = class {
1065
- consecutive = 0;
1066
- /** 返回 true 表示本轮不消耗有效轮次(调用方执行 round--)。 */
1067
- apply(toolNames) {
1068
- const allFree = toolNames.length > 0 && toolNames.every((n) => FREE_ROUND_TOOLS.has(n));
1069
- if (!allFree) {
1070
- this.consecutive = 0;
1071
- return false;
1076
+ function extractBashCommands(extraMessages) {
1077
+ const cmds = [];
1078
+ const msgs = extraMessages;
1079
+ const addCmd = (raw) => {
1080
+ if (typeof raw === "string") {
1081
+ try {
1082
+ const parsed = JSON.parse(raw);
1083
+ if (typeof parsed.command === "string") cmds.push(parsed.command);
1084
+ } catch {
1085
+ }
1086
+ } else if (raw && typeof raw === "object") {
1087
+ const c = raw.command;
1088
+ if (typeof c === "string") cmds.push(c);
1072
1089
  }
1073
- this.consecutive++;
1074
- return this.consecutive <= MAX_CONSECUTIVE_FREE_ROUNDS;
1075
- }
1076
- };
1077
- var BudgetWarner = class {
1078
- constructor(maxToolRounds) {
1079
- this.maxToolRounds = maxToolRounds;
1080
- this.noteAt = Math.max(10, Math.floor(maxToolRounds * 0.2));
1081
- const lowRaw = Math.max(5, Math.floor(maxToolRounds * 0.1));
1082
- const criticalRaw = Math.max(3, Math.floor(maxToolRounds * 0.05));
1083
- this.lowAt = Math.min(lowRaw, this.noteAt - 1);
1084
- this.criticalAt = Math.min(criticalRaw, this.lowAt - 1);
1085
- }
1086
- noteAt;
1087
- lowAt;
1088
- criticalAt;
1089
- warnedNote = false;
1090
- warnedLow = false;
1091
- warnedCritical = false;
1092
- check(roundsLeft) {
1093
- if (!this.warnedCritical && roundsLeft <= this.criticalAt) {
1094
- this.warnedCritical = true;
1095
- return {
1096
- level: "critical",
1097
- injectMessage: `\u{1F6A8} Critical budget: Only ${roundsLeft} rounds left! Wrap up NOW \u2014 complete the current operation and give a final summary. Do NOT start new tasks.`,
1098
- displayMessage: `\u{1F6A8} Critical: ${roundsLeft} rounds remaining`
1099
- };
1090
+ };
1091
+ for (const msg of msgs) {
1092
+ if (msg.role === "assistant" && Array.isArray(msg.tool_calls)) {
1093
+ for (const tc of msg.tool_calls) {
1094
+ const fn = tc.function;
1095
+ if (fn?.name === "bash") addCmd(fn?.arguments);
1096
+ }
1100
1097
  }
1101
- if (!this.warnedLow && roundsLeft <= this.lowAt) {
1102
- this.warnedLow = true;
1103
- return {
1104
- level: "low",
1105
- injectMessage: `\u26A0\uFE0F Budget warning: Only ${roundsLeft} tool rounds remaining. Prioritize completing the most critical task. Use efficient approaches (batch edits, fewer reads). If you cannot finish everything, summarize what's done and what remains.`,
1106
- displayMessage: `\u26A0\uFE0F Low budget: ${roundsLeft} rounds remaining`
1107
- };
1098
+ if (msg.role === "assistant" && Array.isArray(msg.content)) {
1099
+ for (const block of msg.content) {
1100
+ if (block.type === "tool_use" && block.name === "bash") addCmd(block.input);
1101
+ }
1108
1102
  }
1109
- if (!this.warnedNote && roundsLeft <= this.noteAt) {
1110
- this.warnedNote = true;
1111
- return {
1112
- level: "note",
1113
- injectMessage: `\u{1F4CA} Budget note: ${roundsLeft} tool rounds remaining out of ${this.maxToolRounds}. Plan your remaining work efficiently \u2014 use batch operations (e.g., replaceAll) when possible.`
1114
- };
1103
+ if (msg.role === "model" && Array.isArray(msg.parts)) {
1104
+ for (const part of msg.parts) {
1105
+ const fc = part.functionCall;
1106
+ if (fc && fc.name === "bash") addCmd(fc.args);
1107
+ }
1115
1108
  }
1116
- return null;
1117
1109
  }
1118
- };
1119
- var EMPTY_RESPONSE_NUDGE = "Your previous response was empty \u2014 no text and no tool calls. This usually means the context window is nearly full. Please either: (1) continue the task by calling the next tool you need, or (2) give a concise final text summary of what has been accomplished so far and what remains. Do NOT repeat earlier long outputs.";
1120
- function describeFinishReason(fr) {
1121
- if (fr === "length") return "output limit reached (finish_reason=length)";
1122
- if (fr === "content_filter") return "content blocked (finish_reason=content_filter)";
1123
- if (fr) return `empty response (finish_reason=${fr})`;
1124
- return "empty response";
1125
- }
1126
- function emptyResponseHint(fr) {
1127
- if (fr === "length") return "Output token limit hit \u2014 try /compact to reduce context, raise maxTokens, or /model to switch.";
1128
- if (fr === "content_filter") return "Content was blocked by the provider filter.";
1129
- return "Context window may be exhausted or max_tokens too low.";
1110
+ return cmds;
1130
1111
  }
1131
- var EmptyResponseGuard = class {
1132
- retries = 0;
1133
- onEmpty(canRetry, finishReason) {
1134
- if (this.retries === 0 && canRetry) {
1135
- this.retries++;
1136
- return {
1137
- action: "nudge",
1138
- injectMessage: EMPTY_RESPONSE_NUDGE,
1139
- displayMessage: `\u26A0 ${describeFinishReason(finishReason)} \u2014 nudging AI to continue...`
1140
- };
1141
- }
1142
- return {
1143
- action: "stop",
1144
- displayMessage: "\u26A0 AI returned empty responses twice in a row. Stopping agentic loop.",
1145
- hint: emptyResponseHint(finishReason)
1146
- };
1147
- }
1148
- /** 非空响应到达 → 重置计数(下次空响应仍可 nudge 一次)。 */
1149
- onNonEmpty() {
1150
- this.retries = 0;
1151
- }
1152
- };
1153
- var ContextPressureMonitor = class {
1154
- warned80 = false;
1155
- check(requestTokens, contextWindow) {
1156
- if (contextWindow <= 0) return { action: "ok", ratio: 0 };
1157
- const ratio = requestTokens / contextWindow;
1158
- if (ratio >= 0.95) return { action: "abort", ratio };
1159
- if (ratio >= 0.8 && !this.warned80) {
1160
- this.warned80 = true;
1161
- return {
1162
- action: "warn",
1163
- ratio,
1164
- injectMessage: `\u26A0\uFE0F Context pressure: ~${Math.round(ratio * 100)}% of the ${contextWindow.toLocaleString()}-token context window is used. Avoid reading more files or running broad scans. Finish the current critical step, then produce a final summary. Every unnecessary tool call now risks breaking the conversation.`
1165
- };
1112
+ function findPhantomClaims(content, extraMessages) {
1113
+ const claimed = extractClaimedFilePaths(content);
1114
+ if (claimed.length === 0) return [];
1115
+ const normalize = (p) => p.replace(/\\/g, "/").toLowerCase().replace(/^\.\//, "");
1116
+ const basename = (p) => {
1117
+ const parts = normalize(p).split("/");
1118
+ return parts[parts.length - 1] ?? "";
1119
+ };
1120
+ const written = extractWrittenFilePaths(extraMessages).map(normalize);
1121
+ const writtenBases = new Set(written.map(basename));
1122
+ const writtenFull = new Set(written);
1123
+ const bashText = extractBashCommands(extraMessages).map((c) => c.replace(/\\/g, "/").toLowerCase()).join("\n");
1124
+ return claimed.filter((raw) => {
1125
+ const norm = normalize(raw);
1126
+ if (writtenFull.has(norm)) return false;
1127
+ for (const w of writtenFull) {
1128
+ if (w.endsWith("/" + norm) || norm.endsWith("/" + w)) return false;
1166
1129
  }
1167
- return { action: "ok", ratio };
1168
- }
1169
- };
1170
- function accumulateUsage(total, delta) {
1171
- if (!delta) return;
1172
- total.inputTokens += delta.inputTokens;
1173
- total.outputTokens += delta.outputTokens;
1174
- total.cacheCreationTokens += delta.cacheCreationTokens ?? 0;
1175
- total.cacheReadTokens += delta.cacheReadTokens ?? 0;
1176
- }
1177
- function buildRoundBudgetHint(opts) {
1178
- const pauseHint = opts.autoPauseInterval > 0 ? `
1179
- - Every ${opts.autoPauseInterval} rounds the user will be asked whether to continue \u2014 use this as a natural checkpoint to report progress.` : "";
1180
- if (opts.planMode) {
1181
- return `
1182
-
1183
- [Tool Round Budget \u2014 Plan Mode]
1184
- You have a maximum of ${opts.maxToolRounds} tool call rounds. You are in READ-ONLY Plan Mode:
1185
- - Only use: read_file, list_dir, grep_files, glob_files, ask_user, write_todos
1186
- - Do NOT attempt to call bash, write_file, edit_file \u2014 they are disabled
1187
- - Do NOT write shell commands or code blocks as a substitute for tool calls
1188
- - Do NOT read the same file more than once
1189
- - Call write_todos ONCE to present your plan, then give a text summary
1190
- - If the user asks you to execute anything, respond: "Please type /plan execute to switch to execute mode."${pauseHint}`;
1191
- }
1192
- return `
1193
-
1194
- [Tool Round Budget]
1195
- You have a maximum of ${opts.maxToolRounds} tool call rounds for this task. Plan efficiently:
1196
- - Prefer batch operations (e.g. global find-and-replace) over repetitive single edits.
1197
- - Do NOT read the same file more than once \u2014 use the content from previous reads.
1198
- - Prioritize the most critical tasks first in case rounds run out.
1199
- - When remaining rounds are low, focus on completing the current task and summarizing.${pauseHint}`;
1200
- }
1201
- function buildRoundsExhaustedPrompt(maxToolRounds) {
1202
- return `You have used all ${maxToolRounds} tool call rounds. Do not call any more tools. Summarize in text:
1203
- 1. What work has been completed so far
1204
- 2. What tasks remain unfinished
1205
- 3. What the user can do next (e.g. send another request to continue)`;
1206
- }
1207
- function buildUserStopMessage(effectiveRound, maxToolRounds) {
1208
- return `The user has stopped the task at round ${effectiveRound}/${maxToolRounds}. Do not call any more tools. Summarize what has been completed and what remains.`;
1209
- }
1210
- function summarizeRecentTools(history, interval) {
1211
- const recent = history.slice(-interval);
1212
- const counts = /* @__PURE__ */ new Map();
1213
- for (const rh of recent) {
1214
- for (const t of rh.tools) counts.set(t, (counts.get(t) || 0) + 1);
1215
- }
1216
- return [...counts.entries()].sort((a, b) => b[1] - a[1]).map(([name, count]) => count > 1 ? `${name}\xD7${count}` : name).join(", ");
1130
+ if (writtenBases.has(basename(norm))) return false;
1131
+ const base = basename(norm);
1132
+ if (base && bashText.includes(base)) return false;
1133
+ return true;
1134
+ });
1217
1135
  }
1218
-
1219
- // src/providers/openai-compatible.ts
1220
- function toUsage(u) {
1221
- if (!u) return void 0;
1222
- const cached = u.prompt_tokens_details?.cached_tokens ?? 0;
1223
- const usage = {
1224
- inputTokens: Math.max(0, u.prompt_tokens - cached),
1225
- outputTokens: u.completion_tokens
1226
- };
1227
- if (cached > 0) usage.cacheReadTokens = cached;
1228
- return usage;
1136
+ function buildPhantomCorrectionMessage(phantoms) {
1137
+ const list = phantoms.map((p) => ` - ${p}`).join("\n");
1138
+ return "You claimed to have written the following file(s), but no matching write_file tool call was actually made in this turn:\n" + list + '\n\nEach of these files does NOT exist on disk. You MUST now invoke write_file (via the function calling API) for every missing file listed above. Do NOT output another "completion summary" until the tool calls have actually been made.';
1229
1139
  }
1230
- var OpenAICompatibleProvider = class extends BaseProvider {
1231
- client;
1232
- defaultTimeout = 6e4;
1233
- // ms
1234
- /** 子类设为 false 可禁用流式工具调用(虚假声明检测需要完整响应) */
1235
- enableStreamingToolCalls = true;
1236
- async initialize(apiKey, options) {
1237
- if (options?.timeout !== void 0) {
1238
- this.defaultTimeout = options.timeout;
1239
- }
1240
- const clientOptions = {
1241
- apiKey,
1242
- baseURL: options?.baseUrl ?? this.defaultBaseUrl,
1243
- timeout: this.defaultTimeout
1244
- };
1245
- const proxyUrl = options?.proxy;
1246
- try {
1247
- const { Agent, ProxyAgent, fetch: undiciFetch } = await import("undici");
1248
- const STREAM_BODY_TIMEOUT = 30 * 60 * 1e3;
1249
- const STREAM_HEADERS_TIMEOUT = 5 * 60 * 1e3;
1250
- const dispatcher = proxyUrl ? new ProxyAgent({
1251
- uri: proxyUrl,
1252
- bodyTimeout: STREAM_BODY_TIMEOUT,
1253
- headersTimeout: STREAM_HEADERS_TIMEOUT
1254
- }) : new Agent({
1255
- bodyTimeout: STREAM_BODY_TIMEOUT,
1256
- headersTimeout: STREAM_HEADERS_TIMEOUT
1257
- });
1258
- clientOptions.fetch = ((url, init) => undiciFetch(url, { ...init, dispatcher }));
1259
- } catch {
1260
- }
1261
- this.client = new OpenAI(clientOptions);
1140
+ var DSML_PIPE_CLASS = "[|\\uFF5C\\u2502\\u2503\\u01C0]";
1141
+ var PSEUDO_TOOL_CALL_PATTERNS = [
1142
+ // <tool_call name="..."> ... </tool_call> (DeepSeek V4 thinking, GLM)
1143
+ /<tool_call\s+name\s*=\s*["'][\w._-]+["']/,
1144
+ // <function_calls> ... </function_calls> (Anthropic-style as text)
1145
+ /<\/?function_calls\s*>/,
1146
+ // <invoke name="..." /> (Anthropic XML tool-call, which is real for
1147
+ // Claude API but is text/garbage for any other provider's plain stream)
1148
+ /<invoke\s+name\s*=\s*["'][\w._-]+["']/,
1149
+ // <tool_use> ... <tool_use_id> (Claude flavor leaked into text)
1150
+ /<tool_use(?:_id)?\b/,
1151
+ // ```tool_call\n...\n``` markdown fences (Kimi/Zhipu fallback)
1152
+ /```\s*tool_call\b/i,
1153
+ // Bare JSON tool-call block: lines starting with `{"name":"...","arguments":`
1154
+ /^\s*\{\s*"name"\s*:\s*"[\w._-]+"\s*,\s*"arguments"\s*:/m,
1155
+ // v0.4.112: <think> ... </think> reasoning blocks. The REPL renderer
1156
+ // suppresses these from terminal output, but tee mode writes the raw
1157
+ // delta to disk reasoning leaks into the saved file. We saw a 600-line
1158
+ // 审计报告.md whose first 57 lines were the model's planning monologue.
1159
+ /<think\b[^>]*>/i,
1160
+ // v0.4.112: leading ```markdown / ```md fence wrapping the entire document.
1161
+ // DeepSeek V4 Pro Thinking sometimes "politely" wraps its document output
1162
+ // in a markdown fence. The fence ends up literally in the saved file.
1163
+ /^\s*```\s*(?:markdown|md|gfm)\b/im,
1164
+ // v0.4.173: DeepSeek V4 DSML pseudo-tool-call markup leaked as text. DeepSeek
1165
+ // emits a fake tool call using its native special-token markup
1166
+ // <||DSML||tool_calls> <||DSML||invoke name="write"> <||DSML||parameter …>
1167
+ // where the "pipe" is U+FF5C FULLWIDTH VERTICAL LINE (the same token family as
1168
+ // <|User|>/<|Assistant|>). We saw an exam paper saved via save_last_response
1169
+ // whose tee stream was preamble + this DSML wrapper + the real document body.
1170
+ // The earlier <invoke …> pattern uses ASCII < > and does NOT match these.
1171
+ new RegExp(`<\\/?\\s*${DSML_PIPE_CLASS}+\\s*DSML\\s*${DSML_PIPE_CLASS}+`, "i")
1172
+ ];
1173
+ function detectPseudoToolCalls(content) {
1174
+ if (!content || content.length === 0) return null;
1175
+ for (const re of PSEUDO_TOOL_CALL_PATTERNS) {
1176
+ if (re.test(content)) return re.source;
1262
1177
  }
1263
- /**
1264
- * 将 systemPrompt + messages 合并为 OpenAI messages 数组(system 消息放首位)。
1265
- *
1266
- * v0.4.100+:按原始顺序保留工具消息(assistant.toolCalls role='tool'),
1267
- * 不再剥离到 _extraMessages 末尾——之前的剥离会让历史工具往返被插到当前用户消息之后,
1268
- * 导致模型把"过去的工具调用结果"当作"对当前问题的回应",DeepSeek V4 Flash 上尤其明显
1269
- * (会复读上一轮的"完成汇总")。
1270
- *
1271
- * DeepSeek V4 thinking 模式:所有 assistant 消息(含带 toolCalls 的)必须有
1272
- * reasoning_content 字段,缺失则 API 400。
1273
- */
1274
- buildMessages(request) {
1275
- const msgs = [];
1276
- for (const m of request.messages) {
1277
- if (m.role === "tool") {
1278
- if (!m.toolCallId) continue;
1279
- msgs.push({
1280
- role: "tool",
1281
- tool_call_id: m.toolCallId,
1282
- content: typeof m.content === "string" ? m.content : ""
1283
- });
1284
- continue;
1285
- }
1286
- if (m.role === "assistant" && m.toolCalls && m.toolCalls.length > 0) {
1287
- const assistantMsg = {
1288
- role: "assistant",
1289
- content: typeof m.content === "string" && m.content ? m.content : null,
1290
- tool_calls: m.toolCalls.map((tc) => ({
1291
- id: tc.id,
1292
- type: "function",
1293
- function: { name: tc.name, arguments: JSON.stringify(tc.arguments) }
1294
- })),
1295
- reasoning_content: m.reasoningContent ?? ""
1296
- };
1297
- msgs.push(assistantMsg);
1298
- continue;
1299
- }
1300
- const base = { role: m.role, content: m.content };
1301
- if (m.role === "assistant") {
1302
- base.reasoning_content = m.reasoningContent ?? "";
1303
- }
1304
- msgs.push(base);
1178
+ return null;
1179
+ }
1180
+ function stripPseudoToolCalls(content) {
1181
+ if (!content) return content;
1182
+ let out = content;
1183
+ const dsmlBody = extractDsmlContent(out);
1184
+ if (dsmlBody !== null) {
1185
+ out = dsmlBody;
1186
+ } else {
1187
+ out = stripDsmlTags(out);
1188
+ }
1189
+ out = out.replace(/<tool_call\b[^>]*>[\s\S]*?<\/tool_call>/gi, "");
1190
+ out = out.replace(/<tool_call\b[^>]*\/>/gi, "");
1191
+ out = out.replace(/<function_calls\b[^>]*>[\s\S]*?<\/function_calls>/gi, "");
1192
+ out = out.replace(/<invoke\b[^>]*>[\s\S]*?<\/invoke>/gi, "");
1193
+ out = out.replace(/<invoke\b[^>]*\/>/gi, "");
1194
+ out = out.replace(/<tool_use(?:_id)?\b[^>]*>[\s\S]*?<\/tool_use(?:_id)?>/gi, "");
1195
+ out = out.replace(/```\s*tool_call\b[\s\S]*?```/gi, "");
1196
+ out = out.replace(/<think\b[^>]*>[\s\S]*?<\/think>/gi, "");
1197
+ out = out.replace(/<think\b[^>]*>[\s\S]*?(?=^#{1,3}\s+\S|\n\s*\n)/im, "");
1198
+ out = out.replace(/^\s*\{\s*"name"\s*:\s*"[\w._-]+"\s*,\s*"arguments"\s*:[\s\S]*?\}\s*$/gm, "");
1199
+ out = unwrapDocumentFence(out);
1200
+ out = peelMetaNarration(out);
1201
+ out = out.replace(/\n{3,}/g, "\n\n").trim();
1202
+ return out;
1203
+ }
1204
+ function extractDsmlContent(content) {
1205
+ if (!content) return null;
1206
+ const P = DSML_PIPE_CLASS;
1207
+ const re = new RegExp(
1208
+ `<\\s*${P}+\\s*DSML\\s*${P}+\\s*parameter\\b[^>]*\\bname\\s*=\\s*["']content["'][^>]*>([\\s\\S]*?)<\\s*/\\s*${P}+\\s*DSML\\s*${P}+\\s*parameter\\s*>`,
1209
+ "i"
1210
+ );
1211
+ const m = content.match(re);
1212
+ if (m && typeof m[1] === "string") {
1213
+ const body = m[1].trim();
1214
+ return body.length > 0 ? body : null;
1215
+ }
1216
+ return null;
1217
+ }
1218
+ function stripDsmlTags(content) {
1219
+ const P = DSML_PIPE_CLASS;
1220
+ let out = content;
1221
+ out = out.replace(
1222
+ new RegExp(
1223
+ `<\\s*${P}+\\s*DSML\\s*${P}+\\s*tool_calls\\b[\\s\\S]*?<\\s*/\\s*${P}+\\s*DSML\\s*${P}+\\s*tool_calls\\s*>`,
1224
+ "gi"
1225
+ ),
1226
+ ""
1227
+ );
1228
+ out = out.replace(new RegExp(`<\\s*/?\\s*${P}+\\s*DSML\\s*${P}+[^>]*>`, "gi"), "");
1229
+ return out;
1230
+ }
1231
+ function unwrapDocumentFence(content) {
1232
+ const trimmed = content.trim();
1233
+ const open = trimmed.match(/^```\s*(markdown|md|gfm)?\s*\n/i);
1234
+ if (!open) return content;
1235
+ const afterOpen = trimmed.slice(open[0].length);
1236
+ const closeMatch = afterOpen.match(/\n```\s*$/);
1237
+ if (!closeMatch) return content;
1238
+ const inner = afterOpen.slice(0, afterOpen.length - closeMatch[0].length);
1239
+ if (inner.length < 200) return content;
1240
+ return inner;
1241
+ }
1242
+ function peelMetaNarration(content) {
1243
+ let out = content;
1244
+ const firstHeadingMatch = out.match(/^#{1,3}\s+\S.*$/m);
1245
+ if (firstHeadingMatch && firstHeadingMatch.index !== void 0) {
1246
+ const before = out.slice(0, firstHeadingMatch.index);
1247
+ const hasIntroMarker = /(?:以下(?:即为|是|就是)|这是|Here\s+is|Below\s+is|完整的?(?:审计报告|内容|文档)|审计报告(?:如下|的完整内容))/i.test(before);
1248
+ if (before.length > 0 && before.length < 800 && hasIntroMarker) {
1249
+ out = out.slice(firstHeadingMatch.index);
1305
1250
  }
1306
- const systemContent = [request.systemPrompt, request.systemPromptVolatile].filter(Boolean).join("\n\n---\n\n");
1307
- if (systemContent) {
1308
- return [{ role: "system", content: systemContent }, ...msgs];
1251
+ if (out.startsWith("---\n")) {
1252
+ const headingAfterRule = out.slice(4).match(/^#{1,3}\s+\S/m);
1253
+ if (headingAfterRule && headingAfterRule.index !== void 0 && headingAfterRule.index < 100) {
1254
+ out = out.slice(4 + headingAfterRule.index);
1255
+ }
1309
1256
  }
1310
- return msgs;
1311
1257
  }
1312
- async chat(request) {
1313
- try {
1314
- const response = await this.client.chat.completions.create({
1315
- model: request.model,
1316
- messages: this.buildMessages(request),
1317
- temperature: request.temperature,
1318
- max_tokens: request.maxTokens,
1319
- stream: false,
1320
- ...request.thinking ? { thinking: { type: "enabled" } } : {}
1321
- }, {
1322
- timeout: request.timeout ?? this.defaultTimeout
1323
- });
1324
- const firstChoice = response.choices?.[0];
1325
- if (!firstChoice) {
1326
- return { content: "", model: response.model, usage: void 0 };
1327
- }
1328
- return {
1329
- content: firstChoice.message.content ?? "",
1330
- model: response.model,
1331
- usage: toUsage(response.usage)
1332
- };
1333
- } catch (err) {
1334
- throw this.wrapError(err);
1258
+ const codaMatch = out.match(/\n[^\n]*?(?:以上(?:即为|就是|内容|为完整的?)|Above\s+is\s+the|本报告已经|该报告(?:已经|包含)|报告(?:已|至此)结束)[^\n]*$/i);
1259
+ if (codaMatch && codaMatch.index !== void 0 && codaMatch.index > out.length / 2) {
1260
+ out = out.slice(0, codaMatch.index);
1261
+ }
1262
+ return out.trim();
1263
+ }
1264
+ var META_NARRATION_HARD_MARKERS = [
1265
+ /\[⚠️\s*CONTENT GENERATION MODE\]/,
1266
+ /CONTENT_ONLY_STREAM_REMINDER\b/,
1267
+ /<system-reminder>/i
1268
+ ];
1269
+ var META_NARRATION_HEURISTICS = [
1270
+ /\bthe user (?:is asking me|wants me|is requesting|expects me)\b/i,
1271
+ /\blet me (?:re-?read|re-?consider|reconsider|think about|carefully (?:re-?read|consider))\b/i,
1272
+ /\bI'?m (?:in (?:a )?content-only|in CONTENT-ONLY|currently in)\b/i,
1273
+ /\bI think (?:there might be|I should|I cannot|the (?:user|best)|maybe)\b/i,
1274
+ /\bWait,?\s+let me\b/i,
1275
+ /\bActually,?\s+I\b/i,
1276
+ /\bI need to be honest with the user\b/i,
1277
+ /\bI(?:'m| am) in a special mode\b/i,
1278
+ /\bGiven that I cannot\b/i
1279
+ ];
1280
+ function detectMetaNarration(content) {
1281
+ if (!content) return null;
1282
+ const head = content.slice(0, 2e3);
1283
+ for (const re of META_NARRATION_HARD_MARKERS) {
1284
+ if (re.test(head)) return re.source;
1285
+ }
1286
+ if (/^#{1,3}\s+\S/m.test(head)) return null;
1287
+ let hits = 0;
1288
+ let firstMatch = "";
1289
+ for (const re of META_NARRATION_HEURISTICS) {
1290
+ if (re.test(head)) {
1291
+ hits++;
1292
+ if (!firstMatch) firstMatch = re.source;
1293
+ if (hits >= 2) return `meta-narration:${firstMatch}`;
1335
1294
  }
1336
1295
  }
1337
- async *chatStream(request) {
1338
- try {
1339
- const stream = await this.client.chat.completions.create({
1340
- model: request.model,
1341
- messages: this.buildMessages(request),
1342
- temperature: request.temperature,
1343
- max_tokens: request.maxTokens,
1344
- stream: true,
1345
- // 请求末尾 usage chunk,供 token 统计使用
1346
- stream_options: { include_usage: true },
1347
- ...request.thinking ? { thinking: { type: "enabled" } } : {}
1348
- }, {
1349
- timeout: request.timeout ?? this.defaultTimeout,
1350
- signal: request.signal
1351
- });
1352
- let thinkingStarted = false;
1353
- let reasoningAccumulator = "";
1354
- for await (const chunk of stream) {
1355
- const choice = chunk.choices[0];
1356
- const done = choice?.finish_reason != null;
1357
- if (!choice && chunk.usage) {
1358
- yield {
1359
- delta: "",
1360
- done: true,
1361
- usage: toUsage(chunk.usage),
1362
- ...reasoningAccumulator ? { reasoningContent: reasoningAccumulator } : {}
1363
- };
1364
- continue;
1365
- }
1366
- const reasoningDelta = choice?.delta?.reasoning_content;
1367
- if (reasoningDelta) {
1368
- if (!thinkingStarted) {
1369
- yield { delta: "<think>", done: false };
1370
- thinkingStarted = true;
1371
- }
1372
- reasoningAccumulator += reasoningDelta;
1373
- yield { delta: reasoningDelta, done: false };
1374
- continue;
1375
- }
1376
- const delta = choice?.delta?.content ?? "";
1377
- if (thinkingStarted && delta) {
1378
- thinkingStarted = false;
1379
- yield { delta: "</think>", done: false };
1380
- }
1381
- if (done) {
1382
- yield { delta, done, ...reasoningAccumulator ? { reasoningContent: reasoningAccumulator } : {} };
1383
- } else {
1384
- yield { delta, done };
1385
- }
1386
- }
1387
- } catch (err) {
1388
- throw this.wrapError(err);
1389
- }
1390
- }
1391
- /**
1392
- * 请求 AI 并获取工具调用列表(不执行,只解析)。
1393
- * 返回 { toolCalls, usage? } 时说明 AI 想要调用工具,
1394
- * 返回 { content, usage? } 时说明 AI 给出了最终回答。
1395
- */
1396
- async chatWithTools(request, tools) {
1397
- try {
1398
- const openaiTools = tools.map((t) => ({
1399
- type: "function",
1400
- function: {
1401
- name: t.name,
1402
- description: t.description,
1403
- parameters: {
1404
- type: "object",
1405
- properties: Object.fromEntries(
1406
- Object.entries(t.parameters).map(([key, schema]) => [
1407
- key,
1408
- schemaToJsonSchema(schema)
1409
- ])
1410
- ),
1411
- required: Object.entries(t.parameters).filter(([, s]) => s.required).map(([k]) => k)
1412
- }
1413
- }
1414
- }));
1415
- const baseMessages = this.buildMessages(request);
1416
- const extraMessages = request._extraMessages ?? [];
1417
- const allMessages = [...baseMessages, ...extraMessages];
1418
- const response = await this.client.chat.completions.create({
1419
- model: request.model,
1420
- messages: allMessages,
1421
- tools: openaiTools,
1422
- tool_choice: "auto",
1423
- temperature: request.temperature,
1424
- max_tokens: request.maxTokens,
1425
- stream: false,
1426
- ...request.thinking ? { thinking: { type: "enabled" } } : {}
1427
- }, {
1428
- timeout: request.timeout ?? this.defaultTimeout
1429
- });
1430
- const firstChoice = response.choices?.[0];
1431
- if (!firstChoice) {
1432
- return { content: "", usage: void 0 };
1433
- }
1434
- const message = firstChoice.message;
1435
- const finishReason = firstChoice.finish_reason;
1436
- const usage = toUsage(response.usage);
1437
- const contentStr = typeof message.content === "string" ? message.content : "";
1438
- const hasToolCalls = !!(message.tool_calls && message.tool_calls.length > 0);
1439
- const reasoningContent = message.reasoning_content;
1440
- if (message.tool_calls && message.tool_calls.length > 0) {
1441
- const toolCalls = message.tool_calls.map((tc) => {
1442
- const parsedArgs = repairToolCallArguments(
1443
- tc.function.arguments || "{}",
1444
- (m) => process.stderr.write(`[warn] ${m}
1445
- `)
1446
- );
1447
- return {
1448
- id: tc.id,
1449
- name: tc.function.name,
1450
- arguments: parsedArgs
1451
- };
1452
- });
1453
- return { toolCalls, usage, reasoningContent };
1454
- }
1455
- return {
1456
- content: message.content ?? "",
1457
- usage,
1458
- ...reasoningContent ? { reasoningContent } : {},
1459
- ...!hasToolCalls && (finishReason ?? "") ? { finishReason } : {}
1460
- };
1461
- } catch (err) {
1462
- throw this.wrapError(err);
1463
- }
1296
+ return null;
1297
+ }
1298
+ function looksLikeDocumentBody(content) {
1299
+ if (!content || content.length < 200) return false;
1300
+ if (/^#{1,6}\s+\S/m.test(content)) return true;
1301
+ const paragraphs = content.split(/\n\s*\n/).filter((p) => p.trim().length > 30);
1302
+ if (paragraphs.length >= 3) return true;
1303
+ return false;
1304
+ }
1305
+ function stripToolCallReminder(systemPrompt) {
1306
+ if (!systemPrompt) return systemPrompt;
1307
+ const idx = systemPrompt.indexOf("[\u26A0\uFE0F Mandatory Tool Call Policy]");
1308
+ if (idx === -1) return systemPrompt;
1309
+ return systemPrompt.slice(0, idx).trimEnd();
1310
+ }
1311
+ var TEE_FINAL_USER_NUDGE = `\u26A0\uFE0F STOP using tools NOW. The save_last_response tee stream is open and capturing every token of THIS response. Output ONLY the requested document body, in markdown. The very first character of your response must be the document's top-level heading (e.g. "# \u5BA1\u8BA1\u62A5\u544A" / "# Audit Report"). Do NOT print <tool_call>, </tool_call>, <function_calls>, <invoke>, <tool_use>, <think>, or any other tool-call markup. Do NOT narrate that you will produce the document \u2014 just produce it. Do NOT pretend to call tools \u2014 there are none in this stream.`;
1312
+ var CONTENT_ONLY_STREAM_REMINDER = `
1313
+
1314
+ [\u26A0\uFE0F CONTENT GENERATION MODE]
1315
+ You are now in a CONTENT-ONLY streaming pass. The file at the configured path will receive every token of THIS response.
1316
+ - Do NOT emit <tool_call>, </tool_call>, <function_calls>, <invoke>, <tool_use>, or any tool-call XML/JSON markup.
1317
+ - Do NOT print "I will now call ...", "let me read ...", "<think>" reasoning blocks (the surrounding REPL handles those separately \u2014 they should not enter the saved file).
1318
+ - Do NOT pretend to call tools. There are NO tools available in this stream \u2014 only your text output is captured.
1319
+ - Produce ONLY the requested document body. Markdown is fine. Code blocks are fine. Tool-call markup is NOT.
1320
+ - If you accidentally start a <tool_call>, STOP and produce the document body instead.
1321
+
1322
+ The file is closed and named when this stream ends. If your output contains pseudo-tool-call markup, the save will be REJECTED and you will be asked to retry.`;
1323
+
1324
+ // src/core/agent-loop.ts
1325
+ function partialTagTail(s, tag) {
1326
+ const max = Math.min(s.length, tag.length - 1);
1327
+ for (let len = max; len > 0; len--) {
1328
+ if (s.endsWith(tag.slice(0, len))) return len;
1464
1329
  }
1465
- /**
1466
- * 流式工具调用:文本内容实时输出、工具名称/参数逐块发射。
1467
- * 子类(DeepSeek / Kimi)因虚假声明检测需要完整响应,故不继承此方法。
1468
- */
1469
- async *chatWithToolsStream(request, tools) {
1470
- if (!this.enableStreamingToolCalls) {
1471
- const result = await this.chatWithTools(request, tools);
1472
- if ("toolCalls" in result) {
1473
- for (let i = 0; i < result.toolCalls.length; i++) {
1474
- const tc = result.toolCalls[i];
1475
- yield { type: "tool_call_start", index: i, id: tc.id, name: tc.name };
1476
- yield { type: "tool_call_delta", index: i, argumentsDelta: JSON.stringify(tc.arguments) };
1477
- yield { type: "tool_call_end", index: i };
1330
+ return 0;
1331
+ }
1332
+ var ThinkTagFilter = class {
1333
+ inThink = false;
1334
+ buf = "";
1335
+ push(raw) {
1336
+ this.buf += raw;
1337
+ let out = "";
1338
+ while (this.buf.length > 0) {
1339
+ if (!this.inThink) {
1340
+ const open = this.buf.indexOf("<think>");
1341
+ if (open === -1) {
1342
+ const keep = partialTagTail(this.buf, "<think>");
1343
+ out += this.buf.slice(0, this.buf.length - keep);
1344
+ this.buf = this.buf.slice(this.buf.length - keep);
1345
+ break;
1478
1346
  }
1347
+ out += this.buf.slice(0, open);
1348
+ this.buf = this.buf.slice(open + "<think>".length);
1349
+ this.inThink = true;
1479
1350
  } else {
1480
- yield { type: "text_delta", delta: result.content };
1481
- }
1482
- const rc = "reasoningContent" in result ? result.reasoningContent : void 0;
1483
- const fr = "finishReason" in result ? result.finishReason : void 0;
1484
- yield {
1485
- type: "done",
1486
- usage: result.usage,
1487
- ...rc ? { reasoningContent: rc } : {},
1488
- ...fr ? { finishReason: fr } : {}
1489
- };
1490
- return;
1491
- }
1492
- const openaiTools = tools.map((t) => ({
1493
- type: "function",
1494
- function: {
1495
- name: t.name,
1496
- description: t.description,
1497
- parameters: {
1498
- type: "object",
1499
- properties: Object.fromEntries(
1500
- Object.entries(t.parameters).map(([key, schema]) => [
1501
- key,
1502
- schemaToJsonSchema(schema)
1503
- ])
1504
- ),
1505
- required: Object.entries(t.parameters).filter(([, s]) => s.required).map(([k]) => k)
1506
- }
1507
- }
1508
- }));
1509
- const baseMessages = this.buildMessages(request);
1510
- const extraMessages = request._extraMessages ?? [];
1511
- const allMessages = [...baseMessages, ...extraMessages];
1512
- try {
1513
- const stream = await this.client.chat.completions.create({
1514
- model: request.model,
1515
- messages: allMessages,
1516
- tools: openaiTools,
1517
- tool_choice: "auto",
1518
- temperature: request.temperature,
1519
- max_tokens: request.maxTokens,
1520
- stream: true,
1521
- stream_options: { include_usage: true },
1522
- ...request.thinking ? { thinking: { type: "enabled" } } : {}
1523
- }, {
1524
- timeout: request.timeout ?? this.defaultTimeout,
1525
- signal: request.signal
1526
- });
1527
- const toolCallAccumulators = /* @__PURE__ */ new Map();
1528
- let toolCallsEnded = false;
1529
- let thinkingStarted = false;
1530
- let reasoningAccumulator = "";
1531
- let lastFinishReason;
1532
- let doneEmitted = false;
1533
- for await (const chunk of stream) {
1534
- const choice = chunk.choices[0];
1535
- if (choice?.finish_reason) lastFinishReason = choice.finish_reason;
1536
- if (!choice && chunk.usage) {
1537
- if (thinkingStarted) {
1538
- yield { type: "thinking_end" };
1539
- thinkingStarted = false;
1540
- }
1541
- if (!toolCallsEnded && toolCallAccumulators.size > 0) {
1542
- for (const [idx] of toolCallAccumulators) {
1543
- yield { type: "tool_call_end", index: idx };
1544
- }
1545
- toolCallsEnded = true;
1546
- }
1547
- doneEmitted = true;
1548
- yield {
1549
- type: "done",
1550
- usage: toUsage(chunk.usage),
1551
- ...reasoningAccumulator ? { reasoningContent: reasoningAccumulator } : {},
1552
- ...lastFinishReason && lastFinishReason !== "stop" ? { finishReason: lastFinishReason } : {}
1553
- };
1554
- continue;
1555
- }
1556
- if (!choice) continue;
1557
- const delta = choice.delta;
1558
- const reasoningDelta = delta?.reasoning_content;
1559
- if (reasoningDelta) {
1560
- if (!thinkingStarted) {
1561
- yield { type: "thinking_start" };
1562
- thinkingStarted = true;
1563
- }
1564
- reasoningAccumulator += reasoningDelta;
1565
- yield { type: "thinking_delta", delta: reasoningDelta };
1566
- continue;
1567
- }
1568
- if (thinkingStarted && (delta?.content || delta?.tool_calls)) {
1569
- yield { type: "thinking_end" };
1570
- thinkingStarted = false;
1571
- }
1572
- if (delta?.content) {
1573
- yield { type: "text_delta", delta: delta.content };
1574
- }
1575
- if (delta?.tool_calls) {
1576
- for (const tc of delta.tool_calls) {
1577
- const idx = tc.index;
1578
- const existing = toolCallAccumulators.get(idx);
1579
- if (!existing && tc.id && tc.function?.name) {
1580
- const initialArgs = tc.function.arguments ?? "";
1581
- toolCallAccumulators.set(idx, {
1582
- id: tc.id,
1583
- name: tc.function.name,
1584
- arguments: initialArgs
1585
- });
1586
- yield { type: "tool_call_start", index: idx, id: tc.id, name: tc.function.name };
1587
- if (initialArgs) {
1588
- yield { type: "tool_call_delta", index: idx, argumentsDelta: initialArgs };
1589
- }
1590
- } else if (existing && tc.function?.arguments) {
1591
- existing.arguments += tc.function.arguments;
1592
- yield { type: "tool_call_delta", index: idx, argumentsDelta: tc.function.arguments };
1593
- }
1594
- }
1595
- }
1596
- if (choice.finish_reason && !toolCallsEnded && toolCallAccumulators.size > 0) {
1597
- for (const [idx] of toolCallAccumulators) {
1598
- yield { type: "tool_call_end", index: idx };
1599
- }
1600
- toolCallsEnded = true;
1601
- }
1602
- }
1603
- if (!doneEmitted) {
1604
- if (thinkingStarted) {
1605
- yield { type: "thinking_end" };
1606
- thinkingStarted = false;
1607
- }
1608
- if (!toolCallsEnded && toolCallAccumulators.size > 0) {
1609
- for (const [idx] of toolCallAccumulators) {
1610
- yield { type: "tool_call_end", index: idx };
1611
- }
1612
- }
1613
- yield {
1614
- type: "done",
1615
- ...reasoningAccumulator ? { reasoningContent: reasoningAccumulator } : {},
1616
- ...lastFinishReason && lastFinishReason !== "stop" ? { finishReason: lastFinishReason } : {}
1617
- };
1618
- }
1619
- } catch (err) {
1620
- if (err instanceof Error && (err.name === "AbortError" || err.name === "TimeoutError")) {
1621
- throw err;
1351
+ const close = this.buf.indexOf("</think>");
1352
+ if (close === -1) {
1353
+ const keep = partialTagTail(this.buf, "</think>");
1354
+ this.buf = this.buf.slice(this.buf.length - keep);
1355
+ break;
1356
+ }
1357
+ this.buf = this.buf.slice(close + "</think>".length);
1358
+ this.inThink = false;
1622
1359
  }
1623
- throw this.wrapError(err);
1624
1360
  }
1361
+ return out;
1625
1362
  }
1626
- /**
1627
- * 将工具结果作为 tool_call 消息追加,供下一轮使用
1628
- */
1629
- buildToolResultMessages(assistantToolCalls, results, reasoningContent) {
1630
- const streamedText = assistantToolCalls._streamedText;
1631
- const assistantMsg = {
1632
- role: "assistant",
1633
- content: streamedText || null,
1634
- tool_calls: assistantToolCalls.map((tc) => ({
1635
- id: tc.id,
1636
- type: "function",
1637
- function: {
1638
- name: tc.name,
1639
- arguments: JSON.stringify(tc.arguments)
1640
- }
1641
- }))
1642
- };
1643
- assistantMsg.reasoning_content = reasoningContent ?? "";
1644
- const resultMsgs = results.map((r) => ({
1645
- role: "tool",
1646
- tool_call_id: r.callId,
1647
- content: r.content
1648
- }));
1649
- return [assistantMsg, ...resultMsgs];
1363
+ /** 流结束:若仍持留可能的半截 '<think>' 前缀且并未进入 think 块,它是真实文本。 */
1364
+ flush() {
1365
+ if (!this.inThink && this.buf) {
1366
+ const tail = this.buf;
1367
+ this.buf = "";
1368
+ return tail;
1369
+ }
1370
+ this.buf = "";
1371
+ return "";
1650
1372
  }
1651
- async validateApiKey(apiKey) {
1373
+ };
1374
+ function repairToolCallArguments(raw, onWarn) {
1375
+ const argStr = raw || "{}";
1376
+ try {
1377
+ return JSON.parse(argStr);
1378
+ } catch {
1379
+ const truncated = argStr.trimEnd();
1380
+ const lastComma = truncated.lastIndexOf(",");
1381
+ const fixed = lastComma > 0 ? truncated.slice(0, lastComma) + "}" : truncated.slice(0, truncated.indexOf("{") + 1) + "}";
1652
1382
  try {
1653
- const testClient = new OpenAI({ apiKey, baseURL: this.defaultBaseUrl });
1654
- await testClient.models.list();
1655
- return true;
1383
+ const repaired = JSON.parse(fixed);
1384
+ onWarn?.("Tool call JSON was truncated and auto-repaired. Some parameters may be missing.");
1385
+ return repaired;
1656
1386
  } catch {
1657
- return false;
1387
+ onWarn?.("Tool call JSON could not be parsed, using empty arguments.");
1388
+ return {};
1658
1389
  }
1659
1390
  }
1660
- async listModels() {
1661
- return this.info.models;
1391
+ }
1392
+ async function consumeToolCallStream(stream, hooks = {}) {
1393
+ const textParts = [];
1394
+ const accumulators = /* @__PURE__ */ new Map();
1395
+ let usage;
1396
+ let rawContent;
1397
+ let reasoningContent;
1398
+ let finishReason;
1399
+ let aborted = false;
1400
+ const thinkFilter = new ThinkTagFilter();
1401
+ const emitText = (raw) => {
1402
+ const visible = thinkFilter.push(raw);
1403
+ if (visible) {
1404
+ textParts.push(visible);
1405
+ hooks.onText?.(visible);
1406
+ }
1407
+ };
1408
+ try {
1409
+ for await (const event of stream) {
1410
+ if (hooks.signal?.aborted) {
1411
+ aborted = true;
1412
+ break;
1413
+ }
1414
+ switch (event.type) {
1415
+ case "text_delta":
1416
+ emitText(event.delta);
1417
+ break;
1418
+ case "thinking_start":
1419
+ hooks.onThinkingStart?.();
1420
+ break;
1421
+ case "thinking_delta":
1422
+ hooks.onThinkingDelta?.(event.delta);
1423
+ break;
1424
+ case "thinking_end":
1425
+ hooks.onThinkingEnd?.();
1426
+ break;
1427
+ case "tool_call_start":
1428
+ accumulators.set(event.index, { id: event.id, name: event.name, arguments: "" });
1429
+ hooks.onToolCallStart?.(event.index, event.id, event.name);
1430
+ break;
1431
+ case "tool_call_delta": {
1432
+ const acc = accumulators.get(event.index);
1433
+ if (acc) acc.arguments += event.argumentsDelta;
1434
+ break;
1435
+ }
1436
+ case "tool_call_end":
1437
+ break;
1438
+ case "done":
1439
+ if (event.usage) usage = event.usage;
1440
+ if (event.rawContent) rawContent = event.rawContent;
1441
+ if (event.reasoningContent) reasoningContent = event.reasoningContent;
1442
+ if (event.finishReason) finishReason = event.finishReason;
1443
+ break;
1444
+ }
1445
+ }
1446
+ } catch (err) {
1447
+ if (err instanceof Error && (err.name === "AbortError" || err.message.includes("aborted"))) {
1448
+ aborted = true;
1449
+ } else {
1450
+ throw err;
1451
+ }
1662
1452
  }
1663
- wrapError(err) {
1664
- if (err instanceof OpenAI.AuthenticationError) {
1665
- return new AuthError(this.info.id);
1453
+ const tail = thinkFilter.flush();
1454
+ if (tail && !aborted) {
1455
+ textParts.push(tail);
1456
+ hooks.onText?.(tail);
1457
+ }
1458
+ const textContent = textParts.join("");
1459
+ if (aborted) {
1460
+ return { textContent, toolCalls: [], usage, rawContent, reasoningContent, finishReason, aborted };
1461
+ }
1462
+ const toolCalls = [];
1463
+ for (const [, acc] of accumulators) {
1464
+ toolCalls.push({
1465
+ id: acc.id,
1466
+ name: acc.name,
1467
+ arguments: repairToolCallArguments(acc.arguments, hooks.onWarn)
1468
+ });
1469
+ }
1470
+ if (toolCalls.length > 0) {
1471
+ if (rawContent) {
1472
+ toolCalls._rawContent = rawContent;
1666
1473
  }
1667
- if (err instanceof OpenAI.RateLimitError) {
1668
- return new RateLimitError(this.info.id);
1474
+ if (textContent) {
1475
+ toolCalls._streamedText = textContent;
1669
1476
  }
1670
- if (err instanceof Error) {
1671
- return new ProviderError(this.info.id, err.message, err);
1477
+ }
1478
+ return { textContent, toolCalls, usage, rawContent, reasoningContent, finishReason, aborted };
1479
+ }
1480
+ var FREE_ROUND_TOOLS = /* @__PURE__ */ new Set(["write_todos"]);
1481
+ var MAX_CONSECUTIVE_FREE_ROUNDS = 3;
1482
+ var FreeRoundTracker = class {
1483
+ consecutive = 0;
1484
+ /** 返回 true 表示本轮不消耗有效轮次(调用方执行 round--)。 */
1485
+ apply(toolNames) {
1486
+ const allFree = toolNames.length > 0 && toolNames.every((n) => FREE_ROUND_TOOLS.has(n));
1487
+ if (!allFree) {
1488
+ this.consecutive = 0;
1489
+ return false;
1672
1490
  }
1673
- return new ProviderError(this.info.id, String(err));
1491
+ this.consecutive++;
1492
+ return this.consecutive <= MAX_CONSECUTIVE_FREE_ROUNDS;
1674
1493
  }
1675
1494
  };
1676
-
1677
- // src/providers/deepseek.ts
1678
- var CODE_BLOCK_PATTERNS = [
1679
- /```(?:sql|bash|shell|powershell|sh)\s*\n/i,
1680
- /```\s*\n\s*(?:SELECT|INSERT|UPDATE|DELETE|psql)\b/i
1681
- ];
1682
- function detectsCodeBlockPseudoCall(content) {
1683
- return CODE_BLOCK_PATTERNS.some((pattern) => pattern.test(content));
1684
- }
1685
- var DEEPSEEK_CODE_BLOCK_CORRECTION = "You wrote a code block in your response text, but you did NOT actually execute it. Code blocks in text are NOT executed by the system. You MUST use the function calling API to invoke the appropriate tool (e.g., mcp__postgres__query for SQL queries, bash for shell commands). Please call the correct tool NOW to execute the query/command.";
1686
- var DEEPSEEK_ANTI_HALLUCINATION = `
1687
-
1688
- [CRITICAL: Anti-Hallucination Enforcement \u2014 DeepSeek Specific]
1689
- You have a known tendency to claim files were "saved" or "created" without actually calling write_file. This is UNACCEPTABLE.
1690
- Rules you MUST follow:
1691
- - NEVER type file content into your response text. ALL file content goes through write_file tool calls ONLY.
1692
- - After calling write_file, do NOT describe the file content again in text \u2014 just confirm the tool call result.
1693
- - When generating multiple files: call write_file for file 1 \u2192 call write_file for file 2 \u2192 ... \u2192 THEN summarize.
1694
- - If you catch yourself writing markdown/code that should be a file, STOP and use write_file instead.
1695
- - The system will detect and reject phantom claims. Each failed detection wastes a round. Be honest.`;
1696
- var DeepSeekProvider = class extends OpenAICompatibleProvider {
1697
- defaultBaseUrl = "https://api.deepseek.com/v1";
1698
- /** 禁用流式工具调用,确保 chatWithTools 覆写(代码块检测)生效 */
1699
- enableStreamingToolCalls = false;
1700
- info = {
1701
- id: "deepseek",
1702
- displayName: "DeepSeek",
1703
- defaultModel: "deepseek-v4-flash",
1704
- apiKeyEnvVar: "AICLI_API_KEY_DEEPSEEK",
1705
- requiresApiKey: true,
1706
- baseUrl: this.defaultBaseUrl,
1707
- models: [
1708
- // ── V4 family (2026-04-23+):1M context,支持 Thinking / Non-Thinking 双模式 ──
1709
- {
1710
- id: "deepseek-v4-pro",
1711
- displayName: "DeepSeek V4 Pro (1.6T MoE, 49B active)",
1712
- contextWindow: 1048576,
1713
- supportsStreaming: true,
1714
- supportsThinking: true
1715
- },
1716
- {
1717
- id: "deepseek-v4-flash",
1718
- displayName: "DeepSeek V4 Flash (284B MoE, 13B active)",
1719
- contextWindow: 1048576,
1720
- supportsStreaming: true,
1721
- supportsThinking: true
1722
- }
1723
- // Legacy aliases (deepseek-chat / deepseek-reasoner) removed in v0.4.140 —
1724
- // they retire 2026-07-24 and DeepSeek already routes them to V4 Flash server-side.
1725
- // Pre-existing sessions were migrated to deepseek-v4-flash; pricing.ts keeps the
1726
- // old ids for any historical cost calculations.
1727
- ]
1728
- };
1729
- /**
1730
- * 覆写 chatWithTools 检测代码块伪工具调用并自动重试。
1731
- *
1732
- * DeepSeek 有时在 system prompt 较长(如技能注入)时,
1733
- * 退化为输出 ```sql/```bash 代码块而不调用工具。
1734
- * 检测到后注入纠正消息强制重试一次。
1735
- */
1736
- async chatWithTools(request, tools) {
1737
- const enhancedRequest = {
1738
- ...request,
1739
- systemPrompt: (request.systemPrompt ?? "") + DEEPSEEK_ANTI_HALLUCINATION
1495
+ var BudgetWarner = class {
1496
+ constructor(maxToolRounds) {
1497
+ this.maxToolRounds = maxToolRounds;
1498
+ this.noteAt = Math.max(10, Math.floor(maxToolRounds * 0.2));
1499
+ const lowRaw = Math.max(5, Math.floor(maxToolRounds * 0.1));
1500
+ const criticalRaw = Math.max(3, Math.floor(maxToolRounds * 0.05));
1501
+ this.lowAt = Math.min(lowRaw, this.noteAt - 1);
1502
+ this.criticalAt = Math.min(criticalRaw, this.lowAt - 1);
1503
+ }
1504
+ noteAt;
1505
+ lowAt;
1506
+ criticalAt;
1507
+ warnedNote = false;
1508
+ warnedLow = false;
1509
+ warnedCritical = false;
1510
+ check(roundsLeft) {
1511
+ if (!this.warnedCritical && roundsLeft <= this.criticalAt) {
1512
+ this.warnedCritical = true;
1513
+ return {
1514
+ level: "critical",
1515
+ injectMessage: `\u{1F6A8} Critical budget: Only ${roundsLeft} rounds left! Wrap up NOW \u2014 complete the current operation and give a final summary. Do NOT start new tasks.`,
1516
+ displayMessage: `\u{1F6A8} Critical: ${roundsLeft} rounds remaining`
1517
+ };
1518
+ }
1519
+ if (!this.warnedLow && roundsLeft <= this.lowAt) {
1520
+ this.warnedLow = true;
1521
+ return {
1522
+ level: "low",
1523
+ injectMessage: `\u26A0\uFE0F Budget warning: Only ${roundsLeft} tool rounds remaining. Prioritize completing the most critical task. Use efficient approaches (batch edits, fewer reads). If you cannot finish everything, summarize what's done and what remains.`,
1524
+ displayMessage: `\u26A0\uFE0F Low budget: ${roundsLeft} rounds remaining`
1525
+ };
1526
+ }
1527
+ if (!this.warnedNote && roundsLeft <= this.noteAt) {
1528
+ this.warnedNote = true;
1529
+ return {
1530
+ level: "note",
1531
+ injectMessage: `\u{1F4CA} Budget note: ${roundsLeft} tool rounds remaining out of ${this.maxToolRounds}. Plan your remaining work efficiently \u2014 use batch operations (e.g., replaceAll) when possible.`
1532
+ };
1533
+ }
1534
+ return null;
1535
+ }
1536
+ };
1537
+ var EMPTY_RESPONSE_NUDGE = "Your previous response was empty \u2014 no text and no tool calls. This usually means the context window is nearly full. Please either: (1) continue the task by calling the next tool you need, or (2) give a concise final text summary of what has been accomplished so far and what remains. Do NOT repeat earlier long outputs.";
1538
+ function describeFinishReason(fr) {
1539
+ if (fr === "length") return "output limit reached (finish_reason=length)";
1540
+ if (fr === "content_filter") return "content blocked (finish_reason=content_filter)";
1541
+ if (fr) return `empty response (finish_reason=${fr})`;
1542
+ return "empty response";
1543
+ }
1544
+ function emptyResponseHint(fr) {
1545
+ if (fr === "length") return "Output token limit hit \u2014 try /compact to reduce context, raise maxTokens, or /model to switch.";
1546
+ if (fr === "content_filter") return "Content was blocked by the provider filter.";
1547
+ return "Context window may be exhausted or max_tokens too low.";
1548
+ }
1549
+ var EmptyResponseGuard = class {
1550
+ retries = 0;
1551
+ onEmpty(canRetry, finishReason) {
1552
+ if (this.retries === 0 && canRetry) {
1553
+ this.retries++;
1554
+ return {
1555
+ action: "nudge",
1556
+ injectMessage: EMPTY_RESPONSE_NUDGE,
1557
+ displayMessage: `\u26A0 ${describeFinishReason(finishReason)} \u2014 nudging AI to continue...`
1558
+ };
1559
+ }
1560
+ return {
1561
+ action: "stop",
1562
+ displayMessage: "\u26A0 AI returned empty responses twice in a row. Stopping agentic loop.",
1563
+ hint: emptyResponseHint(finishReason)
1740
1564
  };
1741
- const result = await super.chatWithTools(enhancedRequest, tools);
1742
- const hasBashTool = tools.some((t) => t.name === "bash");
1743
- const extraMsgs = request._extraMessages ?? [];
1744
- const alreadyUsedTools = extraMsgs.some((m) => m?.role === "tool");
1745
- if (hasBashTool && !alreadyUsedTools && "content" in result && result.content && detectsCodeBlockPseudoCall(result.content)) {
1746
- process.stderr.write(
1747
- `[deepseek] \u26A0 Detected code block pseudo-tool-call (DeepSeek wrote code in text instead of calling a tool). Forcing retry...
1748
- `
1749
- );
1750
- const retryRequest = {
1751
- ...request,
1752
- _extraMessages: [
1753
- ...request._extraMessages ?? [],
1754
- // DeepSeek V4 thinking 模式:assistant 消息必须含 reasoning_content(即使为空)
1755
- { role: "assistant", content: result.content, reasoning_content: "" },
1756
- { role: "user", content: DEEPSEEK_CODE_BLOCK_CORRECTION }
1757
- ]
1565
+ }
1566
+ /** 非空响应到达 重置计数(下次空响应仍可 nudge 一次)。 */
1567
+ onNonEmpty() {
1568
+ this.retries = 0;
1569
+ }
1570
+ };
1571
+ var ContextPressureMonitor = class {
1572
+ warned80 = false;
1573
+ check(requestTokens, contextWindow) {
1574
+ if (contextWindow <= 0) return { action: "ok", ratio: 0 };
1575
+ const ratio = requestTokens / contextWindow;
1576
+ if (ratio >= 0.95) return { action: "abort", ratio };
1577
+ if (ratio >= 0.8 && !this.warned80) {
1578
+ this.warned80 = true;
1579
+ return {
1580
+ action: "warn",
1581
+ ratio,
1582
+ injectMessage: `\u26A0\uFE0F Context pressure: ~${Math.round(ratio * 100)}% of the ${contextWindow.toLocaleString()}-token context window is used. Avoid reading more files or running broad scans. Finish the current critical step, then produce a final summary. Every unnecessary tool call now risks breaking the conversation.`
1758
1583
  };
1759
- const retryResult = await super.chatWithTools(retryRequest, tools);
1760
- if (result.usage && "usage" in retryResult && retryResult.usage) {
1761
- retryResult.usage = {
1762
- inputTokens: result.usage.inputTokens + retryResult.usage.inputTokens,
1763
- outputTokens: result.usage.outputTokens + retryResult.usage.outputTokens
1764
- };
1765
- }
1766
- return retryResult;
1767
1584
  }
1768
- return result;
1585
+ return { action: "ok", ratio };
1769
1586
  }
1770
1587
  };
1588
+ function accumulateUsage(total, delta) {
1589
+ if (!delta) return;
1590
+ total.inputTokens += delta.inputTokens;
1591
+ total.outputTokens += delta.outputTokens;
1592
+ total.cacheCreationTokens += delta.cacheCreationTokens ?? 0;
1593
+ total.cacheReadTokens += delta.cacheReadTokens ?? 0;
1594
+ }
1595
+ function buildRoundBudgetHint(opts) {
1596
+ const pauseHint = opts.autoPauseInterval > 0 ? `
1597
+ - Every ${opts.autoPauseInterval} rounds the user will be asked whether to continue \u2014 use this as a natural checkpoint to report progress.` : "";
1598
+ if (opts.planMode) {
1599
+ return `
1771
1600
 
1772
- // src/providers/zhipu.ts
1773
- var ZhipuProvider = class extends OpenAICompatibleProvider {
1774
- defaultBaseUrl = "https://open.bigmodel.cn/api/paas/v4";
1775
- // GLM-5 / GLM-5.1 等深度思考模型生成长内容需要较长时间,默认 5 分钟
1776
- defaultTimeout = 3e5;
1777
- info = {
1778
- id: "zhipu",
1779
- displayName: "Zhipu (GLM)",
1780
- // 默认选 GLM-4.6:中文写作口碑最稳 + 200K 上下文 + 价格只有 5.1 的 ~1/2。
1781
- // 需要 Agent 长跑 / 代码工程时再手动 /model glm-5.1。
1782
- defaultModel: "glm-4.6",
1783
- apiKeyEnvVar: "AICLI_API_KEY_ZHIPU",
1784
- requiresApiKey: true,
1785
- baseUrl: this.defaultBaseUrl,
1786
- models: [
1787
- // ── GLM-5.1 系列(2026-04 旗舰,主打长程 Agent + 代码工程) ──
1788
- {
1789
- id: "glm-5.1",
1790
- displayName: "GLM-5.1 (2026 Flagship, 200K, Agent+Code)",
1791
- contextWindow: 204800,
1792
- supportsStreaming: true,
1793
- supportsThinking: true
1794
- },
1795
- {
1796
- id: "glm-5.1-reasoning",
1797
- displayName: "GLM-5.1 Reasoning (Deep Thinking)",
1798
- contextWindow: 204800,
1799
- supportsStreaming: true,
1800
- supportsThinking: true
1801
- },
1802
- {
1803
- id: "glm-5.1-air",
1804
- displayName: "GLM-5.1 Air (Lightweight 5.1)",
1805
- contextWindow: 204800,
1806
- supportsStreaming: true,
1807
- supportsThinking: true
1808
- },
1809
- // ── GLM-5 系列(2026-02) ──
1810
- {
1811
- id: "glm-5",
1812
- displayName: "GLM-5 (Flagship, Deep Thinking)",
1813
- contextWindow: 131072,
1814
- supportsStreaming: true,
1815
- supportsThinking: true
1816
- },
1817
- // ── GLM-4.6 系列(2025-09,中文写作口碑最佳) ──
1818
- {
1819
- id: "glm-4.6",
1820
- displayName: "GLM-4.6 (200K, \u4E2D\u6587\u5199\u4F5C\u63A8\u8350)",
1821
- contextWindow: 204800,
1822
- supportsStreaming: true
1823
- },
1824
- {
1825
- id: "glm-4.6v",
1826
- displayName: "GLM-4.6V (Vision + Thinking)",
1827
- contextWindow: 131072,
1828
- supportsStreaming: true,
1829
- supportsThinking: true
1830
- },
1831
- // ── GLM-Z1 推理系列 ──
1832
- {
1833
- id: "glm-z1",
1834
- displayName: "GLM-Z1 (Reasoning Flagship)",
1835
- contextWindow: 131072,
1836
- supportsStreaming: true,
1837
- supportsThinking: true
1838
- },
1839
- {
1840
- id: "glm-z1-air",
1841
- displayName: "GLM-Z1 Air (Lightweight Reasoning)",
1842
- contextWindow: 131072,
1843
- supportsStreaming: true,
1844
- supportsThinking: true
1845
- },
1846
- {
1847
- id: "glm-z1-flash",
1848
- displayName: "GLM-Z1 Flash (Free Reasoning)",
1849
- contextWindow: 128e3,
1850
- supportsStreaming: true,
1851
- supportsThinking: true
1852
- },
1853
- // ── GLM-4 系列(稳定,价格低) ──
1854
- {
1855
- id: "glm-4-plus",
1856
- displayName: "GLM-4 Plus",
1857
- contextWindow: 128e3,
1858
- supportsStreaming: true
1859
- },
1860
- {
1861
- id: "glm-4-air",
1862
- displayName: "GLM-4 Air",
1863
- contextWindow: 128e3,
1864
- supportsStreaming: true
1865
- },
1866
- {
1867
- id: "glm-4-flash",
1868
- displayName: "GLM-4 Flash (Free)",
1869
- contextWindow: 128e3,
1870
- supportsStreaming: true
1871
- }
1872
- ]
1873
- };
1874
- };
1601
+ [Tool Round Budget \u2014 Plan Mode]
1602
+ You have a maximum of ${opts.maxToolRounds} tool call rounds. You are in READ-ONLY Plan Mode:
1603
+ - Only use: read_file, list_dir, grep_files, glob_files, ask_user, write_todos
1604
+ - Do NOT attempt to call bash, write_file, edit_file \u2014 they are disabled
1605
+ - Do NOT write shell commands or code blocks as a substitute for tool calls
1606
+ - Do NOT read the same file more than once
1607
+ - Call write_todos ONCE to present your plan, then give a text summary
1608
+ - If the user asks you to execute anything, respond: "Please type /plan execute to switch to execute mode."${pauseHint}`;
1609
+ }
1610
+ return `
1875
1611
 
1876
- // src/tools/hallucination.ts
1877
- var HALLUCINATION_PATTERNS = [
1878
- /文件路径[::]\s*`?[^\s`]+\.\w{1,5}/,
1879
- // 文件路径: `path/to/file.ext`(要求文件扩展名)
1880
- /已生成[::!!]/,
1881
- // 已生成完成!
1882
- /已保存到?\s*[`'"]/,
1883
- // 已保存到 `path`(要求后跟路径引号)
1884
- /已写入[::!!]/,
1885
- // 已写入!
1886
- /已创建[::!!]/,
1887
- // 已创建!
1888
- /File\s+(?:written|saved|created)\s+(?:to|as|at)/i,
1889
- // File written to / saved as(要求介词)
1890
- /生成完成[!!]/,
1891
- // 生成完成!
1892
- /✅\s*(?:文件|已[生保写创]|第)\S*\.\w{1,5}/,
1893
- // ✅ 文件已保存 path.ext(要求文件扩展名)
1894
- /文件已[成功]?创建/,
1895
- // 文件已成功创建 / 文件已创建
1896
- /教案已[成功]?[生创保写]/,
1897
- // 教案已成功生成 / 教案已保存
1898
- /已成功[保写创生]入?[::!!\s`'"]/,
1899
- // 已成功保存 / 已成功写入 / 已成功创建
1900
- /保存[到至]了?\s*[`'"]/,
1901
- // 保存到了 `path` / 保存至 'path'
1902
- /内容如下[::]/,
1903
- // 内容如下:(后跟大段文件内容)
1904
- /以下是.*(?:教案|文件|内容)[::]/
1905
- // 以下是xx教案内容:(Kimi 常见模式)
1906
- ];
1907
- function detectsHallucinatedFileOp(content) {
1908
- return HALLUCINATION_PATTERNS.some((pattern) => pattern.test(content));
1612
+ [Tool Round Budget]
1613
+ You have a maximum of ${opts.maxToolRounds} tool call rounds for this task. Plan efficiently:
1614
+ - Prefer batch operations (e.g. global find-and-replace) over repetitive single edits.
1615
+ - Do NOT read the same file more than once \u2014 use the content from previous reads.
1616
+ - Prioritize the most critical tasks first in case rounds run out.
1617
+ - When remaining rounds are low, focus on completing the current task and summarizing.${pauseHint}`;
1909
1618
  }
1910
- function hadPreviousWriteToolCalls(extraMessages) {
1911
- const msgs = extraMessages;
1912
- return msgs.some((msg) => {
1913
- if (msg.role === "assistant" && Array.isArray(msg.tool_calls)) {
1914
- return msg.tool_calls.some((tc) => {
1915
- const fn = tc.function;
1916
- const name = fn?.name ?? "";
1917
- return name === "write_file" || name === "edit_file";
1918
- });
1919
- }
1920
- if (msg.role === "assistant" && Array.isArray(msg.content)) {
1921
- return msg.content.some((block) => {
1922
- if (block.type !== "tool_use") return false;
1923
- const name = block.name ?? "";
1924
- return name === "write_file" || name === "edit_file";
1925
- });
1619
+ function buildRoundsExhaustedPrompt(maxToolRounds) {
1620
+ return `You have used all ${maxToolRounds} tool call rounds. Do not call any more tools. Summarize in text:
1621
+ 1. What work has been completed so far
1622
+ 2. What tasks remain unfinished
1623
+ 3. What the user can do next (e.g. send another request to continue)`;
1624
+ }
1625
+ function buildUserStopMessage(effectiveRound, maxToolRounds) {
1626
+ return `The user has stopped the task at round ${effectiveRound}/${maxToolRounds}. Do not call any more tools. Summarize what has been completed and what remains.`;
1627
+ }
1628
+ async function runAgentLoop(host) {
1629
+ const { maxToolRounds, autoPauseInterval, usage } = host;
1630
+ const extraMessages = [];
1631
+ const budgetWarner = new BudgetWarner(maxToolRounds);
1632
+ const emptyGuard = new EmptyResponseGuard();
1633
+ const ctxMonitor = new ContextPressureMonitor();
1634
+ const freeRounds = new FreeRoundTracker();
1635
+ const roundToolHistory = [];
1636
+ const hasWriteTools = host.toolDefs.some((t) => t.name === "write_file" || t.name === "edit_file");
1637
+ for (let round = 0; round < maxToolRounds; round++) {
1638
+ if (host.signal?.aborted) return { reason: "aborted", usage };
1639
+ host.onRoundStart?.(round, maxToolRounds);
1640
+ if (await host.beforeRound?.(round, extraMessages) === "stop") {
1641
+ return { reason: "stopped", usage };
1642
+ }
1643
+ const warning = budgetWarner.check(maxToolRounds - round);
1644
+ if (warning) {
1645
+ extraMessages.push({ role: "user", content: warning.injectMessage });
1646
+ host.onBudgetWarning?.(warning);
1647
+ }
1648
+ const interjection = host.pollInterjection?.();
1649
+ if (interjection) extraMessages.push({ role: "user", content: interjection });
1650
+ const ctxWindow = host.getContextWindow();
1651
+ if (ctxWindow > 0) {
1652
+ const pressure = ctxMonitor.check(host.estimateRequestTokens(extraMessages), ctxWindow);
1653
+ if (pressure.action === "abort") {
1654
+ host.onContextPressure?.(pressure, ctxWindow, round);
1655
+ return { reason: "context-overflow", usage };
1656
+ } else if (pressure.action === "warn") {
1657
+ host.onContextPressure?.(pressure, ctxWindow, round);
1658
+ extraMessages.push({ role: "user", content: pressure.injectMessage });
1659
+ }
1926
1660
  }
1927
- if (msg.role === "model" && Array.isArray(msg.parts)) {
1928
- return msg.parts.some((part) => {
1929
- const fc = part.functionCall;
1930
- const name = fc?.name ?? "";
1931
- return name === "write_file" || name === "edit_file";
1661
+ const outcome = await host.callModel(round, extraMessages);
1662
+ if (host.signal?.aborted) return { reason: "aborted", usage };
1663
+ accumulateUsage(usage, outcome.usage);
1664
+ if (outcome.stopLoop) return { reason: "host-stop", usage };
1665
+ const toolCalls = outcome.toolCalls ?? [];
1666
+ if (toolCalls.length === 0) {
1667
+ const content = outcome.content ?? "";
1668
+ const alreadyRendered = !!outcome.alreadyRendered;
1669
+ const alreadyWrote = hadPreviousWriteToolCalls(extraMessages);
1670
+ const coarseHallucination = !host.planMode && hasWriteTools && !alreadyWrote && !!content && detectsHallucinatedFileOp(content);
1671
+ const phantomPaths = (coarseHallucination || alreadyWrote) && !host.planMode && hasWriteTools && content ? findPhantomClaims(content, extraMessages) : [];
1672
+ const bashRanThisTurn = extractBashCommands(extraMessages).length > 0;
1673
+ const coarseShouldFire = coarseHallucination && !bashRanThisTurn;
1674
+ if ((phantomPaths.length > 0 || coarseShouldFire) && round < maxToolRounds - 1) {
1675
+ host.onHallucinationRetry?.({ phantomPaths, round, alreadyRendered });
1676
+ const correctionMsg = phantomPaths.length > 0 ? buildPhantomCorrectionMessage(phantomPaths) : HALLUCINATION_CORRECTION_MESSAGE;
1677
+ const reasoningField = outcome.reasoningContent ? { reasoning_content: outcome.reasoningContent } : host.providerId === "deepseek" ? { reasoning_content: "" } : {};
1678
+ extraMessages.push(
1679
+ { role: "assistant", content, ...reasoningField },
1680
+ { role: "user", content: correctionMsg }
1681
+ );
1682
+ continue;
1683
+ }
1684
+ if (!content || content.trim() === "") {
1685
+ const decision = emptyGuard.onEmpty(round < maxToolRounds - 1, outcome.finishReason);
1686
+ host.onEmptyResponse?.(decision, { alreadyRendered, round });
1687
+ if (decision.action === "nudge") {
1688
+ extraMessages.push({ role: "user", content: decision.injectMessage });
1689
+ continue;
1690
+ }
1691
+ return { reason: "empty-response", usage };
1692
+ }
1693
+ emptyGuard.onNonEmpty();
1694
+ await host.onFinalContent(content, { reasoningContent: outcome.reasoningContent, alreadyRendered });
1695
+ return { reason: "final", usage };
1696
+ }
1697
+ emptyGuard.onNonEmpty();
1698
+ const saveCall = toolCalls.find((tc) => tc.name === "save_last_response");
1699
+ const savePath = saveCall ? String(saveCall.arguments["path"] ?? "") : "";
1700
+ if (saveCall && savePath && host.runSaveLastResponseTee) {
1701
+ const directive = await host.runSaveLastResponseTee({
1702
+ toolCalls,
1703
+ call: saveCall,
1704
+ saveToFile: savePath,
1705
+ extraMessages,
1706
+ reasoningContent: outcome.reasoningContent
1932
1707
  });
1708
+ if (directive === "stop") return { reason: "tee-stop", usage };
1709
+ freeRounds.apply(toolCalls.map((tc) => tc.name));
1710
+ continue;
1711
+ }
1712
+ const toolResults = await host.executeTools(toolCalls, extraMessages);
1713
+ if (host.isInterrupted?.() || host.signal?.aborted) {
1714
+ host.onInterrupted?.();
1715
+ return { reason: "aborted", usage };
1716
+ }
1717
+ roundToolHistory.push({ round: round + 1, tools: toolCalls.map((tc) => tc.name) });
1718
+ host.onToolsExecuted?.(toolCalls, toolResults, extraMessages);
1719
+ extraMessages.push(...host.buildToolResultMessages(toolCalls, toolResults, outcome.reasoningContent));
1720
+ for (const tc of toolCalls) {
1721
+ if (tc.name.startsWith("mcp__")) host.onMcpToolUsed?.(tc.name);
1722
+ }
1723
+ host.persistRound?.(toolCalls, toolResults, {
1724
+ assistantContent: outcome.content,
1725
+ reasoningContent: outcome.reasoningContent
1726
+ });
1727
+ host.afterToolRoundPersist?.(toolCalls, toolResults, extraMessages);
1728
+ if (freeRounds.apply(toolCalls.map((tc) => tc.name))) {
1729
+ round--;
1730
+ }
1731
+ if (host.checkLoopHealth?.(toolCalls, extraMessages) === "skip-checkpoint") continue;
1732
+ const postInterjection = host.pollInterjection?.();
1733
+ if (postInterjection) extraMessages.push({ role: "user", content: postInterjection });
1734
+ const effectiveRound = round + 1;
1735
+ const remaining = maxToolRounds - effectiveRound;
1736
+ if (autoPauseInterval > 0 && effectiveRound > 0 && effectiveRound % autoPauseInterval === 0 && remaining > 0 && !host.signal?.aborted && host.requestAutoPause) {
1737
+ const toolSummary = summarizeRecentTools(roundToolHistory, autoPauseInterval);
1738
+ const resp = await host.requestAutoPause({ effectiveRound, maxToolRounds, remaining, toolSummary });
1739
+ if (host.signal?.aborted) return { reason: "aborted", usage };
1740
+ if (resp.action === "stop") {
1741
+ extraMessages.push({ role: "user", content: buildUserStopMessage(effectiveRound, maxToolRounds) });
1742
+ break;
1743
+ } else if (resp.action === "redirect" && resp.message) {
1744
+ extraMessages.push({ role: "user", content: resp.message });
1745
+ }
1933
1746
  }
1934
- return false;
1935
- });
1747
+ host.onRoundEnd?.(round);
1748
+ }
1749
+ let summaryContent = null;
1750
+ try {
1751
+ const summaryExtra = [
1752
+ ...extraMessages,
1753
+ { role: "user", content: buildRoundsExhaustedPrompt(maxToolRounds) }
1754
+ ];
1755
+ const summary = await host.callSummary(summaryExtra);
1756
+ accumulateUsage(usage, summary.usage);
1757
+ summaryContent = summary.content && summary.content.trim() ? summary.content : null;
1758
+ } catch {
1759
+ summaryContent = null;
1760
+ }
1761
+ await host.onRoundsExhausted(summaryContent);
1762
+ return { reason: "rounds-exhausted", usage };
1936
1763
  }
1937
- var TOOL_CALL_REMINDER = `
1938
-
1939
- [\u26A0\uFE0F Mandatory Tool Call Policy]
1940
- When you need to create, write, or modify files, you MUST use the function calling API to invoke write_file or edit_file.
1941
- NEVER claim "file saved", "file created", "written to", etc. in your response text without actually calling the tool.
1942
- Describing file content in text without calling the tool = the file does not exist = task failure.
1943
- If multiple files need to be generated, you MUST call write_file separately for each file \u2014 do not skip any.
1944
- Do NOT output fake "completion summaries" unless you have actually completed all file writes via tool_calls.
1945
-
1946
- CRITICAL \u2014 Batch file generation rules:
1947
- 1. You MUST call write_file once per file. There are NO shortcuts.
1948
- 2. After writing file N, immediately proceed to call write_file for file N+1. Do NOT stop to summarize.
1949
- 3. If you find yourself typing file content into your response text instead of into a write_file call, STOP and use the tool.
1950
- 4. Only produce a text summary AFTER all write_file calls have been made and returned success.
1951
- 5. The system compares every "file saved" claim against actual tool calls. Phantom claims trigger an automatic retry \u2014 do not waste rounds.`;
1952
- function buildWriteRoundReminder(writtenCount) {
1953
- return `
1954
-
1955
- [Write Progress Reminder]
1956
- You have successfully called write_file ${writtenCount} time(s) so far in this turn. If there are more files to write, call write_file NOW for the next file. Do NOT produce a text summary until ALL files have been written via tool calls.`;
1764
+ function summarizeRecentTools(history, interval) {
1765
+ const recent = history.slice(-interval);
1766
+ const counts = /* @__PURE__ */ new Map();
1767
+ for (const rh of recent) {
1768
+ for (const t of rh.tools) counts.set(t, (counts.get(t) || 0) + 1);
1769
+ }
1770
+ return [...counts.entries()].sort((a, b) => b[1] - a[1]).map(([name, count]) => count > 1 ? `${name}\xD7${count}` : name).join(", ");
1957
1771
  }
1958
- var HALLUCINATION_CORRECTION_MESSAGE = "You did NOT actually call the write_file tool \u2014 the file was NOT created! Please immediately use the write_file tool via the function calling API to perform the actual file write. Do NOT describe file content in text \u2014 you MUST invoke write_file through the tool_calls mechanism.";
1959
- function extractClaimedFilePaths(content) {
1960
- const paths = /* @__PURE__ */ new Set();
1961
- const add = (p) => {
1962
- const trimmed = p.trim().replace(/[,,。、;;::]+$/, "");
1963
- if (trimmed && /\.\w{1,6}$/.test(trimmed)) paths.add(trimmed);
1772
+
1773
+ // src/providers/openai-compatible.ts
1774
+ function toUsage(u) {
1775
+ if (!u) return void 0;
1776
+ const cached = u.prompt_tokens_details?.cached_tokens ?? 0;
1777
+ const usage = {
1778
+ inputTokens: Math.max(0, u.prompt_tokens - cached),
1779
+ outputTokens: u.completion_tokens
1964
1780
  };
1965
- let m;
1966
- const actionLineRe = /(?:已[生保写创]|saved|written|created|完成.*(?:写入|保存|创建|生成)|输出|file\s+(?:saved|written|created))/i;
1967
- const backtickRe = /`([^`\n]+?\.\w{1,6})`/g;
1968
- while ((m = backtickRe.exec(content)) !== null) {
1969
- let pos = m.index;
1970
- let linesBack = 0;
1971
- while (linesBack < 9 && pos > 0) {
1972
- pos--;
1973
- if (content[pos] === "\n") linesBack++;
1781
+ if (cached > 0) usage.cacheReadTokens = cached;
1782
+ return usage;
1783
+ }
1784
+ var OpenAICompatibleProvider = class extends BaseProvider {
1785
+ client;
1786
+ defaultTimeout = 6e4;
1787
+ // ms
1788
+ /** 子类设为 false 可禁用流式工具调用(虚假声明检测需要完整响应) */
1789
+ enableStreamingToolCalls = true;
1790
+ async initialize(apiKey, options) {
1791
+ if (options?.timeout !== void 0) {
1792
+ this.defaultTimeout = options.timeout;
1974
1793
  }
1975
- const windowStart = pos === 0 ? 0 : pos + 1;
1976
- const lineEndIdx = content.indexOf("\n", m.index + m[0].length);
1977
- const window = content.slice(windowStart, lineEndIdx === -1 ? void 0 : lineEndIdx);
1978
- if (actionLineRe.test(window)) add(m[1]);
1794
+ const clientOptions = {
1795
+ apiKey,
1796
+ baseURL: options?.baseUrl ?? this.defaultBaseUrl,
1797
+ timeout: this.defaultTimeout
1798
+ };
1799
+ const proxyUrl = options?.proxy;
1800
+ try {
1801
+ const { Agent, ProxyAgent, fetch: undiciFetch } = await import("undici");
1802
+ const STREAM_BODY_TIMEOUT = 30 * 60 * 1e3;
1803
+ const STREAM_HEADERS_TIMEOUT = 5 * 60 * 1e3;
1804
+ const dispatcher = proxyUrl ? new ProxyAgent({
1805
+ uri: proxyUrl,
1806
+ bodyTimeout: STREAM_BODY_TIMEOUT,
1807
+ headersTimeout: STREAM_HEADERS_TIMEOUT
1808
+ }) : new Agent({
1809
+ bodyTimeout: STREAM_BODY_TIMEOUT,
1810
+ headersTimeout: STREAM_HEADERS_TIMEOUT
1811
+ });
1812
+ clientOptions.fetch = ((url, init) => undiciFetch(url, { ...init, dispatcher }));
1813
+ } catch {
1814
+ }
1815
+ this.client = new OpenAI(clientOptions);
1979
1816
  }
1980
- const zhRe = /(?:已保存(?:到)?|已写入(?:到)?|已创建|已生成|文件路径[::]|保存为|写入到)\s*[`'”””]?([^\s`'”””,,。\n]+?\.\w{1,6})/g;
1981
- while ((m = zhRe.exec(content)) !== null) add(m[1]);
1982
- const enRe = /(?:saved|written|created)\s+(?:to|as|at)\s+[`'”]?([^\s`'”\n,]+?\.\w{1,6})/gi;
1983
- while ((m = enRe.exec(content)) !== null) add(m[1]);
1984
- const checkRe = /✅[^\n`]*?[`'”]?([^\s`'”\n,,。]+?\.\w{1,6})/g;
1985
- while ((m = checkRe.exec(content)) !== null) {
1986
- let pos = m.index;
1987
- let linesBack = 0;
1988
- while (linesBack < 9 && pos > 0) {
1989
- pos--;
1990
- if (content[pos] === "\n") linesBack++;
1817
+ /**
1818
+ * systemPrompt + messages 合并为 OpenAI messages 数组(system 消息放首位)。
1819
+ *
1820
+ * v0.4.100+:按原始顺序保留工具消息(assistant.toolCalls role='tool'),
1821
+ * 不再剥离到 _extraMessages 末尾——之前的剥离会让历史工具往返被插到当前用户消息之后,
1822
+ * 导致模型把"过去的工具调用结果"当作"对当前问题的回应",DeepSeek V4 Flash 上尤其明显
1823
+ * (会复读上一轮的"完成汇总")。
1824
+ *
1825
+ * DeepSeek V4 thinking 模式:所有 assistant 消息(含带 toolCalls 的)必须有
1826
+ * reasoning_content 字段,缺失则 API 400。
1827
+ */
1828
+ buildMessages(request) {
1829
+ const msgs = [];
1830
+ for (const m of request.messages) {
1831
+ if (m.role === "tool") {
1832
+ if (!m.toolCallId) continue;
1833
+ msgs.push({
1834
+ role: "tool",
1835
+ tool_call_id: m.toolCallId,
1836
+ content: typeof m.content === "string" ? m.content : ""
1837
+ });
1838
+ continue;
1839
+ }
1840
+ if (m.role === "assistant" && m.toolCalls && m.toolCalls.length > 0) {
1841
+ const assistantMsg = {
1842
+ role: "assistant",
1843
+ content: typeof m.content === "string" && m.content ? m.content : null,
1844
+ tool_calls: m.toolCalls.map((tc) => ({
1845
+ id: tc.id,
1846
+ type: "function",
1847
+ function: { name: tc.name, arguments: JSON.stringify(tc.arguments) }
1848
+ })),
1849
+ reasoning_content: m.reasoningContent ?? ""
1850
+ };
1851
+ msgs.push(assistantMsg);
1852
+ continue;
1853
+ }
1854
+ const base = { role: m.role, content: m.content };
1855
+ if (m.role === "assistant") {
1856
+ base.reasoning_content = m.reasoningContent ?? "";
1857
+ }
1858
+ msgs.push(base);
1859
+ }
1860
+ const systemContent = [request.systemPrompt, request.systemPromptVolatile].filter(Boolean).join("\n\n---\n\n");
1861
+ if (systemContent) {
1862
+ return [{ role: "system", content: systemContent }, ...msgs];
1863
+ }
1864
+ return msgs;
1865
+ }
1866
+ async chat(request) {
1867
+ try {
1868
+ const response = await this.client.chat.completions.create({
1869
+ model: request.model,
1870
+ messages: this.buildMessages(request),
1871
+ temperature: request.temperature,
1872
+ max_tokens: request.maxTokens,
1873
+ stream: false,
1874
+ ...request.thinking ? { thinking: { type: "enabled" } } : {}
1875
+ }, {
1876
+ timeout: request.timeout ?? this.defaultTimeout
1877
+ });
1878
+ const firstChoice = response.choices?.[0];
1879
+ if (!firstChoice) {
1880
+ return { content: "", model: response.model, usage: void 0 };
1881
+ }
1882
+ return {
1883
+ content: firstChoice.message.content ?? "",
1884
+ model: response.model,
1885
+ usage: toUsage(response.usage)
1886
+ };
1887
+ } catch (err) {
1888
+ throw this.wrapError(err);
1889
+ }
1890
+ }
1891
+ async *chatStream(request) {
1892
+ try {
1893
+ const stream = await this.client.chat.completions.create({
1894
+ model: request.model,
1895
+ messages: this.buildMessages(request),
1896
+ temperature: request.temperature,
1897
+ max_tokens: request.maxTokens,
1898
+ stream: true,
1899
+ // 请求末尾 usage chunk,供 token 统计使用
1900
+ stream_options: { include_usage: true },
1901
+ ...request.thinking ? { thinking: { type: "enabled" } } : {}
1902
+ }, {
1903
+ timeout: request.timeout ?? this.defaultTimeout,
1904
+ signal: request.signal
1905
+ });
1906
+ let thinkingStarted = false;
1907
+ let reasoningAccumulator = "";
1908
+ for await (const chunk of stream) {
1909
+ const choice = chunk.choices[0];
1910
+ const done = choice?.finish_reason != null;
1911
+ if (!choice && chunk.usage) {
1912
+ yield {
1913
+ delta: "",
1914
+ done: true,
1915
+ usage: toUsage(chunk.usage),
1916
+ ...reasoningAccumulator ? { reasoningContent: reasoningAccumulator } : {}
1917
+ };
1918
+ continue;
1919
+ }
1920
+ const reasoningDelta = choice?.delta?.reasoning_content;
1921
+ if (reasoningDelta) {
1922
+ if (!thinkingStarted) {
1923
+ yield { delta: "<think>", done: false };
1924
+ thinkingStarted = true;
1925
+ }
1926
+ reasoningAccumulator += reasoningDelta;
1927
+ yield { delta: reasoningDelta, done: false };
1928
+ continue;
1929
+ }
1930
+ const delta = choice?.delta?.content ?? "";
1931
+ if (thinkingStarted && delta) {
1932
+ thinkingStarted = false;
1933
+ yield { delta: "</think>", done: false };
1934
+ }
1935
+ if (done) {
1936
+ yield { delta, done, ...reasoningAccumulator ? { reasoningContent: reasoningAccumulator } : {} };
1937
+ } else {
1938
+ yield { delta, done };
1939
+ }
1940
+ }
1941
+ } catch (err) {
1942
+ throw this.wrapError(err);
1991
1943
  }
1992
- const windowStart = pos === 0 ? 0 : pos + 1;
1993
- const lineEndIdx = content.indexOf("\n", m.index + m[0].length);
1994
- const window = content.slice(windowStart, lineEndIdx === -1 ? void 0 : lineEndIdx);
1995
- if (actionLineRe.test(window)) add(m[1]);
1996
1944
  }
1997
- return Array.from(paths);
1998
- }
1999
- function extractWrittenFilePaths(extraMessages) {
2000
- const paths = /* @__PURE__ */ new Set();
2001
- const msgs = extraMessages;
2002
- const addFromArgs = (raw) => {
2003
- if (typeof raw === "string") {
2004
- try {
2005
- const parsed = JSON.parse(raw);
2006
- if (typeof parsed.path === "string") paths.add(parsed.path);
2007
- } catch {
1945
+ /**
1946
+ * 请求 AI 并获取工具调用列表(不执行,只解析)。
1947
+ * 返回 { toolCalls, usage? } 时说明 AI 想要调用工具,
1948
+ * 返回 { content, usage? } 时说明 AI 给出了最终回答。
1949
+ */
1950
+ async chatWithTools(request, tools) {
1951
+ try {
1952
+ const openaiTools = tools.map((t) => ({
1953
+ type: "function",
1954
+ function: {
1955
+ name: t.name,
1956
+ description: t.description,
1957
+ parameters: {
1958
+ type: "object",
1959
+ properties: Object.fromEntries(
1960
+ Object.entries(t.parameters).map(([key, schema]) => [
1961
+ key,
1962
+ schemaToJsonSchema(schema)
1963
+ ])
1964
+ ),
1965
+ required: Object.entries(t.parameters).filter(([, s]) => s.required).map(([k]) => k)
1966
+ }
1967
+ }
1968
+ }));
1969
+ const baseMessages = this.buildMessages(request);
1970
+ const extraMessages = request._extraMessages ?? [];
1971
+ const allMessages = [...baseMessages, ...extraMessages];
1972
+ const response = await this.client.chat.completions.create({
1973
+ model: request.model,
1974
+ messages: allMessages,
1975
+ tools: openaiTools,
1976
+ tool_choice: "auto",
1977
+ temperature: request.temperature,
1978
+ max_tokens: request.maxTokens,
1979
+ stream: false,
1980
+ ...request.thinking ? { thinking: { type: "enabled" } } : {}
1981
+ }, {
1982
+ timeout: request.timeout ?? this.defaultTimeout
1983
+ });
1984
+ const firstChoice = response.choices?.[0];
1985
+ if (!firstChoice) {
1986
+ return { content: "", usage: void 0 };
2008
1987
  }
2009
- } else if (raw && typeof raw === "object") {
2010
- const p = raw.path;
2011
- if (typeof p === "string") paths.add(p);
1988
+ const message = firstChoice.message;
1989
+ const finishReason = firstChoice.finish_reason;
1990
+ const usage = toUsage(response.usage);
1991
+ const contentStr = typeof message.content === "string" ? message.content : "";
1992
+ const hasToolCalls = !!(message.tool_calls && message.tool_calls.length > 0);
1993
+ const reasoningContent = message.reasoning_content;
1994
+ if (message.tool_calls && message.tool_calls.length > 0) {
1995
+ const toolCalls = message.tool_calls.map((tc) => {
1996
+ const parsedArgs = repairToolCallArguments(
1997
+ tc.function.arguments || "{}",
1998
+ (m) => process.stderr.write(`[warn] ${m}
1999
+ `)
2000
+ );
2001
+ return {
2002
+ id: tc.id,
2003
+ name: tc.function.name,
2004
+ arguments: parsedArgs
2005
+ };
2006
+ });
2007
+ return { toolCalls, usage, reasoningContent };
2008
+ }
2009
+ return {
2010
+ content: message.content ?? "",
2011
+ usage,
2012
+ ...reasoningContent ? { reasoningContent } : {},
2013
+ ...!hasToolCalls && (finishReason ?? "") ? { finishReason } : {}
2014
+ };
2015
+ } catch (err) {
2016
+ throw this.wrapError(err);
2012
2017
  }
2013
- };
2014
- for (const msg of msgs) {
2015
- if (msg.role === "assistant" && Array.isArray(msg.tool_calls)) {
2016
- for (const tc of msg.tool_calls) {
2017
- const fn = tc.function;
2018
- const name = fn?.name ?? "";
2019
- if (name === "write_file" || name === "edit_file") {
2020
- addFromArgs(fn?.arguments);
2018
+ }
2019
+ /**
2020
+ * 流式工具调用:文本内容实时输出、工具名称/参数逐块发射。
2021
+ * 子类(DeepSeek / Kimi)因虚假声明检测需要完整响应,故不继承此方法。
2022
+ */
2023
+ async *chatWithToolsStream(request, tools) {
2024
+ if (!this.enableStreamingToolCalls) {
2025
+ const result = await this.chatWithTools(request, tools);
2026
+ if ("toolCalls" in result) {
2027
+ for (let i = 0; i < result.toolCalls.length; i++) {
2028
+ const tc = result.toolCalls[i];
2029
+ yield { type: "tool_call_start", index: i, id: tc.id, name: tc.name };
2030
+ yield { type: "tool_call_delta", index: i, argumentsDelta: JSON.stringify(tc.arguments) };
2031
+ yield { type: "tool_call_end", index: i };
2021
2032
  }
2033
+ } else {
2034
+ yield { type: "text_delta", delta: result.content };
2022
2035
  }
2036
+ const rc = "reasoningContent" in result ? result.reasoningContent : void 0;
2037
+ const fr = "finishReason" in result ? result.finishReason : void 0;
2038
+ yield {
2039
+ type: "done",
2040
+ usage: result.usage,
2041
+ ...rc ? { reasoningContent: rc } : {},
2042
+ ...fr ? { finishReason: fr } : {}
2043
+ };
2044
+ return;
2023
2045
  }
2024
- if (msg.role === "assistant" && Array.isArray(msg.content)) {
2025
- for (const block of msg.content) {
2026
- if (block.type !== "tool_use") continue;
2027
- const name = block.name ?? "";
2028
- if (name === "write_file" || name === "edit_file") {
2029
- addFromArgs(block.input);
2046
+ const openaiTools = tools.map((t) => ({
2047
+ type: "function",
2048
+ function: {
2049
+ name: t.name,
2050
+ description: t.description,
2051
+ parameters: {
2052
+ type: "object",
2053
+ properties: Object.fromEntries(
2054
+ Object.entries(t.parameters).map(([key, schema]) => [
2055
+ key,
2056
+ schemaToJsonSchema(schema)
2057
+ ])
2058
+ ),
2059
+ required: Object.entries(t.parameters).filter(([, s]) => s.required).map(([k]) => k)
2030
2060
  }
2031
2061
  }
2032
- }
2033
- if (msg.role === "model" && Array.isArray(msg.parts)) {
2034
- for (const part of msg.parts) {
2035
- const fc = part.functionCall;
2036
- if (!fc) continue;
2037
- const name = fc.name ?? "";
2038
- if (name === "write_file" || name === "edit_file") {
2039
- addFromArgs(fc.args);
2062
+ }));
2063
+ const baseMessages = this.buildMessages(request);
2064
+ const extraMessages = request._extraMessages ?? [];
2065
+ const allMessages = [...baseMessages, ...extraMessages];
2066
+ try {
2067
+ const stream = await this.client.chat.completions.create({
2068
+ model: request.model,
2069
+ messages: allMessages,
2070
+ tools: openaiTools,
2071
+ tool_choice: "auto",
2072
+ temperature: request.temperature,
2073
+ max_tokens: request.maxTokens,
2074
+ stream: true,
2075
+ stream_options: { include_usage: true },
2076
+ ...request.thinking ? { thinking: { type: "enabled" } } : {}
2077
+ }, {
2078
+ timeout: request.timeout ?? this.defaultTimeout,
2079
+ signal: request.signal
2080
+ });
2081
+ const toolCallAccumulators = /* @__PURE__ */ new Map();
2082
+ let toolCallsEnded = false;
2083
+ let thinkingStarted = false;
2084
+ let reasoningAccumulator = "";
2085
+ let lastFinishReason;
2086
+ let doneEmitted = false;
2087
+ for await (const chunk of stream) {
2088
+ const choice = chunk.choices[0];
2089
+ if (choice?.finish_reason) lastFinishReason = choice.finish_reason;
2090
+ if (!choice && chunk.usage) {
2091
+ if (thinkingStarted) {
2092
+ yield { type: "thinking_end" };
2093
+ thinkingStarted = false;
2094
+ }
2095
+ if (!toolCallsEnded && toolCallAccumulators.size > 0) {
2096
+ for (const [idx] of toolCallAccumulators) {
2097
+ yield { type: "tool_call_end", index: idx };
2098
+ }
2099
+ toolCallsEnded = true;
2100
+ }
2101
+ doneEmitted = true;
2102
+ yield {
2103
+ type: "done",
2104
+ usage: toUsage(chunk.usage),
2105
+ ...reasoningAccumulator ? { reasoningContent: reasoningAccumulator } : {},
2106
+ ...lastFinishReason && lastFinishReason !== "stop" ? { finishReason: lastFinishReason } : {}
2107
+ };
2108
+ continue;
2109
+ }
2110
+ if (!choice) continue;
2111
+ const delta = choice.delta;
2112
+ const reasoningDelta = delta?.reasoning_content;
2113
+ if (reasoningDelta) {
2114
+ if (!thinkingStarted) {
2115
+ yield { type: "thinking_start" };
2116
+ thinkingStarted = true;
2117
+ }
2118
+ reasoningAccumulator += reasoningDelta;
2119
+ yield { type: "thinking_delta", delta: reasoningDelta };
2120
+ continue;
2121
+ }
2122
+ if (thinkingStarted && (delta?.content || delta?.tool_calls)) {
2123
+ yield { type: "thinking_end" };
2124
+ thinkingStarted = false;
2125
+ }
2126
+ if (delta?.content) {
2127
+ yield { type: "text_delta", delta: delta.content };
2128
+ }
2129
+ if (delta?.tool_calls) {
2130
+ for (const tc of delta.tool_calls) {
2131
+ const idx = tc.index;
2132
+ const existing = toolCallAccumulators.get(idx);
2133
+ if (!existing && tc.id && tc.function?.name) {
2134
+ const initialArgs = tc.function.arguments ?? "";
2135
+ toolCallAccumulators.set(idx, {
2136
+ id: tc.id,
2137
+ name: tc.function.name,
2138
+ arguments: initialArgs
2139
+ });
2140
+ yield { type: "tool_call_start", index: idx, id: tc.id, name: tc.function.name };
2141
+ if (initialArgs) {
2142
+ yield { type: "tool_call_delta", index: idx, argumentsDelta: initialArgs };
2143
+ }
2144
+ } else if (existing && tc.function?.arguments) {
2145
+ existing.arguments += tc.function.arguments;
2146
+ yield { type: "tool_call_delta", index: idx, argumentsDelta: tc.function.arguments };
2147
+ }
2148
+ }
2149
+ }
2150
+ if (choice.finish_reason && !toolCallsEnded && toolCallAccumulators.size > 0) {
2151
+ for (const [idx] of toolCallAccumulators) {
2152
+ yield { type: "tool_call_end", index: idx };
2153
+ }
2154
+ toolCallsEnded = true;
2040
2155
  }
2041
2156
  }
2042
- }
2043
- }
2044
- return Array.from(paths);
2045
- }
2046
- function extractBashCommands(extraMessages) {
2047
- const cmds = [];
2048
- const msgs = extraMessages;
2049
- const addCmd = (raw) => {
2050
- if (typeof raw === "string") {
2051
- try {
2052
- const parsed = JSON.parse(raw);
2053
- if (typeof parsed.command === "string") cmds.push(parsed.command);
2054
- } catch {
2055
- }
2056
- } else if (raw && typeof raw === "object") {
2057
- const c = raw.command;
2058
- if (typeof c === "string") cmds.push(c);
2059
- }
2060
- };
2061
- for (const msg of msgs) {
2062
- if (msg.role === "assistant" && Array.isArray(msg.tool_calls)) {
2063
- for (const tc of msg.tool_calls) {
2064
- const fn = tc.function;
2065
- if (fn?.name === "bash") addCmd(fn?.arguments);
2066
- }
2067
- }
2068
- if (msg.role === "assistant" && Array.isArray(msg.content)) {
2069
- for (const block of msg.content) {
2070
- if (block.type === "tool_use" && block.name === "bash") addCmd(block.input);
2071
- }
2072
- }
2073
- if (msg.role === "model" && Array.isArray(msg.parts)) {
2074
- for (const part of msg.parts) {
2075
- const fc = part.functionCall;
2076
- if (fc && fc.name === "bash") addCmd(fc.args);
2157
+ if (!doneEmitted) {
2158
+ if (thinkingStarted) {
2159
+ yield { type: "thinking_end" };
2160
+ thinkingStarted = false;
2161
+ }
2162
+ if (!toolCallsEnded && toolCallAccumulators.size > 0) {
2163
+ for (const [idx] of toolCallAccumulators) {
2164
+ yield { type: "tool_call_end", index: idx };
2165
+ }
2166
+ }
2167
+ yield {
2168
+ type: "done",
2169
+ ...reasoningAccumulator ? { reasoningContent: reasoningAccumulator } : {},
2170
+ ...lastFinishReason && lastFinishReason !== "stop" ? { finishReason: lastFinishReason } : {}
2171
+ };
2077
2172
  }
2078
- }
2079
- }
2080
- return cmds;
2081
- }
2082
- function findPhantomClaims(content, extraMessages) {
2083
- const claimed = extractClaimedFilePaths(content);
2084
- if (claimed.length === 0) return [];
2085
- const normalize = (p) => p.replace(/\\/g, "/").toLowerCase().replace(/^\.\//, "");
2086
- const basename = (p) => {
2087
- const parts = normalize(p).split("/");
2088
- return parts[parts.length - 1] ?? "";
2089
- };
2090
- const written = extractWrittenFilePaths(extraMessages).map(normalize);
2091
- const writtenBases = new Set(written.map(basename));
2092
- const writtenFull = new Set(written);
2093
- const bashText = extractBashCommands(extraMessages).map((c) => c.replace(/\\/g, "/").toLowerCase()).join("\n");
2094
- return claimed.filter((raw) => {
2095
- const norm = normalize(raw);
2096
- if (writtenFull.has(norm)) return false;
2097
- for (const w of writtenFull) {
2098
- if (w.endsWith("/" + norm) || norm.endsWith("/" + w)) return false;
2099
- }
2100
- if (writtenBases.has(basename(norm))) return false;
2101
- const base = basename(norm);
2102
- if (base && bashText.includes(base)) return false;
2103
- return true;
2104
- });
2105
- }
2106
- function buildPhantomCorrectionMessage(phantoms) {
2107
- const list = phantoms.map((p) => ` - ${p}`).join("\n");
2108
- return "You claimed to have written the following file(s), but no matching write_file tool call was actually made in this turn:\n" + list + '\n\nEach of these files does NOT exist on disk. You MUST now invoke write_file (via the function calling API) for every missing file listed above. Do NOT output another "completion summary" until the tool calls have actually been made.';
2109
- }
2110
- var DSML_PIPE_CLASS = "[|\\uFF5C\\u2502\\u2503\\u01C0]";
2111
- var PSEUDO_TOOL_CALL_PATTERNS = [
2112
- // <tool_call name="..."> ... </tool_call> (DeepSeek V4 thinking, GLM)
2113
- /<tool_call\s+name\s*=\s*["'][\w._-]+["']/,
2114
- // <function_calls> ... </function_calls> (Anthropic-style as text)
2115
- /<\/?function_calls\s*>/,
2116
- // <invoke name="..." /> (Anthropic XML tool-call, which is real for
2117
- // Claude API but is text/garbage for any other provider's plain stream)
2118
- /<invoke\s+name\s*=\s*["'][\w._-]+["']/,
2119
- // <tool_use> ... <tool_use_id> (Claude flavor leaked into text)
2120
- /<tool_use(?:_id)?\b/,
2121
- // ```tool_call\n...\n``` markdown fences (Kimi/Zhipu fallback)
2122
- /```\s*tool_call\b/i,
2123
- // Bare JSON tool-call block: lines starting with `{"name":"...","arguments":`
2124
- /^\s*\{\s*"name"\s*:\s*"[\w._-]+"\s*,\s*"arguments"\s*:/m,
2125
- // v0.4.112: <think> ... </think> reasoning blocks. The REPL renderer
2126
- // suppresses these from terminal output, but tee mode writes the raw
2127
- // delta to disk → reasoning leaks into the saved file. We saw a 600-line
2128
- // 审计报告.md whose first 57 lines were the model's planning monologue.
2129
- /<think\b[^>]*>/i,
2130
- // v0.4.112: leading ```markdown / ```md fence wrapping the entire document.
2131
- // DeepSeek V4 Pro Thinking sometimes "politely" wraps its document output
2132
- // in a markdown fence. The fence ends up literally in the saved file.
2133
- /^\s*```\s*(?:markdown|md|gfm)\b/im,
2134
- // v0.4.173: DeepSeek V4 DSML pseudo-tool-call markup leaked as text. DeepSeek
2135
- // emits a fake tool call using its native special-token markup
2136
- // <||DSML||tool_calls> <||DSML||invoke name="write"> <||DSML||parameter …>
2137
- // where the "pipe" is U+FF5C FULLWIDTH VERTICAL LINE (the same token family as
2138
- // <|User|>/<|Assistant|>). We saw an exam paper saved via save_last_response
2139
- // whose tee stream was preamble + this DSML wrapper + the real document body.
2140
- // The earlier <invoke …> pattern uses ASCII < > and does NOT match these.
2141
- new RegExp(`<\\/?\\s*${DSML_PIPE_CLASS}+\\s*DSML\\s*${DSML_PIPE_CLASS}+`, "i")
2142
- ];
2143
- function detectPseudoToolCalls(content) {
2144
- if (!content || content.length === 0) return null;
2145
- for (const re of PSEUDO_TOOL_CALL_PATTERNS) {
2146
- if (re.test(content)) return re.source;
2147
- }
2148
- return null;
2149
- }
2150
- function stripPseudoToolCalls(content) {
2151
- if (!content) return content;
2152
- let out = content;
2153
- const dsmlBody = extractDsmlContent(out);
2154
- if (dsmlBody !== null) {
2155
- out = dsmlBody;
2156
- } else {
2157
- out = stripDsmlTags(out);
2158
- }
2159
- out = out.replace(/<tool_call\b[^>]*>[\s\S]*?<\/tool_call>/gi, "");
2160
- out = out.replace(/<tool_call\b[^>]*\/>/gi, "");
2161
- out = out.replace(/<function_calls\b[^>]*>[\s\S]*?<\/function_calls>/gi, "");
2162
- out = out.replace(/<invoke\b[^>]*>[\s\S]*?<\/invoke>/gi, "");
2163
- out = out.replace(/<invoke\b[^>]*\/>/gi, "");
2164
- out = out.replace(/<tool_use(?:_id)?\b[^>]*>[\s\S]*?<\/tool_use(?:_id)?>/gi, "");
2165
- out = out.replace(/```\s*tool_call\b[\s\S]*?```/gi, "");
2166
- out = out.replace(/<think\b[^>]*>[\s\S]*?<\/think>/gi, "");
2167
- out = out.replace(/<think\b[^>]*>[\s\S]*?(?=^#{1,3}\s+\S|\n\s*\n)/im, "");
2168
- out = out.replace(/^\s*\{\s*"name"\s*:\s*"[\w._-]+"\s*,\s*"arguments"\s*:[\s\S]*?\}\s*$/gm, "");
2169
- out = unwrapDocumentFence(out);
2170
- out = peelMetaNarration(out);
2171
- out = out.replace(/\n{3,}/g, "\n\n").trim();
2172
- return out;
2173
- }
2174
- function extractDsmlContent(content) {
2175
- if (!content) return null;
2176
- const P = DSML_PIPE_CLASS;
2177
- const re = new RegExp(
2178
- `<\\s*${P}+\\s*DSML\\s*${P}+\\s*parameter\\b[^>]*\\bname\\s*=\\s*["']content["'][^>]*>([\\s\\S]*?)<\\s*/\\s*${P}+\\s*DSML\\s*${P}+\\s*parameter\\s*>`,
2179
- "i"
2180
- );
2181
- const m = content.match(re);
2182
- if (m && typeof m[1] === "string") {
2183
- const body = m[1].trim();
2184
- return body.length > 0 ? body : null;
2185
- }
2186
- return null;
2187
- }
2188
- function stripDsmlTags(content) {
2189
- const P = DSML_PIPE_CLASS;
2190
- let out = content;
2191
- out = out.replace(
2192
- new RegExp(
2193
- `<\\s*${P}+\\s*DSML\\s*${P}+\\s*tool_calls\\b[\\s\\S]*?<\\s*/\\s*${P}+\\s*DSML\\s*${P}+\\s*tool_calls\\s*>`,
2194
- "gi"
2195
- ),
2196
- ""
2197
- );
2198
- out = out.replace(new RegExp(`<\\s*/?\\s*${P}+\\s*DSML\\s*${P}+[^>]*>`, "gi"), "");
2199
- return out;
2200
- }
2201
- function unwrapDocumentFence(content) {
2202
- const trimmed = content.trim();
2203
- const open = trimmed.match(/^```\s*(markdown|md|gfm)?\s*\n/i);
2204
- if (!open) return content;
2205
- const afterOpen = trimmed.slice(open[0].length);
2206
- const closeMatch = afterOpen.match(/\n```\s*$/);
2207
- if (!closeMatch) return content;
2208
- const inner = afterOpen.slice(0, afterOpen.length - closeMatch[0].length);
2209
- if (inner.length < 200) return content;
2210
- return inner;
2211
- }
2212
- function peelMetaNarration(content) {
2213
- let out = content;
2214
- const firstHeadingMatch = out.match(/^#{1,3}\s+\S.*$/m);
2215
- if (firstHeadingMatch && firstHeadingMatch.index !== void 0) {
2216
- const before = out.slice(0, firstHeadingMatch.index);
2217
- const hasIntroMarker = /(?:以下(?:即为|是|就是)|这是|Here\s+is|Below\s+is|完整的?(?:审计报告|内容|文档)|审计报告(?:如下|的完整内容))/i.test(before);
2218
- if (before.length > 0 && before.length < 800 && hasIntroMarker) {
2219
- out = out.slice(firstHeadingMatch.index);
2220
- }
2221
- if (out.startsWith("---\n")) {
2222
- const headingAfterRule = out.slice(4).match(/^#{1,3}\s+\S/m);
2223
- if (headingAfterRule && headingAfterRule.index !== void 0 && headingAfterRule.index < 100) {
2224
- out = out.slice(4 + headingAfterRule.index);
2173
+ } catch (err) {
2174
+ if (err instanceof Error && (err.name === "AbortError" || err.name === "TimeoutError")) {
2175
+ throw err;
2225
2176
  }
2177
+ throw this.wrapError(err);
2226
2178
  }
2227
2179
  }
2228
- const codaMatch = out.match(/\n[^\n]*?(?:以上(?:即为|就是|内容|为完整的?)|Above\s+is\s+the|本报告已经|该报告(?:已经|包含)|报告(?:已|至此)结束)[^\n]*$/i);
2229
- if (codaMatch && codaMatch.index !== void 0 && codaMatch.index > out.length / 2) {
2230
- out = out.slice(0, codaMatch.index);
2180
+ /**
2181
+ * 将工具结果作为 tool_call 消息追加,供下一轮使用
2182
+ */
2183
+ buildToolResultMessages(assistantToolCalls, results, reasoningContent) {
2184
+ const streamedText = assistantToolCalls._streamedText;
2185
+ const assistantMsg = {
2186
+ role: "assistant",
2187
+ content: streamedText || null,
2188
+ tool_calls: assistantToolCalls.map((tc) => ({
2189
+ id: tc.id,
2190
+ type: "function",
2191
+ function: {
2192
+ name: tc.name,
2193
+ arguments: JSON.stringify(tc.arguments)
2194
+ }
2195
+ }))
2196
+ };
2197
+ assistantMsg.reasoning_content = reasoningContent ?? "";
2198
+ const resultMsgs = results.map((r) => ({
2199
+ role: "tool",
2200
+ tool_call_id: r.callId,
2201
+ content: r.content
2202
+ }));
2203
+ return [assistantMsg, ...resultMsgs];
2231
2204
  }
2232
- return out.trim();
2233
- }
2234
- var META_NARRATION_HARD_MARKERS = [
2235
- /\[⚠️\s*CONTENT GENERATION MODE\]/,
2236
- /CONTENT_ONLY_STREAM_REMINDER\b/,
2237
- /<system-reminder>/i
2238
- ];
2239
- var META_NARRATION_HEURISTICS = [
2240
- /\bthe user (?:is asking me|wants me|is requesting|expects me)\b/i,
2241
- /\blet me (?:re-?read|re-?consider|reconsider|think about|carefully (?:re-?read|consider))\b/i,
2242
- /\bI'?m (?:in (?:a )?content-only|in CONTENT-ONLY|currently in)\b/i,
2243
- /\bI think (?:there might be|I should|I cannot|the (?:user|best)|maybe)\b/i,
2244
- /\bWait,?\s+let me\b/i,
2245
- /\bActually,?\s+I\b/i,
2246
- /\bI need to be honest with the user\b/i,
2247
- /\bI(?:'m| am) in a special mode\b/i,
2248
- /\bGiven that I cannot\b/i
2249
- ];
2250
- function detectMetaNarration(content) {
2251
- if (!content) return null;
2252
- const head = content.slice(0, 2e3);
2253
- for (const re of META_NARRATION_HARD_MARKERS) {
2254
- if (re.test(head)) return re.source;
2205
+ async validateApiKey(apiKey) {
2206
+ try {
2207
+ const testClient = new OpenAI({ apiKey, baseURL: this.defaultBaseUrl });
2208
+ await testClient.models.list();
2209
+ return true;
2210
+ } catch {
2211
+ return false;
2212
+ }
2255
2213
  }
2256
- if (/^#{1,3}\s+\S/m.test(head)) return null;
2257
- let hits = 0;
2258
- let firstMatch = "";
2259
- for (const re of META_NARRATION_HEURISTICS) {
2260
- if (re.test(head)) {
2261
- hits++;
2262
- if (!firstMatch) firstMatch = re.source;
2263
- if (hits >= 2) return `meta-narration:${firstMatch}`;
2214
+ async listModels() {
2215
+ return this.info.models;
2216
+ }
2217
+ wrapError(err) {
2218
+ if (err instanceof OpenAI.AuthenticationError) {
2219
+ return new AuthError(this.info.id);
2220
+ }
2221
+ if (err instanceof OpenAI.RateLimitError) {
2222
+ return new RateLimitError(this.info.id);
2223
+ }
2224
+ if (err instanceof Error) {
2225
+ return new ProviderError(this.info.id, err.message, err);
2264
2226
  }
2227
+ return new ProviderError(this.info.id, String(err));
2265
2228
  }
2266
- return null;
2267
- }
2268
- function looksLikeDocumentBody(content) {
2269
- if (!content || content.length < 200) return false;
2270
- if (/^#{1,6}\s+\S/m.test(content)) return true;
2271
- const paragraphs = content.split(/\n\s*\n/).filter((p) => p.trim().length > 30);
2272
- if (paragraphs.length >= 3) return true;
2273
- return false;
2274
- }
2275
- function stripToolCallReminder(systemPrompt) {
2276
- if (!systemPrompt) return systemPrompt;
2277
- const idx = systemPrompt.indexOf("[\u26A0\uFE0F Mandatory Tool Call Policy]");
2278
- if (idx === -1) return systemPrompt;
2279
- return systemPrompt.slice(0, idx).trimEnd();
2229
+ };
2230
+
2231
+ // src/providers/deepseek.ts
2232
+ var CODE_BLOCK_PATTERNS = [
2233
+ /```(?:sql|bash|shell|powershell|sh)\s*\n/i,
2234
+ /```\s*\n\s*(?:SELECT|INSERT|UPDATE|DELETE|psql)\b/i
2235
+ ];
2236
+ function detectsCodeBlockPseudoCall(content) {
2237
+ return CODE_BLOCK_PATTERNS.some((pattern) => pattern.test(content));
2280
2238
  }
2281
- var TEE_FINAL_USER_NUDGE = `\u26A0\uFE0F STOP using tools NOW. The save_last_response tee stream is open and capturing every token of THIS response. Output ONLY the requested document body, in markdown. The very first character of your response must be the document's top-level heading (e.g. "# \u5BA1\u8BA1\u62A5\u544A" / "# Audit Report"). Do NOT print <tool_call>, </tool_call>, <function_calls>, <invoke>, <tool_use>, <think>, or any other tool-call markup. Do NOT narrate that you will produce the document \u2014 just produce it. Do NOT pretend to call tools \u2014 there are none in this stream.`;
2282
- var CONTENT_ONLY_STREAM_REMINDER = `
2239
+ var DEEPSEEK_CODE_BLOCK_CORRECTION = "You wrote a code block in your response text, but you did NOT actually execute it. Code blocks in text are NOT executed by the system. You MUST use the function calling API to invoke the appropriate tool (e.g., mcp__postgres__query for SQL queries, bash for shell commands). Please call the correct tool NOW to execute the query/command.";
2240
+ var DEEPSEEK_ANTI_HALLUCINATION = `
2283
2241
 
2284
- [\u26A0\uFE0F CONTENT GENERATION MODE]
2285
- You are now in a CONTENT-ONLY streaming pass. The file at the configured path will receive every token of THIS response.
2286
- - Do NOT emit <tool_call>, </tool_call>, <function_calls>, <invoke>, <tool_use>, or any tool-call XML/JSON markup.
2287
- - Do NOT print "I will now call ...", "let me read ...", "<think>" reasoning blocks (the surrounding REPL handles those separately \u2014 they should not enter the saved file).
2288
- - Do NOT pretend to call tools. There are NO tools available in this stream \u2014 only your text output is captured.
2289
- - Produce ONLY the requested document body. Markdown is fine. Code blocks are fine. Tool-call markup is NOT.
2290
- - If you accidentally start a <tool_call>, STOP and produce the document body instead.
2242
+ [CRITICAL: Anti-Hallucination Enforcement \u2014 DeepSeek Specific]
2243
+ You have a known tendency to claim files were "saved" or "created" without actually calling write_file. This is UNACCEPTABLE.
2244
+ Rules you MUST follow:
2245
+ - NEVER type file content into your response text. ALL file content goes through write_file tool calls ONLY.
2246
+ - After calling write_file, do NOT describe the file content again in text \u2014 just confirm the tool call result.
2247
+ - When generating multiple files: call write_file for file 1 \u2192 call write_file for file 2 \u2192 ... \u2192 THEN summarize.
2248
+ - If you catch yourself writing markdown/code that should be a file, STOP and use write_file instead.
2249
+ - The system will detect and reject phantom claims. Each failed detection wastes a round. Be honest.`;
2250
+ var DeepSeekProvider = class extends OpenAICompatibleProvider {
2251
+ defaultBaseUrl = "https://api.deepseek.com/v1";
2252
+ /** 禁用流式工具调用,确保 chatWithTools 覆写(代码块检测)生效 */
2253
+ enableStreamingToolCalls = false;
2254
+ info = {
2255
+ id: "deepseek",
2256
+ displayName: "DeepSeek",
2257
+ defaultModel: "deepseek-v4-flash",
2258
+ apiKeyEnvVar: "AICLI_API_KEY_DEEPSEEK",
2259
+ requiresApiKey: true,
2260
+ baseUrl: this.defaultBaseUrl,
2261
+ models: [
2262
+ // ── V4 family (2026-04-23+):1M context,支持 Thinking / Non-Thinking 双模式 ──
2263
+ {
2264
+ id: "deepseek-v4-pro",
2265
+ displayName: "DeepSeek V4 Pro (1.6T MoE, 49B active)",
2266
+ contextWindow: 1048576,
2267
+ supportsStreaming: true,
2268
+ supportsThinking: true
2269
+ },
2270
+ {
2271
+ id: "deepseek-v4-flash",
2272
+ displayName: "DeepSeek V4 Flash (284B MoE, 13B active)",
2273
+ contextWindow: 1048576,
2274
+ supportsStreaming: true,
2275
+ supportsThinking: true
2276
+ }
2277
+ // Legacy aliases (deepseek-chat / deepseek-reasoner) removed in v0.4.140 —
2278
+ // they retire 2026-07-24 and DeepSeek already routes them to V4 Flash server-side.
2279
+ // Pre-existing sessions were migrated to deepseek-v4-flash; pricing.ts keeps the
2280
+ // old ids for any historical cost calculations.
2281
+ ]
2282
+ };
2283
+ /**
2284
+ * 覆写 chatWithTools — 检测代码块伪工具调用并自动重试。
2285
+ *
2286
+ * DeepSeek 有时在 system prompt 较长(如技能注入)时,
2287
+ * 退化为输出 ```sql/```bash 代码块而不调用工具。
2288
+ * 检测到后注入纠正消息强制重试一次。
2289
+ */
2290
+ async chatWithTools(request, tools) {
2291
+ const enhancedRequest = {
2292
+ ...request,
2293
+ systemPrompt: (request.systemPrompt ?? "") + DEEPSEEK_ANTI_HALLUCINATION
2294
+ };
2295
+ const result = await super.chatWithTools(enhancedRequest, tools);
2296
+ const hasBashTool = tools.some((t) => t.name === "bash");
2297
+ const extraMsgs = request._extraMessages ?? [];
2298
+ const alreadyUsedTools = extraMsgs.some((m) => m?.role === "tool");
2299
+ if (hasBashTool && !alreadyUsedTools && "content" in result && result.content && detectsCodeBlockPseudoCall(result.content)) {
2300
+ process.stderr.write(
2301
+ `[deepseek] \u26A0 Detected code block pseudo-tool-call (DeepSeek wrote code in text instead of calling a tool). Forcing retry...
2302
+ `
2303
+ );
2304
+ const retryRequest = {
2305
+ ...request,
2306
+ _extraMessages: [
2307
+ ...request._extraMessages ?? [],
2308
+ // DeepSeek V4 thinking 模式:assistant 消息必须含 reasoning_content(即使为空)
2309
+ { role: "assistant", content: result.content, reasoning_content: "" },
2310
+ { role: "user", content: DEEPSEEK_CODE_BLOCK_CORRECTION }
2311
+ ]
2312
+ };
2313
+ const retryResult = await super.chatWithTools(retryRequest, tools);
2314
+ if (result.usage && "usage" in retryResult && retryResult.usage) {
2315
+ retryResult.usage = {
2316
+ inputTokens: result.usage.inputTokens + retryResult.usage.inputTokens,
2317
+ outputTokens: result.usage.outputTokens + retryResult.usage.outputTokens
2318
+ };
2319
+ }
2320
+ return retryResult;
2321
+ }
2322
+ return result;
2323
+ }
2324
+ };
2291
2325
 
2292
- The file is closed and named when this stream ends. If your output contains pseudo-tool-call markup, the save will be REJECTED and you will be asked to retry.`;
2326
+ // src/providers/zhipu.ts
2327
+ var ZhipuProvider = class extends OpenAICompatibleProvider {
2328
+ defaultBaseUrl = "https://open.bigmodel.cn/api/paas/v4";
2329
+ // GLM-5 / GLM-5.1 等深度思考模型生成长内容需要较长时间,默认 5 分钟
2330
+ defaultTimeout = 3e5;
2331
+ info = {
2332
+ id: "zhipu",
2333
+ displayName: "Zhipu (GLM)",
2334
+ // 默认选 GLM-4.6:中文写作口碑最稳 + 200K 上下文 + 价格只有 5.1 的 ~1/2。
2335
+ // 需要 Agent 长跑 / 代码工程时再手动 /model glm-5.1。
2336
+ defaultModel: "glm-4.6",
2337
+ apiKeyEnvVar: "AICLI_API_KEY_ZHIPU",
2338
+ requiresApiKey: true,
2339
+ baseUrl: this.defaultBaseUrl,
2340
+ models: [
2341
+ // ── GLM-5.1 系列(2026-04 旗舰,主打长程 Agent + 代码工程) ──
2342
+ {
2343
+ id: "glm-5.1",
2344
+ displayName: "GLM-5.1 (2026 Flagship, 200K, Agent+Code)",
2345
+ contextWindow: 204800,
2346
+ supportsStreaming: true,
2347
+ supportsThinking: true
2348
+ },
2349
+ {
2350
+ id: "glm-5.1-reasoning",
2351
+ displayName: "GLM-5.1 Reasoning (Deep Thinking)",
2352
+ contextWindow: 204800,
2353
+ supportsStreaming: true,
2354
+ supportsThinking: true
2355
+ },
2356
+ {
2357
+ id: "glm-5.1-air",
2358
+ displayName: "GLM-5.1 Air (Lightweight 5.1)",
2359
+ contextWindow: 204800,
2360
+ supportsStreaming: true,
2361
+ supportsThinking: true
2362
+ },
2363
+ // ── GLM-5 系列(2026-02) ──
2364
+ {
2365
+ id: "glm-5",
2366
+ displayName: "GLM-5 (Flagship, Deep Thinking)",
2367
+ contextWindow: 131072,
2368
+ supportsStreaming: true,
2369
+ supportsThinking: true
2370
+ },
2371
+ // ── GLM-4.6 系列(2025-09,中文写作口碑最佳) ──
2372
+ {
2373
+ id: "glm-4.6",
2374
+ displayName: "GLM-4.6 (200K, \u4E2D\u6587\u5199\u4F5C\u63A8\u8350)",
2375
+ contextWindow: 204800,
2376
+ supportsStreaming: true
2377
+ },
2378
+ {
2379
+ id: "glm-4.6v",
2380
+ displayName: "GLM-4.6V (Vision + Thinking)",
2381
+ contextWindow: 131072,
2382
+ supportsStreaming: true,
2383
+ supportsThinking: true
2384
+ },
2385
+ // ── GLM-Z1 推理系列 ──
2386
+ {
2387
+ id: "glm-z1",
2388
+ displayName: "GLM-Z1 (Reasoning Flagship)",
2389
+ contextWindow: 131072,
2390
+ supportsStreaming: true,
2391
+ supportsThinking: true
2392
+ },
2393
+ {
2394
+ id: "glm-z1-air",
2395
+ displayName: "GLM-Z1 Air (Lightweight Reasoning)",
2396
+ contextWindow: 131072,
2397
+ supportsStreaming: true,
2398
+ supportsThinking: true
2399
+ },
2400
+ {
2401
+ id: "glm-z1-flash",
2402
+ displayName: "GLM-Z1 Flash (Free Reasoning)",
2403
+ contextWindow: 128e3,
2404
+ supportsStreaming: true,
2405
+ supportsThinking: true
2406
+ },
2407
+ // ── GLM-4 系列(稳定,价格低) ──
2408
+ {
2409
+ id: "glm-4-plus",
2410
+ displayName: "GLM-4 Plus",
2411
+ contextWindow: 128e3,
2412
+ supportsStreaming: true
2413
+ },
2414
+ {
2415
+ id: "glm-4-air",
2416
+ displayName: "GLM-4 Air",
2417
+ contextWindow: 128e3,
2418
+ supportsStreaming: true
2419
+ },
2420
+ {
2421
+ id: "glm-4-flash",
2422
+ displayName: "GLM-4 Flash (Free)",
2423
+ contextWindow: 128e3,
2424
+ supportsStreaming: true
2425
+ }
2426
+ ]
2427
+ };
2428
+ };
2293
2429
 
2294
2430
  // src/providers/kimi.ts
2295
2431
  var KIMI_XML_REMINDER = `
@@ -2986,26 +3122,9 @@ var ProviderRegistry = class {
2986
3122
  };
2987
3123
 
2988
3124
  export {
2989
- ThinkTagFilter,
2990
- consumeToolCallStream,
2991
- FreeRoundTracker,
2992
- BudgetWarner,
2993
- EmptyResponseGuard,
2994
- ContextPressureMonitor,
2995
- accumulateUsage,
2996
- buildRoundBudgetHint,
2997
- buildRoundsExhaustedPrompt,
2998
- buildUserStopMessage,
2999
- summarizeRecentTools,
3000
- detectsHallucinatedFileOp,
3001
- hadPreviousWriteToolCalls,
3002
3125
  TOOL_CALL_REMINDER,
3003
3126
  buildWriteRoundReminder,
3004
- HALLUCINATION_CORRECTION_MESSAGE,
3005
3127
  extractWrittenFilePaths,
3006
- extractBashCommands,
3007
- findPhantomClaims,
3008
- buildPhantomCorrectionMessage,
3009
3128
  detectPseudoToolCalls,
3010
3129
  stripPseudoToolCalls,
3011
3130
  detectMetaNarration,
@@ -3013,5 +3132,10 @@ export {
3013
3132
  stripToolCallReminder,
3014
3133
  TEE_FINAL_USER_NUDGE,
3015
3134
  CONTENT_ONLY_STREAM_REMINDER,
3135
+ ThinkTagFilter,
3136
+ consumeToolCallStream,
3137
+ accumulateUsage,
3138
+ buildRoundBudgetHint,
3139
+ runAgentLoop,
3016
3140
  ProviderRegistry
3017
3141
  };