minimal-agent 0.1.9 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +383 -122
- package/package.json +19 -12
- package/plugins/HOW-TO-WRITE-A-PLUGIN.md +186 -0
- package/plugins/ralph-wiggum/commands/ralph-loop.md +6 -16
- package/plugins/ralph-wiggum/plugin.js +205 -0
- package/plugins/ralph-wiggum/src/goalState.js +260 -0
- package/plugins/ralph-wiggum/src/sentinels.js +21 -0
- package/plugins/ralph-wiggum/src/stopHookRunner.js +104 -0
- package/plugins/ralph-wiggum/src/verificationGate.js +202 -0
- package/plugins/workflow-runner/.claude-plugin/plugin.json +5 -0
- package/plugins/workflow-runner/commands/workflow.md +15 -0
- package/plugins/workflow-runner/commands/workflows.md +8 -0
- package/plugins/workflow-runner/plugin.js +36 -0
- package/plugins/workflow-runner/src/expressions.js +369 -0
- package/plugins/workflow-runner/src/index.js +174 -0
- package/plugins/workflow-runner/src/loader.js +183 -0
- package/plugins/workflow-runner/src/runner.js +290 -0
- package/plugins/workflow-runner/src/stepExecutors/assert.js +28 -0
- package/plugins/workflow-runner/src/stepExecutors/llm.js +44 -0
- package/plugins/workflow-runner/src/stepExecutors/skill.js +103 -0
- package/plugins/workflow-runner/src/stepExecutors/tool.js +35 -0
- package/plugins/workflow-runner/src/types.js +59 -0
- package/plugins/workflow-runner/src/workflowState.js +46 -0
- package/skills/image-gen-openrouter/SKILL.md +121 -0
- package/skills/subtitle-srt/SKILL.md +134 -0
- package/skills/tts-zh/SKILL.md +137 -0
- package/skills/video-compose/SKILL.md +139 -0
- package/src/bootstrap/cwdArg.js +22 -0
- package/src/bootstrap/workingDir.js +31 -0
- package/src/cli/configWizard.js +272 -0
- package/src/cli/print.js +192 -0
- package/src/config/configFile.js +78 -0
- package/src/config.js +118 -0
- package/src/context/compact.js +357 -0
- package/src/context/microCompactLite.js +151 -0
- package/src/context/persistContext.js +109 -0
- package/src/context/reactiveCompact.js +121 -0
- package/src/context/sessionPath.js +58 -0
- package/src/context/snipCompact.js +112 -0
- package/src/context/tokenCounter.js +66 -0
- package/src/llm/client.js +182 -0
- package/src/loop.js +230 -0
- package/src/main.js +116 -0
- package/src/plugin-sdk.js +24 -0
- package/src/plugins/commandRouter.js +169 -0
- package/src/plugins/hookEngine.js +258 -0
- package/src/plugins/pluginApi.js +23 -0
- package/src/plugins/pluginLoader.js +71 -0
- package/src/plugins/pluginRunner.js +65 -0
- package/src/plugins/transcript.js +171 -0
- package/src/prompts/projectInstructions.js +48 -0
- package/src/prompts/skillList.js +126 -0
- package/src/prompts/system.js +155 -0
- package/src/session/runTurn.js +41 -0
- package/src/session/sessionState.js +19 -0
- package/src/tools/bash/bash.js +352 -0
- package/src/tools/bash/semantics.js +85 -0
- package/src/tools/bash/warnings.js +98 -0
- package/src/tools/edit/edit.js +253 -0
- package/src/tools/edit/multi-edit.js +155 -0
- package/src/tools/glob/glob.js +97 -0
- package/src/tools/grep/grep.js +185 -0
- package/src/tools/grep/rgPath.js +173 -0
- package/src/tools/index.js +94 -0
- package/src/tools/read/read.js +209 -0
- package/src/tools/shared/fileState.js +61 -0
- package/src/tools/shared/fileUtils.js +281 -0
- package/src/tools/shared/schemas.js +16 -0
- package/src/tools/types.js +21 -0
- package/src/tools/webbrowser/browser.js +55 -0
- package/src/tools/webbrowser/webbrowser.js +194 -0
- package/src/tools/webfetch/preapproved.js +267 -0
- package/src/tools/webfetch/webfetch.js +317 -0
- package/src/tools/websearch/websearch.js +161 -0
- package/src/tools/write/write.js +125 -0
- package/src/types/turndown.d.ts +23 -0
- package/src/types.js +16 -0
- package/src/ui/App.js +37 -0
- package/src/ui/InputBox.js +240 -0
- package/src/ui/MessageList.js +28 -0
- package/src/ui/Root.js +70 -0
- package/src/ui/StatusLine.js +41 -0
- package/src/ui/ToolStatus.js +11 -0
- package/src/ui/hooks/useChat.js +234 -0
- package/src/ui/hooks/usePasteHandler.js +137 -0
- package/src/ui/hooks/useTextBuffer.js +55 -0
- package/src/ui/hooks/useTokenUsage.js +30 -0
- package/src/ui/textBuffer.js +217 -0
- package/src/utils/packageRoot.js +37 -0
- package/src/utils/resourcePaths.js +49 -0
- package/src/utils/zodToJson.js +29 -0
- package/workflows/book-review-short.yaml +99 -0
- package/workflows/e2e-write-greet.yaml +27 -0
- package/workflows/schema.json +74 -0
- package/workflows/youtube-shorts.yaml +171 -0
- package/dist/main.js +0 -5936
- package/plugins/ralph-wiggum/scripts/setup-ralph-loop.sh +0 -203
package/src/config.js
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ============================================================
|
|
3
|
+
* config.ts —— 全局配置加载(从 .env 读取)
|
|
4
|
+
* ------------------------------------------------------------
|
|
5
|
+
* 只从环境变量读取,集中管理所有配置。
|
|
6
|
+
*
|
|
7
|
+
* 必需的环境变量:
|
|
8
|
+
* MINIMAL_AGENT_BASE_URL - API 地址,如 https://api.minimax.chat/v1
|
|
9
|
+
* MINIMAL_AGENT_API_KEY - API key
|
|
10
|
+
* MINIMAL_AGENT_MODEL - 模型名,如 MiniMax-M2.7
|
|
11
|
+
*
|
|
12
|
+
* 可选的环境变量:
|
|
13
|
+
* MINIMAL_AGENT_PROVIDER - 友好名称,默认 "env"
|
|
14
|
+
* MINIMAL_AGENT_CONTEXT_WINDOW - 上下文窗口,默认 128000
|
|
15
|
+
* ============================================================
|
|
16
|
+
*/
|
|
17
|
+
import { readSavedConfig } from './config/configFile.js';
|
|
18
|
+
const DEFAULT_CONTEXT_WINDOW = 128_000;
|
|
19
|
+
/**
|
|
20
|
+
* 加载 Provider 配置(从环境变量)。
|
|
21
|
+
* 缺少必需变量时抛出明确错误。
|
|
22
|
+
*/
|
|
23
|
+
export async function loadProvider() {
|
|
24
|
+
const baseURL = process.env.MINIMAL_AGENT_BASE_URL;
|
|
25
|
+
const apiKey = process.env.MINIMAL_AGENT_API_KEY;
|
|
26
|
+
const model = process.env.MINIMAL_AGENT_MODEL;
|
|
27
|
+
if (!baseURL || !apiKey || !model) {
|
|
28
|
+
const missing = [];
|
|
29
|
+
if (!baseURL)
|
|
30
|
+
missing.push('MINIMAL_AGENT_BASE_URL');
|
|
31
|
+
if (!apiKey)
|
|
32
|
+
missing.push('MINIMAL_AGENT_API_KEY');
|
|
33
|
+
if (!model)
|
|
34
|
+
missing.push('MINIMAL_AGENT_MODEL');
|
|
35
|
+
throw new Error(`缺少必需的环境变量:${missing.join(', ')}\n\n` +
|
|
36
|
+
`请在 .env 中配置:\n` +
|
|
37
|
+
` MINIMAL_AGENT_BASE_URL=https://api.example.com/v1\n` +
|
|
38
|
+
` MINIMAL_AGENT_API_KEY=your-api-key\n` +
|
|
39
|
+
` MINIMAL_AGENT_MODEL=your-model\n\n` +
|
|
40
|
+
`参考 .env.example`);
|
|
41
|
+
}
|
|
42
|
+
const contextWindowRaw = process.env.MINIMAL_AGENT_CONTEXT_WINDOW;
|
|
43
|
+
let contextWindow = DEFAULT_CONTEXT_WINDOW;
|
|
44
|
+
if (contextWindowRaw) {
|
|
45
|
+
const n = parseInt(contextWindowRaw, 10);
|
|
46
|
+
if (!Number.isNaN(n) && n > 0) {
|
|
47
|
+
contextWindow = n;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
return {
|
|
51
|
+
name: process.env.MINIMAL_AGENT_PROVIDER ?? 'env',
|
|
52
|
+
baseURL,
|
|
53
|
+
apiKey,
|
|
54
|
+
model,
|
|
55
|
+
contextWindow,
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* 层级加载:env 优先(模式 A),不全则回退到 ~/.minimal-agent/config.json(模式 B)。
|
|
60
|
+
* 都没有时返回 null —— 调用方决定是触发向导还是直接退出。
|
|
61
|
+
*
|
|
62
|
+
* 与 loadProvider() 的区别:
|
|
63
|
+
* - loadProvider() 只读 env,全缺则抛错(给 -p 模式用,明确告知"先做配置")
|
|
64
|
+
* - 本函数静默 fallback 到 JSON,全缺则返回 null(给 TUI 模式用,可触发向导)
|
|
65
|
+
*
|
|
66
|
+
* env 字段优先级是逐项的:只要 env 里有 BASE_URL 就用 env 的 BASE_URL,
|
|
67
|
+
* 其它字段从 JSON 补;这样用户可以临时用 env 覆盖单个字段做调试。
|
|
68
|
+
*/
|
|
69
|
+
export async function loadProviderLayered() {
|
|
70
|
+
const envBaseURL = process.env.MINIMAL_AGENT_BASE_URL;
|
|
71
|
+
const envApiKey = process.env.MINIMAL_AGENT_API_KEY;
|
|
72
|
+
const envModel = process.env.MINIMAL_AGENT_MODEL;
|
|
73
|
+
const envName = process.env.MINIMAL_AGENT_PROVIDER;
|
|
74
|
+
const envContextRaw = process.env.MINIMAL_AGENT_CONTEXT_WINDOW;
|
|
75
|
+
let saved = null;
|
|
76
|
+
if (!envBaseURL || !envApiKey || !envModel) {
|
|
77
|
+
saved = await readSavedConfig();
|
|
78
|
+
}
|
|
79
|
+
const baseURL = envBaseURL ?? saved?.baseURL;
|
|
80
|
+
const apiKey = envApiKey ?? saved?.apiKey;
|
|
81
|
+
const model = envModel ?? saved?.model;
|
|
82
|
+
if (!baseURL || !apiKey || !model)
|
|
83
|
+
return null;
|
|
84
|
+
let contextWindow = DEFAULT_CONTEXT_WINDOW;
|
|
85
|
+
if (envContextRaw) {
|
|
86
|
+
const n = parseInt(envContextRaw, 10);
|
|
87
|
+
if (!Number.isNaN(n) && n > 0)
|
|
88
|
+
contextWindow = n;
|
|
89
|
+
}
|
|
90
|
+
else if (saved?.contextWindow) {
|
|
91
|
+
contextWindow = saved.contextWindow;
|
|
92
|
+
}
|
|
93
|
+
return {
|
|
94
|
+
name: envName ?? saved?.provider ?? 'env',
|
|
95
|
+
baseURL,
|
|
96
|
+
apiKey,
|
|
97
|
+
model,
|
|
98
|
+
contextWindow,
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
/**
|
|
102
|
+
* 把 saved config 里的工具 key 注入 process.env(env 已设则不覆盖)。
|
|
103
|
+
*
|
|
104
|
+
* 解决 npm 全局安装场景:`node dist/main.js` 不会自动加载 .env,
|
|
105
|
+
* Tavily 之类的工具 key 只能从 ~/.minimal-agent/config.json 取出再写回 env,
|
|
106
|
+
* websearch.ts 才能在 call() 里读到。
|
|
107
|
+
*
|
|
108
|
+
* 应在 main() 最早期调用(initWorkingDir 之后即可),保证 ALL_TOOLS
|
|
109
|
+
* 任何 call() 触发前 env 已就位。
|
|
110
|
+
*/
|
|
111
|
+
export async function applyToolKeysToEnv() {
|
|
112
|
+
const saved = await readSavedConfig();
|
|
113
|
+
if (!saved)
|
|
114
|
+
return;
|
|
115
|
+
if (!process.env.TAVILY_API_KEY && saved.tavilyApiKey) {
|
|
116
|
+
process.env.TAVILY_API_KEY = saved.tavilyApiKey;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
@@ -0,0 +1,357 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ============================================================
|
|
3
|
+
* src/context/compact.ts —— 自动压缩逻辑(v0.2 修复版)
|
|
4
|
+
* ============================================================
|
|
5
|
+
* 做的事:
|
|
6
|
+
* 1. 估算当前历史的 token 数
|
|
7
|
+
* 2. 若超过阈值(contextWindow - AUTOCOMPACT_BUFFER_TOKENS),触发压缩
|
|
8
|
+
* 3. 压缩流程:
|
|
9
|
+
* a. 把整段历史 + 9 段式压缩 prompt 发给 LLM(非流式)
|
|
10
|
+
* b. 提取 <summary> 块
|
|
11
|
+
* c. 用"会话延续"包装把摘要做成 user 消息,紧跟最后 N 条原消息
|
|
12
|
+
*
|
|
13
|
+
* v0.2 修复说明(2026-05-08):
|
|
14
|
+
* - 🚨 修复致命 bug:/compact 后 tool_call / tool_result 配对关系破坏
|
|
15
|
+
* 导致 MiniMax API 返回 400 "tool id not found" 错误
|
|
16
|
+
* - ✅ 新增 findTailWithCompleteToolChains() 函数
|
|
17
|
+
* 智能截取尾部消息,保证每条 tool 消息都有对应的 tool_call 父消息
|
|
18
|
+
* - ✅ 不再使用固定的 KEEP_RECENT_MESSAGES = 4 简单切片
|
|
19
|
+
* - 改为动态扫描算法:从末尾向前检查完整性,必要时自动扩展
|
|
20
|
+
*
|
|
21
|
+
* 为什么保留最后 N 条?
|
|
22
|
+
* 模型刚执行完工具的 tool_result 不能丢,否则后续推理失去依据。
|
|
23
|
+
* 但现在不是固定 N 条,而是动态保证 tool 消息完整性。
|
|
24
|
+
*
|
|
25
|
+
* 为什么用"延续"包装而不是直接塞摘要?
|
|
26
|
+
* 借鉴 kakadeai:在摘要前后加上"this session is continued from..."
|
|
27
|
+
* + "Recent messages are preserved verbatim" + "Continue from where it
|
|
28
|
+
* left off without asking" 三句话,可以让模型直接接着干活而不是把摘要
|
|
29
|
+
* 当成新任务"重新介绍一遍"。
|
|
30
|
+
*
|
|
31
|
+
* ---- prompt 模板内联自 src/prompts/compact.ts ----
|
|
32
|
+
*/
|
|
33
|
+
import { chat } from '../llm/client.js';
|
|
34
|
+
import { countMessagesTokens } from './tokenCounter.js';
|
|
35
|
+
/** 压缩这一轮严禁调工具——前置警告 */
|
|
36
|
+
const NO_TOOLS_PREAMBLE = `CRITICAL: Respond with TEXT ONLY. Do NOT call any tools.
|
|
37
|
+
|
|
38
|
+
- Do NOT use Read, Bash, Grep, Glob, Edit, Write, or ANY other tool.
|
|
39
|
+
- You already have all the context you need in the conversation above.
|
|
40
|
+
- Tool calls will be REJECTED and will waste your only turn — you will fail the task.
|
|
41
|
+
- Your entire response must be plain text: an <analysis> block followed by a <summary> block.
|
|
42
|
+
|
|
43
|
+
`;
|
|
44
|
+
/** 同样的警告再放在 prompt 末尾——adaptive-thinking 模型在长 prompt 末尾容易"飘" */
|
|
45
|
+
const NO_TOOLS_TRAILER = '\n\nREMINDER: Do NOT call any tools. Respond with plain text only — ' +
|
|
46
|
+
'an <analysis> block followed by a <summary> block. ' +
|
|
47
|
+
'Tool calls will be rejected and you will fail the task.';
|
|
48
|
+
/**
|
|
49
|
+
* 9 段式压缩模板主体——UP_TO 变体。
|
|
50
|
+
*/
|
|
51
|
+
const COMPACT_PROMPT_BODY = `Your task is to create a detailed summary of this conversation. This summary will be placed at the start of a continuing session; newer messages that build on this context will follow after your summary. Summarize thoroughly so that someone reading only your summary and then the newer messages can fully understand what happened and continue the work.
|
|
52
|
+
|
|
53
|
+
Before providing your final summary, wrap your analysis in <analysis> tags to organize your thoughts and ensure you've covered all necessary points. In your analysis process:
|
|
54
|
+
|
|
55
|
+
1. Chronologically analyze each message and section of the conversation. For each section thoroughly identify:
|
|
56
|
+
- The user's explicit requests and intents
|
|
57
|
+
- Your approach to addressing the user's requests
|
|
58
|
+
- Key decisions, technical concepts and code patterns
|
|
59
|
+
- Specific details like file names, full code snippets, function signatures, file edits
|
|
60
|
+
- Errors that you ran into and how you fixed them
|
|
61
|
+
- Pay special attention to specific user feedback that you received, especially if the user told you to do something differently.
|
|
62
|
+
2. Double-check for technical accuracy and completeness, addressing each required element thoroughly.
|
|
63
|
+
|
|
64
|
+
Your summary should include the following sections:
|
|
65
|
+
|
|
66
|
+
1. Primary Request and Intent: Capture all of the user's explicit requests and intents in detail
|
|
67
|
+
2. Key Technical Concepts: List all important technical concepts, technologies, and frameworks discussed.
|
|
68
|
+
3. Files and Code Sections: Enumerate specific files and code sections examined, modified, or created. Include FULL code snippets where applicable and include a summary of why this file read or edit is important. (Code snippets are NOT compressed — preserve them verbatim.)
|
|
69
|
+
4. Errors and fixes: List all errors that you ran into, and how you fixed them. Pay special attention to specific user feedback that you received, especially if the user told you to do something differently.
|
|
70
|
+
5. Problem Solving: Document problems solved and any ongoing troubleshooting efforts.
|
|
71
|
+
6. All user messages: List ALL user messages that are not tool results. These are critical for understanding the users' feedback and changing intent. (User messages are NOT compressed — preserve them verbatim.)
|
|
72
|
+
7. Pending Tasks: Outline any pending tasks that you have explicitly been asked to work on.
|
|
73
|
+
8. Work Completed: Describe what was accomplished by the end of this portion.
|
|
74
|
+
9. Context for Continuing Work: Summarize any context, decisions, or state that would be needed to understand and continue the work in subsequent messages. Include direct quotes from the most recent conversation showing exactly where work was left off — this should be verbatim to ensure there's no drift.
|
|
75
|
+
|
|
76
|
+
Here's an example of how your output should be structured:
|
|
77
|
+
|
|
78
|
+
<example>
|
|
79
|
+
<analysis>
|
|
80
|
+
[Your thought process, ensuring all points are covered thoroughly and accurately]
|
|
81
|
+
</analysis>
|
|
82
|
+
|
|
83
|
+
<summary>
|
|
84
|
+
1. Primary Request and Intent:
|
|
85
|
+
[Detailed description]
|
|
86
|
+
|
|
87
|
+
2. Key Technical Concepts:
|
|
88
|
+
- [Concept 1]
|
|
89
|
+
- [...]
|
|
90
|
+
|
|
91
|
+
3. Files and Code Sections:
|
|
92
|
+
- [File Name 1]
|
|
93
|
+
- [Summary of why this file is important]
|
|
94
|
+
- [Important Code Snippet]
|
|
95
|
+
- [...]
|
|
96
|
+
|
|
97
|
+
4. Errors and fixes:
|
|
98
|
+
- [Detailed description of error 1]:
|
|
99
|
+
- [How you fixed the error]
|
|
100
|
+
- [User feedback on the error if any]
|
|
101
|
+
- [...]
|
|
102
|
+
|
|
103
|
+
5. Problem Solving:
|
|
104
|
+
[Description of solved problems and ongoing troubleshooting]
|
|
105
|
+
|
|
106
|
+
6. All user messages:
|
|
107
|
+
- [Detailed non tool use user message]
|
|
108
|
+
- [...]
|
|
109
|
+
|
|
110
|
+
7. Pending Tasks:
|
|
111
|
+
- [Task 1]
|
|
112
|
+
- [...]
|
|
113
|
+
|
|
114
|
+
8. Work Completed:
|
|
115
|
+
[What was accomplished]
|
|
116
|
+
|
|
117
|
+
9. Context for Continuing Work:
|
|
118
|
+
[Key context, decisions, or state needed to continue]
|
|
119
|
+
|
|
120
|
+
</summary>
|
|
121
|
+
</example>
|
|
122
|
+
|
|
123
|
+
Please provide your summary following this structure, ensuring precision and thoroughness in your response.`;
|
|
124
|
+
/**
|
|
125
|
+
* 完整的压缩 prompt:preamble + 9 段模板 + 可选的 customInstructions + trailer.
|
|
126
|
+
*/
|
|
127
|
+
export function buildCompactPrompt(customInstructions) {
|
|
128
|
+
let prompt = NO_TOOLS_PREAMBLE + COMPACT_PROMPT_BODY;
|
|
129
|
+
if (customInstructions && customInstructions.trim() !== '') {
|
|
130
|
+
prompt += `\n\nAdditional Instructions:\n${customInstructions.trim()}`;
|
|
131
|
+
}
|
|
132
|
+
prompt += NO_TOOLS_TRAILER;
|
|
133
|
+
return prompt;
|
|
134
|
+
}
|
|
135
|
+
/**
|
|
136
|
+
* 兼容旧名:现在等价于 buildCompactPrompt() 的默认调用。
|
|
137
|
+
*/
|
|
138
|
+
export const BASE_COMPACT_PROMPT = buildCompactPrompt();
|
|
139
|
+
/**
|
|
140
|
+
* 把摘要包装成"会话延续"指令——这是让 LLM 接着干活而不是"重新介绍一遍"的关键。
|
|
141
|
+
*/
|
|
142
|
+
export function buildContinuationUserMessage(args) {
|
|
143
|
+
const { summary, recentMessagesPreserved } = args;
|
|
144
|
+
let msg = `This session is being continued from a previous conversation that ran out of context. The summary below covers the earlier portion of the conversation.
|
|
145
|
+
|
|
146
|
+
${summary}`;
|
|
147
|
+
if (recentMessagesPreserved) {
|
|
148
|
+
msg += `\n\nRecent messages are preserved verbatim.`;
|
|
149
|
+
}
|
|
150
|
+
msg += `\n\nContinue the conversation from where it left off without asking the user any further questions. Resume directly — do not acknowledge the summary, do not recap what was happening, do not preface with "I'll continue" or similar. Pick up the last task as if the break never happened.`;
|
|
151
|
+
return msg;
|
|
152
|
+
}
|
|
153
|
+
/**
|
|
154
|
+
* 从 LLM 的压缩响应里提取 <summary>...</summary> 内的内容。
|
|
155
|
+
*/
|
|
156
|
+
export function formatCompactSummary(rawResponse) {
|
|
157
|
+
const match = rawResponse.match(/<summary>([\s\S]*?)<\/summary>/);
|
|
158
|
+
if (match)
|
|
159
|
+
return match[1].trim();
|
|
160
|
+
// 退化路径:去掉 <analysis> 块(如果有)
|
|
161
|
+
const stripped = rawResponse.replace(/<analysis>[\s\S]*?<\/analysis>/g, '').trim();
|
|
162
|
+
return stripped || rawResponse.trim();
|
|
163
|
+
}
|
|
164
|
+
/**
|
|
165
|
+
* autoCompact 触发阈值的安全 buffer。
|
|
166
|
+
*
|
|
167
|
+
* 设计意图:在 contextWindow 被撑满之前留出"必须装得下"的两块空间:
|
|
168
|
+
* - 下一次 LLM 完整输出(assistant message + 多个 tool_calls)约 ~12K
|
|
169
|
+
* - 发起压缩调用时把 9 段模板 prompt 也算上的余量 ~13K
|
|
170
|
+
*
|
|
171
|
+
* 对比:kakadeai 主项目用 ~33K(output 20K + buffer 13K),但它有
|
|
172
|
+
* prompt cache 收益所以可以更激进;我们没有 cache,25K 是稳健折中。
|
|
173
|
+
*
|
|
174
|
+
* 主动触发优先于 reactive 兜底——把 buffer 留宽一点,让 autoCompact
|
|
175
|
+
* 在 LLM 真撑爆之前先把上下文摘要掉,reactive 几乎不会被触发。
|
|
176
|
+
*/
|
|
177
|
+
export const AUTOCOMPACT_BUFFER_TOKENS = 25_000;
|
|
178
|
+
/**
|
|
179
|
+
* 最少保留的消息数(即使没有 tool 调用也要保留这么多最近对话)
|
|
180
|
+
*/
|
|
181
|
+
export const MIN_KEEP_RECENT_MESSAGES = 4;
|
|
182
|
+
/**
|
|
183
|
+
* 给定 provider 的压缩触发阈值
|
|
184
|
+
*/
|
|
185
|
+
export function getCompactThreshold(provider) {
|
|
186
|
+
return Math.max(1000, provider.contextWindow - AUTOCOMPACT_BUFFER_TOKENS);
|
|
187
|
+
}
|
|
188
|
+
/**
|
|
189
|
+
* ✅ 核心修复函数:智能截取尾部消息,保证 tool_call / tool_result 完整性。
|
|
190
|
+
*
|
|
191
|
+
* 问题背景:
|
|
192
|
+
* OpenAI 协议要求 tool 消息的 tool_call_id 必须对应前面某条 assistant 消息
|
|
193
|
+
* 的 tool_calls[].id。如果压缩时只保留 tool 消息但丢掉了对应的 tool_call,
|
|
194
|
+
* API 会返回 400 错误 "tool id not found"。
|
|
195
|
+
*
|
|
196
|
+
* 算法:
|
|
197
|
+
* 1. 从消息数组末尾开始,至少保留 minKeep 条消息
|
|
198
|
+
* 2. 维护一个 Set<tool_call_id> 记录已见过的 tool_call ID
|
|
199
|
+
* 3. 遇到 tool 消息时,检查其 tool_call_id 是否在 Set 中
|
|
200
|
+
* - 如果不在 → 说明对应的 tool_call 消息被截断了 → 继续向前扩展
|
|
201
|
+
* - 如果在 → 正常保留
|
|
202
|
+
* 4. 遇到 assistant 消息且有 tool_calls → 把所有 ID 加入 Set
|
|
203
|
+
* 5. 直到所有 tool 消息都有父级或到达数组开头
|
|
204
|
+
*
|
|
205
|
+
* @param messages 非系统消息数组
|
|
206
|
+
* @param minKeep 最少保留条数(默认 4)
|
|
207
|
+
* @returns 截取后的尾部子数组(保证 tool 消息完整性)
|
|
208
|
+
*
|
|
209
|
+
* 示例:
|
|
210
|
+
* 输入: [user, assistant(tool:A), tool(id:A), user, assistant]
|
|
211
|
+
* minKeep: 3
|
|
212
|
+
* 输出: [assistant(tool:A), tool(id:A), user, assistant] (4条,自动扩展)
|
|
213
|
+
*
|
|
214
|
+
* 时间复杂度: O(N × M), N=消息数, M=平均扩展次数(通常<5)
|
|
215
|
+
* 空间复杂度: O(T), T=唯一 tool_call_id 数量(通常<20)
|
|
216
|
+
*/
|
|
217
|
+
export function findTailWithCompleteToolChains(messages, minKeep = MIN_KEEP_RECENT_MESSAGES) {
|
|
218
|
+
if (messages.length <= minKeep)
|
|
219
|
+
return [...messages];
|
|
220
|
+
// 至少保留 minKeep 条
|
|
221
|
+
const tailEnd = messages.length;
|
|
222
|
+
let tailStart = tailEnd - minKeep;
|
|
223
|
+
// 收集已经保留的 assistant 消息中的 tool_call IDs
|
|
224
|
+
const knownToolCallIds = new Set();
|
|
225
|
+
// 初始化已知 ID(从 minKeep 范围内)
|
|
226
|
+
for (let i = tailStart; i < tailEnd; i++) {
|
|
227
|
+
const msg = messages[i];
|
|
228
|
+
if (msg.role === 'assistant' && msg.tool_calls) {
|
|
229
|
+
for (const tc of msg.tool_calls) {
|
|
230
|
+
knownToolCallIds.add(tc.id);
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
// 检查是否有孤儿 tool 消息,如果有则向前扩展
|
|
235
|
+
let needsExpansion = true;
|
|
236
|
+
while (needsExpansion && tailStart > 0) {
|
|
237
|
+
needsExpansion = false;
|
|
238
|
+
for (let i = tailStart; i < tailEnd; i++) {
|
|
239
|
+
const msg = messages[i];
|
|
240
|
+
// 检查 tool 消息是否孤儿
|
|
241
|
+
if (msg.role === 'tool' && msg.tool_call_id) {
|
|
242
|
+
if (!knownToolCallIds.has(msg.tool_call_id)) {
|
|
243
|
+
// ❌ 找到孤儿 tool 消息!需要向前扩展
|
|
244
|
+
needsExpansion = true;
|
|
245
|
+
tailStart--;
|
|
246
|
+
// 把新加入的消息中的 tool_call ID 也加入集合
|
|
247
|
+
const newMsg = messages[tailStart];
|
|
248
|
+
if (newMsg.role === 'assistant' && newMsg.tool_calls) {
|
|
249
|
+
for (const tc of newMsg.tool_calls) {
|
|
250
|
+
knownToolCallIds.add(tc.id);
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
// 重新开始检查(因为新加入的消息可能也是 tool 孤儿)
|
|
254
|
+
break;
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
return messages.slice(tailStart);
|
|
260
|
+
}
|
|
261
|
+
/**
|
|
262
|
+
* 检查并按需压缩历史。
|
|
263
|
+
*
|
|
264
|
+
* @param messages 当前完整历史(必含一条 system)
|
|
265
|
+
* @param provider 当前 provider(取 contextWindow)
|
|
266
|
+
* @returns 可能更短的新 messages 数组(不修改原数组)
|
|
267
|
+
*
|
|
268
|
+
* 当 tokens 不超阈值时**直接返回原数组**(同引用),调用方可据此判断"是否压缩了"。
|
|
269
|
+
*/
|
|
270
|
+
export async function autoCompactIfNeeded(messages, provider) {
|
|
271
|
+
const before = countMessagesTokens(messages);
|
|
272
|
+
const threshold = getCompactThreshold(provider);
|
|
273
|
+
if (before < threshold) {
|
|
274
|
+
return { messages, compacted: false, before, after: before };
|
|
275
|
+
}
|
|
276
|
+
// 真正要压缩了
|
|
277
|
+
const compactedMessages = await runCompaction(messages, provider);
|
|
278
|
+
const after = countMessagesTokens(compactedMessages);
|
|
279
|
+
return { messages: compactedMessages, compacted: true, before, after };
|
|
280
|
+
}
|
|
281
|
+
/**
|
|
282
|
+
* 不看阈值,直接执行一次压缩。供 `/compact` 手动命令使用。
|
|
283
|
+
*
|
|
284
|
+
* 与 autoCompactIfNeeded 共用同一段 runCompaction 逻辑;区别仅在于此函数
|
|
285
|
+
* 永远跑压缩、不返回 `compacted` 字段(由调用方决定如何反馈)。
|
|
286
|
+
*/
|
|
287
|
+
export async function forceCompact(messages, provider) {
|
|
288
|
+
const before = countMessagesTokens(messages);
|
|
289
|
+
const compactedMessages = await runCompaction(messages, provider);
|
|
290
|
+
const after = countMessagesTokens(compactedMessages);
|
|
291
|
+
return { messages: compactedMessages, before, after };
|
|
292
|
+
}
|
|
293
|
+
/**
|
|
294
|
+
* 真正执行压缩(一轮非流式 LLM 调用)。
|
|
295
|
+
*
|
|
296
|
+
* 输入:原始历史
|
|
297
|
+
* 输出:[原 system, "会话延续指令 + 摘要" 的 user 消息, 最后 N 条原消息(保证完整性)]
|
|
298
|
+
*
|
|
299
|
+
* ✅ 关键修复:使用 findTailWithCompleteToolChains() 替代简单的 slice(-N)
|
|
300
|
+
* 保证压缩后的消息结构符合 OpenAI 协议要求(tool 消息必须有对应的 tool_call 父消息)。
|
|
301
|
+
*/
|
|
302
|
+
async function runCompaction(messages, provider) {
|
|
303
|
+
// 1. 找出原 system(保留它,避免 agent 失忆)
|
|
304
|
+
const systemMsg = messages.find((m) => m.role === 'system');
|
|
305
|
+
const nonSystem = messages.filter((m) => m.role !== 'system');
|
|
306
|
+
// 2. 构造给"压缩用"的请求:完整历史 + 9 段式压缩 prompt
|
|
307
|
+
// (即使最后 N 条会被原样保留在新历史里,也送给 LLM 看 —— 让它写得出
|
|
308
|
+
// 高质量的 "Context for Continuing Work" 节,知道当前停在哪儿。)
|
|
309
|
+
const compactRequest = [
|
|
310
|
+
...(systemMsg ? [systemMsg] : []),
|
|
311
|
+
...nonSystem,
|
|
312
|
+
{
|
|
313
|
+
role: 'user',
|
|
314
|
+
content: buildCompactPrompt(),
|
|
315
|
+
},
|
|
316
|
+
];
|
|
317
|
+
// 3. 调 LLM(用同一个 chat() 但不传工具,避免模型尝试调用工具)
|
|
318
|
+
let summary = '';
|
|
319
|
+
try {
|
|
320
|
+
for await (const ev of chat({
|
|
321
|
+
provider,
|
|
322
|
+
messages: compactRequest,
|
|
323
|
+
tools: [],
|
|
324
|
+
})) {
|
|
325
|
+
if (ev.type === 'text_delta')
|
|
326
|
+
summary += ev.delta;
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
catch (e) {
|
|
330
|
+
// 压缩失败时降级:保留原历史的最后 N 条 + 一条用 user 消息说明压缩失败
|
|
331
|
+
// ✅ 即使降级也要使用智能截取保证完整性
|
|
332
|
+
return [
|
|
333
|
+
...(systemMsg ? [systemMsg] : []),
|
|
334
|
+
{
|
|
335
|
+
role: 'user',
|
|
336
|
+
content: `(自动压缩失败:${e.message},已直接截断旧历史)`,
|
|
337
|
+
},
|
|
338
|
+
...findTailWithCompleteToolChains(nonSystem),
|
|
339
|
+
];
|
|
340
|
+
}
|
|
341
|
+
const cleanSummary = formatCompactSummary(summary);
|
|
342
|
+
// ✅ 核心修复:智能截取尾部,保证 tool 消息完整性
|
|
343
|
+
// 不再使用固定的 slice(-KEEP_RECENT_MESSAGES)
|
|
344
|
+
const recentTail = findTailWithCompleteToolChains(nonSystem);
|
|
345
|
+
// 4. 组装新历史:用"会话延续"包装把摘要做成 user 消息
|
|
346
|
+
return [
|
|
347
|
+
...(systemMsg ? [systemMsg] : []),
|
|
348
|
+
{
|
|
349
|
+
role: 'user',
|
|
350
|
+
content: buildContinuationUserMessage({
|
|
351
|
+
summary: cleanSummary,
|
|
352
|
+
recentMessagesPreserved: recentTail.length > 0,
|
|
353
|
+
}),
|
|
354
|
+
},
|
|
355
|
+
...recentTail,
|
|
356
|
+
];
|
|
357
|
+
}
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ============================================================
|
|
3
|
+
* src/context/microCompactLite.ts —— 工具结果微压缩(v2)
|
|
4
|
+
* ------------------------------------------------------------
|
|
5
|
+
* 对齐 kakadeai 的 microCompact 设计,纯规则实现,零 LLM 调用。
|
|
6
|
+
*
|
|
7
|
+
* 核心思路:
|
|
8
|
+
* 工具执行结果在写入 history 前经过一道"单向检查门",
|
|
9
|
+
* 用规则压缩冗余内容,减少 token 消耗但不丢失关键信息。
|
|
10
|
+
*
|
|
11
|
+
* 对齐 kakadeai 的两条有效路径:
|
|
12
|
+
* Path A: Cached MC → 本方案用 SHA-1 进程缓存 + 计数淘汰实现
|
|
13
|
+
* Path B: Time-based MC → 本方案用轮次衰减实现
|
|
14
|
+
*
|
|
15
|
+
* kakadeai 已删除 Legacy LLM path → 本方案不实现 LLM 压缩
|
|
16
|
+
* ============================================================
|
|
17
|
+
*/
|
|
18
|
+
import { createHash } from 'node:crypto';
|
|
19
|
+
// ==================== 配置常量 ====================
|
|
20
|
+
/** 同一内容最多原样保留多少次,之后用压缩引用替代 */
|
|
21
|
+
const MAX_REPEAT_COUNT = 3;
|
|
22
|
+
/** 触发截断的字符数阈值(仅白名单内的工具) */
|
|
23
|
+
const MAX_RESULT_SIZE = 4000;
|
|
24
|
+
/** 截断时保留的头部字符数 */
|
|
25
|
+
const HEAD_KEEP_CHARS = 2000;
|
|
26
|
+
/** 截断时保留的尾部字符数 */
|
|
27
|
+
const TAIL_KEEP_CHARS = 1000;
|
|
28
|
+
/** 时间衰减:条目首次出现后超过多少轮自动清理 */
|
|
29
|
+
const MAX_KEEP_ROUNDS = 10;
|
|
30
|
+
/** 短内容阈值:不超过此长度的内容直接放行(不做任何处理) */
|
|
31
|
+
const SHORT_CONTENT_THRESHOLD = 200;
|
|
32
|
+
export function createMicroCompactState() {
|
|
33
|
+
return { turn: 0, cache: new Map() };
|
|
34
|
+
}
|
|
35
|
+
const defaultState = createMicroCompactState();
|
|
36
|
+
export function incrementTurn(state = defaultState) {
|
|
37
|
+
state.turn++;
|
|
38
|
+
}
|
|
39
|
+
export function resetTurn(state = defaultState) {
|
|
40
|
+
state.turn = 0;
|
|
41
|
+
}
|
|
42
|
+
/** 获取当前缓存统计 */
|
|
43
|
+
export function getCacheStats(state = defaultState) {
|
|
44
|
+
let compressedCount = 0;
|
|
45
|
+
let expiredCount = 0;
|
|
46
|
+
let truncatedCount = 0;
|
|
47
|
+
let totalCharsSaved = 0;
|
|
48
|
+
let repeatedCount = 0;
|
|
49
|
+
for (const entry of state.cache.values()) {
|
|
50
|
+
if (entry.count > 1)
|
|
51
|
+
repeatedCount++;
|
|
52
|
+
if (entry.count > MAX_REPEAT_COUNT) {
|
|
53
|
+
compressedCount++;
|
|
54
|
+
totalCharsSaved += (entry.count - MAX_REPEAT_COUNT) * 500;
|
|
55
|
+
}
|
|
56
|
+
if (state.turn - entry.firstSeenTurn > MAX_KEEP_ROUNDS) {
|
|
57
|
+
expiredCount++;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
return { totalEntries: state.cache.size, repeatedCount, compressedCount, expiredCount, truncatedCount, totalCharsSaved };
|
|
61
|
+
}
|
|
62
|
+
/** 清空缓存(供 /new 命令或测试使用) */
|
|
63
|
+
export function clearCache(state = defaultState) {
|
|
64
|
+
state.cache.clear();
|
|
65
|
+
state.turn = 0;
|
|
66
|
+
}
|
|
67
|
+
// ==================== 工具白名单 ====================
|
|
68
|
+
/**
|
|
69
|
+
* 只有这些工具的返回值参与微压缩(长度截断 + SHA-1 去重)。
|
|
70
|
+
*
|
|
71
|
+
* 排除 Read/Edit/Write 的原因:输出本身就短,有独立的大小限制。
|
|
72
|
+
* 排除 WebSearch 的原因:搜索结果是"一次性知识注入",首次必须完整展示给模型;
|
|
73
|
+
* 后续重复时由 SHA-1 去重处理,但不做长度截断(避免丢失关键信息)。
|
|
74
|
+
*/
|
|
75
|
+
const COMPRESSIBLE_TOOLS = new Set([
|
|
76
|
+
'Grep',
|
|
77
|
+
'Bash',
|
|
78
|
+
'WebFetch',
|
|
79
|
+
'Glob',
|
|
80
|
+
]);
|
|
81
|
+
// ==================== 核心函数 ====================
|
|
82
|
+
/**
|
|
83
|
+
* 对一条工具执行结果进行微压缩处理。
|
|
84
|
+
*
|
|
85
|
+
* 这是整个模块唯一的对外接口。loop.ts 在把 tool_result 塞入 history 之前调用它。
|
|
86
|
+
*
|
|
87
|
+
* 处理流水线(按优先级):
|
|
88
|
+
* Rule 0: 错误/短内容 → 原样放行
|
|
89
|
+
* Rule 1: SHA-1 去重 → 相同内容第 4 次起压缩为引用标记
|
|
90
|
+
* Rule 2: 时间衰减 → 超过 N 轮未再出现的内容清理掉
|
|
91
|
+
* Rule 3: 长度截断 → 白名单工具超长内容保留头尾 + 省略标记
|
|
92
|
+
*
|
|
93
|
+
* @param toolName 工具名
|
|
94
|
+
* @param content 工具返回的原始内容
|
|
95
|
+
* @returns 处理后的内容(可能被压缩、截断、或原样返回)
|
|
96
|
+
*/
|
|
97
|
+
export function microCompact(toolName, content, state = defaultState) {
|
|
98
|
+
if (!content)
|
|
99
|
+
return content;
|
|
100
|
+
// Rule 0: 错误信息原样保留
|
|
101
|
+
if (content.startsWith('Error:') || content.startsWith('错误')) {
|
|
102
|
+
return content;
|
|
103
|
+
}
|
|
104
|
+
// Rule 0b: 短内容直接放行
|
|
105
|
+
if (content.length <= SHORT_CONTENT_THRESHOLD) {
|
|
106
|
+
return content;
|
|
107
|
+
}
|
|
108
|
+
// Rule 1: SHA-1 去重
|
|
109
|
+
const hash = sha1(content);
|
|
110
|
+
const existing = state.cache.get(hash);
|
|
111
|
+
if (existing) {
|
|
112
|
+
existing.count++;
|
|
113
|
+
if (existing.count > MAX_REPEAT_COUNT) {
|
|
114
|
+
return `[↑ ${existing.firstToolName} 结果已重复出现 ${existing.count} 次(相同内容已省略)]`;
|
|
115
|
+
}
|
|
116
|
+
return content;
|
|
117
|
+
}
|
|
118
|
+
// 首次看到此内容
|
|
119
|
+
state.cache.set(hash, { count: 1, firstToolName: toolName, firstSeenTurn: state.turn });
|
|
120
|
+
// Rule 2: 不需要——首次出现不存在时间衰减问题
|
|
121
|
+
// Rule 3: 长度截断(仅白名单工具)
|
|
122
|
+
if (content.length > MAX_RESULT_SIZE && COMPRESSIBLE_TOOLS.has(toolName)) {
|
|
123
|
+
return truncateContent(content);
|
|
124
|
+
}
|
|
125
|
+
return content;
|
|
126
|
+
}
|
|
127
|
+
/**
|
|
128
|
+
* 清理过期条目(每轮开始前调用)。
|
|
129
|
+
* 将超过 MAX_KEEP_ROUNDS 未被再次访问的条目标记为过期,
|
|
130
|
+
* 后续如果这些条目的 hash 再次命中,会作为"新内容"重新计数。
|
|
131
|
+
*/
|
|
132
|
+
export function expireOldEntries(state = defaultState) {
|
|
133
|
+
let expired = 0;
|
|
134
|
+
for (const [hash, entry] of state.cache) {
|
|
135
|
+
if (state.turn - entry.firstSeenTurn > MAX_KEEP_ROUNDS) {
|
|
136
|
+
state.cache.delete(hash);
|
|
137
|
+
expired++;
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
return expired;
|
|
141
|
+
}
|
|
142
|
+
// ==================== 内部函数 ====================
|
|
143
|
+
function sha1(str) {
|
|
144
|
+
return createHash('sha1').update(str).digest('hex');
|
|
145
|
+
}
|
|
146
|
+
function truncateContent(content) {
|
|
147
|
+
const omitted = content.length - HEAD_KEEP_CHARS - TAIL_KEEP_CHARS;
|
|
148
|
+
return (content.slice(0, HEAD_KEEP_CHARS) +
|
|
149
|
+
`\n\n[... 省略了 ${omitted.toLocaleString()} 字符 ...]\n\n` +
|
|
150
|
+
content.slice(-TAIL_KEEP_CHARS));
|
|
151
|
+
}
|