minimal-agent 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. package/README.md +54 -72
  2. package/package.json +18 -13
  3. package/plugins/ralph-wiggum/plugin.js +205 -0
  4. package/plugins/ralph-wiggum/src/goalState.js +260 -0
  5. package/plugins/ralph-wiggum/src/{sentinels.ts → sentinels.js} +4 -7
  6. package/plugins/ralph-wiggum/src/stopHookRunner.js +104 -0
  7. package/plugins/ralph-wiggum/src/verificationGate.js +202 -0
  8. package/plugins/workflow-runner/commands/workflow.md +13 -3
  9. package/plugins/workflow-runner/{plugin.ts → plugin.js} +20 -26
  10. package/plugins/workflow-runner/src/expressions.js +369 -0
  11. package/plugins/workflow-runner/src/index.js +216 -0
  12. package/plugins/workflow-runner/src/loader.js +183 -0
  13. package/plugins/workflow-runner/src/runner.js +290 -0
  14. package/plugins/workflow-runner/src/stepExecutors/assert.js +28 -0
  15. package/plugins/workflow-runner/src/stepExecutors/llm.js +44 -0
  16. package/plugins/workflow-runner/src/stepExecutors/skill.js +103 -0
  17. package/plugins/workflow-runner/src/stepExecutors/{tool.ts → tool.js} +19 -25
  18. package/plugins/workflow-runner/src/types.js +59 -0
  19. package/plugins/workflow-runner/src/{workflowState.ts → workflowState.js} +21 -40
  20. package/src/bootstrap/cwdArg.js +22 -0
  21. package/src/bootstrap/workingDir.js +31 -0
  22. package/src/cli/configWizard.js +272 -0
  23. package/src/cli/print.js +197 -0
  24. package/src/config/configFile.js +78 -0
  25. package/src/config.js +118 -0
  26. package/src/context/compact.js +357 -0
  27. package/src/context/microCompactLite.js +151 -0
  28. package/src/context/persistContext.js +109 -0
  29. package/src/context/reactiveCompact.js +121 -0
  30. package/src/context/sessionPath.js +58 -0
  31. package/src/context/snipCompact.js +112 -0
  32. package/src/context/tokenCounter.js +66 -0
  33. package/src/llm/client.js +182 -0
  34. package/src/loop.js +230 -0
  35. package/src/main.js +116 -0
  36. package/src/plugin-sdk.js +24 -0
  37. package/src/plugins/commandRouter.js +169 -0
  38. package/src/plugins/hookEngine.js +258 -0
  39. package/src/plugins/pluginApi.js +23 -0
  40. package/src/plugins/pluginLoader.js +71 -0
  41. package/src/plugins/pluginRunner.js +65 -0
  42. package/src/plugins/transcript.js +171 -0
  43. package/src/prompts/projectInstructions.js +48 -0
  44. package/src/prompts/skillList.js +126 -0
  45. package/src/prompts/system.js +155 -0
  46. package/src/session/runTurn.js +41 -0
  47. package/src/session/sessionState.js +19 -0
  48. package/src/tools/bash/bash.js +352 -0
  49. package/src/tools/bash/semantics.js +85 -0
  50. package/src/tools/bash/warnings.js +98 -0
  51. package/src/tools/edit/edit.js +253 -0
  52. package/src/tools/edit/multi-edit.js +155 -0
  53. package/src/tools/glob/glob.js +97 -0
  54. package/src/tools/grep/grep.js +185 -0
  55. package/src/tools/grep/rgPath.js +173 -0
  56. package/src/tools/index.js +94 -0
  57. package/src/tools/read/read.js +209 -0
  58. package/src/tools/shared/fileState.js +61 -0
  59. package/src/tools/shared/fileUtils.js +281 -0
  60. package/src/tools/shared/schemas.js +16 -0
  61. package/src/tools/types.js +21 -0
  62. package/src/tools/webbrowser/browser.js +55 -0
  63. package/src/tools/webbrowser/webbrowser.js +194 -0
  64. package/src/tools/webfetch/preapproved.js +267 -0
  65. package/src/tools/webfetch/webfetch.js +317 -0
  66. package/src/tools/websearch/websearch.js +161 -0
  67. package/src/tools/write/write.js +125 -0
  68. package/src/types/turndown.d.ts +23 -0
  69. package/src/types.js +16 -0
  70. package/src/ui/App.js +37 -0
  71. package/src/ui/InputBox.js +240 -0
  72. package/src/ui/MessageList.js +28 -0
  73. package/src/ui/Root.js +70 -0
  74. package/src/ui/StatusLine.js +41 -0
  75. package/src/ui/ToolStatus.js +11 -0
  76. package/src/ui/hooks/useChat.js +234 -0
  77. package/src/ui/hooks/usePasteHandler.js +137 -0
  78. package/src/ui/hooks/useTextBuffer.js +55 -0
  79. package/src/ui/hooks/useTokenUsage.js +30 -0
  80. package/src/ui/textBuffer.js +217 -0
  81. package/src/utils/packageRoot.js +37 -0
  82. package/src/utils/resourcePaths.js +49 -0
  83. package/src/utils/zodToJson.js +29 -0
  84. package/dist/main.js +0 -5315
  85. package/plugins/ralph-wiggum/plugin.ts +0 -275
  86. package/plugins/ralph-wiggum/scripts/setup-ralph-loop.sh +0 -203
  87. package/plugins/ralph-wiggum/src/goalState.ts +0 -310
  88. package/plugins/ralph-wiggum/src/stopHookRunner.ts +0 -136
  89. package/plugins/ralph-wiggum/src/verificationGate.ts +0 -252
  90. package/plugins/ralph-wiggum/test/goalState.test.ts +0 -410
  91. package/plugins/ralph-wiggum/test/verificationGate.test.ts +0 -122
  92. package/plugins/workflow-runner/src/expressions.ts +0 -371
  93. package/plugins/workflow-runner/src/index.ts +0 -194
  94. package/plugins/workflow-runner/src/loader.ts +0 -193
  95. package/plugins/workflow-runner/src/runner.ts +0 -313
  96. package/plugins/workflow-runner/src/stepExecutors/assert.ts +0 -30
  97. package/plugins/workflow-runner/src/stepExecutors/llm.ts +0 -54
  98. package/plugins/workflow-runner/src/stepExecutors/skill.ts +0 -115
  99. package/plugins/workflow-runner/src/types.ts +0 -183
  100. package/plugins/workflow-runner/test/cli.e2e.test.ts +0 -114
  101. package/plugins/workflow-runner/test/e2e.test.ts +0 -268
  102. package/plugins/workflow-runner/test/expressions.test.ts +0 -140
  103. package/plugins/workflow-runner/test/fixtures/cli-e2e.yaml +0 -27
  104. package/plugins/workflow-runner/test/fixtures/hello-workflow.yaml +0 -49
  105. package/plugins/workflow-runner/test/graceful.test.ts +0 -139
  106. package/plugins/workflow-runner/test/loader.test.ts +0 -216
  107. package/plugins/workflow-runner/test/pluginRunner.isolation.test.ts +0 -230
  108. package/plugins/workflow-runner/test/runner.test.ts +0 -511
@@ -0,0 +1,121 @@
1
+ /**
2
+ * ============================================================
3
+ * src/context/reactiveCompact.ts —— 反应式压缩(错误自救)
4
+ * ------------------------------------------------------------
5
+ * 对齐 kakadeai 主项目 services/compact/reactiveCompact.ts:
6
+ * 当 API 返回 "prompt too long" 类错误时,自动触发一次压缩重试。
7
+ *
8
+ * 典型场景:
9
+ * 用户灌了一大段上下文 → 调 LLM → 返回 400 prompt_too_long
10
+ * → 系统不直接报错,而是先调 forceCompact 把上下文摘要一遍
11
+ * → 摘要失败再用 snipCompact 砍头兜底
12
+ * → 把新上下文交还给调用方,调用方重试一次 chat()
13
+ *
14
+ * 防爆约束:
15
+ * 每个 session 最多自救一次(attemptedThisSession 标志位)。
16
+ * 用户 /new 重启会话后才能再次自救。这避免"压缩→还是太长→再压缩"
17
+ * 的死循环。
18
+ * ============================================================
19
+ */
20
+ import { forceCompact } from './compact.js';
21
+ import { snipCompactIfNeeded } from './snipCompact.js';
22
+ import { countMessagesTokens } from './tokenCounter.js';
23
+ export function createReactiveCompactState() {
24
+ return { attempted: false };
25
+ }
26
+ const defaultState = createReactiveCompactState();
27
+ /** /new 时调用,允许下一个 session 再次自救 */
28
+ export function resetReactiveCompactState(state = defaultState) {
29
+ state.attempted = false;
30
+ }
31
+ /** 测试 / 调试用:查询当前是否已尝试 */
32
+ export function hasAttemptedReactiveCompact(state = defaultState) {
33
+ return state.attempted;
34
+ }
35
+ // ==================== 错误识别 ====================
36
+ /**
37
+ * 判断一个错误是否是"提示词太长"类错误。
38
+ *
39
+ * 兼容多家 provider 的错误消息格式:
40
+ * - OpenAI: "This model's maximum context length is X tokens..."
41
+ * - Anthropic: "prompt is too long"
42
+ * - MiniMax: "input length exceeds the context window"
43
+ * - DeepSeek: "context_length_exceeded"
44
+ * - 通用: 含 "prompt" / "context" / "token" 且含 "long"/"exceed"/"limit"
45
+ */
46
+ export function isPromptTooLongError(error) {
47
+ const msg = errorMessage(error).toLowerCase();
48
+ if (!msg)
49
+ return false;
50
+ // 宽松匹配:提及 prompt/context/token 且提及 long/exceed/limit/max
51
+ const hasSubject = /prompt|context|token|input length/.test(msg);
52
+ const hasIssue = /too long|exceed|limit|maximum|over/.test(msg);
53
+ return hasSubject && hasIssue;
54
+ }
55
+ function errorMessage(error) {
56
+ if (typeof error === 'string')
57
+ return error;
58
+ if (error instanceof Error)
59
+ return error.message;
60
+ if (error && typeof error === 'object' && 'message' in error) {
61
+ return String(error.message ?? '');
62
+ }
63
+ return String(error ?? '');
64
+ }
65
+ /**
66
+ * 如果当前错误是 prompt_too_long 且本 session 未尝试过自救,
67
+ * 执行一次"先 LLM 压缩、失败兜底 snip"的恢复流程。
68
+ *
69
+ * @param messages 当前历史(不修改)
70
+ * @param provider 当前 provider(用于 LLM 压缩)
71
+ * @param error 刚刚抛出的错误
72
+ */
73
+ export async function reactiveCompactIfApplicable(messages, provider, error, state = defaultState) {
74
+ if (!isPromptTooLongError(error)) {
75
+ return { recovered: false, messages, reason: 'not a prompt-too-long error' };
76
+ }
77
+ if (state.attempted) {
78
+ return {
79
+ recovered: false,
80
+ messages,
81
+ reason: 'already attempted this session — use /new or /compact manually',
82
+ };
83
+ }
84
+ // 占位:即使下面失败也算"用过一次",防止反复触发
85
+ state.attempted = true;
86
+ // Step 1: 先试 LLM 全量压缩
87
+ try {
88
+ const r = await forceCompact(messages, provider);
89
+ return {
90
+ recovered: true,
91
+ messages: r.messages,
92
+ reason: `LLM 压缩成功(${r.before} → ${r.after} tokens)`,
93
+ before: r.before,
94
+ after: r.after,
95
+ };
96
+ }
97
+ catch (compactErr) {
98
+ // 压缩失败 → 走 snip 兜底
99
+ }
100
+ // Step 2: snip 兜底(更激进 40%)
101
+ const beforeSnip = countMessagesTokens(messages);
102
+ const snipped = snipCompactIfNeeded(messages, {
103
+ force: true,
104
+ snipPercent: 0.4,
105
+ });
106
+ if (snipped.messagesRemoved > 0) {
107
+ const afterSnip = countMessagesTokens(snipped.messages);
108
+ return {
109
+ recovered: true,
110
+ messages: snipped.messages,
111
+ reason: `snip 兜底成功(删除 ${snipped.messagesRemoved} 条最老消息,释放 ~${snipped.tokensFreed} tokens)`,
112
+ before: beforeSnip,
113
+ after: afterSnip,
114
+ };
115
+ }
116
+ return {
117
+ recovered: false,
118
+ messages,
119
+ reason: '反应式压缩失败:LLM 压缩抛错且 snip 也没东西可砍',
120
+ };
121
+ }
@@ -0,0 +1,58 @@
1
+ /**
2
+ * ============================================================
3
+ * src/context/sessionPath.ts —— 按目录隔离的会话文件路径
4
+ * ------------------------------------------------------------
5
+ * 做的事:
6
+ * 把当前工作目录编码为 ~/.minimal-agent/sessions/<encoded>.json
7
+ * 的文件名,让不同项目目录的对话历史互不覆盖。
8
+ *
9
+ * 命名规则:
10
+ * <sanitized-path>-<6-hex-hash>.json
11
+ * sanitized:路径归一化后只保留 [a-z0-9-],最长 80 字符
12
+ * hash :原始路径 sha1 前 6 位,防 sanitize 后碰撞
13
+ *
14
+ * 迁移:
15
+ * 旧版 ~/.minimal-agent/last-context.json 启动时一次性 rename 到
16
+ * 当前 cwd 对应的会话文件。失败静默——丢迁移好过崩进程。
17
+ * ============================================================
18
+ */
19
+ import { createHash } from 'node:crypto';
20
+ import { mkdir, rename, stat } from 'node:fs/promises';
21
+ import { homedir } from 'node:os';
22
+ import { dirname, join, resolve } from 'node:path';
23
+ /** 把 cwd 编码成会话文件绝对路径 */
24
+ export function sessionFileFor(cwd) {
25
+ const normalized = resolve(cwd).replace(/\\/g, '/').toLowerCase();
26
+ const sanitized = normalized
27
+ .replace(/:/g, '')
28
+ .replace(/[^a-z0-9]+/g, '-')
29
+ .replace(/^-+|-+$/g, '')
30
+ .slice(0, 80);
31
+ const hash = createHash('sha1').update(normalized).digest('hex').slice(0, 6);
32
+ return join(homedir(), '.minimal-agent', 'sessions', `${sanitized}-${hash}.json`);
33
+ }
34
+ /**
35
+ * 一次性把旧版 ~/.minimal-agent/last-context.json 迁到当前 cwd 对应的
36
+ * 会话文件。
37
+ *
38
+ * 静默策略:
39
+ * - 旧文件不存在 → 直接返回
40
+ * - rename 失败(目标已存在 / 权限等)→ 忽略,旧文件原位等下次启动
41
+ */
42
+ export async function migrateLegacyContext(cwd) {
43
+ const legacy = join(homedir(), '.minimal-agent', 'last-context.json');
44
+ try {
45
+ await stat(legacy);
46
+ }
47
+ catch {
48
+ return;
49
+ }
50
+ const target = sessionFileFor(cwd);
51
+ try {
52
+ await mkdir(dirname(target), { recursive: true });
53
+ await rename(legacy, target);
54
+ }
55
+ catch {
56
+ // 静默
57
+ }
58
+ }
@@ -0,0 +1,112 @@
1
+ /**
2
+ * ============================================================
3
+ * src/context/snipCompact.ts —— 轻量"砍头"压缩(Path 3)
4
+ * ------------------------------------------------------------
5
+ * 对齐 kakadeai 主项目 services/compact/snipCompact.ts 的设计:
6
+ * 纯规则、零 LLM 调用,直接砍掉最老的 N% 消息,用作 LLM 压缩失败时
7
+ * 的兜底("穷人的压缩")。
8
+ *
9
+ * 压缩强度光谱:
10
+ * micro —— 工具结果级别(SHA-1 + 长度截断,每条消息内部)
11
+ * snip —— 对话级别(按比例砍最老消息,无 LLM) ← 本文件
12
+ * auto —— LLM 全量重写摘要(9 段式模板)
13
+ *
14
+ * 设计要点:
15
+ * 1. system 消息(messages[0])永远不动
16
+ * 2. 默认 < 10 条历史不动(除非 force=true)
17
+ * 3. 砍头时严格保护 tool_call / tool_result 配对:切口落在 tool
18
+ * 消息上会自动向后微调,避免造成"孤儿 tool"导致 API 400
19
+ * 4. 替换为一条 user marker 消息,告诉模型"前面有 N 条被自动删除"
20
+ * ============================================================
21
+ */
22
+ import { countMessagesTokens } from './tokenCounter.js';
23
+ // ==================== 配置常量 ====================
24
+ const DEFAULT_SNIP_PERCENT = 0.2;
25
+ const DEFAULT_MIN_KEEP = 5;
26
+ const DEFAULT_MAX_SNIP = 50;
27
+ /** 小于这个总长度时除非 force 不动 */
28
+ const DEFAULT_THRESHOLD = 10;
29
+ // ==================== 核心函数 ====================
30
+ /**
31
+ * 砍掉最老的 N% 消息,用一条 user marker 消息替代。
32
+ *
33
+ * 切口保护:如果按比例算出的切点落在 tool 消息上,会自动向后微调
34
+ * 直到切点不在 tool 上——避免造成孤儿 tool(其父 assistant.tool_calls
35
+ * 被砍掉而它自己被保留,OpenAI 协议下 API 会 400 "tool id not found")。
36
+ */
37
+ export function snipCompactIfNeeded(messages, options = {}) {
38
+ const force = options.force ?? false;
39
+ const snipPercent = options.snipPercent ?? DEFAULT_SNIP_PERCENT;
40
+ const minKeep = options.minMessagesToKeep ?? DEFAULT_MIN_KEEP;
41
+ const maxSnip = options.maxMessagesToSnip ?? DEFAULT_MAX_SNIP;
42
+ if (messages.length === 0) {
43
+ return { messages: [], messagesRemoved: 0, tokensFreed: 0 };
44
+ }
45
+ // 拆出 system 消息(只可能在 index 0;最多一条)
46
+ const systemMsg = messages[0].role === 'system' ? messages[0] : null;
47
+ const rest = systemMsg ? messages.slice(1) : messages.slice();
48
+ // 阈值检查(< 10 条且非 force → 不动)
49
+ if (!force && rest.length < DEFAULT_THRESHOLD) {
50
+ return { messages: messages.slice(), messagesRemoved: 0, tokensFreed: 0 };
51
+ }
52
+ // 算应该砍多少条
53
+ const byPercent = Math.floor(rest.length * snipPercent);
54
+ const proposedSnipCount = Math.min(byPercent, maxSnip, Math.max(0, rest.length - minKeep));
55
+ if (proposedSnipCount <= 0) {
56
+ return { messages: messages.slice(), messagesRemoved: 0, tokensFreed: 0 };
57
+ }
58
+ // 切点保护:避免落在 tool 消息上造成孤儿
59
+ const actualCut = findHeadCutpoint(rest, proposedSnipCount);
60
+ if (actualCut === 0 || actualCut >= rest.length) {
61
+ return { messages: messages.slice(), messagesRemoved: 0, tokensFreed: 0 };
62
+ }
63
+ const snipped = rest.slice(0, actualCut);
64
+ const remaining = rest.slice(actualCut);
65
+ const tokensFreed = countMessagesTokens(snipped);
66
+ const marker = {
67
+ role: 'user',
68
+ content: `[已自动删除最早的 ${actualCut} 条对话以节省 token(约释放 ${tokensFreed.toLocaleString()} tokens)。` +
69
+ `如需完整历史请用 /new 重启会话或 /compact 触发 LLM 摘要压缩。]`,
70
+ };
71
+ return {
72
+ messages: [...(systemMsg ? [systemMsg] : []), marker, ...remaining],
73
+ messagesRemoved: actualCut,
74
+ tokensFreed,
75
+ };
76
+ }
77
+ /**
78
+ * 找到一个安全的"砍头切点":从 proposedCut 开始向后扫描,
79
+ * 跳过所有会留下孤儿 tool 的位置。
80
+ *
81
+ * 算法(与 compact.ts 的 findTailWithCompleteToolChains 互补):
82
+ * 1. 扫描 messages[cut:](即"保留区"),收集 assistant.tool_calls.id
83
+ * 到 knownIds Set 中(向前扫描,每遇到 assistant 就累加)
84
+ * 2. 遇到 tool 消息且其 tool_call_id 不在 knownIds → 这是孤儿
85
+ * → 把切点向后推一位(即 cut++)并重新扫描
86
+ * 3. 直到保留区中所有 tool 消息都有父级,或 cut 越界
87
+ *
88
+ * 复杂度:O(N²) 最坏(N 个消息),实际 N 通常 < 100、扩展次数 < 5 → 极快
89
+ */
90
+ export function findHeadCutpoint(messages, proposedCut) {
91
+ let cut = Math.max(0, Math.min(proposedCut, messages.length));
92
+ // 反复扫描直到切点稳定
93
+ while (cut < messages.length) {
94
+ const knownIds = new Set();
95
+ let foundOrphan = false;
96
+ for (let i = cut; i < messages.length; i++) {
97
+ const msg = messages[i];
98
+ if (msg.role === 'assistant' && msg.tool_calls) {
99
+ for (const tc of msg.tool_calls)
100
+ knownIds.add(tc.id);
101
+ }
102
+ else if (msg.role === 'tool' && !knownIds.has(msg.tool_call_id)) {
103
+ foundOrphan = true;
104
+ break;
105
+ }
106
+ }
107
+ if (!foundOrphan)
108
+ return cut;
109
+ cut++;
110
+ }
111
+ return cut;
112
+ }
@@ -0,0 +1,66 @@
1
+ /**
2
+ * ============================================================
3
+ * src/context/tokenCounter.ts —— Token 估算
4
+ * ------------------------------------------------------------
5
+ * 我们用一个**粗略估算**(chars / 4)作为 token 数。
6
+ *
7
+ * 为何不用 tiktoken / @anthropic-ai/tokenizer?
8
+ * - 真实 tokenizer 对每个模型都不同,且会增加几 MB 的依赖
9
+ * - 我们只用估算来决定"要不要触发压缩",误差 ±20% 完全可以接受
10
+ * - chars/4 对英文很准;对中文偏低(中文 1 char ≈ 1 token)
11
+ *
12
+ * 如何换成精确版?
13
+ * 1. npm i tiktoken
14
+ * 2. 把 countTokens() 实现替换为 tiktoken 调用
15
+ * 3. 不改外部签名即可
16
+ * ============================================================
17
+ */
18
+ /**
19
+ * 粗略估算一段文本的 token 数。
20
+ *
21
+ * 经验值:
22
+ * - GPT 系列英文: 1 token ≈ 4 chars
23
+ * - 中文:1 char ≈ 1~1.5 tokens(因此本函数对中文低估)
24
+ *
25
+ * 我们选保守的 chars/4 + 中文加权:
26
+ * ASCII 部分按 /4,非 ASCII(主要是中文)按 *1.0。
27
+ */
28
+ export function countTextTokens(text) {
29
+ if (!text)
30
+ return 0;
31
+ let asciiChars = 0;
32
+ let nonAsciiChars = 0;
33
+ for (let i = 0; i < text.length; i++) {
34
+ const code = text.charCodeAt(i);
35
+ if (code < 128)
36
+ asciiChars++;
37
+ else
38
+ nonAsciiChars++;
39
+ }
40
+ return Math.ceil(asciiChars / 4) + nonAsciiChars;
41
+ }
42
+ /** 估算整段历史的 token 数(包含 role / 工具调用结构的开销) */
43
+ export function countMessagesTokens(messages) {
44
+ let total = 0;
45
+ for (const m of messages) {
46
+ // 每条消息至少有 ~4 token 的固定开销(role + 边界标记)
47
+ total += 4;
48
+ if (typeof m.content === 'string') {
49
+ total += countTextTokens(m.content);
50
+ }
51
+ else if (m.content === null) {
52
+ // assistant 的 content 可以是 null(只有 tool_calls 时)
53
+ }
54
+ if (m.role === 'assistant' && m.tool_calls) {
55
+ for (const tc of m.tool_calls) {
56
+ total += 8; // tool_call 结构开销
57
+ total += countTextTokens(tc.function.name);
58
+ total += countTextTokens(tc.function.arguments);
59
+ }
60
+ }
61
+ if (m.role === 'tool') {
62
+ total += countTextTokens(m.tool_call_id);
63
+ }
64
+ }
65
+ return total;
66
+ }
@@ -0,0 +1,182 @@
1
+ /**
2
+ * ============================================================
3
+ * src/llm/client.ts —— OpenAI 兼容 LLM 客户端(流式)
4
+ * ------------------------------------------------------------
5
+ * 这个文件做一件事:把"消息历史 + 可用工具"发给 LLM,把"流式响应"逐 chunk
6
+ * yield 回上层。它是整个 agent 系统里唯一会 fetch() 远端 API 的地方。
7
+ *
8
+ * 我们用 OpenAI 的 Chat Completions 协议,因为:
9
+ * - DeepSeek、月之暗面、智谱、火山、OpenRouter 都按它兼容
10
+ * - 协议简单:POST 一个 JSON,回 SSE
11
+ *
12
+ * SSE(Server-Sent Events)是什么?
13
+ * 一种很简单的流式协议:服务器逐行返回 "data: <json>\n\n",
14
+ * 最后用 "data: [DONE]\n\n" 表示结束。
15
+ *
16
+ * 本文件不依赖任何 npm 包(fetch 是 Bun/Node 20+ 内置)。
17
+ * ============================================================
18
+ */
19
+ /**
20
+ * 调用 LLM 并以流的形式 yield 事件。
21
+ *
22
+ * @param provider 选好的 provider(含 baseURL / apiKey / model)
23
+ * @param messages 完整的对话历史
24
+ * @param tools 工具列表(每个工具需要带 jsonSchema 才能给 LLM)
25
+ * @param signal AbortSignal,UI 按 Ctrl+C 时中断
26
+ *
27
+ * @yield LlmStreamEvent: text_delta / tool_call_delta / done
28
+ *
29
+ * 用法示例:
30
+ * ```
31
+ * for await (const ev of chat({ provider, messages, tools })) {
32
+ * if (ev.type === 'text_delta') process.stdout.write(ev.delta);
33
+ * }
34
+ * ```
35
+ */
36
+ export async function* chat(args) {
37
+ const { provider, messages, tools, signal } = args;
38
+ // 1. 把 Tool 数组转成 OpenAI 协议的 tools 字段
39
+ const openaiTools = tools.map((t) => ({
40
+ type: 'function',
41
+ function: {
42
+ name: t.name,
43
+ description: typeof t.description === 'function' ? t.description() : t.description,
44
+ parameters: t.parameters,
45
+ },
46
+ }));
47
+ // 2. 构造请求体
48
+ const body = {
49
+ model: provider.model,
50
+ messages,
51
+ tools: openaiTools.length > 0 ? openaiTools : undefined,
52
+ stream: true,
53
+ // tool_choice: 'auto' 是默认值,可不写;某些 provider 必须显式
54
+ tool_choice: openaiTools.length > 0 ? 'auto' : undefined,
55
+ };
56
+ // 3. 发请求(fetch 在 Bun/Node 20+ 内置)
57
+ const url = `${provider.baseURL.replace(/\/$/, '')}/chat/completions`;
58
+ const resp = await fetch(url, {
59
+ method: 'POST',
60
+ headers: {
61
+ 'Content-Type': 'application/json',
62
+ Authorization: `Bearer ${provider.apiKey}`,
63
+ },
64
+ body: JSON.stringify(body),
65
+ signal,
66
+ });
67
+ // 4. 错误处理:把 HTTP 错误尽量翻译成可读消息
68
+ if (!resp.ok) {
69
+ const errText = await resp.text().catch(() => '');
70
+ throw new Error(`LLM 请求失败 [${resp.status} ${resp.statusText}]:${errText.slice(0, 500)}`);
71
+ }
72
+ if (!resp.body) {
73
+ throw new Error('LLM 响应没有 body(provider 可能不支持流式?)');
74
+ }
75
+ // 5. SSE 解析:把 ReadableStream 切成一行行 "data: ..." 然后 JSON.parse
76
+ const reader = resp.body.getReader();
77
+ const decoder = new TextDecoder();
78
+ /** 临时缓冲:HTTP chunk 不一定按行切,要自己拼 */
79
+ let buffer = '';
80
+ /** 累积的 stop_reason,最后 yield done 时用 */
81
+ let stopReason = 'unknown';
82
+ try {
83
+ while (true) {
84
+ // 每次 read 前检查 abort:用户按 ESC/Ctrl+C 时 signal.aborted 为 true。
85
+ // 这样即使 fetch 还没完成,loop 也能及时退出而不是一直等到网络超时。
86
+ if (signal?.aborted) {
87
+ yield { type: 'done', stopReason: 'aborted' };
88
+ return;
89
+ }
90
+ const { value, done } = await reader.read();
91
+ if (done)
92
+ break;
93
+ // 每个 chunk 处理完后也检查 abort,让快速连按能及时退出。
94
+ if (signal?.aborted) {
95
+ yield { type: 'done', stopReason: 'aborted' };
96
+ return;
97
+ }
98
+ buffer += decoder.decode(value, { stream: true });
99
+ // SSE 用 "\n\n" 分事件,按行拆 buffer
100
+ let lineEnd;
101
+ while ((lineEnd = buffer.indexOf('\n')) !== -1) {
102
+ const line = buffer.slice(0, lineEnd).trim();
103
+ buffer = buffer.slice(lineEnd + 1);
104
+ if (!line || !line.startsWith('data:'))
105
+ continue;
106
+ const dataStr = line.slice(5).trim(); // 去掉 "data:" 前缀
107
+ if (dataStr === '[DONE]') {
108
+ // 流结束标记
109
+ yield { type: 'done', stopReason };
110
+ return;
111
+ }
112
+ // 解析一个 chunk JSON
113
+ let chunk;
114
+ try {
115
+ chunk = JSON.parse(dataStr);
116
+ }
117
+ catch {
118
+ // 罕见:某些 provider 会发空行或心跳,忽略
119
+ continue;
120
+ }
121
+ const delta = chunk.choices?.[0]?.delta;
122
+ const finish = chunk.choices?.[0]?.finish_reason;
123
+ if (finish) {
124
+ stopReason =
125
+ finish === 'stop' || finish === 'tool_calls' || finish === 'length'
126
+ ? finish
127
+ : 'unknown';
128
+ }
129
+ if (!delta)
130
+ continue;
131
+ // 5a. 文本增量
132
+ // 注意:MiniMax 等模型默认会把 <think>...</think> 标签直接放在
133
+ // content 字段里 —— 我们原样累积,不剥离,确保下一轮工具调用时
134
+ // 思维链完整回传。
135
+ if (typeof delta.content === 'string' && delta.content.length > 0) {
136
+ yield { type: 'text_delta', delta: delta.content };
137
+ }
138
+ // 5b. tool_calls 增量(OpenAI 协议下 tool_calls 也是分片的)
139
+ if (Array.isArray(delta.tool_calls)) {
140
+ for (const tc of delta.tool_calls) {
141
+ yield {
142
+ type: 'tool_call_delta',
143
+ index: tc.index ?? 0,
144
+ id: tc.id,
145
+ name: tc.function?.name,
146
+ argumentsDelta: tc.function?.arguments,
147
+ };
148
+ }
149
+ }
150
+ // 5c. 思维链增量(DeepSeek-R1 / MiniMax reasoning_split / OpenRouter)
151
+ // 三种字段名都见过:reasoning_content / reasoning / reasoning_details
152
+ // reasoning_details 是数组,每片是个对象;其它两个是字符串增量。
153
+ if (typeof delta.reasoning_content === 'string' && delta.reasoning_content.length > 0) {
154
+ yield {
155
+ type: 'reasoning_delta',
156
+ field: 'reasoning_content',
157
+ delta: delta.reasoning_content,
158
+ };
159
+ }
160
+ if (typeof delta.reasoning === 'string' && delta.reasoning.length > 0) {
161
+ yield {
162
+ type: 'reasoning_delta',
163
+ field: 'reasoning',
164
+ delta: delta.reasoning,
165
+ };
166
+ }
167
+ if (Array.isArray(delta.reasoning_details) && delta.reasoning_details.length > 0) {
168
+ yield {
169
+ type: 'reasoning_delta',
170
+ field: 'reasoning_details',
171
+ items: delta.reasoning_details,
172
+ };
173
+ }
174
+ }
175
+ }
176
+ // 如果流没显式发 [DONE],也补一个 done
177
+ yield { type: 'done', stopReason };
178
+ }
179
+ finally {
180
+ reader.releaseLock?.();
181
+ }
182
+ }