@next-open-ai/openclawx 0.8.40 → 0.8.58

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/README.md +10 -0
  2. package/apps/desktop/renderer/dist/assets/index-M5VGUUpo.js +93 -0
  3. package/apps/desktop/renderer/dist/assets/index-y8oE2q_u.css +10 -0
  4. package/apps/desktop/renderer/dist/index.html +2 -2
  5. package/dist/cli/cli.js +107 -0
  6. package/dist/core/agent/agent-manager.js +13 -2
  7. package/dist/core/agent/proxy/adapters/local-adapter.js +1 -1
  8. package/dist/core/config/desktop-config.d.ts +4 -1
  9. package/dist/core/config/desktop-config.js +108 -21
  10. package/dist/core/config/provider-support-default.js +26 -0
  11. package/dist/core/local-llm-server/download-model.d.ts +16 -0
  12. package/dist/core/local-llm-server/download-model.js +37 -0
  13. package/dist/core/local-llm-server/index.d.ts +32 -0
  14. package/dist/core/local-llm-server/index.js +147 -0
  15. package/dist/core/local-llm-server/llm-context.d.ts +65 -0
  16. package/dist/core/local-llm-server/llm-context.js +242 -0
  17. package/dist/core/local-llm-server/model-resolve.d.ts +27 -0
  18. package/dist/core/local-llm-server/model-resolve.js +90 -0
  19. package/dist/core/local-llm-server/server.d.ts +1 -0
  20. package/dist/core/local-llm-server/server.js +234 -0
  21. package/dist/core/local-llm-server/start-from-config.d.ts +5 -0
  22. package/dist/core/local-llm-server/start-from-config.js +50 -0
  23. package/dist/core/mcp/transport/stdio.d.ts +6 -0
  24. package/dist/core/mcp/transport/stdio.js +107 -27
  25. package/dist/core/memory/local-embedding-llama.js +2 -4
  26. package/dist/core/memory/local-embedding.d.ts +4 -3
  27. package/dist/core/memory/local-embedding.js +43 -3
  28. package/dist/gateway/methods/agent-chat.js +80 -41
  29. package/dist/gateway/server.js +10 -0
  30. package/dist/server/agent-config/agent-config.controller.d.ts +1 -1
  31. package/dist/server/agent-config/agent-config.service.d.ts +2 -0
  32. package/dist/server/agent-config/agent-config.service.js +5 -0
  33. package/dist/server/bootstrap.d.ts +1 -0
  34. package/dist/server/bootstrap.js +3 -0
  35. package/dist/server/config/config.controller.d.ts +81 -4
  36. package/dist/server/config/config.controller.js +185 -3
  37. package/dist/server/config/config.module.js +3 -2
  38. package/dist/server/config/config.service.d.ts +4 -1
  39. package/dist/server/config/config.service.js +62 -9
  40. package/dist/server/config/local-models.service.d.ts +67 -0
  41. package/dist/server/config/local-models.service.js +243 -0
  42. package/package.json +1 -1
  43. package/presets/preset-agents.json +6 -2
  44. package/presets/preset-config.json +24 -6
  45. package/presets/recommended-local-models.json +42 -0
  46. package/apps/desktop/renderer/dist/assets/index-BSfTiTKo.css +0 -10
  47. package/apps/desktop/renderer/dist/assets/index-DgLpQsA-.js +0 -89
  48. package/presets/workspaces/finance-expert/skills/akshare-helper/SKILL.md +0 -9
@@ -0,0 +1,242 @@
1
+ /**
2
+ * node-llama-cpp 模型实例管理。
3
+ * 可只加载 LLM、只加载 Embedding、或两者都加载;有一个就启动一个,不因缺另一个而失败。
4
+ */
5
+ import { LOCAL_LLM_CACHE_DIR } from "./model-resolve.js";
6
+ let llama = null;
7
+ let llmModel = null;
8
+ let embeddingModel = null;
9
+ let embeddingCtx = null;
10
+ /** 上下文窗口大小,initModels 时设置,用于 createContext;默认 32K 以容纳较长 system + tools */
11
+ let storedContextSize = 32768;
12
+ /** 串行锁:同一模型同一时间只处理一个推理请求 */
13
+ let llmQueue = Promise.resolve();
14
+ async function getLlamaInstance(gpuLayers) {
15
+ if (llama)
16
+ return llama;
17
+ const { getLlama, LlamaLogLevel } = await import("node-llama-cpp");
18
+ llama = await getLlama({
19
+ logLevel: LlamaLogLevel.warn,
20
+ ...(gpuLayers !== undefined ? { gpu: gpuLayers === 0 ? false : "auto" } : {}),
21
+ });
22
+ return llama;
23
+ }
24
+ export async function initModels(opts) {
25
+ storedContextSize = opts.contextSize ?? 32768;
26
+ const { resolveModelFile } = await import("node-llama-cpp");
27
+ const instance = await getLlamaInstance(opts.gpuLayers);
28
+ const cacheDir = LOCAL_LLM_CACHE_DIR;
29
+ if (opts.llmModelPath?.trim()) {
30
+ console.log("[local-llm] 加载 LLM 模型:", opts.llmModelPath);
31
+ const llmPath = await resolveModelFile(opts.llmModelPath, cacheDir);
32
+ llmModel = await instance.loadModel({ modelPath: llmPath });
33
+ }
34
+ else {
35
+ llmModel = null;
36
+ }
37
+ if (opts.embeddingModelPath?.trim()) {
38
+ console.log("[local-llm] 加载 Embedding 模型:", opts.embeddingModelPath);
39
+ const embPath = await resolveModelFile(opts.embeddingModelPath, cacheDir);
40
+ embeddingModel = await instance.loadModel({ modelPath: embPath });
41
+ embeddingCtx = await embeddingModel.createEmbeddingContext();
42
+ }
43
+ else {
44
+ embeddingModel = null;
45
+ embeddingCtx = null;
46
+ }
47
+ console.log("[local-llm] 模型加载完成", {
48
+ llm: !!llmModel,
49
+ embedding: !!embeddingCtx,
50
+ });
51
+ }
52
+ /** 将 API 可能传来的 content(string | array 如 [{ type: "text", text: "..." }])规范为 string,避免 node-llama-cpp LlamaText.fromJSON 收到对象抛 "Unknown value type: [object Object]" */
53
+ function contentToString(content) {
54
+ if (content == null)
55
+ return "";
56
+ if (typeof content === "string")
57
+ return content;
58
+ if (!Array.isArray(content))
59
+ return String(content);
60
+ return content
61
+ .filter((part) => part != null && typeof part === "object")
62
+ .map((part) => (part.type === "text" && typeof part.text === "string" ? part.text : ""))
63
+ .join("");
64
+ }
65
+ /**
66
+ * 将 ChatMessage[] 转换为 node-llama-cpp 的 LlamaChatMessage[]。
67
+ * tool_calls 序列化为 assistant content;tool 结果作为 user content 回传。
68
+ * 入参 content 可能是 OpenAI 多段格式(content: [{ type: "text", text: "..." }]),必须规范为 string。
69
+ */
70
+ function toLocalMessages(messages) {
71
+ return messages.map((m) => {
72
+ const rawContent = m.content;
73
+ const content = contentToString(rawContent);
74
+ if (m.role === "tool") {
75
+ return { role: "user", content: `[Tool result for ${m.name ?? m.tool_call_id ?? "tool"}]: ${content}` };
76
+ }
77
+ if (m.role === "assistant" && m.tool_calls?.length) {
78
+ const calls = JSON.stringify(m.tool_calls);
79
+ return { role: "assistant", content: content + `\n[tool_calls]: ${calls}` };
80
+ }
81
+ return { role: m.role, content };
82
+ });
83
+ }
84
+ /**
85
+ * 将 tools 定义转换为 grammar 约束描述,拼入 system prompt。
86
+ * node-llama-cpp v3 通过 LlamaGrammar 支持 JSON schema 约束输出,
87
+ * 这里用 prompt 方式描述工具,让模型以 JSON 格式输出 tool_calls。
88
+ */
89
+ function buildToolSystemPrompt(tools) {
90
+ if (!tools.length)
91
+ return "";
92
+ const descs = tools.map((t) => {
93
+ const fn = t.function;
94
+ return `- ${fn.name}: ${fn.description ?? ""}\n parameters: ${JSON.stringify(fn.parameters ?? {})}`;
95
+ }).join("\n");
96
+ return `\n\nYou have access to the following tools. When you need to call a tool, respond ONLY with a JSON object in this exact format (no other text):\n{"tool_calls":[{"id":"call_<random>","type":"function","function":{"name":"<tool_name>","arguments":"<json_string>"}}]}\n\nAvailable tools:\n${descs}`;
97
+ }
98
+ /** 尝试从模型输出中解析 tool_calls JSON */
99
+ function parseToolCalls(text) {
100
+ const trimmed = text.trim();
101
+ // 匹配 {"tool_calls":[...]} 格式
102
+ const match = trimmed.match(/\{[\s\S]*"tool_calls"[\s\S]*\}/);
103
+ if (!match)
104
+ return null;
105
+ try {
106
+ const parsed = JSON.parse(match[0]);
107
+ if (Array.isArray(parsed.tool_calls) && parsed.tool_calls.length > 0) {
108
+ return parsed.tool_calls;
109
+ }
110
+ }
111
+ catch {
112
+ // 不是合法 JSON,当普通文本处理
113
+ }
114
+ return null;
115
+ }
116
+ /**
117
+ * 流式 chat completion。
118
+ * onChunk 每次收到新 token 时调用;结束后返回完整 finish_reason。
119
+ */
120
+ export async function chatCompletionStream(messages, tools, onChunk, signal) {
121
+ if (!llmModel)
122
+ throw new Error("[local-llm] LLM 模型未初始化");
123
+ const { LlamaChatSession } = await import("node-llama-cpp");
124
+ // 串行排队
125
+ const run = async () => {
126
+ const ctx = await llmModel.createContext({ contextSize: storedContextSize });
127
+ // 注入历史消息(除最后一条 user 消息)
128
+ const localMsgs = toLocalMessages(messages);
129
+ let lastUser = -1;
130
+ for (let i = localMsgs.length - 1; i >= 0; i--) {
131
+ if (localMsgs[i].role === "user") {
132
+ lastUser = i;
133
+ break;
134
+ }
135
+ }
136
+ const history = lastUser > 0 ? localMsgs.slice(0, lastUser) : [];
137
+ const userPrompt = lastUser >= 0 ? localMsgs[lastUser].content : "";
138
+ // 找 system prompt,拼入 tool 描述(system 的 content 也可能是 array,需规范为 string)
139
+ const systemMsg = messages.find((m) => m.role === "system");
140
+ const toolSystemPrompt = buildToolSystemPrompt(tools);
141
+ const systemContent = contentToString(systemMsg?.content) + toolSystemPrompt;
142
+ // 创建带 systemPrompt 的 session,重建历史
143
+ const session = new LlamaChatSession({
144
+ contextSequence: ctx.getSequence(),
145
+ systemPrompt: systemContent || undefined,
146
+ });
147
+ for (const msg of history) {
148
+ if (msg.role === "user") {
149
+ await session.prompt(msg.content, { onTextChunk: () => { } });
150
+ }
151
+ }
152
+ let fullText = "";
153
+ let prevSentLength = 0;
154
+ let lastSent = ""; // 连续相同 delta 只发一次,缓解回复缓慢时「每个字显示两遍」
155
+ try {
156
+ await session.prompt(userPrompt, {
157
+ onTextChunk: (token) => {
158
+ if (signal?.aborted)
159
+ return;
160
+ const s = typeof token === "string" ? token : (token != null ? String(token) : "");
161
+ if (!s)
162
+ return;
163
+ // node-llama-cpp 在 detokenize(tokens, false, tokenTrail) 时可能返回带上下文的片段(含重复前缀),
164
+ // 与 DeepSeek 等仅返回增量不同。只向下游发送增量,避免出现「你好你好!!我是我是...」式重复。
165
+ if (s.startsWith(fullText)) {
166
+ fullText = s;
167
+ }
168
+ else {
169
+ fullText += s;
170
+ }
171
+ const toSend = fullText.slice(prevSentLength);
172
+ prevSentLength = fullText.length;
173
+ if (toSend && toSend !== lastSent) {
174
+ lastSent = toSend;
175
+ onChunk({ content: toSend });
176
+ }
177
+ },
178
+ signal,
179
+ });
180
+ }
181
+ catch (e) {
182
+ // node-llama-cpp 在解析模型输出(如 segment/tool_call)时可能对 LlamaText.fromJSON 传入对象导致 "Unknown value type: [object Object]"
183
+ const msg = e instanceof Error ? e.message : String(e);
184
+ const stack = e instanceof Error ? e.stack : undefined;
185
+ console.error("[local-llm] chatCompletionStream session.prompt error:", msg);
186
+ if (stack)
187
+ console.error("[local-llm] stack:", stack);
188
+ throw e;
189
+ }
190
+ // 检查是否是 tool_calls 输出
191
+ const toolCalls = parseToolCalls(fullText);
192
+ if (toolCalls) {
193
+ onChunk({ tool_calls: toolCalls, finish_reason: "tool_calls" });
194
+ }
195
+ else {
196
+ onChunk({ finish_reason: "stop" });
197
+ }
198
+ await ctx.dispose();
199
+ };
200
+ llmQueue = llmQueue.then(run, run);
201
+ await llmQueue;
202
+ }
203
+ /**
204
+ * 非流式 chat completion(内部复用流式实现)。
205
+ */
206
+ export async function chatCompletion(messages, tools, signal) {
207
+ let content = "";
208
+ let toolCalls;
209
+ let finishReason = "stop";
210
+ await chatCompletionStream(messages, tools, (chunk) => {
211
+ if (chunk.content)
212
+ content += chunk.content;
213
+ if (chunk.tool_calls)
214
+ toolCalls = chunk.tool_calls;
215
+ if (chunk.finish_reason)
216
+ finishReason = chunk.finish_reason;
217
+ }, signal);
218
+ return { content, tool_calls: toolCalls, finish_reason: finishReason };
219
+ }
220
+ /**
221
+ * 文本 embedding,返回 L2 归一化向量。
222
+ */
223
+ export async function getEmbedding(text) {
224
+ if (!embeddingCtx)
225
+ throw new Error("[local-llm] Embedding 模型未初始化");
226
+ const result = await embeddingCtx.getEmbeddingFor(text);
227
+ const vec = Array.from(result.vector);
228
+ const norm = Math.sqrt(vec.reduce((s, v) => s + v * v, 0)) || 1;
229
+ return vec.map((v) => v / norm);
230
+ }
231
+ /** 是否至少加载了一个模型(LLM 或 Embedding) */
232
+ export function isReady() {
233
+ return llmModel !== null || embeddingCtx !== null;
234
+ }
235
+ /** 是否有 LLM,可提供 chat/completions */
236
+ export function isLlmReady() {
237
+ return llmModel !== null;
238
+ }
239
+ /** 是否有 Embedding,可提供 embeddings */
240
+ export function isEmbeddingReady() {
241
+ return embeddingCtx !== null;
242
+ }
@@ -0,0 +1,27 @@
1
+ export declare const LOCAL_LLM_CACHE_DIR: string;
2
+ /**
3
+ * 取 modelUri 的末尾文件名(用于与已安装文件灵活匹配:不同 node-llama-cpp 版本可能生成不同前缀)。
4
+ * 例:hf:Qwen/Qwen3-4B-GGUF/Qwen3-4B-Q4_K_M.gguf → Qwen3-4B-Q4_K_M.gguf
5
+ * 例:hf_Qwen_Qwen3-4B-GGUF_Qwen3-4B-Q4_K_M.gguf → Qwen3-4B-Q4_K_M.gguf(文件名形式取最后一段 _ 之后)
6
+ */
7
+ export declare function modelUriBasename(modelUri: string): string;
8
+ /**
9
+ * 将 modelUri(hf:owner/repo/file.gguf)或文件名转为缓存目录下的文件名。
10
+ * 与 LocalModelsService.predictFilename 逻辑一致。
11
+ */
12
+ export declare function modelUriToFilename(modelUri: string): string;
13
+ /**
14
+ * 在缓存目录中解析出实际存在的模型文件路径。
15
+ * 先尝试精确文件名,若无则按「以 modelUri 的末尾文件名结尾」匹配(与「已安装的本地模型」逻辑一致)。
16
+ */
17
+ export declare function resolveModelPathInCache(modelIdOrUri: string, cacheDir?: string): string;
18
+ /**
19
+ * 检查指定模型(uri 或文件名)是否已存在于本地缓存目录。
20
+ * 支持精确文件名 或 以末尾 .gguf 文件名结尾的灵活匹配,与「已安装的本地模型」展示一致。
21
+ */
22
+ export declare function isModelFileInCache(modelIdOrUri: string, cacheDir?: string): boolean;
23
+ /**
24
+ * 将前端传入的模型标识(hf: URI 或已安装文件名)转为可传给 node-llama-cpp 的路径或 URI。
25
+ * 若为纯文件名(如 hf_xxx.gguf),则返回缓存目录下的绝对路径;若实际磁盘文件名与配置不一致(如 node-llama-cpp 命名),则解析为真实路径。
26
+ */
27
+ export declare function toModelPathForStart(uriOrFilename: string, cacheDir?: string): string;
@@ -0,0 +1,90 @@
1
+ /**
2
+ * 本地模型路径解析与文件存在性检查。
3
+ * 缓存目录:~/.openbot/.cached_models/,与 openbot 配置同目录便于管理。
4
+ * 与「已安装的本地模型」展示一致:支持精确文件名 或 以末尾 .gguf 文件名结尾的灵活匹配(兼容 node-llama-cpp 不同命名)。
5
+ */
6
+ import { join } from "node:path";
7
+ import { existsSync, readdirSync } from "node:fs";
8
+ import { homedir } from "node:os";
9
+ export const LOCAL_LLM_CACHE_DIR = join(homedir(), ".openbot", ".cached_models");
10
+ /**
11
+ * 取 modelUri 的末尾文件名(用于与已安装文件灵活匹配:不同 node-llama-cpp 版本可能生成不同前缀)。
12
+ * 例:hf:Qwen/Qwen3-4B-GGUF/Qwen3-4B-Q4_K_M.gguf → Qwen3-4B-Q4_K_M.gguf
13
+ * 例:hf_Qwen_Qwen3-4B-GGUF_Qwen3-4B-Q4_K_M.gguf → Qwen3-4B-Q4_K_M.gguf(文件名形式取最后一段 _ 之后)
14
+ */
15
+ export function modelUriBasename(modelUri) {
16
+ const s = (modelUri || "").trim();
17
+ if (!s)
18
+ return "";
19
+ const parts = s.replace(/\\/g, "/").split("/");
20
+ const last = parts[parts.length - 1] || s;
21
+ // 仅对无 "/" 的文件名形式(如 hf_X_Y_Z.gguf)取最后 _ 之后一段,以匹配 node-llama-cpp 可能生成的短文件名
22
+ if (!s.includes("/") && last.includes("_") && last.endsWith(".gguf")) {
23
+ const fromUnderscore = last.slice(last.lastIndexOf("_") + 1);
24
+ if (fromUnderscore.endsWith(".gguf"))
25
+ return fromUnderscore;
26
+ }
27
+ return last;
28
+ }
29
+ /**
30
+ * 将 modelUri(hf:owner/repo/file.gguf)或文件名转为缓存目录下的文件名。
31
+ * 与 LocalModelsService.predictFilename 逻辑一致。
32
+ */
33
+ export function modelUriToFilename(modelUri) {
34
+ const s = (modelUri || "").trim();
35
+ if (!s)
36
+ return "";
37
+ if (s.startsWith("hf:")) {
38
+ const parts = s.slice(3).split("/");
39
+ return "hf_" + parts.slice(0, -1).join("_") + "_" + parts[parts.length - 1];
40
+ }
41
+ // 已是文件名或路径,只取 basename
42
+ const last = s.replace(/\\/g, "/").split("/").pop();
43
+ return last ?? s;
44
+ }
45
+ /**
46
+ * 在缓存目录中解析出实际存在的模型文件路径。
47
+ * 先尝试精确文件名,若无则按「以 modelUri 的末尾文件名结尾」匹配(与「已安装的本地模型」逻辑一致)。
48
+ */
49
+ export function resolveModelPathInCache(modelIdOrUri, cacheDir = LOCAL_LLM_CACHE_DIR) {
50
+ const filename = modelUriToFilename(modelIdOrUri);
51
+ if (!filename || !filename.endsWith(".gguf"))
52
+ return "";
53
+ const exactPath = join(cacheDir, filename);
54
+ if (existsSync(exactPath))
55
+ return exactPath;
56
+ const suffix = modelUriBasename(modelIdOrUri);
57
+ if (!suffix)
58
+ return "";
59
+ try {
60
+ const files = readdirSync(cacheDir);
61
+ const found = files.find((f) => f.endsWith(".gguf") && (f === suffix || f.endsWith(suffix)));
62
+ return found ? join(cacheDir, found) : "";
63
+ }
64
+ catch {
65
+ return "";
66
+ }
67
+ }
68
+ /**
69
+ * 检查指定模型(uri 或文件名)是否已存在于本地缓存目录。
70
+ * 支持精确文件名 或 以末尾 .gguf 文件名结尾的灵活匹配,与「已安装的本地模型」展示一致。
71
+ */
72
+ export function isModelFileInCache(modelIdOrUri, cacheDir = LOCAL_LLM_CACHE_DIR) {
73
+ return resolveModelPathInCache(modelIdOrUri, cacheDir) !== "";
74
+ }
75
+ /**
76
+ * 将前端传入的模型标识(hf: URI 或已安装文件名)转为可传给 node-llama-cpp 的路径或 URI。
77
+ * 若为纯文件名(如 hf_xxx.gguf),则返回缓存目录下的绝对路径;若实际磁盘文件名与配置不一致(如 node-llama-cpp 命名),则解析为真实路径。
78
+ */
79
+ export function toModelPathForStart(uriOrFilename, cacheDir = LOCAL_LLM_CACHE_DIR) {
80
+ const s = (uriOrFilename || "").trim();
81
+ if (!s)
82
+ return "";
83
+ if (s.startsWith("hf:"))
84
+ return s;
85
+ const resolved = resolveModelPathInCache(s, cacheDir);
86
+ if (resolved)
87
+ return resolved;
88
+ const filename = modelUriToFilename(s);
89
+ return filename ? join(cacheDir, filename) : s;
90
+ }
@@ -0,0 +1 @@
1
+ export declare function createOpenAICompatServer(port: number): Promise<void>;
@@ -0,0 +1,234 @@
1
+ /**
2
+ * OpenAI 兼容 HTTP 服务(严格对齐 [OpenAI Chat Completions / Embeddings API](https://platform.openai.com/docs/api-reference))。
3
+ * 实现:GET /v1/models;POST /v1/chat/completions(流式/非流式,tool_calls);POST /v1/embeddings。
4
+ * - 错误统一为 { error: { message, type } },流式错误以 SSE 事件发送后结束。
5
+ * - 流式 delta 仅含规范字段:role、content(必为 string)、tool_calls(规范结构),避免客户端解析到未知类型。
6
+ */
7
+ import { createServer } from "node:http";
8
+ import { randomUUID } from "node:crypto";
9
+ import { chatCompletionStream, chatCompletion, getEmbedding, isLlmReady, isEmbeddingReady, } from "./llm-context.js";
10
+ const LLM_MODEL_ID = process.env.LOCAL_LLM_MODEL_ID ?? "local-llm";
11
+ const EMB_MODEL_ID = process.env.LOCAL_EMB_MODEL_ID ?? "local-embedding";
12
+ function readBody(req) {
13
+ return new Promise((resolve, reject) => {
14
+ let data = "";
15
+ req.on("data", (chunk) => (data += chunk));
16
+ req.on("end", () => {
17
+ try {
18
+ resolve(data ? JSON.parse(data) : {});
19
+ }
20
+ catch {
21
+ reject(new Error("Invalid JSON body"));
22
+ }
23
+ });
24
+ req.on("error", reject);
25
+ });
26
+ }
27
+ function sendJson(res, status, body) {
28
+ const json = JSON.stringify(body);
29
+ res.writeHead(status, { "Content-Type": "application/json" });
30
+ res.end(json);
31
+ }
32
+ /** OpenAI 规范错误体:{ error: { message, type } } */
33
+ function sendError(res, status, message, type = status >= 500 ? "server_error" : "invalid_request_error") {
34
+ sendJson(res, status, { error: { message: String(message), type } });
35
+ }
36
+ /** 构造 OpenAI 格式的 chat completion 响应对象 */
37
+ function buildCompletionResponse(content, tool_calls, finish_reason, model) {
38
+ const message = { role: "assistant", content: tool_calls ? null : content };
39
+ if (tool_calls?.length)
40
+ message.tool_calls = tool_calls;
41
+ return {
42
+ id: `chatcmpl-${randomUUID()}`,
43
+ object: "chat.completion",
44
+ created: Math.floor(Date.now() / 1000),
45
+ model,
46
+ choices: [{ index: 0, message, finish_reason, logprobs: null }],
47
+ usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
48
+ };
49
+ }
50
+ /** 构造 SSE delta chunk,仅含 OpenAI 流式规范字段,不包含 logprobs 避免下游解析异常 */
51
+ function buildStreamChunk(id, model, delta, finish_reason) {
52
+ const choice = { index: 0, delta, finish_reason };
53
+ const chunk = {
54
+ id,
55
+ object: "chat.completion.chunk",
56
+ created: Math.floor(Date.now() / 1000),
57
+ model,
58
+ choices: [choice],
59
+ };
60
+ return `data: ${JSON.stringify(chunk)}\n\n`;
61
+ }
62
+ async function handleChatCompletions(req, res) {
63
+ let body;
64
+ try {
65
+ body = await readBody(req);
66
+ }
67
+ catch {
68
+ return sendError(res, 400, "Invalid JSON body");
69
+ }
70
+ if (!isLlmReady())
71
+ return sendError(res, 503, "LLM 模型未加载,请先启动本地模型服务并选择 LLM 模型", "server_error");
72
+ if (!Array.isArray(body.messages)) {
73
+ return sendError(res, 400, "Missing or invalid 'messages' (must be an array)", "invalid_request_error");
74
+ }
75
+ if (body.messages.length === 0) {
76
+ return sendError(res, 400, "'messages' must contain at least one message", "invalid_request_error");
77
+ }
78
+ const messages = body.messages;
79
+ const tools = Array.isArray(body.tools) ? body.tools : [];
80
+ const stream = body.stream === true;
81
+ const model = typeof body.model === "string" && body.model.trim() ? body.model.trim() : LLM_MODEL_ID;
82
+ const abortCtrl = new AbortController();
83
+ req.on("close", () => abortCtrl.abort());
84
+ if (stream) {
85
+ res.writeHead(200, {
86
+ "Content-Type": "text/event-stream",
87
+ "Cache-Control": "no-cache",
88
+ Connection: "keep-alive",
89
+ });
90
+ const id = `chatcmpl-${randomUUID()}`;
91
+ // 首包:role + content 占位,与 DeepSeek 等一致,避免仅 role 时下游对 delta 的严格校验
92
+ res.write(buildStreamChunk(id, model, { role: "assistant", content: "" }, null));
93
+ let pendingToolCalls;
94
+ let finishReason = "stop";
95
+ try {
96
+ await chatCompletionStream(messages, tools, (chunk) => {
97
+ if (abortCtrl.signal.aborted)
98
+ return;
99
+ if (chunk.content != null && chunk.content !== "") {
100
+ const text = typeof chunk.content === "string" ? chunk.content : String(chunk.content);
101
+ res.write(buildStreamChunk(id, model, { content: text }, null));
102
+ }
103
+ if (chunk.tool_calls?.length) {
104
+ pendingToolCalls = chunk.tool_calls;
105
+ }
106
+ if (chunk.finish_reason) {
107
+ finishReason = chunk.finish_reason;
108
+ }
109
+ }, abortCtrl.signal);
110
+ }
111
+ catch (e) {
112
+ if (!abortCtrl.signal.aborted) {
113
+ const errMsg = e instanceof Error ? e.message : String(e);
114
+ const stack = e instanceof Error ? e.stack : undefined;
115
+ console.error("[local-llm] stream error:", errMsg);
116
+ if (stack)
117
+ console.error("[local-llm] stream stack:", stack);
118
+ res.write(`data: ${JSON.stringify({ error: { message: errMsg, type: "server_error" } })}\n\n`);
119
+ }
120
+ res.end();
121
+ return;
122
+ }
123
+ // 若有 tool_calls,按 OpenAI 流式规范发一条 delta(含 index/id/type/function),与 DeepSeek 等一致
124
+ if (pendingToolCalls?.length) {
125
+ const deltaToolCalls = pendingToolCalls.map((tc, i) => ({
126
+ index: i,
127
+ id: typeof tc.id === "string" ? tc.id : `call_${i}`,
128
+ type: "function",
129
+ function: {
130
+ name: typeof tc.function?.name === "string" ? tc.function.name : "",
131
+ arguments: typeof tc.function?.arguments === "string" ? tc.function.arguments : "",
132
+ },
133
+ }));
134
+ res.write(buildStreamChunk(id, model, { tool_calls: deltaToolCalls }, null));
135
+ finishReason = "tool_calls";
136
+ }
137
+ res.write(buildStreamChunk(id, model, {}, finishReason));
138
+ res.write("data: [DONE]\n\n");
139
+ res.end();
140
+ }
141
+ else {
142
+ try {
143
+ const result = await chatCompletion(messages, tools, abortCtrl.signal);
144
+ sendJson(res, 200, buildCompletionResponse(result.content, result.tool_calls, result.finish_reason, model));
145
+ }
146
+ catch (e) {
147
+ const msg = e instanceof Error ? e.message : String(e);
148
+ sendError(res, 500, msg, "server_error");
149
+ }
150
+ }
151
+ }
152
+ async function handleEmbeddings(req, res) {
153
+ let body;
154
+ try {
155
+ body = await readBody(req);
156
+ }
157
+ catch {
158
+ return sendError(res, 400, "Invalid JSON body", "invalid_request_error");
159
+ }
160
+ if (!isEmbeddingReady())
161
+ return sendError(res, 503, "Embedding 模型未加载,请先启动本地模型服务并选择 Embedding 模型", "server_error");
162
+ const input = body.input;
163
+ if (input === undefined || input === null) {
164
+ return sendError(res, 400, "Missing 'input' (string or array of strings)", "invalid_request_error");
165
+ }
166
+ const inputs = Array.isArray(input) ? input : [input];
167
+ if (inputs.length === 0 || inputs.some((x) => typeof x !== "string")) {
168
+ return sendError(res, 400, "'input' must be a non-empty string or array of strings", "invalid_request_error");
169
+ }
170
+ try {
171
+ const data = await Promise.all(inputs.map(async (text, i) => ({
172
+ object: "embedding",
173
+ index: i,
174
+ embedding: await getEmbedding(text),
175
+ })));
176
+ sendJson(res, 200, {
177
+ object: "list",
178
+ data,
179
+ model: body.model ?? EMB_MODEL_ID,
180
+ usage: { prompt_tokens: 0, total_tokens: 0 },
181
+ });
182
+ }
183
+ catch (e) {
184
+ const msg = e instanceof Error ? e.message : String(e);
185
+ sendError(res, 500, msg, "server_error");
186
+ }
187
+ }
188
+ function handleModels(_req, res) {
189
+ const data = [];
190
+ if (isLlmReady())
191
+ data.push({ id: LLM_MODEL_ID, object: "model", created: 0, owned_by: "local" });
192
+ if (isEmbeddingReady())
193
+ data.push({ id: EMB_MODEL_ID, object: "model", created: 0, owned_by: "local" });
194
+ sendJson(res, 200, { object: "list", data });
195
+ }
196
+ export function createOpenAICompatServer(port) {
197
+ return new Promise((resolve, reject) => {
198
+ const server = createServer(async (req, res) => {
199
+ const url = req.url ?? "";
200
+ const method = req.method ?? "";
201
+ // CORS
202
+ res.setHeader("Access-Control-Allow-Origin", "*");
203
+ res.setHeader("Access-Control-Allow-Headers", "Content-Type, Authorization");
204
+ if (method === "OPTIONS") {
205
+ res.writeHead(204);
206
+ res.end();
207
+ return;
208
+ }
209
+ try {
210
+ if (method === "GET" && url === "/v1/models") {
211
+ handleModels(req, res);
212
+ }
213
+ else if (method === "POST" && url === "/v1/chat/completions") {
214
+ await handleChatCompletions(req, res);
215
+ }
216
+ else if (method === "POST" && url === "/v1/embeddings") {
217
+ await handleEmbeddings(req, res);
218
+ }
219
+ else {
220
+ sendError(res, 404, `Not found: ${method} ${url}`, "invalid_request_error");
221
+ }
222
+ }
223
+ catch (e) {
224
+ if (!res.headersSent)
225
+ sendError(res, 500, String(e));
226
+ }
227
+ });
228
+ server.listen(port, "127.0.0.1", () => {
229
+ console.log(`[local-llm] OpenAI 兼容服务已启动: http://127.0.0.1:${port}/v1`);
230
+ resolve();
231
+ });
232
+ server.on("error", reject);
233
+ });
234
+ }
@@ -0,0 +1,5 @@
1
+ /**
2
+ * 按已保存的配置(默认智能体的 local 模型与上下文长度)尝试启动本地模型服务。
3
+ * 仅当已下载的模型文件存在时才启动;不抛错;失败时设置 process.env.LOCAL_LLM_START_FAILED 并打日志。
4
+ */
5
+ export declare function tryStartLocalModelFromSavedConfig(): Promise<void>;
@@ -0,0 +1,50 @@
1
+ /**
2
+ * 通用「按当前已保存配置启动本地模型服务」逻辑。
3
+ * 供网关启动与 API 复用:读取默认智能体配置,尝试启动;失败只设 env 与日志,不抛错、不影响主进程。
4
+ * LLM/Embedding 任一存在即可启动,只提示不报错。
5
+ */
6
+ import { loadDesktopAgentConfig } from "../config/desktop-config.js";
7
+ import { startLocalLlmServer } from "./index.js";
8
+ import { resolveModelPathInCache, LOCAL_LLM_CACHE_DIR } from "./model-resolve.js";
9
+ /**
10
+ * 按已保存的配置(默认智能体的 local 模型与上下文长度)尝试启动本地模型服务。
11
+ * 仅当已下载的模型文件存在时才启动;不抛错;失败时设置 process.env.LOCAL_LLM_START_FAILED 并打日志。
12
+ */
13
+ export async function tryStartLocalModelFromSavedConfig() {
14
+ try {
15
+ const agent = await loadDesktopAgentConfig("default");
16
+ if (!agent || agent.provider !== "local" || !agent.model?.trim()) {
17
+ process.env.LOCAL_LLM_START_FAILED =
18
+ "未配置默认本地模型,请在「模型配置」中选择 LLM 后点击「启动本地模型服务」";
19
+ console.log("[local-llm] 提示:未配置默认本地模型,跳过启动。");
20
+ return;
21
+ }
22
+ const llmResolved = resolveModelPathInCache(agent.model.trim(), LOCAL_LLM_CACHE_DIR);
23
+ if (!llmResolved) {
24
+ process.env.LOCAL_LLM_START_FAILED =
25
+ "缺省模型文件未下载,请先在「模型管理」中下载后点击「启动本地模型服务」";
26
+ console.log("[local-llm] 提示:缺省模型文件未下载,跳过启动。");
27
+ return;
28
+ }
29
+ const contextSize = process.env.LOCAL_LLM_CONTEXT_MAX != null && String(process.env.LOCAL_LLM_CONTEXT_MAX).trim() !== ""
30
+ ? parseInt(process.env.LOCAL_LLM_CONTEXT_MAX, 10) || 32768
31
+ : (agent.contextSize ?? 32768);
32
+ const opts = { llmModelPath: llmResolved, contextSize };
33
+ startLocalLlmServer(opts)
34
+ .then((handle) => {
35
+ process.env.LOCAL_LLM_BASE_URL = handle.baseUrl;
36
+ delete process.env.LOCAL_LLM_START_FAILED;
37
+ console.log("[local-llm] 已就绪:", handle.baseUrl);
38
+ })
39
+ .catch((e) => {
40
+ const msg = e instanceof Error ? e.message : String(e);
41
+ process.env.LOCAL_LLM_START_FAILED = msg;
42
+ console.log("[local-llm] 提示:启动未成功(如模型未下载请先在「模型管理」中下载)。", msg);
43
+ });
44
+ }
45
+ catch (e) {
46
+ const msg = e instanceof Error ? e.message : String(e);
47
+ process.env.LOCAL_LLM_START_FAILED = msg;
48
+ console.log("[local-llm] 提示:启动时发生异常,已跳过。", msg);
49
+ }
50
+ }
@@ -23,10 +23,16 @@ export declare class StdioTransport {
23
23
  private nextId;
24
24
  private pending;
25
25
  private buffer;
26
+ private stderrBuffer;
27
+ private static pendingKey;
26
28
  constructor(config: McpServerConfigStdio, options?: StdioTransportOptions);
27
29
  /** 启动子进程并完成 MCP initialize 握手 */
28
30
  start(): Promise<void>;
31
+ /** 从一行中解析 JSON-RPC 响应:整行即 JSON,或从第一个 { 开始提取到匹配的 }(兼容 npx/uvx 等前缀输出) */
32
+ private static parseJsonRpcResponse;
29
33
  private flushLines;
34
+ private flushStderrLines;
35
+ private flushLinesFromBuffer;
30
36
  private rejectAll;
31
37
  private initialize;
32
38
  private sendNotification;