@next-open-ai/openbot 0.6.16 → 0.6.66

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. package/README.md +4 -4
  2. package/apps/desktop/renderer/dist/assets/index-CxDZnMBH.css +10 -0
  3. package/apps/desktop/renderer/dist/assets/index-k47Qiokg.js +93 -0
  4. package/apps/desktop/renderer/dist/index.html +2 -2
  5. package/dist/cli/cli.js +136 -0
  6. package/dist/cli/extension-cmd.d.ts +15 -0
  7. package/dist/cli/extension-cmd.js +107 -0
  8. package/dist/core/agent/agent-dir.d.ts +6 -0
  9. package/dist/core/agent/agent-dir.js +8 -0
  10. package/dist/core/agent/agent-manager.d.ts +13 -0
  11. package/dist/core/agent/agent-manager.js +88 -7
  12. package/dist/core/agent/proxy/adapters/claude-code-adapter.d.ts +2 -0
  13. package/dist/core/agent/proxy/adapters/claude-code-adapter.js +186 -0
  14. package/dist/core/agent/proxy/adapters/local-adapter.js +3 -1
  15. package/dist/core/agent/proxy/adapters/openclawx-adapter.js +3 -3
  16. package/dist/core/agent/proxy/adapters/opencode-adapter.js +65 -29
  17. package/dist/core/agent/proxy/adapters/opencode-local-runner.js +9 -0
  18. package/dist/core/agent/proxy/index.js +2 -0
  19. package/dist/core/agent/token-usage-log-extension.d.ts +14 -0
  20. package/dist/core/agent/token-usage-log-extension.js +61 -0
  21. package/dist/core/config/agent-reload-pending.js +3 -2
  22. package/dist/core/config/desktop-config.d.ts +29 -6
  23. package/dist/core/config/desktop-config.js +188 -27
  24. package/dist/core/config/provider-support-default.js +27 -0
  25. package/dist/core/extensions/index.d.ts +1 -0
  26. package/dist/core/extensions/index.js +1 -0
  27. package/dist/core/extensions/load.d.ts +11 -0
  28. package/dist/core/extensions/load.js +101 -0
  29. package/dist/core/local-llm-server/download-model.d.ts +16 -0
  30. package/dist/core/local-llm-server/download-model.js +37 -0
  31. package/dist/core/local-llm-server/index.d.ts +32 -0
  32. package/dist/core/local-llm-server/index.js +152 -0
  33. package/dist/core/local-llm-server/llm-context.d.ts +66 -0
  34. package/dist/core/local-llm-server/llm-context.js +270 -0
  35. package/dist/core/local-llm-server/model-resolve.d.ts +27 -0
  36. package/dist/core/local-llm-server/model-resolve.js +90 -0
  37. package/dist/core/local-llm-server/server.d.ts +1 -0
  38. package/dist/core/local-llm-server/server.js +234 -0
  39. package/dist/core/local-llm-server/start-from-config.d.ts +5 -0
  40. package/dist/core/local-llm-server/start-from-config.js +50 -0
  41. package/dist/core/mcp/adapter.d.ts +4 -2
  42. package/dist/core/mcp/adapter.js +10 -4
  43. package/dist/core/mcp/index.d.ts +2 -0
  44. package/dist/core/mcp/index.js +1 -0
  45. package/dist/core/mcp/operator.d.ts +11 -0
  46. package/dist/core/mcp/operator.js +41 -7
  47. package/dist/core/mcp/transport/stdio.d.ts +6 -0
  48. package/dist/core/mcp/transport/stdio.js +125 -28
  49. package/dist/core/memory/local-embedding-llama.js +8 -6
  50. package/dist/core/memory/local-embedding.d.ts +4 -3
  51. package/dist/core/memory/local-embedding.js +43 -3
  52. package/dist/core/tools/index.d.ts +1 -0
  53. package/dist/core/tools/index.js +1 -0
  54. package/dist/core/tools/truncate-result.d.ts +14 -0
  55. package/dist/core/tools/truncate-result.js +27 -0
  56. package/dist/core/tools/web-search/create-web-search-tool.d.ts +17 -0
  57. package/dist/core/tools/web-search/create-web-search-tool.js +87 -0
  58. package/dist/core/tools/web-search/index.d.ts +4 -0
  59. package/dist/core/tools/web-search/index.js +2 -0
  60. package/dist/core/tools/web-search/providers/brave.d.ts +2 -0
  61. package/dist/core/tools/web-search/providers/brave.js +87 -0
  62. package/dist/core/tools/web-search/providers/duck-duck-scrape.d.ts +2 -0
  63. package/dist/core/tools/web-search/providers/duck-duck-scrape.js +47 -0
  64. package/dist/core/tools/web-search/providers/index.d.ts +5 -0
  65. package/dist/core/tools/web-search/providers/index.js +13 -0
  66. package/dist/core/tools/web-search/types.d.ts +35 -0
  67. package/dist/core/tools/web-search/types.js +4 -0
  68. package/dist/gateway/methods/agent-chat.js +110 -42
  69. package/dist/gateway/methods/run-scheduled-task.js +2 -0
  70. package/dist/gateway/server.js +60 -13
  71. package/dist/server/agent-config/agent-config.controller.d.ts +9 -1
  72. package/dist/server/agent-config/agent-config.controller.js +11 -0
  73. package/dist/server/agent-config/agent-config.service.d.ts +29 -5
  74. package/dist/server/agent-config/agent-config.service.js +41 -1
  75. package/dist/server/agents/agents.gateway.js +1 -1
  76. package/dist/server/bootstrap.d.ts +1 -0
  77. package/dist/server/bootstrap.js +19 -2
  78. package/dist/server/config/config.controller.d.ts +107 -4
  79. package/dist/server/config/config.controller.js +185 -3
  80. package/dist/server/config/config.module.js +3 -2
  81. package/dist/server/config/config.service.d.ts +18 -1
  82. package/dist/server/config/config.service.js +68 -9
  83. package/dist/server/config/local-models.service.d.ts +67 -0
  84. package/dist/server/config/local-models.service.js +242 -0
  85. package/package.json +3 -1
  86. package/presets/preset-agents.json +125 -91
  87. package/presets/preset-config.json +24 -6
  88. package/presets/preset-providers.json +7 -0
  89. package/presets/recommended-local-models.json +36 -0
  90. package/presets/workspaces/download-assistant/skills/downloader/SKILL.md +2 -2
  91. package/presets/workspaces/office-automation/skills/rpa-helper/SKILL.md +9 -0
  92. package/presets/workspaces/self-media-bot/skills/self-media-tools/SKILL.md +9 -0
  93. package/skills/url-bookmark/SKILL.md +12 -12
  94. package/apps/desktop/renderer/dist/assets/index-BxqMW-uy.css +0 -10
  95. package/apps/desktop/renderer/dist/assets/index-DJs-wX3R.js +0 -89
@@ -0,0 +1,152 @@
1
+ /**
2
+ * local-llm-server 入口。
3
+ *
4
+ * 两种运行模式:
5
+ * 1. 子进程模式(--child):直接加载模型并启动 HTTP 服务,由主进程 fork 调用。
6
+ * 2. 主进程模式(默认导出):fork 子进程,管理其生命周期,提供 baseUrl 给调用方。
7
+ *
8
+ * 主进程通过 startLocalLlmServer() 启动,返回 { baseUrl, stop }。
9
+ * 子进程就绪后通过 IPC 发送 { type: "ready" } 通知主进程。
10
+ */
11
+ import { fileURLToPath } from "node:url";
12
+ // ─── 子进程模式 ───────────────────────────────────────────────────────────────
13
+ async function runChildProcess() {
14
+ const port = parseInt(process.env.LOCAL_LLM_PORT ?? "11435", 10);
15
+ const llmModelPath = process.env.LOCAL_LLM_MODEL?.trim() || undefined;
16
+ const embModelPath = process.env.LOCAL_EMB_MODEL?.trim() || undefined;
17
+ let contextSize = process.env.LOCAL_LLM_CONTEXT_SIZE != null ? parseInt(process.env.LOCAL_LLM_CONTEXT_SIZE, 10) : undefined;
18
+ if (contextSize == null && process.env.LOCAL_LLM_CONTEXT_MAX != null && String(process.env.LOCAL_LLM_CONTEXT_MAX).trim() !== '') {
19
+ contextSize = parseInt(process.env.LOCAL_LLM_CONTEXT_MAX, 10) || undefined;
20
+ }
21
+ if (!llmModelPath && !embModelPath) {
22
+ console.error("[local-llm] 未指定 LLM 或 Embedding 模型路径,至少需提供一个");
23
+ if (process.send)
24
+ process.send({ type: "error", message: "至少需指定 LOCAL_LLM_MODEL 或 LOCAL_EMB_MODEL" });
25
+ process.exit(1);
26
+ }
27
+ const { initModels } = await import("./llm-context.js");
28
+ const { createOpenAICompatServer } = await import("./server.js");
29
+ try {
30
+ await initModels({
31
+ ...(llmModelPath ? { llmModelPath } : {}),
32
+ ...(embModelPath ? { embeddingModelPath: embModelPath } : {}),
33
+ contextSize: contextSize ?? 32768,
34
+ });
35
+ await createOpenAICompatServer(port);
36
+ if (process.send) {
37
+ process.send({ type: "ready", port });
38
+ }
39
+ }
40
+ catch (e) {
41
+ console.error("[local-llm] 子进程启动失败:", e);
42
+ if (process.send) {
43
+ process.send({ type: "error", message: String(e) });
44
+ }
45
+ process.exit(1);
46
+ }
47
+ }
48
+ let serverHandle = null;
49
+ /**
50
+ * 停止本地 LLM 子进程服务(若正在运行)。用于切换模型前先停止再启动。
51
+ */
52
+ export function stopLocalLlmServer() {
53
+ if (serverHandle) {
54
+ serverHandle.stop();
55
+ serverHandle = null;
56
+ }
57
+ }
58
+ /**
59
+ * 启动本地 LLM 子进程服务。
60
+ * 已启动时直接返回已有 handle(单例)。需先 stop 再传新参数重启。
61
+ */
62
+ export async function startLocalLlmServer(opts = {}) {
63
+ if (serverHandle)
64
+ return serverHandle;
65
+ const { fork } = await import("node:child_process");
66
+ const port = opts.port ?? 11435;
67
+ const readyTimeoutMs = opts.readyTimeoutMs ?? 300_000;
68
+ const env = {
69
+ ...process.env,
70
+ LOCAL_LLM_PORT: String(port),
71
+ LOCAL_LLM_CHILD: "1",
72
+ };
73
+ if (opts.llmModelPath)
74
+ env.LOCAL_LLM_MODEL = opts.llmModelPath;
75
+ if (opts.embeddingModelPath)
76
+ env.LOCAL_EMB_MODEL = opts.embeddingModelPath;
77
+ if (opts.contextSize != null)
78
+ env.LOCAL_LLM_CONTEXT_SIZE = String(opts.contextSize);
79
+ const metaUrl = (new Function("return typeof import.meta !== 'undefined' ? import.meta.url : ''")());
80
+ const childPath = metaUrl
81
+ ? fileURLToPath(metaUrl)
82
+ : (typeof globalThis.__filename !== "undefined"
83
+ ? globalThis.__filename
84
+ : process.argv[1] || ".");
85
+ const child = fork(childPath, ["--child"], {
86
+ env,
87
+ stdio: ["ignore", "inherit", "inherit", "ipc"],
88
+ execArgv: [],
89
+ });
90
+ await new Promise((resolve, reject) => {
91
+ const timer = setTimeout(() => {
92
+ child.kill();
93
+ reject(new Error(`[local-llm] 子进程启动超时(${readyTimeoutMs}ms)`));
94
+ }, readyTimeoutMs);
95
+ child.on("message", (msg) => {
96
+ if (msg?.type === "ready") {
97
+ clearTimeout(timer);
98
+ resolve();
99
+ }
100
+ else if (msg?.type === "error") {
101
+ clearTimeout(timer);
102
+ reject(new Error(`[local-llm] 子进程错误: ${msg.message}`));
103
+ }
104
+ });
105
+ child.on("exit", (code) => {
106
+ clearTimeout(timer);
107
+ if (code !== 0)
108
+ reject(new Error(`[local-llm] 子进程意外退出,code=${code}`));
109
+ });
110
+ child.on("error", (e) => {
111
+ clearTimeout(timer);
112
+ reject(e);
113
+ });
114
+ });
115
+ // 主进程退出时清理子进程
116
+ const cleanup = () => { try {
117
+ child.kill();
118
+ }
119
+ catch { /* ignore */ } };
120
+ process.on("exit", cleanup);
121
+ process.on("SIGINT", cleanup);
122
+ process.on("SIGTERM", cleanup);
123
+ serverHandle = {
124
+ baseUrl: `http://127.0.0.1:${port}/v1`,
125
+ stop: () => {
126
+ serverHandle = null;
127
+ try {
128
+ child.kill();
129
+ }
130
+ catch { /* ignore */ }
131
+ },
132
+ };
133
+ // 子进程意外退出(崩溃、OOM 等)时清理 handle 与 env,避免后续请求继续连已死服务导致 "Connection error"
134
+ const onChildExit = (code, signal) => {
135
+ if (serverHandle)
136
+ serverHandle = null;
137
+ process.env.LOCAL_LLM_START_FAILED = "本地模型服务已退出,请重新点击「启动本地模型服务」";
138
+ delete process.env.LOCAL_LLM_BASE_URL;
139
+ console.warn("[local-llm] 子进程已退出 code=%s signal=%s,请重新启动本地模型服务", code, signal);
140
+ };
141
+ child.on("exit", onChildExit);
142
+ console.log(`[local-llm] 本地服务就绪: ${serverHandle.baseUrl}`);
143
+ return serverHandle;
144
+ }
145
+ // ─── 入口判断 ─────────────────────────────────────────────────────────────────
146
+ // 子进程模式:被 fork 时带 --child 参数或设置了 LOCAL_LLM_CHILD 环境变量
147
+ if (process.argv.includes("--child") || process.env.LOCAL_LLM_CHILD === "1") {
148
+ runChildProcess().catch((e) => {
149
+ console.error("[local-llm] 致命错误:", e);
150
+ process.exit(1);
151
+ });
152
+ }
@@ -0,0 +1,66 @@
1
+ export interface LlmContextOptions {
2
+ /** LLM 推理模型路径或 hf: URI,可选;不传则仅提供 embedding */
3
+ llmModelPath?: string;
4
+ /** Embedding 模型路径或 hf: URI,可选;不传则仅提供 chat */
5
+ embeddingModelPath?: string;
6
+ /** GPU layers,-1 表示全部卸载到 GPU(Metal),0 表示纯 CPU */
7
+ gpuLayers?: number;
8
+ /** 上下文窗口大小,默认 32768(32K) */
9
+ contextSize?: number;
10
+ }
11
+ export interface ChatMessage {
12
+ role: "system" | "user" | "assistant" | "tool";
13
+ content: string | null;
14
+ /** tool_calls(assistant 发起工具调用时) */
15
+ tool_calls?: ToolCall[];
16
+ /** tool_call_id(role=tool 时,对应哪个 tool_call) */
17
+ tool_call_id?: string;
18
+ /** tool 消息的函数名 */
19
+ name?: string;
20
+ }
21
+ export interface ToolDefinition {
22
+ type: "function";
23
+ function: {
24
+ name: string;
25
+ description?: string;
26
+ parameters?: Record<string, unknown>;
27
+ };
28
+ }
29
+ export interface ToolCall {
30
+ id: string;
31
+ type: "function";
32
+ function: {
33
+ name: string;
34
+ arguments: string;
35
+ };
36
+ }
37
+ export interface ChatCompletionChunk {
38
+ content?: string;
39
+ tool_calls?: ToolCall[];
40
+ finish_reason?: "stop" | "tool_calls" | "length";
41
+ }
42
+ export declare function initModels(opts: LlmContextOptions): Promise<void>;
43
+ /**
44
+ * 流式 chat completion。
45
+ * onChunk 每次收到新 token 时调用;结束后返回完整 finish_reason。
46
+ * 本地模型若输出 <think>...</think> 块,会从流中过滤,不展示思考过程。
47
+ */
48
+ export declare function chatCompletionStream(messages: ChatMessage[], tools: ToolDefinition[], onChunk: (chunk: ChatCompletionChunk) => void, signal?: AbortSignal): Promise<void>;
49
+ /**
50
+ * 非流式 chat completion(内部复用流式实现)。
51
+ */
52
+ export declare function chatCompletion(messages: ChatMessage[], tools: ToolDefinition[], signal?: AbortSignal): Promise<{
53
+ content: string;
54
+ tool_calls?: ToolCall[];
55
+ finish_reason: string;
56
+ }>;
57
+ /**
58
+ * 文本 embedding,返回 L2 归一化向量。
59
+ */
60
+ export declare function getEmbedding(text: string): Promise<number[]>;
61
+ /** 是否至少加载了一个模型(LLM 或 Embedding) */
62
+ export declare function isReady(): boolean;
63
+ /** 是否有 LLM,可提供 chat/completions */
64
+ export declare function isLlmReady(): boolean;
65
+ /** 是否有 Embedding,可提供 embeddings */
66
+ export declare function isEmbeddingReady(): boolean;
@@ -0,0 +1,270 @@
1
+ /**
2
+ * node-llama-cpp 模型实例管理。
3
+ * 可只加载 LLM、只加载 Embedding、或两者都加载;有一个就启动一个,不因缺另一个而失败。
4
+ */
5
+ import { LOCAL_LLM_CACHE_DIR } from "./model-resolve.js";
6
+ let llama = null;
7
+ let llmModel = null;
8
+ let embeddingModel = null;
9
+ let embeddingCtx = null;
10
+ /** 上下文窗口大小,initModels 时设置,用于 createContext;默认 32K 以容纳较长 system + tools */
11
+ let storedContextSize = 32768;
12
+ /** 串行锁:同一模型同一时间只处理一个推理请求 */
13
+ let llmQueue = Promise.resolve();
14
+ async function getLlamaInstance(gpuLayers) {
15
+ if (llama)
16
+ return llama;
17
+ const { getLlama, LlamaLogLevel } = await import("node-llama-cpp");
18
+ llama = await getLlama({
19
+ logLevel: LlamaLogLevel.warn,
20
+ ...(gpuLayers !== undefined ? { gpu: gpuLayers === 0 ? false : "auto" } : {}),
21
+ });
22
+ return llama;
23
+ }
24
+ export async function initModels(opts) {
25
+ storedContextSize = opts.contextSize ?? 32768;
26
+ const { resolveModelFile } = await import("node-llama-cpp");
27
+ const instance = await getLlamaInstance(opts.gpuLayers);
28
+ const cacheDir = LOCAL_LLM_CACHE_DIR;
29
+ if (opts.llmModelPath?.trim()) {
30
+ console.log("[local-llm] 加载 LLM 模型:", opts.llmModelPath);
31
+ const llmPath = await resolveModelFile(opts.llmModelPath, cacheDir);
32
+ llmModel = await instance.loadModel({ modelPath: llmPath });
33
+ }
34
+ else {
35
+ llmModel = null;
36
+ }
37
+ if (opts.embeddingModelPath?.trim()) {
38
+ console.log("[local-llm] 加载 Embedding 模型:", opts.embeddingModelPath);
39
+ const embPath = await resolveModelFile(opts.embeddingModelPath, cacheDir);
40
+ embeddingModel = await instance.loadModel({ modelPath: embPath });
41
+ embeddingCtx = await embeddingModel.createEmbeddingContext();
42
+ }
43
+ else {
44
+ embeddingModel = null;
45
+ embeddingCtx = null;
46
+ }
47
+ console.log("[local-llm] 模型加载完成", {
48
+ llm: !!llmModel,
49
+ embedding: !!embeddingCtx,
50
+ });
51
+ }
52
+ /** 将 API 可能传来的 content(string | array 如 [{ type: "text", text: "..." }])规范为 string,避免 node-llama-cpp LlamaText.fromJSON 收到对象抛 "Unknown value type: [object Object]" */
53
+ function contentToString(content) {
54
+ if (content == null)
55
+ return "";
56
+ if (typeof content === "string")
57
+ return content;
58
+ if (!Array.isArray(content))
59
+ return String(content);
60
+ return content
61
+ .filter((part) => part != null && typeof part === "object")
62
+ .map((part) => (part.type === "text" && typeof part.text === "string" ? part.text : ""))
63
+ .join("");
64
+ }
65
+ /**
66
+ * 将 ChatMessage[] 转换为 node-llama-cpp 的 LlamaChatMessage[]。
67
+ * tool_calls 序列化为 assistant content;tool 结果作为 user content 回传。
68
+ * 入参 content 可能是 OpenAI 多段格式(content: [{ type: "text", text: "..." }]),必须规范为 string。
69
+ */
70
+ function toLocalMessages(messages) {
71
+ return messages.map((m) => {
72
+ const rawContent = m.content;
73
+ const content = contentToString(rawContent);
74
+ if (m.role === "tool") {
75
+ return { role: "user", content: `[Tool result for ${m.name ?? m.tool_call_id ?? "tool"}]: ${content}` };
76
+ }
77
+ if (m.role === "assistant" && m.tool_calls?.length) {
78
+ const calls = JSON.stringify(m.tool_calls);
79
+ return { role: "assistant", content: content + `\n[tool_calls]: ${calls}` };
80
+ }
81
+ return { role: m.role, content };
82
+ });
83
+ }
84
+ /**
85
+ * 将 tools 定义转换为 grammar 约束描述,拼入 system prompt。
86
+ * node-llama-cpp v3 通过 LlamaGrammar 支持 JSON schema 约束输出,
87
+ * 这里用 prompt 方式描述工具,让模型以 JSON 格式输出 tool_calls。
88
+ */
89
+ function buildToolSystemPrompt(tools) {
90
+ if (!tools.length)
91
+ return "";
92
+ const descs = tools.map((t) => {
93
+ const fn = t.function;
94
+ return `- ${fn.name}: ${fn.description ?? ""}\n parameters: ${JSON.stringify(fn.parameters ?? {})}`;
95
+ }).join("\n");
96
+ return `\n\nYou have access to the following tools. When you need to call a tool, respond ONLY with a JSON object in this exact format (no other text):\n{"tool_calls":[{"id":"call_<random>","type":"function","function":{"name":"<tool_name>","arguments":"<json_string>"}}]}\n\nAvailable tools:\n${descs}`;
97
+ }
98
+ /** 尝试从模型输出中解析 tool_calls JSON */
99
+ function parseToolCalls(text) {
100
+ const trimmed = text.trim();
101
+ // 匹配 {"tool_calls":[...]} 格式
102
+ const match = trimmed.match(/\{[\s\S]*"tool_calls"[\s\S]*\}/);
103
+ if (!match)
104
+ return null;
105
+ try {
106
+ const parsed = JSON.parse(match[0]);
107
+ if (Array.isArray(parsed.tool_calls) && parsed.tool_calls.length > 0) {
108
+ return parsed.tool_calls;
109
+ }
110
+ }
111
+ catch {
112
+ // 不是合法 JSON,当普通文本处理
113
+ }
114
+ return null;
115
+ }
116
+ /**
117
+ * 从累积文本中移除 <think>...</think> 块,只保留对外可见的正文(关闭本地模型“思考过程”的展示)。
118
+ * 若存在未闭合的 <think>,则从 <think> 起至末尾均视为思考内容不输出。
119
+ */
120
+ function getVisibleWithoutThinking(text) {
121
+ let out = "";
122
+ let i = 0;
123
+ const openTag = "<think>";
124
+ const closeTag = "</think>";
125
+ while (i < text.length) {
126
+ const open = text.indexOf(openTag, i);
127
+ if (open === -1) {
128
+ out += text.slice(i);
129
+ break;
130
+ }
131
+ out += text.slice(i, open);
132
+ const close = text.indexOf(closeTag, open + openTag.length);
133
+ if (close === -1) {
134
+ // 思考块未闭合,剩余不输出
135
+ break;
136
+ }
137
+ i = close + closeTag.length;
138
+ }
139
+ return out;
140
+ }
141
+ /**
142
+ * 流式 chat completion。
143
+ * onChunk 每次收到新 token 时调用;结束后返回完整 finish_reason。
144
+ * 本地模型若输出 <think>...</think> 块,会从流中过滤,不展示思考过程。
145
+ */
146
+ export async function chatCompletionStream(messages, tools, onChunk, signal) {
147
+ if (!llmModel)
148
+ throw new Error("[local-llm] LLM 模型未初始化");
149
+ const { LlamaChatSession } = await import("node-llama-cpp");
150
+ // 串行排队
151
+ const run = async () => {
152
+ const ctx = await llmModel.createContext({ contextSize: storedContextSize });
153
+ // 注入历史消息(除最后一条 user 消息)
154
+ const localMsgs = toLocalMessages(messages);
155
+ let lastUser = -1;
156
+ for (let i = localMsgs.length - 1; i >= 0; i--) {
157
+ if (localMsgs[i].role === "user") {
158
+ lastUser = i;
159
+ break;
160
+ }
161
+ }
162
+ const history = lastUser > 0 ? localMsgs.slice(0, lastUser) : [];
163
+ const userPrompt = lastUser >= 0 ? localMsgs[lastUser].content : "";
164
+ // 找 system prompt,拼入 tool 描述(system 的 content 也可能是 array,需规范为 string)
165
+ const systemMsg = messages.find((m) => m.role === "system");
166
+ const toolSystemPrompt = buildToolSystemPrompt(tools);
167
+ const systemContent = contentToString(systemMsg?.content) + toolSystemPrompt;
168
+ // 创建带 systemPrompt 的 session,重建历史
169
+ const session = new LlamaChatSession({
170
+ contextSequence: ctx.getSequence(),
171
+ systemPrompt: systemContent || undefined,
172
+ });
173
+ for (const msg of history) {
174
+ if (msg.role === "user") {
175
+ await session.prompt(msg.content, { onTextChunk: () => { } });
176
+ }
177
+ }
178
+ let fullText = "";
179
+ let prevSentVisibleLength = 0;
180
+ let lastSent = ""; // 连续相同 delta 只发一次,缓解回复缓慢时「每个字显示两遍」
181
+ try {
182
+ await session.prompt(userPrompt, {
183
+ onTextChunk: (token) => {
184
+ if (signal?.aborted)
185
+ return;
186
+ const s = typeof token === "string" ? token : (token != null ? String(token) : "");
187
+ if (!s)
188
+ return;
189
+ // node-llama-cpp 在 detokenize(tokens, false, tokenTrail) 时可能返回带上下文的片段(含重复前缀),
190
+ // 与 DeepSeek 等仅返回增量不同。只向下游发送增量,避免出现「你好你好!!我是我是...」式重复。
191
+ if (s.startsWith(fullText)) {
192
+ fullText = s;
193
+ }
194
+ else {
195
+ fullText += s;
196
+ }
197
+ // 过滤 <think>...</think> 块,不向客户端输出思考过程
198
+ const visibleText = getVisibleWithoutThinking(fullText);
199
+ const toSend = visibleText.slice(prevSentVisibleLength);
200
+ prevSentVisibleLength = visibleText.length;
201
+ if (toSend && toSend !== lastSent) {
202
+ lastSent = toSend;
203
+ onChunk({ content: toSend });
204
+ }
205
+ },
206
+ signal,
207
+ });
208
+ }
209
+ catch (e) {
210
+ // node-llama-cpp 在解析模型输出(如 segment/tool_call)时可能对 LlamaText.fromJSON 传入对象导致 "Unknown value type: [object Object]"
211
+ const msg = e instanceof Error ? e.message : String(e);
212
+ const stack = e instanceof Error ? e.stack : undefined;
213
+ console.error("[local-llm] chatCompletionStream session.prompt error:", msg);
214
+ if (stack)
215
+ console.error("[local-llm] stack:", stack);
216
+ throw e;
217
+ }
218
+ // 检查是否是 tool_calls 输出
219
+ const toolCalls = parseToolCalls(fullText);
220
+ if (toolCalls) {
221
+ onChunk({ tool_calls: toolCalls, finish_reason: "tool_calls" });
222
+ }
223
+ else {
224
+ onChunk({ finish_reason: "stop" });
225
+ }
226
+ await ctx.dispose();
227
+ };
228
+ llmQueue = llmQueue.then(run, run);
229
+ await llmQueue;
230
+ }
231
+ /**
232
+ * 非流式 chat completion(内部复用流式实现)。
233
+ */
234
+ export async function chatCompletion(messages, tools, signal) {
235
+ let content = "";
236
+ let toolCalls;
237
+ let finishReason = "stop";
238
+ await chatCompletionStream(messages, tools, (chunk) => {
239
+ if (chunk.content)
240
+ content += chunk.content;
241
+ if (chunk.tool_calls)
242
+ toolCalls = chunk.tool_calls;
243
+ if (chunk.finish_reason)
244
+ finishReason = chunk.finish_reason;
245
+ }, signal);
246
+ return { content, tool_calls: toolCalls, finish_reason: finishReason };
247
+ }
248
+ /**
249
+ * 文本 embedding,返回 L2 归一化向量。
250
+ */
251
+ export async function getEmbedding(text) {
252
+ if (!embeddingCtx)
253
+ throw new Error("[local-llm] Embedding 模型未初始化");
254
+ const result = await embeddingCtx.getEmbeddingFor(text);
255
+ const vec = Array.from(result.vector);
256
+ const norm = Math.sqrt(vec.reduce((s, v) => s + v * v, 0)) || 1;
257
+ return vec.map((v) => v / norm);
258
+ }
259
+ /** 是否至少加载了一个模型(LLM 或 Embedding) */
260
+ export function isReady() {
261
+ return llmModel !== null || embeddingCtx !== null;
262
+ }
263
+ /** 是否有 LLM,可提供 chat/completions */
264
+ export function isLlmReady() {
265
+ return llmModel !== null;
266
+ }
267
+ /** 是否有 Embedding,可提供 embeddings */
268
+ export function isEmbeddingReady() {
269
+ return embeddingCtx !== null;
270
+ }
@@ -0,0 +1,27 @@
1
+ export declare const LOCAL_LLM_CACHE_DIR: string;
2
+ /**
3
+ * 取 modelUri 的末尾文件名(用于与已安装文件灵活匹配:不同 node-llama-cpp 版本可能生成不同前缀)。
4
+ * 例:hf:unsloth/Qwen3.5-4B-GGUF/Qwen3.5-4B-Q5_K_M.gguf → Qwen3.5-4B-Q5_K_M.gguf
5
+ * 例:hf_unsloth_Qwen3.5-4B-GGUF_Qwen3.5-4B-Q5_K_M.gguf → Qwen3.5-4B-Q5_K_M.gguf(文件名形式取最后一段 _ 之后)
6
+ */
7
+ export declare function modelUriBasename(modelUri: string): string;
8
+ /**
9
+ * 将 modelUri(hf:owner/repo/file.gguf)或文件名转为缓存目录下的文件名。
10
+ * 与 LocalModelsService.predictFilename 逻辑一致。
11
+ */
12
+ export declare function modelUriToFilename(modelUri: string): string;
13
+ /**
14
+ * 在缓存目录中解析出实际存在的模型文件路径。
15
+ * 先尝试精确文件名,若无则按「以 modelUri 的末尾文件名结尾」匹配(与「已安装的本地模型」逻辑一致)。
16
+ */
17
+ export declare function resolveModelPathInCache(modelIdOrUri: string, cacheDir?: string): string;
18
+ /**
19
+ * 检查指定模型(uri 或文件名)是否已存在于本地缓存目录。
20
+ * 支持精确文件名 或 以末尾 .gguf 文件名结尾的灵活匹配,与「已安装的本地模型」展示一致。
21
+ */
22
+ export declare function isModelFileInCache(modelIdOrUri: string, cacheDir?: string): boolean;
23
+ /**
24
+ * 将前端传入的模型标识(hf: URI 或已安装文件名)转为可传给 node-llama-cpp 的路径或 URI。
25
+ * 若为纯文件名(如 hf_xxx.gguf),则返回缓存目录下的绝对路径;若实际磁盘文件名与配置不一致(如 node-llama-cpp 命名),则解析为真实路径。
26
+ */
27
+ export declare function toModelPathForStart(uriOrFilename: string, cacheDir?: string): string;
@@ -0,0 +1,90 @@
1
+ /**
2
+ * 本地模型路径解析与文件存在性检查。
3
+ * 缓存目录:~/.openbot/.cached_models/,与 openbot 配置同目录便于管理。
4
+ * 与「已安装的本地模型」展示一致:支持精确文件名 或 以末尾 .gguf 文件名结尾的灵活匹配(兼容 node-llama-cpp 不同命名)。
5
+ */
6
+ import { join } from "node:path";
7
+ import { existsSync, readdirSync } from "node:fs";
8
+ import { homedir } from "node:os";
9
+ export const LOCAL_LLM_CACHE_DIR = join(homedir(), ".openbot", ".cached_models");
10
+ /**
11
+ * 取 modelUri 的末尾文件名(用于与已安装文件灵活匹配:不同 node-llama-cpp 版本可能生成不同前缀)。
12
+ * 例:hf:unsloth/Qwen3.5-4B-GGUF/Qwen3.5-4B-Q5_K_M.gguf → Qwen3.5-4B-Q5_K_M.gguf
13
+ * 例:hf_unsloth_Qwen3.5-4B-GGUF_Qwen3.5-4B-Q5_K_M.gguf → Qwen3.5-4B-Q5_K_M.gguf(文件名形式取最后一段 _ 之后)
14
+ */
15
+ export function modelUriBasename(modelUri) {
16
+ const s = (modelUri || "").trim();
17
+ if (!s)
18
+ return "";
19
+ const parts = s.replace(/\\/g, "/").split("/");
20
+ const last = parts[parts.length - 1] || s;
21
+ // 仅对无 "/" 的文件名形式(如 hf_X_Y_Z.gguf)取最后 _ 之后一段,以匹配 node-llama-cpp 可能生成的短文件名
22
+ if (!s.includes("/") && last.includes("_") && last.endsWith(".gguf")) {
23
+ const fromUnderscore = last.slice(last.lastIndexOf("_") + 1);
24
+ if (fromUnderscore.endsWith(".gguf"))
25
+ return fromUnderscore;
26
+ }
27
+ return last;
28
+ }
29
+ /**
30
+ * 将 modelUri(hf:owner/repo/file.gguf)或文件名转为缓存目录下的文件名。
31
+ * 与 LocalModelsService.predictFilename 逻辑一致。
32
+ */
33
+ export function modelUriToFilename(modelUri) {
34
+ const s = (modelUri || "").trim();
35
+ if (!s)
36
+ return "";
37
+ if (s.startsWith("hf:")) {
38
+ const parts = s.slice(3).split("/");
39
+ return "hf_" + parts.slice(0, -1).join("_") + "_" + parts[parts.length - 1];
40
+ }
41
+ // 已是文件名或路径,只取 basename
42
+ const last = s.replace(/\\/g, "/").split("/").pop();
43
+ return last ?? s;
44
+ }
45
+ /**
46
+ * 在缓存目录中解析出实际存在的模型文件路径。
47
+ * 先尝试精确文件名,若无则按「以 modelUri 的末尾文件名结尾」匹配(与「已安装的本地模型」逻辑一致)。
48
+ */
49
+ export function resolveModelPathInCache(modelIdOrUri, cacheDir = LOCAL_LLM_CACHE_DIR) {
50
+ const filename = modelUriToFilename(modelIdOrUri);
51
+ if (!filename || !filename.endsWith(".gguf"))
52
+ return "";
53
+ const exactPath = join(cacheDir, filename);
54
+ if (existsSync(exactPath))
55
+ return exactPath;
56
+ const suffix = modelUriBasename(modelIdOrUri);
57
+ if (!suffix)
58
+ return "";
59
+ try {
60
+ const files = readdirSync(cacheDir);
61
+ const found = files.find((f) => f.endsWith(".gguf") && (f === suffix || f.endsWith(suffix)));
62
+ return found ? join(cacheDir, found) : "";
63
+ }
64
+ catch {
65
+ return "";
66
+ }
67
+ }
68
+ /**
69
+ * 检查指定模型(uri 或文件名)是否已存在于本地缓存目录。
70
+ * 支持精确文件名 或 以末尾 .gguf 文件名结尾的灵活匹配,与「已安装的本地模型」展示一致。
71
+ */
72
+ export function isModelFileInCache(modelIdOrUri, cacheDir = LOCAL_LLM_CACHE_DIR) {
73
+ return resolveModelPathInCache(modelIdOrUri, cacheDir) !== "";
74
+ }
75
+ /**
76
+ * 将前端传入的模型标识(hf: URI 或已安装文件名)转为可传给 node-llama-cpp 的路径或 URI。
77
+ * 若为纯文件名(如 hf_xxx.gguf),则返回缓存目录下的绝对路径;若实际磁盘文件名与配置不一致(如 node-llama-cpp 命名),则解析为真实路径。
78
+ */
79
+ export function toModelPathForStart(uriOrFilename, cacheDir = LOCAL_LLM_CACHE_DIR) {
80
+ const s = (uriOrFilename || "").trim();
81
+ if (!s)
82
+ return "";
83
+ if (s.startsWith("hf:"))
84
+ return s;
85
+ const resolved = resolveModelPathInCache(s, cacheDir);
86
+ if (resolved)
87
+ return resolved;
88
+ const filename = modelUriToFilename(s);
89
+ return filename ? join(cacheDir, filename) : s;
90
+ }
@@ -0,0 +1 @@
1
+ export declare function createOpenAICompatServer(port: number): Promise<void>;