@next-open-ai/openclawx 0.8.40 → 0.8.58

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/README.md +10 -0
  2. package/apps/desktop/renderer/dist/assets/index-M5VGUUpo.js +93 -0
  3. package/apps/desktop/renderer/dist/assets/index-y8oE2q_u.css +10 -0
  4. package/apps/desktop/renderer/dist/index.html +2 -2
  5. package/dist/cli/cli.js +107 -0
  6. package/dist/core/agent/agent-manager.js +13 -2
  7. package/dist/core/agent/proxy/adapters/local-adapter.js +1 -1
  8. package/dist/core/config/desktop-config.d.ts +4 -1
  9. package/dist/core/config/desktop-config.js +108 -21
  10. package/dist/core/config/provider-support-default.js +26 -0
  11. package/dist/core/local-llm-server/download-model.d.ts +16 -0
  12. package/dist/core/local-llm-server/download-model.js +37 -0
  13. package/dist/core/local-llm-server/index.d.ts +32 -0
  14. package/dist/core/local-llm-server/index.js +147 -0
  15. package/dist/core/local-llm-server/llm-context.d.ts +65 -0
  16. package/dist/core/local-llm-server/llm-context.js +242 -0
  17. package/dist/core/local-llm-server/model-resolve.d.ts +27 -0
  18. package/dist/core/local-llm-server/model-resolve.js +90 -0
  19. package/dist/core/local-llm-server/server.d.ts +1 -0
  20. package/dist/core/local-llm-server/server.js +234 -0
  21. package/dist/core/local-llm-server/start-from-config.d.ts +5 -0
  22. package/dist/core/local-llm-server/start-from-config.js +50 -0
  23. package/dist/core/mcp/transport/stdio.d.ts +6 -0
  24. package/dist/core/mcp/transport/stdio.js +107 -27
  25. package/dist/core/memory/local-embedding-llama.js +2 -4
  26. package/dist/core/memory/local-embedding.d.ts +4 -3
  27. package/dist/core/memory/local-embedding.js +43 -3
  28. package/dist/gateway/methods/agent-chat.js +80 -41
  29. package/dist/gateway/server.js +10 -0
  30. package/dist/server/agent-config/agent-config.controller.d.ts +1 -1
  31. package/dist/server/agent-config/agent-config.service.d.ts +2 -0
  32. package/dist/server/agent-config/agent-config.service.js +5 -0
  33. package/dist/server/bootstrap.d.ts +1 -0
  34. package/dist/server/bootstrap.js +3 -0
  35. package/dist/server/config/config.controller.d.ts +81 -4
  36. package/dist/server/config/config.controller.js +185 -3
  37. package/dist/server/config/config.module.js +3 -2
  38. package/dist/server/config/config.service.d.ts +4 -1
  39. package/dist/server/config/config.service.js +62 -9
  40. package/dist/server/config/local-models.service.d.ts +67 -0
  41. package/dist/server/config/local-models.service.js +243 -0
  42. package/package.json +1 -1
  43. package/presets/preset-agents.json +6 -2
  44. package/presets/preset-config.json +24 -6
  45. package/presets/recommended-local-models.json +42 -0
  46. package/apps/desktop/renderer/dist/assets/index-BSfTiTKo.css +0 -10
  47. package/apps/desktop/renderer/dist/assets/index-DgLpQsA-.js +0 -89
  48. package/presets/workspaces/finance-expert/skills/akshare-helper/SKILL.md +0 -9
@@ -209,8 +209,8 @@ export async function loadDesktopAgentConfig(agentId) {
209
209
  }
210
210
  }
211
211
  const resolvedAgentId = agentId === "default" ? "default" : agentId;
212
- let provider = config.defaultProvider ?? "deepseek";
213
- let model = config.defaultModel ?? "deepseek-chat";
212
+ let provider = config.defaultProvider ?? "ollama";
213
+ let model = config.defaultModel ?? "qwen3:4b";
214
214
  if (config.defaultModelItemCode && Array.isArray(config.configuredModels)) {
215
215
  const configured = config.configuredModels.find((m) => m.modelItemCode === config.defaultModelItemCode);
216
216
  if (configured) {
@@ -218,11 +218,14 @@ export async function loadDesktopAgentConfig(agentId) {
218
218
  model = configured.modelId;
219
219
  }
220
220
  }
221
+ /** 是否从当前智能体自己的配置得到了模型(有 modelItemCode 或 provider/model);若否,则使用的是全局默认 */
222
+ let agentHadOwnModel = false;
221
223
  let workspaceName = resolvedAgentId;
222
224
  let mcpServers;
223
225
  let mcpMaxResultTokens;
224
226
  let systemPrompt;
225
227
  let useLongMemory = true;
228
+ let contextSize;
226
229
  if (existsSync(agentsPath)) {
227
230
  try {
228
231
  const raw = await readFile(agentsPath, "utf-8");
@@ -237,6 +240,9 @@ export async function loadDesktopAgentConfig(agentId) {
237
240
  if (agent.mcpMaxResultTokens != null && typeof agent.mcpMaxResultTokens === "number" && agent.mcpMaxResultTokens > 0) {
238
241
  mcpMaxResultTokens = agent.mcpMaxResultTokens;
239
242
  }
243
+ if (agent.contextSize != null && typeof agent.contextSize === "number" && agent.contextSize > 0) {
244
+ contextSize = agent.contextSize;
245
+ }
240
246
  if (agent.mcpServers != null) {
241
247
  if (Array.isArray(agent.mcpServers) || (typeof agent.mcpServers === "object" && !Array.isArray(agent.mcpServers))) {
242
248
  mcpServers = agent.mcpServers;
@@ -252,19 +258,28 @@ export async function loadDesktopAgentConfig(agentId) {
252
258
  if (configured) {
253
259
  provider = configured.provider;
254
260
  model = configured.modelId;
261
+ agentHadOwnModel = true;
255
262
  }
256
263
  else {
257
- if (agent.provider)
264
+ if (agent.provider) {
258
265
  provider = agent.provider;
259
- if (agent.model)
266
+ agentHadOwnModel = true;
267
+ }
268
+ if (agent.model) {
260
269
  model = agent.model;
270
+ agentHadOwnModel = true;
271
+ }
261
272
  }
262
273
  }
263
274
  else {
264
- if (agent.provider)
275
+ if (agent.provider) {
265
276
  provider = agent.provider;
266
- if (agent.model)
277
+ agentHadOwnModel = true;
278
+ }
279
+ if (agent.model) {
267
280
  model = agent.model;
281
+ agentHadOwnModel = true;
282
+ }
268
283
  }
269
284
  }
270
285
  }
@@ -272,6 +287,11 @@ export async function loadDesktopAgentConfig(agentId) {
272
287
  // ignore
273
288
  }
274
289
  }
290
+ // 本地 LLM 可用且当前智能体未配置自己的模型时,使用本地推理作为缺省,使所有智能体“拥有”该配置
291
+ if (!agentHadOwnModel && process.env.LOCAL_LLM_BASE_URL?.trim()) {
292
+ provider = "local";
293
+ model = "local-llm";
294
+ }
275
295
  const provConfig = config.providers?.[provider];
276
296
  const apiKey = provConfig?.apiKey && typeof provConfig.apiKey === "string" && provConfig.apiKey.trim()
277
297
  ? provConfig.apiKey.trim()
@@ -427,6 +447,7 @@ export async function loadDesktopAgentConfig(agentId) {
427
447
  claudeCode,
428
448
  useLongMemory,
429
449
  webSearch,
450
+ contextSize,
430
451
  };
431
452
  }
432
453
  function ensureDesktopDir() {
@@ -624,22 +645,54 @@ export async function ensureProviderSupportFile() {
624
645
  await writeFile(path, JSON.stringify(presetProviders, null, 2), "utf-8");
625
646
  }
626
647
  }
627
- /** config.json 不存在则用 preset-config.json 初始化,若存在则浅合并补充新基础键值 */
648
+ /** 预装本地推理缺省:推荐列表第一个 LLM(Qwen3-4B)对应的本地文件名,与 modelUriToFilename 一致 */
649
+ const DEFAULT_LOCAL_LLM_MODEL_ID = "hf_Qwen_Qwen3-4B-GGUF_Qwen3-4B-Q4_K_M.gguf";
650
+ const DEFAULT_LOCAL_MODEL_ITEM_CODE = "local-qwen3-4b";
651
+ /** 代码内建默认:local provider + 本地 Qwen3-4B,首次与合并时优先保证存在 */
652
+ const BUILTIN_DEFAULT_CONFIG = {
653
+ defaultProvider: "local",
654
+ defaultModel: DEFAULT_LOCAL_LLM_MODEL_ID,
655
+ defaultModelItemCode: DEFAULT_LOCAL_MODEL_ITEM_CODE,
656
+ defaultAgentId: DEFAULT_AGENT_ID,
657
+ maxAgentSessions: DEFAULT_MAX_AGENT_SESSIONS,
658
+ providers: {
659
+ local: { baseUrl: "http://127.0.0.1:11435/v1" },
660
+ },
661
+ configuredModels: [
662
+ {
663
+ provider: "local",
664
+ modelId: DEFAULT_LOCAL_LLM_MODEL_ID,
665
+ type: "llm",
666
+ alias: "Qwen3 4B Q4_K_M",
667
+ modelItemCode: DEFAULT_LOCAL_MODEL_ITEM_CODE,
668
+ },
669
+ {
670
+ provider: "local",
671
+ modelId: "hf_ggml-org_embeddinggemma-300M-GGUF_embeddinggemma-300M-Q8_0.gguf",
672
+ type: "embedding",
673
+ alias: "EmbeddingGemma 300M Q8 (768维)",
674
+ modelItemCode: "local-embeddinggemma-300m",
675
+ },
676
+ ],
677
+ };
678
+ /** 若 config.json 不存在则用 preset-config.json 初始化,若存在则浅合并补充新基础键值。预装 local provider + 本地 Qwen3-4B 模型并设为缺省;preset 与代码默认合并,保证 local 一定存在。 */
628
679
  async function ensureConfigJsonInitialized() {
629
680
  const presetPath = join(getPresetsDir(), "preset-config.json");
630
- let presetConfig = {
631
- defaultProvider: "deepseek",
632
- defaultModel: "deepseek-chat",
633
- defaultAgentId: DEFAULT_AGENT_ID,
634
- maxAgentSessions: DEFAULT_MAX_AGENT_SESSIONS,
635
- providers: {},
636
- configuredModels: [],
637
- };
681
+ let presetConfig = { ...BUILTIN_DEFAULT_CONFIG };
638
682
  if (existsSync(presetPath)) {
639
683
  try {
640
684
  const data = JSON.parse(await readFile(presetPath, "utf-8"));
641
- if (data.config)
642
- presetConfig = data.config;
685
+ if (data.config && typeof data.config === "object") {
686
+ presetConfig = { ...BUILTIN_DEFAULT_CONFIG, ...data.config };
687
+ presetConfig.providers = { ...BUILTIN_DEFAULT_CONFIG.providers, ...(presetConfig.providers || {}) };
688
+ const hasLocalModel = (presetConfig.configuredModels || []).some((m) => m?.provider === "local" && (m?.modelId === DEFAULT_LOCAL_LLM_MODEL_ID || m?.modelItemCode === DEFAULT_LOCAL_MODEL_ITEM_CODE));
689
+ if (!hasLocalModel) {
690
+ presetConfig.configuredModels = [
691
+ ...(BUILTIN_DEFAULT_CONFIG.configuredModels || []),
692
+ ...(presetConfig.configuredModels || []),
693
+ ];
694
+ }
695
+ }
643
696
  }
644
697
  catch { }
645
698
  }
@@ -708,18 +761,46 @@ async function ensureAgentsJsonInitialized() {
708
761
  }
709
762
  }
710
763
  }
764
+ // 所有未单独配置模型的智能体使用 config 的缺省模型(预装为 local + Qwen3-4B)
765
+ const configPath = join(getDesktopDir(), "config.json");
766
+ if (existsSync(configPath)) {
767
+ try {
768
+ const configRaw = await readFile(configPath, "utf-8");
769
+ const configData = JSON.parse(configRaw);
770
+ const defProvider = configData.defaultProvider?.trim();
771
+ const defModel = configData.defaultModel?.trim();
772
+ const defCode = configData.defaultModelItemCode?.trim();
773
+ if (defProvider && defModel) {
774
+ for (const agent of currentData.agents) {
775
+ const hasOwn = (agent.provider && String(agent.provider).trim()) || (agent.model && String(agent.model).trim()) || (agent.modelItemCode && String(agent.modelItemCode).trim());
776
+ if (!hasOwn) {
777
+ agent.provider = defProvider;
778
+ agent.model = defModel;
779
+ if (defCode)
780
+ agent.modelItemCode = defCode;
781
+ changed = true;
782
+ }
783
+ }
784
+ }
785
+ }
786
+ catch { /* ignore */ }
787
+ }
711
788
  if (changed || !existsSync(agentsPath)) {
712
789
  await writeFile(agentsPath, JSON.stringify(currentData, null, 2), "utf-8");
713
790
  }
714
791
  }
715
792
  /**
716
- * CLI / Gateway 运行时调用,确保 config.json、provider-support.json、agents.json 均完成初始化。
793
+ * CLI / Gateway 运行时调用,确保 config.json、provider-support.json、agents.json 均完成初始化,
794
+ * 并同步到 agent 目录 models.json,供 pi ModelRegistry 解析 local 等模型与凭证。
717
795
  */
718
796
  export async function ensureDesktopConfigInitialized() {
719
797
  ensureDesktopDir();
720
798
  await ensureProviderSupportFile();
721
799
  await ensureConfigJsonInitialized();
722
800
  await ensureAgentsJsonInitialized();
801
+ await syncDesktopConfigToModelsJson().catch((err) => {
802
+ console.warn("[ensureDesktopConfigInitialized] syncDesktopConfigToModelsJson failed:", err);
803
+ });
723
804
  }
724
805
  /**
725
806
  * 取某 provider 在 provider-support 中的第一个 llm 模型 id;若无则返回第一个模型 id。
@@ -770,6 +851,10 @@ const SYNC_DEFAULTS = {
770
851
  "openai-custom": { baseUrl: "", apiKey: "OPENAI_API_KEY", api: "openai-completions" },
771
852
  nvidia: { baseUrl: "https://integrate.api.nvidia.com/v1", apiKey: "NVIDIA_API_KEY", api: "openai-completions" },
772
853
  kimi: { baseUrl: "https://api.moonshot.cn/v1", apiKey: "MOONSHOT_API_KEY", api: "openai-completions" },
854
+ /** 本地 Ollama,无需真实 API Key */
855
+ ollama: { baseUrl: "http://localhost:11434/v1", apiKey: "OPENAI_API_KEY", api: "openai-completions" },
856
+ /** 内置本地推理(node-llama-cpp),无需 API Key,baseUrl 指向本地子进程服务 */
857
+ local: { baseUrl: "http://127.0.0.1:11435/v1", apiKey: "OPENAI_API_KEY", api: "openai-completions" },
773
858
  };
774
859
  const DEFAULT_COST = { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 };
775
860
  const DEFAULT_CONTEXT_WINDOW = 64000;
@@ -806,10 +891,12 @@ export async function syncDesktopConfigToModelsJson() {
806
891
  const support = await getProviderSupport();
807
892
  const piProviders = {};
808
893
  for (const [providerId, userConfig] of Object.entries(configured)) {
809
- if (!userConfig?.apiKey?.trim())
894
+ // ollama / local 不需要 API Key,其他 provider 必须有 apiKey
895
+ const isNoKeyProvider = providerId === "ollama" || providerId === "local";
896
+ if (!isNoKeyProvider && !userConfig?.apiKey?.trim())
810
897
  continue;
811
898
  const defaults = SYNC_DEFAULTS[providerId] ?? { baseUrl: "", apiKey: "OPENAI_API_KEY", api: "openai-completions" };
812
- const baseUrl = userConfig.baseUrl?.trim() || (support[providerId]?.baseUrl ?? "").trim() || defaults.baseUrl;
899
+ const baseUrl = userConfig?.baseUrl?.trim() || (support[providerId]?.baseUrl ?? "").trim() || defaults.baseUrl;
813
900
  if (!baseUrl)
814
901
  continue;
815
902
  const def = support[providerId];
@@ -839,7 +926,7 @@ export async function syncDesktopConfigToModelsJson() {
839
926
  continue;
840
927
  }
841
928
  piProviders[providerId] = {
842
- name: (userConfig.alias?.trim() || def?.name) || providerId,
929
+ name: (userConfig?.alias?.trim() || def?.name) || providerId,
843
930
  apiKey: defaults.apiKey,
844
931
  api: defaults.api,
845
932
  baseUrl: baseUrl.replace(/\/$/, ""),
@@ -54,4 +54,30 @@ export const DEFAULT_PROVIDER_SUPPORT = {
54
54
  { id: "moonshot-v1-128k", name: "Moonshot 128K", types: ["llm"] },
55
55
  ],
56
56
  },
57
+ /** 本地 Ollama 服务,兼容 OpenAI API;baseUrl 指向本机 Ollama 默认端口 */
58
+ ollama: {
59
+ name: "Ollama (本地)",
60
+ baseUrl: "http://localhost:11434/v1",
61
+ models: [
62
+ { id: "qwen3:4b", name: "Qwen3 4B", types: ["llm"] },
63
+ { id: "qwen3:8b", name: "Qwen3 8B", types: ["llm"] },
64
+ { id: "qwen3:14b", name: "Qwen3 14B", types: ["llm"] },
65
+ { id: "llama3.2:3b", name: "Llama 3.2 3B", types: ["llm"] },
66
+ { id: "llama3.2:1b", name: "Llama 3.2 1B", types: ["llm"] },
67
+ { id: "nomic-embed-text", name: "Nomic Embed Text", types: ["embedding"] },
68
+ ],
69
+ },
70
+ /**
71
+ * 内置本地推理(node-llama-cpp),无需安装 Ollama。
72
+ * baseUrl 指向本地 LLM 子进程服务;模型列表为推荐的 GGUF 模型,可在本地模型管理页面增删。
73
+ * 无需 API Key。
74
+ */
75
+ local: {
76
+ name: "本地推理 (node-llama-cpp)",
77
+ baseUrl: "http://127.0.0.1:11435/v1",
78
+ models: [
79
+ { id: "local-llm", name: "本地 LLM(当前加载)", types: ["llm"] },
80
+ { id: "local-embedding", name: "本地 Embedding(当前加载)", types: ["embedding"] },
81
+ ],
82
+ },
57
83
  };
@@ -0,0 +1,16 @@
1
+ export declare const DEFAULT_LLM_MODEL_URI = "hf:Qwen/Qwen3-4B-GGUF/Qwen3-4B-Q4_K_M.gguf";
2
+ export interface DownloadModelOptions {
3
+ useMirror?: boolean;
4
+ signal?: AbortSignal;
5
+ onProgress?: (p: {
6
+ downloadedSize: number;
7
+ totalSize: number;
8
+ percent: number;
9
+ }) => void;
10
+ }
11
+ /**
12
+ * 下载模型到本地缓存目录。
13
+ * @returns 解析后的本地文件路径
14
+ */
15
+ export declare function downloadModel(modelUri: string, options?: DownloadModelOptions): Promise<string>;
16
+ export declare function getResolvedBasename(modelUri: string): string;
@@ -0,0 +1,37 @@
1
+ /**
2
+ * 本地模型下载(供 CLI 与 Nest LocalModelsService 复用)。
3
+ * 使用 node-llama-cpp resolveModelFile,缓存目录 ~/.openbot/.cached_models/。
4
+ */
5
+ import { basename } from "node:path";
6
+ import { LOCAL_LLM_CACHE_DIR } from "./model-resolve.js";
7
+ export const DEFAULT_LLM_MODEL_URI = "hf:Qwen/Qwen3-4B-GGUF/Qwen3-4B-Q4_K_M.gguf";
8
+ /**
9
+ * 下载模型到本地缓存目录。
10
+ * @returns 解析后的本地文件路径
11
+ */
12
+ export async function downloadModel(modelUri, options = {}) {
13
+ const { resolveModelFile } = await import("node-llama-cpp");
14
+ const { useMirror = false, signal, onProgress } = options;
15
+ const hfToken = process.env.HF_TOKEN || process.env.HUGGING_FACE_TOKEN;
16
+ const opts = {
17
+ directory: LOCAL_LLM_CACHE_DIR,
18
+ endpoints: {
19
+ huggingFace: useMirror ? "https://hf-mirror.com/" : "https://huggingface.co/",
20
+ },
21
+ };
22
+ if (signal)
23
+ opts.signal = signal;
24
+ if (hfToken)
25
+ opts.headers = { Authorization: `Bearer ${hfToken}` };
26
+ if (onProgress) {
27
+ opts.onProgress = ({ downloadedSize, totalSize }) => {
28
+ const percent = totalSize ? Math.round((downloadedSize / totalSize) * 100) : 0;
29
+ onProgress({ downloadedSize, totalSize, percent });
30
+ };
31
+ }
32
+ const resolved = await resolveModelFile(modelUri, opts);
33
+ return resolved;
34
+ }
35
+ export function getResolvedBasename(modelUri) {
36
+ return basename(modelUri.replace(/^hf:[^/]+\//, "").replace(/\//g, "_"));
37
+ }
@@ -0,0 +1,32 @@
1
+ /**
2
+ * local-llm-server 入口。
3
+ *
4
+ * 两种运行模式:
5
+ * 1. 子进程模式(--child):直接加载模型并启动 HTTP 服务,由主进程 fork 调用。
6
+ * 2. 主进程模式(默认导出):fork 子进程,管理其生命周期,提供 baseUrl 给调用方。
7
+ *
8
+ * 主进程通过 startLocalLlmServer() 启动,返回 { baseUrl, stop }。
9
+ * 子进程就绪后通过 IPC 发送 { type: "ready" } 通知主进程。
10
+ */
11
+ export interface LocalLlmServerOptions {
12
+ port?: number;
13
+ llmModelPath?: string;
14
+ embeddingModelPath?: string;
15
+ /** 上下文窗口 token 数,默认 32768(32K),需能容纳 system + tools + 对话 */
16
+ contextSize?: number;
17
+ /** 等待子进程就绪的超时毫秒数,默认 300000(5 分钟,冷启/大模型加载可能较慢) */
18
+ readyTimeoutMs?: number;
19
+ }
20
+ export interface LocalLlmServerHandle {
21
+ baseUrl: string;
22
+ stop: () => void;
23
+ }
24
+ /**
25
+ * 停止本地 LLM 子进程服务(若正在运行)。用于切换模型前先停止再启动。
26
+ */
27
+ export declare function stopLocalLlmServer(): void;
28
+ /**
29
+ * 启动本地 LLM 子进程服务。
30
+ * 已启动时直接返回已有 handle(单例)。需先 stop 再传新参数重启。
31
+ */
32
+ export declare function startLocalLlmServer(opts?: LocalLlmServerOptions): Promise<LocalLlmServerHandle>;
@@ -0,0 +1,147 @@
1
+ /**
2
+ * local-llm-server 入口。
3
+ *
4
+ * 两种运行模式:
5
+ * 1. 子进程模式(--child):直接加载模型并启动 HTTP 服务,由主进程 fork 调用。
6
+ * 2. 主进程模式(默认导出):fork 子进程,管理其生命周期,提供 baseUrl 给调用方。
7
+ *
8
+ * 主进程通过 startLocalLlmServer() 启动,返回 { baseUrl, stop }。
9
+ * 子进程就绪后通过 IPC 发送 { type: "ready" } 通知主进程。
10
+ */
11
+ import { fileURLToPath } from "node:url";
12
+ // ─── 子进程模式 ───────────────────────────────────────────────────────────────
13
+ async function runChildProcess() {
14
+ const port = parseInt(process.env.LOCAL_LLM_PORT ?? "11435", 10);
15
+ const llmModelPath = process.env.LOCAL_LLM_MODEL?.trim() || undefined;
16
+ const embModelPath = process.env.LOCAL_EMB_MODEL?.trim() || undefined;
17
+ let contextSize = process.env.LOCAL_LLM_CONTEXT_SIZE != null ? parseInt(process.env.LOCAL_LLM_CONTEXT_SIZE, 10) : undefined;
18
+ if (contextSize == null && process.env.LOCAL_LLM_CONTEXT_MAX != null && String(process.env.LOCAL_LLM_CONTEXT_MAX).trim() !== '') {
19
+ contextSize = parseInt(process.env.LOCAL_LLM_CONTEXT_MAX, 10) || undefined;
20
+ }
21
+ if (!llmModelPath && !embModelPath) {
22
+ console.error("[local-llm] 未指定 LLM 或 Embedding 模型路径,至少需提供一个");
23
+ if (process.send)
24
+ process.send({ type: "error", message: "至少需指定 LOCAL_LLM_MODEL 或 LOCAL_EMB_MODEL" });
25
+ process.exit(1);
26
+ }
27
+ const { initModels } = await import("./llm-context.js");
28
+ const { createOpenAICompatServer } = await import("./server.js");
29
+ try {
30
+ await initModels({
31
+ ...(llmModelPath ? { llmModelPath } : {}),
32
+ ...(embModelPath ? { embeddingModelPath: embModelPath } : {}),
33
+ contextSize: contextSize ?? 32768,
34
+ });
35
+ await createOpenAICompatServer(port);
36
+ if (process.send) {
37
+ process.send({ type: "ready", port });
38
+ }
39
+ }
40
+ catch (e) {
41
+ console.error("[local-llm] 子进程启动失败:", e);
42
+ if (process.send) {
43
+ process.send({ type: "error", message: String(e) });
44
+ }
45
+ process.exit(1);
46
+ }
47
+ }
48
+ let serverHandle = null;
49
+ /**
50
+ * 停止本地 LLM 子进程服务(若正在运行)。用于切换模型前先停止再启动。
51
+ */
52
+ export function stopLocalLlmServer() {
53
+ if (serverHandle) {
54
+ serverHandle.stop();
55
+ serverHandle = null;
56
+ }
57
+ }
58
+ /**
59
+ * 启动本地 LLM 子进程服务。
60
+ * 已启动时直接返回已有 handle(单例)。需先 stop 再传新参数重启。
61
+ */
62
+ export async function startLocalLlmServer(opts = {}) {
63
+ if (serverHandle)
64
+ return serverHandle;
65
+ const { fork } = await import("node:child_process");
66
+ const port = opts.port ?? 11435;
67
+ const readyTimeoutMs = opts.readyTimeoutMs ?? 300_000;
68
+ const env = {
69
+ ...process.env,
70
+ LOCAL_LLM_PORT: String(port),
71
+ LOCAL_LLM_CHILD: "1",
72
+ };
73
+ if (opts.llmModelPath)
74
+ env.LOCAL_LLM_MODEL = opts.llmModelPath;
75
+ if (opts.embeddingModelPath)
76
+ env.LOCAL_EMB_MODEL = opts.embeddingModelPath;
77
+ if (opts.contextSize != null)
78
+ env.LOCAL_LLM_CONTEXT_SIZE = String(opts.contextSize);
79
+ const childPath = fileURLToPath(import.meta.url);
80
+ const child = fork(childPath, ["--child"], {
81
+ env,
82
+ stdio: ["ignore", "inherit", "inherit", "ipc"],
83
+ execArgv: [],
84
+ });
85
+ await new Promise((resolve, reject) => {
86
+ const timer = setTimeout(() => {
87
+ child.kill();
88
+ reject(new Error(`[local-llm] 子进程启动超时(${readyTimeoutMs}ms)`));
89
+ }, readyTimeoutMs);
90
+ child.on("message", (msg) => {
91
+ if (msg?.type === "ready") {
92
+ clearTimeout(timer);
93
+ resolve();
94
+ }
95
+ else if (msg?.type === "error") {
96
+ clearTimeout(timer);
97
+ reject(new Error(`[local-llm] 子进程错误: ${msg.message}`));
98
+ }
99
+ });
100
+ child.on("exit", (code) => {
101
+ clearTimeout(timer);
102
+ if (code !== 0)
103
+ reject(new Error(`[local-llm] 子进程意外退出,code=${code}`));
104
+ });
105
+ child.on("error", (e) => {
106
+ clearTimeout(timer);
107
+ reject(e);
108
+ });
109
+ });
110
+ // 主进程退出时清理子进程
111
+ const cleanup = () => { try {
112
+ child.kill();
113
+ }
114
+ catch { /* ignore */ } };
115
+ process.on("exit", cleanup);
116
+ process.on("SIGINT", cleanup);
117
+ process.on("SIGTERM", cleanup);
118
+ serverHandle = {
119
+ baseUrl: `http://127.0.0.1:${port}/v1`,
120
+ stop: () => {
121
+ serverHandle = null;
122
+ try {
123
+ child.kill();
124
+ }
125
+ catch { /* ignore */ }
126
+ },
127
+ };
128
+ // 子进程意外退出(崩溃、OOM 等)时清理 handle 与 env,避免后续请求继续连已死服务导致 "Connection error"
129
+ const onChildExit = (code, signal) => {
130
+ if (serverHandle)
131
+ serverHandle = null;
132
+ process.env.LOCAL_LLM_START_FAILED = "本地模型服务已退出,请重新点击「启动本地模型服务」";
133
+ delete process.env.LOCAL_LLM_BASE_URL;
134
+ console.warn("[local-llm] 子进程已退出 code=%s signal=%s,请重新启动本地模型服务", code, signal);
135
+ };
136
+ child.on("exit", onChildExit);
137
+ console.log(`[local-llm] 本地服务就绪: ${serverHandle.baseUrl}`);
138
+ return serverHandle;
139
+ }
140
+ // ─── 入口判断 ─────────────────────────────────────────────────────────────────
141
+ // 子进程模式:被 fork 时带 --child 参数或设置了 LOCAL_LLM_CHILD 环境变量
142
+ if (process.argv.includes("--child") || process.env.LOCAL_LLM_CHILD === "1") {
143
+ runChildProcess().catch((e) => {
144
+ console.error("[local-llm] 致命错误:", e);
145
+ process.exit(1);
146
+ });
147
+ }
@@ -0,0 +1,65 @@
1
+ export interface LlmContextOptions {
2
+ /** LLM 推理模型路径或 hf: URI,可选;不传则仅提供 embedding */
3
+ llmModelPath?: string;
4
+ /** Embedding 模型路径或 hf: URI,可选;不传则仅提供 chat */
5
+ embeddingModelPath?: string;
6
+ /** GPU layers,-1 表示全部卸载到 GPU(Metal),0 表示纯 CPU */
7
+ gpuLayers?: number;
8
+ /** 上下文窗口大小,默认 32768(32K) */
9
+ contextSize?: number;
10
+ }
11
+ export interface ChatMessage {
12
+ role: "system" | "user" | "assistant" | "tool";
13
+ content: string | null;
14
+ /** tool_calls(assistant 发起工具调用时) */
15
+ tool_calls?: ToolCall[];
16
+ /** tool_call_id(role=tool 时,对应哪个 tool_call) */
17
+ tool_call_id?: string;
18
+ /** tool 消息的函数名 */
19
+ name?: string;
20
+ }
21
+ export interface ToolDefinition {
22
+ type: "function";
23
+ function: {
24
+ name: string;
25
+ description?: string;
26
+ parameters?: Record<string, unknown>;
27
+ };
28
+ }
29
+ export interface ToolCall {
30
+ id: string;
31
+ type: "function";
32
+ function: {
33
+ name: string;
34
+ arguments: string;
35
+ };
36
+ }
37
+ export interface ChatCompletionChunk {
38
+ content?: string;
39
+ tool_calls?: ToolCall[];
40
+ finish_reason?: "stop" | "tool_calls" | "length";
41
+ }
42
+ export declare function initModels(opts: LlmContextOptions): Promise<void>;
43
+ /**
44
+ * 流式 chat completion。
45
+ * onChunk 每次收到新 token 时调用;结束后返回完整 finish_reason。
46
+ */
47
+ export declare function chatCompletionStream(messages: ChatMessage[], tools: ToolDefinition[], onChunk: (chunk: ChatCompletionChunk) => void, signal?: AbortSignal): Promise<void>;
48
+ /**
49
+ * 非流式 chat completion(内部复用流式实现)。
50
+ */
51
+ export declare function chatCompletion(messages: ChatMessage[], tools: ToolDefinition[], signal?: AbortSignal): Promise<{
52
+ content: string;
53
+ tool_calls?: ToolCall[];
54
+ finish_reason: string;
55
+ }>;
56
+ /**
57
+ * 文本 embedding,返回 L2 归一化向量。
58
+ */
59
+ export declare function getEmbedding(text: string): Promise<number[]>;
60
+ /** 是否至少加载了一个模型(LLM 或 Embedding) */
61
+ export declare function isReady(): boolean;
62
+ /** 是否有 LLM,可提供 chat/completions */
63
+ export declare function isLlmReady(): boolean;
64
+ /** 是否有 Embedding,可提供 embeddings */
65
+ export declare function isEmbeddingReady(): boolean;