@next-open-ai/openclawx 0.8.40 → 0.8.58
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -0
- package/apps/desktop/renderer/dist/assets/index-M5VGUUpo.js +93 -0
- package/apps/desktop/renderer/dist/assets/index-y8oE2q_u.css +10 -0
- package/apps/desktop/renderer/dist/index.html +2 -2
- package/dist/cli/cli.js +107 -0
- package/dist/core/agent/agent-manager.js +13 -2
- package/dist/core/agent/proxy/adapters/local-adapter.js +1 -1
- package/dist/core/config/desktop-config.d.ts +4 -1
- package/dist/core/config/desktop-config.js +108 -21
- package/dist/core/config/provider-support-default.js +26 -0
- package/dist/core/local-llm-server/download-model.d.ts +16 -0
- package/dist/core/local-llm-server/download-model.js +37 -0
- package/dist/core/local-llm-server/index.d.ts +32 -0
- package/dist/core/local-llm-server/index.js +147 -0
- package/dist/core/local-llm-server/llm-context.d.ts +65 -0
- package/dist/core/local-llm-server/llm-context.js +242 -0
- package/dist/core/local-llm-server/model-resolve.d.ts +27 -0
- package/dist/core/local-llm-server/model-resolve.js +90 -0
- package/dist/core/local-llm-server/server.d.ts +1 -0
- package/dist/core/local-llm-server/server.js +234 -0
- package/dist/core/local-llm-server/start-from-config.d.ts +5 -0
- package/dist/core/local-llm-server/start-from-config.js +50 -0
- package/dist/core/mcp/transport/stdio.d.ts +6 -0
- package/dist/core/mcp/transport/stdio.js +107 -27
- package/dist/core/memory/local-embedding-llama.js +2 -4
- package/dist/core/memory/local-embedding.d.ts +4 -3
- package/dist/core/memory/local-embedding.js +43 -3
- package/dist/gateway/methods/agent-chat.js +80 -41
- package/dist/gateway/server.js +10 -0
- package/dist/server/agent-config/agent-config.controller.d.ts +1 -1
- package/dist/server/agent-config/agent-config.service.d.ts +2 -0
- package/dist/server/agent-config/agent-config.service.js +5 -0
- package/dist/server/bootstrap.d.ts +1 -0
- package/dist/server/bootstrap.js +3 -0
- package/dist/server/config/config.controller.d.ts +81 -4
- package/dist/server/config/config.controller.js +185 -3
- package/dist/server/config/config.module.js +3 -2
- package/dist/server/config/config.service.d.ts +4 -1
- package/dist/server/config/config.service.js +62 -9
- package/dist/server/config/local-models.service.d.ts +67 -0
- package/dist/server/config/local-models.service.js +243 -0
- package/package.json +1 -1
- package/presets/preset-agents.json +6 -2
- package/presets/preset-config.json +24 -6
- package/presets/recommended-local-models.json +42 -0
- package/apps/desktop/renderer/dist/assets/index-BSfTiTKo.css +0 -10
- package/apps/desktop/renderer/dist/assets/index-DgLpQsA-.js +0 -89
- package/presets/workspaces/finance-expert/skills/akshare-helper/SKILL.md +0 -9
|
@@ -209,8 +209,8 @@ export async function loadDesktopAgentConfig(agentId) {
|
|
|
209
209
|
}
|
|
210
210
|
}
|
|
211
211
|
const resolvedAgentId = agentId === "default" ? "default" : agentId;
|
|
212
|
-
let provider = config.defaultProvider ?? "
|
|
213
|
-
let model = config.defaultModel ?? "
|
|
212
|
+
let provider = config.defaultProvider ?? "ollama";
|
|
213
|
+
let model = config.defaultModel ?? "qwen3:4b";
|
|
214
214
|
if (config.defaultModelItemCode && Array.isArray(config.configuredModels)) {
|
|
215
215
|
const configured = config.configuredModels.find((m) => m.modelItemCode === config.defaultModelItemCode);
|
|
216
216
|
if (configured) {
|
|
@@ -218,11 +218,14 @@ export async function loadDesktopAgentConfig(agentId) {
|
|
|
218
218
|
model = configured.modelId;
|
|
219
219
|
}
|
|
220
220
|
}
|
|
221
|
+
/** 是否从当前智能体自己的配置得到了模型(有 modelItemCode 或 provider/model);若否,则使用的是全局默认 */
|
|
222
|
+
let agentHadOwnModel = false;
|
|
221
223
|
let workspaceName = resolvedAgentId;
|
|
222
224
|
let mcpServers;
|
|
223
225
|
let mcpMaxResultTokens;
|
|
224
226
|
let systemPrompt;
|
|
225
227
|
let useLongMemory = true;
|
|
228
|
+
let contextSize;
|
|
226
229
|
if (existsSync(agentsPath)) {
|
|
227
230
|
try {
|
|
228
231
|
const raw = await readFile(agentsPath, "utf-8");
|
|
@@ -237,6 +240,9 @@ export async function loadDesktopAgentConfig(agentId) {
|
|
|
237
240
|
if (agent.mcpMaxResultTokens != null && typeof agent.mcpMaxResultTokens === "number" && agent.mcpMaxResultTokens > 0) {
|
|
238
241
|
mcpMaxResultTokens = agent.mcpMaxResultTokens;
|
|
239
242
|
}
|
|
243
|
+
if (agent.contextSize != null && typeof agent.contextSize === "number" && agent.contextSize > 0) {
|
|
244
|
+
contextSize = agent.contextSize;
|
|
245
|
+
}
|
|
240
246
|
if (agent.mcpServers != null) {
|
|
241
247
|
if (Array.isArray(agent.mcpServers) || (typeof agent.mcpServers === "object" && !Array.isArray(agent.mcpServers))) {
|
|
242
248
|
mcpServers = agent.mcpServers;
|
|
@@ -252,19 +258,28 @@ export async function loadDesktopAgentConfig(agentId) {
|
|
|
252
258
|
if (configured) {
|
|
253
259
|
provider = configured.provider;
|
|
254
260
|
model = configured.modelId;
|
|
261
|
+
agentHadOwnModel = true;
|
|
255
262
|
}
|
|
256
263
|
else {
|
|
257
|
-
if (agent.provider)
|
|
264
|
+
if (agent.provider) {
|
|
258
265
|
provider = agent.provider;
|
|
259
|
-
|
|
266
|
+
agentHadOwnModel = true;
|
|
267
|
+
}
|
|
268
|
+
if (agent.model) {
|
|
260
269
|
model = agent.model;
|
|
270
|
+
agentHadOwnModel = true;
|
|
271
|
+
}
|
|
261
272
|
}
|
|
262
273
|
}
|
|
263
274
|
else {
|
|
264
|
-
if (agent.provider)
|
|
275
|
+
if (agent.provider) {
|
|
265
276
|
provider = agent.provider;
|
|
266
|
-
|
|
277
|
+
agentHadOwnModel = true;
|
|
278
|
+
}
|
|
279
|
+
if (agent.model) {
|
|
267
280
|
model = agent.model;
|
|
281
|
+
agentHadOwnModel = true;
|
|
282
|
+
}
|
|
268
283
|
}
|
|
269
284
|
}
|
|
270
285
|
}
|
|
@@ -272,6 +287,11 @@ export async function loadDesktopAgentConfig(agentId) {
|
|
|
272
287
|
// ignore
|
|
273
288
|
}
|
|
274
289
|
}
|
|
290
|
+
// 本地 LLM 可用且当前智能体未配置自己的模型时,使用本地推理作为缺省,使所有智能体“拥有”该配置
|
|
291
|
+
if (!agentHadOwnModel && process.env.LOCAL_LLM_BASE_URL?.trim()) {
|
|
292
|
+
provider = "local";
|
|
293
|
+
model = "local-llm";
|
|
294
|
+
}
|
|
275
295
|
const provConfig = config.providers?.[provider];
|
|
276
296
|
const apiKey = provConfig?.apiKey && typeof provConfig.apiKey === "string" && provConfig.apiKey.trim()
|
|
277
297
|
? provConfig.apiKey.trim()
|
|
@@ -427,6 +447,7 @@ export async function loadDesktopAgentConfig(agentId) {
|
|
|
427
447
|
claudeCode,
|
|
428
448
|
useLongMemory,
|
|
429
449
|
webSearch,
|
|
450
|
+
contextSize,
|
|
430
451
|
};
|
|
431
452
|
}
|
|
432
453
|
function ensureDesktopDir() {
|
|
@@ -624,22 +645,54 @@ export async function ensureProviderSupportFile() {
|
|
|
624
645
|
await writeFile(path, JSON.stringify(presetProviders, null, 2), "utf-8");
|
|
625
646
|
}
|
|
626
647
|
}
|
|
627
|
-
/**
|
|
648
|
+
/** 预装本地推理缺省:推荐列表第一个 LLM(Qwen3-4B)对应的本地文件名,与 modelUriToFilename 一致 */
|
|
649
|
+
const DEFAULT_LOCAL_LLM_MODEL_ID = "hf_Qwen_Qwen3-4B-GGUF_Qwen3-4B-Q4_K_M.gguf";
|
|
650
|
+
const DEFAULT_LOCAL_MODEL_ITEM_CODE = "local-qwen3-4b";
|
|
651
|
+
/** 代码内建默认:local provider + 本地 Qwen3-4B,首次与合并时优先保证存在 */
|
|
652
|
+
const BUILTIN_DEFAULT_CONFIG = {
|
|
653
|
+
defaultProvider: "local",
|
|
654
|
+
defaultModel: DEFAULT_LOCAL_LLM_MODEL_ID,
|
|
655
|
+
defaultModelItemCode: DEFAULT_LOCAL_MODEL_ITEM_CODE,
|
|
656
|
+
defaultAgentId: DEFAULT_AGENT_ID,
|
|
657
|
+
maxAgentSessions: DEFAULT_MAX_AGENT_SESSIONS,
|
|
658
|
+
providers: {
|
|
659
|
+
local: { baseUrl: "http://127.0.0.1:11435/v1" },
|
|
660
|
+
},
|
|
661
|
+
configuredModels: [
|
|
662
|
+
{
|
|
663
|
+
provider: "local",
|
|
664
|
+
modelId: DEFAULT_LOCAL_LLM_MODEL_ID,
|
|
665
|
+
type: "llm",
|
|
666
|
+
alias: "Qwen3 4B Q4_K_M",
|
|
667
|
+
modelItemCode: DEFAULT_LOCAL_MODEL_ITEM_CODE,
|
|
668
|
+
},
|
|
669
|
+
{
|
|
670
|
+
provider: "local",
|
|
671
|
+
modelId: "hf_ggml-org_embeddinggemma-300M-GGUF_embeddinggemma-300M-Q8_0.gguf",
|
|
672
|
+
type: "embedding",
|
|
673
|
+
alias: "EmbeddingGemma 300M Q8 (768维)",
|
|
674
|
+
modelItemCode: "local-embeddinggemma-300m",
|
|
675
|
+
},
|
|
676
|
+
],
|
|
677
|
+
};
|
|
678
|
+
/** 若 config.json 不存在则用 preset-config.json 初始化,若存在则浅合并补充新基础键值。预装 local provider + 本地 Qwen3-4B 模型并设为缺省;preset 与代码默认合并,保证 local 一定存在。 */
|
|
628
679
|
async function ensureConfigJsonInitialized() {
|
|
629
680
|
const presetPath = join(getPresetsDir(), "preset-config.json");
|
|
630
|
-
let presetConfig = {
|
|
631
|
-
defaultProvider: "deepseek",
|
|
632
|
-
defaultModel: "deepseek-chat",
|
|
633
|
-
defaultAgentId: DEFAULT_AGENT_ID,
|
|
634
|
-
maxAgentSessions: DEFAULT_MAX_AGENT_SESSIONS,
|
|
635
|
-
providers: {},
|
|
636
|
-
configuredModels: [],
|
|
637
|
-
};
|
|
681
|
+
let presetConfig = { ...BUILTIN_DEFAULT_CONFIG };
|
|
638
682
|
if (existsSync(presetPath)) {
|
|
639
683
|
try {
|
|
640
684
|
const data = JSON.parse(await readFile(presetPath, "utf-8"));
|
|
641
|
-
if (data.config)
|
|
642
|
-
presetConfig = data.config;
|
|
685
|
+
if (data.config && typeof data.config === "object") {
|
|
686
|
+
presetConfig = { ...BUILTIN_DEFAULT_CONFIG, ...data.config };
|
|
687
|
+
presetConfig.providers = { ...BUILTIN_DEFAULT_CONFIG.providers, ...(presetConfig.providers || {}) };
|
|
688
|
+
const hasLocalModel = (presetConfig.configuredModels || []).some((m) => m?.provider === "local" && (m?.modelId === DEFAULT_LOCAL_LLM_MODEL_ID || m?.modelItemCode === DEFAULT_LOCAL_MODEL_ITEM_CODE));
|
|
689
|
+
if (!hasLocalModel) {
|
|
690
|
+
presetConfig.configuredModels = [
|
|
691
|
+
...(BUILTIN_DEFAULT_CONFIG.configuredModels || []),
|
|
692
|
+
...(presetConfig.configuredModels || []),
|
|
693
|
+
];
|
|
694
|
+
}
|
|
695
|
+
}
|
|
643
696
|
}
|
|
644
697
|
catch { }
|
|
645
698
|
}
|
|
@@ -708,18 +761,46 @@ async function ensureAgentsJsonInitialized() {
|
|
|
708
761
|
}
|
|
709
762
|
}
|
|
710
763
|
}
|
|
764
|
+
// 所有未单独配置模型的智能体使用 config 的缺省模型(预装为 local + Qwen3-4B)
|
|
765
|
+
const configPath = join(getDesktopDir(), "config.json");
|
|
766
|
+
if (existsSync(configPath)) {
|
|
767
|
+
try {
|
|
768
|
+
const configRaw = await readFile(configPath, "utf-8");
|
|
769
|
+
const configData = JSON.parse(configRaw);
|
|
770
|
+
const defProvider = configData.defaultProvider?.trim();
|
|
771
|
+
const defModel = configData.defaultModel?.trim();
|
|
772
|
+
const defCode = configData.defaultModelItemCode?.trim();
|
|
773
|
+
if (defProvider && defModel) {
|
|
774
|
+
for (const agent of currentData.agents) {
|
|
775
|
+
const hasOwn = (agent.provider && String(agent.provider).trim()) || (agent.model && String(agent.model).trim()) || (agent.modelItemCode && String(agent.modelItemCode).trim());
|
|
776
|
+
if (!hasOwn) {
|
|
777
|
+
agent.provider = defProvider;
|
|
778
|
+
agent.model = defModel;
|
|
779
|
+
if (defCode)
|
|
780
|
+
agent.modelItemCode = defCode;
|
|
781
|
+
changed = true;
|
|
782
|
+
}
|
|
783
|
+
}
|
|
784
|
+
}
|
|
785
|
+
}
|
|
786
|
+
catch { /* ignore */ }
|
|
787
|
+
}
|
|
711
788
|
if (changed || !existsSync(agentsPath)) {
|
|
712
789
|
await writeFile(agentsPath, JSON.stringify(currentData, null, 2), "utf-8");
|
|
713
790
|
}
|
|
714
791
|
}
|
|
715
792
|
/**
|
|
716
|
-
* CLI / Gateway 运行时调用,确保 config.json、provider-support.json、agents.json
|
|
793
|
+
* CLI / Gateway 运行时调用,确保 config.json、provider-support.json、agents.json 均完成初始化,
|
|
794
|
+
* 并同步到 agent 目录 models.json,供 pi ModelRegistry 解析 local 等模型与凭证。
|
|
717
795
|
*/
|
|
718
796
|
export async function ensureDesktopConfigInitialized() {
|
|
719
797
|
ensureDesktopDir();
|
|
720
798
|
await ensureProviderSupportFile();
|
|
721
799
|
await ensureConfigJsonInitialized();
|
|
722
800
|
await ensureAgentsJsonInitialized();
|
|
801
|
+
await syncDesktopConfigToModelsJson().catch((err) => {
|
|
802
|
+
console.warn("[ensureDesktopConfigInitialized] syncDesktopConfigToModelsJson failed:", err);
|
|
803
|
+
});
|
|
723
804
|
}
|
|
724
805
|
/**
|
|
725
806
|
* 取某 provider 在 provider-support 中的第一个 llm 模型 id;若无则返回第一个模型 id。
|
|
@@ -770,6 +851,10 @@ const SYNC_DEFAULTS = {
|
|
|
770
851
|
"openai-custom": { baseUrl: "", apiKey: "OPENAI_API_KEY", api: "openai-completions" },
|
|
771
852
|
nvidia: { baseUrl: "https://integrate.api.nvidia.com/v1", apiKey: "NVIDIA_API_KEY", api: "openai-completions" },
|
|
772
853
|
kimi: { baseUrl: "https://api.moonshot.cn/v1", apiKey: "MOONSHOT_API_KEY", api: "openai-completions" },
|
|
854
|
+
/** 本地 Ollama,无需真实 API Key */
|
|
855
|
+
ollama: { baseUrl: "http://localhost:11434/v1", apiKey: "OPENAI_API_KEY", api: "openai-completions" },
|
|
856
|
+
/** 内置本地推理(node-llama-cpp),无需 API Key,baseUrl 指向本地子进程服务 */
|
|
857
|
+
local: { baseUrl: "http://127.0.0.1:11435/v1", apiKey: "OPENAI_API_KEY", api: "openai-completions" },
|
|
773
858
|
};
|
|
774
859
|
const DEFAULT_COST = { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 };
|
|
775
860
|
const DEFAULT_CONTEXT_WINDOW = 64000;
|
|
@@ -806,10 +891,12 @@ export async function syncDesktopConfigToModelsJson() {
|
|
|
806
891
|
const support = await getProviderSupport();
|
|
807
892
|
const piProviders = {};
|
|
808
893
|
for (const [providerId, userConfig] of Object.entries(configured)) {
|
|
809
|
-
|
|
894
|
+
// ollama / local 不需要 API Key,其他 provider 必须有 apiKey
|
|
895
|
+
const isNoKeyProvider = providerId === "ollama" || providerId === "local";
|
|
896
|
+
if (!isNoKeyProvider && !userConfig?.apiKey?.trim())
|
|
810
897
|
continue;
|
|
811
898
|
const defaults = SYNC_DEFAULTS[providerId] ?? { baseUrl: "", apiKey: "OPENAI_API_KEY", api: "openai-completions" };
|
|
812
|
-
const baseUrl = userConfig
|
|
899
|
+
const baseUrl = userConfig?.baseUrl?.trim() || (support[providerId]?.baseUrl ?? "").trim() || defaults.baseUrl;
|
|
813
900
|
if (!baseUrl)
|
|
814
901
|
continue;
|
|
815
902
|
const def = support[providerId];
|
|
@@ -839,7 +926,7 @@ export async function syncDesktopConfigToModelsJson() {
|
|
|
839
926
|
continue;
|
|
840
927
|
}
|
|
841
928
|
piProviders[providerId] = {
|
|
842
|
-
name: (userConfig
|
|
929
|
+
name: (userConfig?.alias?.trim() || def?.name) || providerId,
|
|
843
930
|
apiKey: defaults.apiKey,
|
|
844
931
|
api: defaults.api,
|
|
845
932
|
baseUrl: baseUrl.replace(/\/$/, ""),
|
|
@@ -54,4 +54,30 @@ export const DEFAULT_PROVIDER_SUPPORT = {
|
|
|
54
54
|
{ id: "moonshot-v1-128k", name: "Moonshot 128K", types: ["llm"] },
|
|
55
55
|
],
|
|
56
56
|
},
|
|
57
|
+
/** 本地 Ollama 服务,兼容 OpenAI API;baseUrl 指向本机 Ollama 默认端口 */
|
|
58
|
+
ollama: {
|
|
59
|
+
name: "Ollama (本地)",
|
|
60
|
+
baseUrl: "http://localhost:11434/v1",
|
|
61
|
+
models: [
|
|
62
|
+
{ id: "qwen3:4b", name: "Qwen3 4B", types: ["llm"] },
|
|
63
|
+
{ id: "qwen3:8b", name: "Qwen3 8B", types: ["llm"] },
|
|
64
|
+
{ id: "qwen3:14b", name: "Qwen3 14B", types: ["llm"] },
|
|
65
|
+
{ id: "llama3.2:3b", name: "Llama 3.2 3B", types: ["llm"] },
|
|
66
|
+
{ id: "llama3.2:1b", name: "Llama 3.2 1B", types: ["llm"] },
|
|
67
|
+
{ id: "nomic-embed-text", name: "Nomic Embed Text", types: ["embedding"] },
|
|
68
|
+
],
|
|
69
|
+
},
|
|
70
|
+
/**
|
|
71
|
+
* 内置本地推理(node-llama-cpp),无需安装 Ollama。
|
|
72
|
+
* baseUrl 指向本地 LLM 子进程服务;模型列表为推荐的 GGUF 模型,可在本地模型管理页面增删。
|
|
73
|
+
* 无需 API Key。
|
|
74
|
+
*/
|
|
75
|
+
local: {
|
|
76
|
+
name: "本地推理 (node-llama-cpp)",
|
|
77
|
+
baseUrl: "http://127.0.0.1:11435/v1",
|
|
78
|
+
models: [
|
|
79
|
+
{ id: "local-llm", name: "本地 LLM(当前加载)", types: ["llm"] },
|
|
80
|
+
{ id: "local-embedding", name: "本地 Embedding(当前加载)", types: ["embedding"] },
|
|
81
|
+
],
|
|
82
|
+
},
|
|
57
83
|
};
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
export declare const DEFAULT_LLM_MODEL_URI = "hf:Qwen/Qwen3-4B-GGUF/Qwen3-4B-Q4_K_M.gguf";
|
|
2
|
+
export interface DownloadModelOptions {
|
|
3
|
+
useMirror?: boolean;
|
|
4
|
+
signal?: AbortSignal;
|
|
5
|
+
onProgress?: (p: {
|
|
6
|
+
downloadedSize: number;
|
|
7
|
+
totalSize: number;
|
|
8
|
+
percent: number;
|
|
9
|
+
}) => void;
|
|
10
|
+
}
|
|
11
|
+
/**
|
|
12
|
+
* 下载模型到本地缓存目录。
|
|
13
|
+
* @returns 解析后的本地文件路径
|
|
14
|
+
*/
|
|
15
|
+
export declare function downloadModel(modelUri: string, options?: DownloadModelOptions): Promise<string>;
|
|
16
|
+
export declare function getResolvedBasename(modelUri: string): string;
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 本地模型下载(供 CLI 与 Nest LocalModelsService 复用)。
|
|
3
|
+
* 使用 node-llama-cpp resolveModelFile,缓存目录 ~/.openbot/.cached_models/。
|
|
4
|
+
*/
|
|
5
|
+
import { basename } from "node:path";
|
|
6
|
+
import { LOCAL_LLM_CACHE_DIR } from "./model-resolve.js";
|
|
7
|
+
export const DEFAULT_LLM_MODEL_URI = "hf:Qwen/Qwen3-4B-GGUF/Qwen3-4B-Q4_K_M.gguf";
|
|
8
|
+
/**
|
|
9
|
+
* 下载模型到本地缓存目录。
|
|
10
|
+
* @returns 解析后的本地文件路径
|
|
11
|
+
*/
|
|
12
|
+
export async function downloadModel(modelUri, options = {}) {
|
|
13
|
+
const { resolveModelFile } = await import("node-llama-cpp");
|
|
14
|
+
const { useMirror = false, signal, onProgress } = options;
|
|
15
|
+
const hfToken = process.env.HF_TOKEN || process.env.HUGGING_FACE_TOKEN;
|
|
16
|
+
const opts = {
|
|
17
|
+
directory: LOCAL_LLM_CACHE_DIR,
|
|
18
|
+
endpoints: {
|
|
19
|
+
huggingFace: useMirror ? "https://hf-mirror.com/" : "https://huggingface.co/",
|
|
20
|
+
},
|
|
21
|
+
};
|
|
22
|
+
if (signal)
|
|
23
|
+
opts.signal = signal;
|
|
24
|
+
if (hfToken)
|
|
25
|
+
opts.headers = { Authorization: `Bearer ${hfToken}` };
|
|
26
|
+
if (onProgress) {
|
|
27
|
+
opts.onProgress = ({ downloadedSize, totalSize }) => {
|
|
28
|
+
const percent = totalSize ? Math.round((downloadedSize / totalSize) * 100) : 0;
|
|
29
|
+
onProgress({ downloadedSize, totalSize, percent });
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
const resolved = await resolveModelFile(modelUri, opts);
|
|
33
|
+
return resolved;
|
|
34
|
+
}
|
|
35
|
+
export function getResolvedBasename(modelUri) {
|
|
36
|
+
return basename(modelUri.replace(/^hf:[^/]+\//, "").replace(/\//g, "_"));
|
|
37
|
+
}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* local-llm-server 入口。
|
|
3
|
+
*
|
|
4
|
+
* 两种运行模式:
|
|
5
|
+
* 1. 子进程模式(--child):直接加载模型并启动 HTTP 服务,由主进程 fork 调用。
|
|
6
|
+
* 2. 主进程模式(默认导出):fork 子进程,管理其生命周期,提供 baseUrl 给调用方。
|
|
7
|
+
*
|
|
8
|
+
* 主进程通过 startLocalLlmServer() 启动,返回 { baseUrl, stop }。
|
|
9
|
+
* 子进程就绪后通过 IPC 发送 { type: "ready" } 通知主进程。
|
|
10
|
+
*/
|
|
11
|
+
export interface LocalLlmServerOptions {
|
|
12
|
+
port?: number;
|
|
13
|
+
llmModelPath?: string;
|
|
14
|
+
embeddingModelPath?: string;
|
|
15
|
+
/** 上下文窗口 token 数,默认 32768(32K),需能容纳 system + tools + 对话 */
|
|
16
|
+
contextSize?: number;
|
|
17
|
+
/** 等待子进程就绪的超时毫秒数,默认 300000(5 分钟,冷启/大模型加载可能较慢) */
|
|
18
|
+
readyTimeoutMs?: number;
|
|
19
|
+
}
|
|
20
|
+
export interface LocalLlmServerHandle {
|
|
21
|
+
baseUrl: string;
|
|
22
|
+
stop: () => void;
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* 停止本地 LLM 子进程服务(若正在运行)。用于切换模型前先停止再启动。
|
|
26
|
+
*/
|
|
27
|
+
export declare function stopLocalLlmServer(): void;
|
|
28
|
+
/**
|
|
29
|
+
* 启动本地 LLM 子进程服务。
|
|
30
|
+
* 已启动时直接返回已有 handle(单例)。需先 stop 再传新参数重启。
|
|
31
|
+
*/
|
|
32
|
+
export declare function startLocalLlmServer(opts?: LocalLlmServerOptions): Promise<LocalLlmServerHandle>;
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* local-llm-server 入口。
|
|
3
|
+
*
|
|
4
|
+
* 两种运行模式:
|
|
5
|
+
* 1. 子进程模式(--child):直接加载模型并启动 HTTP 服务,由主进程 fork 调用。
|
|
6
|
+
* 2. 主进程模式(默认导出):fork 子进程,管理其生命周期,提供 baseUrl 给调用方。
|
|
7
|
+
*
|
|
8
|
+
* 主进程通过 startLocalLlmServer() 启动,返回 { baseUrl, stop }。
|
|
9
|
+
* 子进程就绪后通过 IPC 发送 { type: "ready" } 通知主进程。
|
|
10
|
+
*/
|
|
11
|
+
import { fileURLToPath } from "node:url";
|
|
12
|
+
// ─── 子进程模式 ───────────────────────────────────────────────────────────────
|
|
13
|
+
async function runChildProcess() {
|
|
14
|
+
const port = parseInt(process.env.LOCAL_LLM_PORT ?? "11435", 10);
|
|
15
|
+
const llmModelPath = process.env.LOCAL_LLM_MODEL?.trim() || undefined;
|
|
16
|
+
const embModelPath = process.env.LOCAL_EMB_MODEL?.trim() || undefined;
|
|
17
|
+
let contextSize = process.env.LOCAL_LLM_CONTEXT_SIZE != null ? parseInt(process.env.LOCAL_LLM_CONTEXT_SIZE, 10) : undefined;
|
|
18
|
+
if (contextSize == null && process.env.LOCAL_LLM_CONTEXT_MAX != null && String(process.env.LOCAL_LLM_CONTEXT_MAX).trim() !== '') {
|
|
19
|
+
contextSize = parseInt(process.env.LOCAL_LLM_CONTEXT_MAX, 10) || undefined;
|
|
20
|
+
}
|
|
21
|
+
if (!llmModelPath && !embModelPath) {
|
|
22
|
+
console.error("[local-llm] 未指定 LLM 或 Embedding 模型路径,至少需提供一个");
|
|
23
|
+
if (process.send)
|
|
24
|
+
process.send({ type: "error", message: "至少需指定 LOCAL_LLM_MODEL 或 LOCAL_EMB_MODEL" });
|
|
25
|
+
process.exit(1);
|
|
26
|
+
}
|
|
27
|
+
const { initModels } = await import("./llm-context.js");
|
|
28
|
+
const { createOpenAICompatServer } = await import("./server.js");
|
|
29
|
+
try {
|
|
30
|
+
await initModels({
|
|
31
|
+
...(llmModelPath ? { llmModelPath } : {}),
|
|
32
|
+
...(embModelPath ? { embeddingModelPath: embModelPath } : {}),
|
|
33
|
+
contextSize: contextSize ?? 32768,
|
|
34
|
+
});
|
|
35
|
+
await createOpenAICompatServer(port);
|
|
36
|
+
if (process.send) {
|
|
37
|
+
process.send({ type: "ready", port });
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
catch (e) {
|
|
41
|
+
console.error("[local-llm] 子进程启动失败:", e);
|
|
42
|
+
if (process.send) {
|
|
43
|
+
process.send({ type: "error", message: String(e) });
|
|
44
|
+
}
|
|
45
|
+
process.exit(1);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
let serverHandle = null;
|
|
49
|
+
/**
|
|
50
|
+
* 停止本地 LLM 子进程服务(若正在运行)。用于切换模型前先停止再启动。
|
|
51
|
+
*/
|
|
52
|
+
export function stopLocalLlmServer() {
|
|
53
|
+
if (serverHandle) {
|
|
54
|
+
serverHandle.stop();
|
|
55
|
+
serverHandle = null;
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* 启动本地 LLM 子进程服务。
|
|
60
|
+
* 已启动时直接返回已有 handle(单例)。需先 stop 再传新参数重启。
|
|
61
|
+
*/
|
|
62
|
+
export async function startLocalLlmServer(opts = {}) {
|
|
63
|
+
if (serverHandle)
|
|
64
|
+
return serverHandle;
|
|
65
|
+
const { fork } = await import("node:child_process");
|
|
66
|
+
const port = opts.port ?? 11435;
|
|
67
|
+
const readyTimeoutMs = opts.readyTimeoutMs ?? 300_000;
|
|
68
|
+
const env = {
|
|
69
|
+
...process.env,
|
|
70
|
+
LOCAL_LLM_PORT: String(port),
|
|
71
|
+
LOCAL_LLM_CHILD: "1",
|
|
72
|
+
};
|
|
73
|
+
if (opts.llmModelPath)
|
|
74
|
+
env.LOCAL_LLM_MODEL = opts.llmModelPath;
|
|
75
|
+
if (opts.embeddingModelPath)
|
|
76
|
+
env.LOCAL_EMB_MODEL = opts.embeddingModelPath;
|
|
77
|
+
if (opts.contextSize != null)
|
|
78
|
+
env.LOCAL_LLM_CONTEXT_SIZE = String(opts.contextSize);
|
|
79
|
+
const childPath = fileURLToPath(import.meta.url);
|
|
80
|
+
const child = fork(childPath, ["--child"], {
|
|
81
|
+
env,
|
|
82
|
+
stdio: ["ignore", "inherit", "inherit", "ipc"],
|
|
83
|
+
execArgv: [],
|
|
84
|
+
});
|
|
85
|
+
await new Promise((resolve, reject) => {
|
|
86
|
+
const timer = setTimeout(() => {
|
|
87
|
+
child.kill();
|
|
88
|
+
reject(new Error(`[local-llm] 子进程启动超时(${readyTimeoutMs}ms)`));
|
|
89
|
+
}, readyTimeoutMs);
|
|
90
|
+
child.on("message", (msg) => {
|
|
91
|
+
if (msg?.type === "ready") {
|
|
92
|
+
clearTimeout(timer);
|
|
93
|
+
resolve();
|
|
94
|
+
}
|
|
95
|
+
else if (msg?.type === "error") {
|
|
96
|
+
clearTimeout(timer);
|
|
97
|
+
reject(new Error(`[local-llm] 子进程错误: ${msg.message}`));
|
|
98
|
+
}
|
|
99
|
+
});
|
|
100
|
+
child.on("exit", (code) => {
|
|
101
|
+
clearTimeout(timer);
|
|
102
|
+
if (code !== 0)
|
|
103
|
+
reject(new Error(`[local-llm] 子进程意外退出,code=${code}`));
|
|
104
|
+
});
|
|
105
|
+
child.on("error", (e) => {
|
|
106
|
+
clearTimeout(timer);
|
|
107
|
+
reject(e);
|
|
108
|
+
});
|
|
109
|
+
});
|
|
110
|
+
// 主进程退出时清理子进程
|
|
111
|
+
const cleanup = () => { try {
|
|
112
|
+
child.kill();
|
|
113
|
+
}
|
|
114
|
+
catch { /* ignore */ } };
|
|
115
|
+
process.on("exit", cleanup);
|
|
116
|
+
process.on("SIGINT", cleanup);
|
|
117
|
+
process.on("SIGTERM", cleanup);
|
|
118
|
+
serverHandle = {
|
|
119
|
+
baseUrl: `http://127.0.0.1:${port}/v1`,
|
|
120
|
+
stop: () => {
|
|
121
|
+
serverHandle = null;
|
|
122
|
+
try {
|
|
123
|
+
child.kill();
|
|
124
|
+
}
|
|
125
|
+
catch { /* ignore */ }
|
|
126
|
+
},
|
|
127
|
+
};
|
|
128
|
+
// 子进程意外退出(崩溃、OOM 等)时清理 handle 与 env,避免后续请求继续连已死服务导致 "Connection error"
|
|
129
|
+
const onChildExit = (code, signal) => {
|
|
130
|
+
if (serverHandle)
|
|
131
|
+
serverHandle = null;
|
|
132
|
+
process.env.LOCAL_LLM_START_FAILED = "本地模型服务已退出,请重新点击「启动本地模型服务」";
|
|
133
|
+
delete process.env.LOCAL_LLM_BASE_URL;
|
|
134
|
+
console.warn("[local-llm] 子进程已退出 code=%s signal=%s,请重新启动本地模型服务", code, signal);
|
|
135
|
+
};
|
|
136
|
+
child.on("exit", onChildExit);
|
|
137
|
+
console.log(`[local-llm] 本地服务就绪: ${serverHandle.baseUrl}`);
|
|
138
|
+
return serverHandle;
|
|
139
|
+
}
|
|
140
|
+
// ─── 入口判断 ─────────────────────────────────────────────────────────────────
|
|
141
|
+
// 子进程模式:被 fork 时带 --child 参数或设置了 LOCAL_LLM_CHILD 环境变量
|
|
142
|
+
if (process.argv.includes("--child") || process.env.LOCAL_LLM_CHILD === "1") {
|
|
143
|
+
runChildProcess().catch((e) => {
|
|
144
|
+
console.error("[local-llm] 致命错误:", e);
|
|
145
|
+
process.exit(1);
|
|
146
|
+
});
|
|
147
|
+
}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
export interface LlmContextOptions {
|
|
2
|
+
/** LLM 推理模型路径或 hf: URI,可选;不传则仅提供 embedding */
|
|
3
|
+
llmModelPath?: string;
|
|
4
|
+
/** Embedding 模型路径或 hf: URI,可选;不传则仅提供 chat */
|
|
5
|
+
embeddingModelPath?: string;
|
|
6
|
+
/** GPU layers,-1 表示全部卸载到 GPU(Metal),0 表示纯 CPU */
|
|
7
|
+
gpuLayers?: number;
|
|
8
|
+
/** 上下文窗口大小,默认 32768(32K) */
|
|
9
|
+
contextSize?: number;
|
|
10
|
+
}
|
|
11
|
+
export interface ChatMessage {
|
|
12
|
+
role: "system" | "user" | "assistant" | "tool";
|
|
13
|
+
content: string | null;
|
|
14
|
+
/** tool_calls(assistant 发起工具调用时) */
|
|
15
|
+
tool_calls?: ToolCall[];
|
|
16
|
+
/** tool_call_id(role=tool 时,对应哪个 tool_call) */
|
|
17
|
+
tool_call_id?: string;
|
|
18
|
+
/** tool 消息的函数名 */
|
|
19
|
+
name?: string;
|
|
20
|
+
}
|
|
21
|
+
export interface ToolDefinition {
|
|
22
|
+
type: "function";
|
|
23
|
+
function: {
|
|
24
|
+
name: string;
|
|
25
|
+
description?: string;
|
|
26
|
+
parameters?: Record<string, unknown>;
|
|
27
|
+
};
|
|
28
|
+
}
|
|
29
|
+
export interface ToolCall {
|
|
30
|
+
id: string;
|
|
31
|
+
type: "function";
|
|
32
|
+
function: {
|
|
33
|
+
name: string;
|
|
34
|
+
arguments: string;
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
export interface ChatCompletionChunk {
|
|
38
|
+
content?: string;
|
|
39
|
+
tool_calls?: ToolCall[];
|
|
40
|
+
finish_reason?: "stop" | "tool_calls" | "length";
|
|
41
|
+
}
|
|
42
|
+
export declare function initModels(opts: LlmContextOptions): Promise<void>;
|
|
43
|
+
/**
|
|
44
|
+
* 流式 chat completion。
|
|
45
|
+
* onChunk 每次收到新 token 时调用;结束后返回完整 finish_reason。
|
|
46
|
+
*/
|
|
47
|
+
export declare function chatCompletionStream(messages: ChatMessage[], tools: ToolDefinition[], onChunk: (chunk: ChatCompletionChunk) => void, signal?: AbortSignal): Promise<void>;
|
|
48
|
+
/**
|
|
49
|
+
* 非流式 chat completion(内部复用流式实现)。
|
|
50
|
+
*/
|
|
51
|
+
export declare function chatCompletion(messages: ChatMessage[], tools: ToolDefinition[], signal?: AbortSignal): Promise<{
|
|
52
|
+
content: string;
|
|
53
|
+
tool_calls?: ToolCall[];
|
|
54
|
+
finish_reason: string;
|
|
55
|
+
}>;
|
|
56
|
+
/**
|
|
57
|
+
* 文本 embedding,返回 L2 归一化向量。
|
|
58
|
+
*/
|
|
59
|
+
export declare function getEmbedding(text: string): Promise<number[]>;
|
|
60
|
+
/** 是否至少加载了一个模型(LLM 或 Embedding) */
|
|
61
|
+
export declare function isReady(): boolean;
|
|
62
|
+
/** 是否有 LLM,可提供 chat/completions */
|
|
63
|
+
export declare function isLlmReady(): boolean;
|
|
64
|
+
/** 是否有 Embedding,可提供 embeddings */
|
|
65
|
+
export declare function isEmbeddingReady(): boolean;
|