peerllm-host-cli 1.9.3 → 1.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,62 @@
1
+ /**
2
+ * Live host capacity snapshot sent to the orchestrator via `HeartbeatV2`.
3
+ *
4
+ * Field names mirror the Electron host's payload (camelCase) so both host kinds
5
+ * speak the identical contract to the server's `HostCapacity` model. The
6
+ * orchestrator routes around hosts whose fresh snapshot says they're out of
7
+ * VRAM / overloaded instead of piling work on until they OOM and crash.
8
+ */
9
+ export interface HostCapacityPayload {
10
+ hostId: string;
11
+ utilizationType: string;
12
+ cpuUsage: number;
13
+ gpuUsage: number;
14
+ ramUsedMB: number;
15
+ ramFreeMB: number;
16
+ ramTotalMB: number;
17
+ vramFreeMB: number;
18
+ totalVRAM: number;
19
+ effectiveFreeMB: number;
20
+ activeSessions: number;
21
+ status: string;
22
+ canAcceptRequests: boolean;
23
+ lastUpdated: string;
24
+ }
25
+ /** Effective free memory for scheduling, ported verbatim from the Electron host. */
26
+ export declare function calculateEffectiveMemory(args: {
27
+ vramFreeMB: number;
28
+ totalVRAM: number;
29
+ ramFreeMB: number;
30
+ utilizationType: string;
31
+ }): number;
32
+ /** Coarse host status, ported verbatim from the Electron host. */
33
+ export declare function classifyStatus(args: {
34
+ utilizationType: string;
35
+ cpuUsage: number;
36
+ gpuUsage: number;
37
+ effectiveFreeMB: number;
38
+ vramFreeMB: number;
39
+ totalVRAM: number;
40
+ }): string;
41
+ /**
42
+ * Whether this host can take a new request.
43
+ *
44
+ * The 2 GB VRAM floor is applied ONLY to discrete-GPU hosts; CPU/Hybrid/unified
45
+ * hosts have vramFreeMB ≈ 0 and are judged purely on `status` (which already
46
+ * folds in CPU load and effective memory). This keeps CPU-only hosts eligible —
47
+ * "a place for every piece of hardware" — while preserving the 2 GB VRAM
48
+ * headroom for real GPU hosts. The Electron host's HeartbeatV2 computes
49
+ * canAcceptRequests with this same formula, so both host kinds stay in sync.
50
+ */
51
+ export declare function computeCanAcceptRequests(args: {
52
+ utilizationType: string;
53
+ status: string;
54
+ vramFreeMB: number;
55
+ }): boolean;
56
+ /**
57
+ * Build the full HostCapacity snapshot for a HeartbeatV2 message. Best-effort:
58
+ * all sampling is wrapped so a probe failure degrades a field rather than
59
+ * throwing — the caller still gets a usable, conservative snapshot.
60
+ */
61
+ export declare function buildHostCapacity(hostId: string, gpuIndex: number, activeSessions: number): Promise<HostCapacityPayload>;
62
+ //# sourceMappingURL=capacity.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"capacity.d.ts","sourceRoot":"","sources":["../../src/core/capacity.ts"],"names":[],"mappings":"AAcA;;;;;;;GAOG;AACH,MAAM,WAAW,mBAAmB;IAClC,MAAM,EAAE,MAAM,CAAC;IACf,eAAe,EAAE,MAAM,CAAC;IACxB,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,eAAe,EAAE,MAAM,CAAC;IACxB,cAAc,EAAE,MAAM,CAAC;IACvB,MAAM,EAAE,MAAM,CAAC;IACf,iBAAiB,EAAE,OAAO,CAAC;IAC3B,WAAW,EAAE,MAAM,CAAC;CACrB;AAqKD,oFAAoF;AACpF,wBAAgB,wBAAwB,CAAC,IAAI,EAAE;IAC7C,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,eAAe,EAAE,MAAM,CAAC;CACzB,GAAG,MAAM,CAST;AAED,kEAAkE;AAClE,wBAAgB,cAAc,CAAC,IAAI,EAAE;IACnC,eAAe,EAAE,MAAM,CAAC;IACxB,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,eAAe,EAAE,MAAM,CAAC;IACxB,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;CACnB,GAAG,MAAM,CAOT;AAED;;;;;;;;;GASG;AACH,wBAAgB,wBAAwB,CAAC,IAAI,EAAE;IAC7C,eAAe,EAAE,MAAM,CAAC;IACxB,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,CAAC;CACpB,GAAG,OAAO,CAKV;AAED;;;;GAIG;AACH,wBAAsB,iBAAiB,CACrC,MAAM,EAAE,MAAM,EACd,QAAQ,EAAE,MAAM,EAChB,cAAc,EAAE,MAAM,GACrB,OAAO,CAAC,mBAAmB,CAAC,CA8C9B"}
@@ -0,0 +1,228 @@
1
+ import { execFile } from "node:child_process";
2
+ import { arch as osArch } from "node:os";
3
+ import { promisify } from "node:util";
4
+ import si from "systeminformation";
5
+ import { sampleGpuUtilization } from "./gpu-stats.js";
6
+ const execFileP = promisify(execFile);
7
+ const NVIDIA_SMI_TIMEOUT_MS = 1500;
8
+ const GRAPHICS_CACHE_TTL_MS = 5 * 60 * 1000;
9
+ const isAppleSilicon = process.platform === "darwin" && osArch() === "arm64";
10
+ // ---------------------------------------------------------------------------
11
+ // Graphics cache — vendor/static VRAM rarely changes, and si.graphics() is the
12
+ // expensive call. Live numbers (utilization, free VRAM) come from nvidia-smi /
13
+ // the GPU sampler on each tick; the cache only supplies vendor + static totals.
14
+ // ---------------------------------------------------------------------------
15
+ let cachedGraphics;
16
+ let graphicsInFlight;
17
+ async function getCachedGraphics() {
18
+ const now = Date.now();
19
+ if (cachedGraphics && now - cachedGraphics.ts < GRAPHICS_CACHE_TTL_MS) {
20
+ return cachedGraphics.data;
21
+ }
22
+ if (!graphicsInFlight) {
23
+ graphicsInFlight = si.graphics().then((data) => {
24
+ cachedGraphics = { data, ts: Date.now() };
25
+ graphicsInFlight = undefined;
26
+ return data;
27
+ });
28
+ }
29
+ return graphicsInFlight;
30
+ }
31
+ /**
32
+ * Query the selected NVIDIA GPU for exact total/free VRAM and live utilization.
33
+ * `systeminformation`'s `memoryFree` is unreliable for NVIDIA on many systems,
34
+ * so — like the Electron host — we go straight to nvidia-smi for the numbers
35
+ * that drive the VRAM floor decision. Returns null when nvidia-smi is absent.
36
+ */
37
+ async function queryNvidiaSmi(gpuIndex) {
38
+ try {
39
+ const cmd = process.platform === "win32"
40
+ ? `${process.env.SYSTEMROOT ?? "C:\\Windows"}\\System32\\nvidia-smi.exe`
41
+ : "nvidia-smi";
42
+ const { stdout } = await execFileP(cmd, [
43
+ "--query-gpu=memory.total,memory.free,utilization.gpu",
44
+ "--format=csv,noheader,nounits",
45
+ ], { timeout: NVIDIA_SMI_TIMEOUT_MS });
46
+ const lines = stdout.trim().split(/\r?\n/).filter((l) => l.trim().length > 0);
47
+ const line = lines[gpuIndex] ?? lines[0];
48
+ if (!line)
49
+ return null;
50
+ const [total, free, util] = line.split(/[\s,]+/).map((n) => parseInt(n, 10));
51
+ if (![total, free, util].every((n) => Number.isFinite(n)))
52
+ return null;
53
+ return { total: total, free: free, util: util };
54
+ }
55
+ catch {
56
+ return null;
57
+ }
58
+ }
59
+ /**
60
+ * Cross-platform GPU stats, ported from the Electron host's getGPUStats so both
61
+ * hosts classify utilization identically. Respects the user's selected GPU on
62
+ * multi-GPU systems (selectedGpuIndex), and treats unified-memory machines
63
+ * (Apple Silicon, AMD APUs) as RAM-backed rather than VRAM-backed.
64
+ */
65
+ async function getGpuStats(mem, gpuIndex) {
66
+ const ginfo = await getCachedGraphics();
67
+ const controllers = ginfo.controllers ?? [];
68
+ const g = controllers[gpuIndex] ??
69
+ controllers[0] ??
70
+ {};
71
+ let gpuUsage = 0;
72
+ let totalVRAM = 0;
73
+ let vramFreeMB = 0;
74
+ let type = "CPU";
75
+ const ramTotalMB = mem.total / 1024 / 1024;
76
+ const ramFreeMB = mem.available / 1024 / 1024;
77
+ // 🍎 Apple Silicon — unified memory, no discrete VRAM.
78
+ if (isAppleSilicon) {
79
+ gpuUsage = g.utilizationGpu ?? 0;
80
+ if (!gpuUsage) {
81
+ const sampled = await sampleGpuUtilization();
82
+ if (sampled !== null)
83
+ gpuUsage = sampled * 100;
84
+ }
85
+ totalVRAM = ramTotalMB;
86
+ vramFreeMB = ramFreeMB;
87
+ // Under GPU pressure, discount free unified memory (floor at 50%).
88
+ vramFreeMB = Math.max(vramFreeMB * (1 - gpuUsage / 100), vramFreeMB * 0.5);
89
+ type = gpuUsage > 15 ? "Hybrid" : "CPU";
90
+ return { gpuUsage, totalVRAM, vramFreeMB, type };
91
+ }
92
+ const vendor = (g.vendor ?? "").toLowerCase();
93
+ const isAMD = vendor.includes("amd") || vendor.includes("ati") || vendor.includes("advanced micro");
94
+ const isIntel = vendor.includes("intel");
95
+ const isNvidia = vendor.includes("nvidia");
96
+ // 🟦 NVIDIA (Windows & Linux) — exact numbers from nvidia-smi.
97
+ if (isNvidia) {
98
+ const nv = await queryNvidiaSmi(gpuIndex);
99
+ if (nv) {
100
+ totalVRAM = nv.total;
101
+ vramFreeMB = nv.free;
102
+ gpuUsage = nv.util;
103
+ type = gpuUsage > 10 ? "GPU" : "CPU";
104
+ return { gpuUsage, totalVRAM, vramFreeMB, type };
105
+ }
106
+ // nvidia-smi unavailable → fall through to systeminformation values.
107
+ }
108
+ // 🟥 AMD (Windows & Linux).
109
+ if (isAMD) {
110
+ gpuUsage = g.utilizationGpu ?? 0;
111
+ totalVRAM = g.vram ?? g.memoryTotal ?? 0;
112
+ vramFreeMB = g.memoryFree ?? 0;
113
+ // Linux AMD APU (shared system RAM, e.g. Radeon 880M/890M) — treat as unified.
114
+ if (process.platform === "linux" && totalVRAM < 2048) {
115
+ totalVRAM = ramTotalMB;
116
+ vramFreeMB = ramFreeMB;
117
+ type = "Hybrid";
118
+ return { gpuUsage, totalVRAM, vramFreeMB, type };
119
+ }
120
+ // AMD driver quirk: free sometimes mirrors total — estimate from utilization.
121
+ if (vramFreeMB >= totalVRAM * 0.95) {
122
+ vramFreeMB = totalVRAM * (1 - gpuUsage / 100);
123
+ }
124
+ type = gpuUsage > 25 ? "GPU" : gpuUsage > 10 ? "Hybrid" : "CPU";
125
+ return { gpuUsage, totalVRAM, vramFreeMB, type };
126
+ }
127
+ // 🟫 Intel iGPU.
128
+ if (isIntel) {
129
+ gpuUsage = g.utilizationGpu ?? 0;
130
+ totalVRAM = g.vram ?? 0;
131
+ vramFreeMB = g.memoryFree ?? 0;
132
+ type = gpuUsage > 20 ? "Hybrid" : gpuUsage > 10 ? "CPU+GPU" : "CPU";
133
+ return { gpuUsage, totalVRAM, vramFreeMB, type };
134
+ }
135
+ // ⚪ Unknown / CPU-only.
136
+ gpuUsage = g.utilizationGpu ?? 0;
137
+ totalVRAM = g.vram ?? 0;
138
+ vramFreeMB = g.memoryFree ?? 0;
139
+ type = "CPU";
140
+ return { gpuUsage, totalVRAM, vramFreeMB, type };
141
+ }
142
+ /** Effective free memory for scheduling, ported verbatim from the Electron host. */
143
+ export function calculateEffectiveMemory(args) {
144
+ const { vramFreeMB, totalVRAM, ramFreeMB, utilizationType } = args;
145
+ if (utilizationType === "Hybrid") {
146
+ return (vramFreeMB > 0 ? vramFreeMB : totalVRAM * 0.8) + ramFreeMB * 0.25;
147
+ }
148
+ if (utilizationType === "GPU" && totalVRAM > 0) {
149
+ return vramFreeMB || totalVRAM * 0.8;
150
+ }
151
+ return ramFreeMB;
152
+ }
153
+ /** Coarse host status, ported verbatim from the Electron host. */
154
+ export function classifyStatus(args) {
155
+ const { utilizationType, cpuUsage, gpuUsage, effectiveFreeMB, vramFreeMB, totalVRAM } = args;
156
+ if (utilizationType === "GPU" && totalVRAM > 0 && vramFreeMB < 2000)
157
+ return "Overloaded";
158
+ if (cpuUsage > 85)
159
+ return "Busy";
160
+ if (gpuUsage > 90)
161
+ return "Busy";
162
+ if (effectiveFreeMB < 1500)
163
+ return "Overloaded";
164
+ return "Available";
165
+ }
166
+ /**
167
+ * Whether this host can take a new request.
168
+ *
169
+ * The 2 GB VRAM floor is applied ONLY to discrete-GPU hosts; CPU/Hybrid/unified
170
+ * hosts have vramFreeMB ≈ 0 and are judged purely on `status` (which already
171
+ * folds in CPU load and effective memory). This keeps CPU-only hosts eligible —
172
+ * "a place for every piece of hardware" — while preserving the 2 GB VRAM
173
+ * headroom for real GPU hosts. The Electron host's HeartbeatV2 computes
174
+ * canAcceptRequests with this same formula, so both host kinds stay in sync.
175
+ */
176
+ export function computeCanAcceptRequests(args) {
177
+ const { utilizationType, status, vramFreeMB } = args;
178
+ if (status === "Overloaded" || status === "Busy")
179
+ return false;
180
+ if (utilizationType === "GPU" && vramFreeMB < 2000)
181
+ return false;
182
+ return true;
183
+ }
184
+ /**
185
+ * Build the full HostCapacity snapshot for a HeartbeatV2 message. Best-effort:
186
+ * all sampling is wrapped so a probe failure degrades a field rather than
187
+ * throwing — the caller still gets a usable, conservative snapshot.
188
+ */
189
+ export async function buildHostCapacity(hostId, gpuIndex, activeSessions) {
190
+ const [cpu, mem] = await Promise.all([si.currentLoad(), si.mem()]);
191
+ const cpuUsage = cpu.currentLoad ?? 0;
192
+ const ramFreeMB = mem.available / 1024 / 1024;
193
+ const ramTotalMB = mem.total / 1024 / 1024;
194
+ const ramUsedMB = ramTotalMB - ramFreeMB;
195
+ const { gpuUsage, totalVRAM, vramFreeMB, type: utilizationType } = await getGpuStats(mem, gpuIndex);
196
+ const effectiveFreeMB = calculateEffectiveMemory({
197
+ vramFreeMB,
198
+ totalVRAM,
199
+ ramFreeMB,
200
+ utilizationType,
201
+ });
202
+ const status = classifyStatus({
203
+ utilizationType,
204
+ cpuUsage,
205
+ gpuUsage,
206
+ effectiveFreeMB,
207
+ vramFreeMB,
208
+ totalVRAM,
209
+ });
210
+ const canAcceptRequests = computeCanAcceptRequests({ utilizationType, status, vramFreeMB });
211
+ return {
212
+ hostId,
213
+ utilizationType,
214
+ cpuUsage,
215
+ gpuUsage,
216
+ ramUsedMB,
217
+ ramFreeMB,
218
+ ramTotalMB,
219
+ vramFreeMB,
220
+ totalVRAM,
221
+ effectiveFreeMB,
222
+ activeSessions,
223
+ status,
224
+ canAcceptRequests,
225
+ lastUpdated: new Date().toISOString(),
226
+ };
227
+ }
228
+ //# sourceMappingURL=capacity.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"capacity.js","sourceRoot":"","sources":["../../src/core/capacity.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAC9C,OAAO,EAAE,IAAI,IAAI,MAAM,EAAE,MAAM,SAAS,CAAC;AACzC,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAEtC,OAAO,EAAE,MAAM,mBAAmB,CAAC;AAEnC,OAAO,EAAE,oBAAoB,EAAE,MAAM,gBAAgB,CAAC;AAEtD,MAAM,SAAS,GAAG,SAAS,CAAC,QAAQ,CAAC,CAAC;AACtC,MAAM,qBAAqB,GAAG,IAAI,CAAC;AACnC,MAAM,qBAAqB,GAAG,CAAC,GAAG,EAAE,GAAG,IAAI,CAAC;AAE5C,MAAM,cAAc,GAAG,OAAO,CAAC,QAAQ,KAAK,QAAQ,IAAI,MAAM,EAAE,KAAK,OAAO,CAAC;AA2B7E,8EAA8E;AAC9E,+EAA+E;AAC/E,+EAA+E;AAC/E,gFAAgF;AAChF,8EAA8E;AAE9E,IAAI,cAAmF,CAAC;AACxF,IAAI,gBAAwE,CAAC;AAE7E,KAAK,UAAU,iBAAiB;IAC9B,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACvB,IAAI,cAAc,IAAI,GAAG,GAAG,cAAc,CAAC,EAAE,GAAG,qBAAqB,EAAE,CAAC;QACtE,OAAO,cAAc,CAAC,IAAI,CAAC;IAC7B,CAAC;IACD,IAAI,CAAC,gBAAgB,EAAE,CAAC;QACtB,gBAAgB,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE;YAC7C,cAAc,GAAG,EAAE,IAAI,EAAE,EAAE,EAAE,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC;YAC1C,gBAAgB,GAAG,SAAS,CAAC;YAC7B,OAAO,IAAI,CAAC;QACd,CAAC,CAAC,CAAC;IACL,CAAC;IACD,OAAO,gBAAgB,CAAC;AAC1B,CAAC;AAED;;;;;GAKG;AACH,KAAK,UAAU,cAAc,CAC3B,QAAgB;IAEhB,IAAI,CAAC;QACH,MAAM,GAAG,GACP,OAAO,CAAC,QAAQ,KAAK,OAAO;YAC1B,CAAC,CAAC,GAAG,OAAO,CAAC,GAAG,CAAC,UAAU,IAAI,aAAa,4BAA4B;YACxE,CAAC,CAAC,YAAY,CAAC;QACnB,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,SAAS,CAChC,GAAG,EACH;YACE,sDAAsD;YACtD,+BAA+B;SAChC,EACD,EAAE,OAAO,EAAE,qBAAqB,EAAE,CACnC,CAAC;QACF,MAAM,KAAK,GAAG,MAAM,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAC9E,MAAM,IAAI,GAAG,KAAK,CAAC,QAAQ,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC;QACzC,IAAI,CAAC,IAAI;YAAE,OAAO,IAAI,CAAC;QACvB,MAAM,CAAC,KAAK,EAAE,IAAI,EAAE,IAAI,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;QAC7E,IAAI,CAAC,CAAC,KAAK,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;YAAE,OAAO,IAAI,CAAC;QACvE,OAAO,EAAE,KAAK,EAAE,KAAM,EAAE,IAAI,EAAE,IAAK,EAAE,IAAI,EAAE,IAAK,EAAE,CAAC;IACrD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AASD;;;;;GAKG;AACH,KAAK,UAAU,WAAW,CACxB,GAAiC,EACjC,QAAgB;IAEhB,MAAM,KAAK,GAAG,MAAM,iBAAiB,EAAE,CAAC;IACxC,MAAM,WAAW,GAAG,KAAK,CAAC,WAAW,IAAI,EAAE,CAAC;IAC5C,MAAM,CAAC,GACL,WAAW,CAAC,QAAQ,CAAC;QACrB,WAAW,CAAC,CAAC,CAAC;QACb,EAAkD,CAAC;IAEtD,IAAI,QAAQ,GAAG,CAAC,CAAC;IACjB,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,IAAI,IAAI,GAAG,KAAK,CAAC;IAEjB,MAAM,UAAU,GAAG,GAAG,CAAC,KAAK,GAAG,IAAI,GAAG,IAAI,CAAC;IAC3C,MAAM,SAAS,GAAG,GAAG,CAAC,SAAS,GAAG,IAAI,GAAG,IAAI,CAAC;IAE9C,uDAAuD;IACvD,IAAI,cAAc,EAAE,CAAC;QACnB,QAAQ,GAAG,CAAC,CAAC,cAAc,IAAI,CAAC,CAAC;QACjC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACd,MAAM,OAAO,GAAG,MAAM,oBAAoB,EAAE,CAAC;YAC7C,IAAI,OAAO,KAAK,IAAI;gBAAE,QAAQ,GAAG,OAAO,GAAG,GAAG,CAAC;QACjD,CAAC;QACD,SAAS,GAAG,UAAU,CAAC;QACvB,UAAU,GAAG,SAAS,CAAC;QACvB,mEAAmE;QACnE,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,GAAG,CAAC,CAAC,GAAG,QAAQ,GAAG,GAAG,CAAC,EAAE,UAAU,GAAG,GAAG,CAAC,CAAC;QAC3E,IAAI,GAAG,QAAQ,GAAG,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC;QACxC,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC;IACnD,CAAC;IAED,MAAM,MAAM,GAAG,CAAC,CAAC,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;IAC9C,MAAM,KAAK,GACT,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,MAAM,CAAC,QAAQ,CAAC,gBAAgB,CAAC,CAAC;IACxF,MAAM,OAAO,GAAG,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;IACzC,MAAM,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAE3C,+DAA+D;IAC/D,IAAI,QAAQ,EAAE,CAAC;QACb,MAAM,EAAE,GAAG,MAAM,cAAc,CAAC,QAAQ,CAAC,CAAC;QAC1C,IAAI,EAAE,EAAE,CAAC;YACP,SAAS,GAAG,EAAE,CAAC,KAAK,CAAC;YACrB,UAAU,GAAG,EAAE,CAAC,IAAI,CAAC;YACrB,QAAQ,GAAG,EAAE,CAAC,IAAI,CAAC;YACnB,IAAI,GAAG,QAAQ,GAAG,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC;YACrC,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC;QACnD,CAAC;QACD,qEAAqE;IACvE,CAAC;IAED,4BAA4B;IAC5B,IAAI,KAAK,EAAE,CAAC;QACV,QAAQ,GAAG,CAAC,CAAC,cAAc,IAAI,CAAC,CAAC;QACjC,SAAS,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,WAAW,IAAI,CAAC,CAAC;QACzC,UAAU,GAAG,CAAC,CAAC,UAAU,IAAI,CAAC,CAAC;QAE/B,+EAA+E;QAC/E,IAAI,OAAO,CAAC,QAAQ,KAAK,OAAO,IAAI,SAAS,GAAG,IAAI,EAAE,CAAC;YACrD,SAAS,GAAG,UAAU,CAAC;YACvB,UAAU,GAAG,SAAS,CAAC;YACvB,IAAI,GAAG,QAAQ,CAAC;YAChB,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC;QACnD,CAAC;QAED,8EAA8E;QAC9E,IAAI,UAAU,IAAI,SAAS,GAAG,IAAI,EAAE,CAAC;YACnC,UAAU,GAAG,SAAS,GAAG,CAAC,CAAC,GAAG,QAAQ,GAAG,GAAG,CAAC,CAAC;QAChD,CAAC;QAED,IAAI,GAAG,QAAQ,GAAG,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,QAAQ,GAAG,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC;QAChE,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC;IACnD,CAAC;IAED,iBAAiB;IACjB,IAAI,OAAO,EAAE,CAAC;QACZ,QAAQ,GAAG,CAAC,CAAC,cAAc,IAAI,CAAC,CAAC;QACjC,SAAS,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC;QACxB,UAAU,GAAG,CAAC,CAAC,UAAU,IAAI,CAAC,CAAC;QAC/B,IAAI,GAAG,QAAQ,GAAG,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,GAAG,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC;QACpE,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC;IACnD,CAAC;IAED,wBAAwB;IACxB,QAAQ,GAAG,CAAC,CAAC,cAAc,IAAI,CAAC,CAAC;IACjC,SAAS,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC;IACxB,UAAU,GAAG,CAAC,CAAC,UAAU,IAAI,CAAC,CAAC;IAC/B,IAAI,GAAG,KAAK,CAAC;IACb,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC;AACnD,CAAC;AAED,oFAAoF;AACpF,MAAM,UAAU,wBAAwB,CAAC,IAKxC;IACC,MAAM,EAAE,UAAU,EAAE,SAAS,EAAE,SAAS,EAAE,eAAe,EAAE,GAAG,IAAI,CAAC;IACnE,IAAI,eAAe,KAAK,QAAQ,EAAE,CAAC;QACjC,OAAO,CAAC,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,SAAS,GAAG,GAAG,CAAC,GAAG,SAAS,GAAG,IAAI,CAAC;IAC5E,CAAC;IACD,IAAI,eAAe,KAAK,KAAK,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;QAC/C,OAAO,UAAU,IAAI,SAAS,GAAG,GAAG,CAAC;IACvC,CAAC;IACD,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,kEAAkE;AAClE,MAAM,UAAU,cAAc,CAAC,IAO9B;IACC,MAAM,EAAE,eAAe,EAAE,QAAQ,EAAE,QAAQ,EAAE,eAAe,EAAE,UAAU,EAAE,SAAS,EAAE,GAAG,IAAI,CAAC;IAC7F,IAAI,eAAe,KAAK,KAAK,IAAI,SAAS,GAAG,CAAC,IAAI,UAAU,GAAG,IAAI;QAAE,OAAO,YAAY,CAAC;IACzF,IAAI,QAAQ,GAAG,EAAE;QAAE,OAAO,MAAM,CAAC;IACjC,IAAI,QAAQ,GAAG,EAAE;QAAE,OAAO,MAAM,CAAC;IACjC,IAAI,eAAe,GAAG,IAAI;QAAE,OAAO,YAAY,CAAC;IAChD,OAAO,WAAW,CAAC;AACrB,CAAC;AAED;;;;;;;;;GASG;AACH,MAAM,UAAU,wBAAwB,CAAC,IAIxC;IACC,MAAM,EAAE,eAAe,EAAE,MAAM,EAAE,UAAU,EAAE,GAAG,IAAI,CAAC;IACrD,IAAI,MAAM,KAAK,YAAY,IAAI,MAAM,KAAK,MAAM;QAAE,OAAO,KAAK,CAAC;IAC/D,IAAI,eAAe,KAAK,KAAK,IAAI,UAAU,GAAG,IAAI;QAAE,OAAO,KAAK,CAAC;IACjE,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,MAAc,EACd,QAAgB,EAChB,cAAsB;IAEtB,MAAM,CAAC,GAAG,EAAE,GAAG,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,WAAW,EAAE,EAAE,EAAE,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;IACnE,MAAM,QAAQ,GAAG,GAAG,CAAC,WAAW,IAAI,CAAC,CAAC;IACtC,MAAM,SAAS,GAAG,GAAG,CAAC,SAAS,GAAG,IAAI,GAAG,IAAI,CAAC;IAC9C,MAAM,UAAU,GAAG,GAAG,CAAC,KAAK,GAAG,IAAI,GAAG,IAAI,CAAC;IAC3C,MAAM,SAAS,GAAG,UAAU,GAAG,SAAS,CAAC;IAEzC,MAAM,EAAE,QAAQ,EAAE,SAAS,EAAE,UAAU,EAAE,IAAI,EAAE,eAAe,EAAE,GAAG,MAAM,WAAW,CAClF,GAAG,EACH,QAAQ,CACT,CAAC;IAEF,MAAM,eAAe,GAAG,wBAAwB,CAAC;QAC/C,UAAU;QACV,SAAS;QACT,SAAS;QACT,eAAe;KAChB,CAAC,CAAC;IAEH,MAAM,MAAM,GAAG,cAAc,CAAC;QAC5B,eAAe;QACf,QAAQ;QACR,QAAQ;QACR,eAAe;QACf,UAAU;QACV,SAAS;KACV,CAAC,CAAC;IAEH,MAAM,iBAAiB,GAAG,wBAAwB,CAAC,EAAE,eAAe,EAAE,MAAM,EAAE,UAAU,EAAE,CAAC,CAAC;IAE5F,OAAO;QACL,MAAM;QACN,eAAe;QACf,QAAQ;QACR,QAAQ;QACR,SAAS;QACT,SAAS;QACT,UAAU;QACV,UAAU;QACV,SAAS;QACT,eAAe;QACf,cAAc;QACd,MAAM;QACN,iBAAiB;QACjB,WAAW,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;KACtC,CAAC;AACJ,CAAC"}
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=llmWorker.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"llmWorker.d.ts","sourceRoot":"","sources":["../../src/core/llmWorker.ts"],"names":[],"mappings":""}
@@ -0,0 +1,239 @@
1
+ import { getLlama, LlamaChatSession } from "node-llama-cpp";
2
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
3
+ let llama = null;
4
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
5
+ let model = null;
6
+ const contexts = new Map();
7
+ function send(msg) {
8
+ process.send?.(msg);
9
+ }
10
+ function fwdLog(level, message) {
11
+ send({ type: "log", level, message });
12
+ }
13
+ console.log = (...args) => fwdLog("info", args.map(String).join(" "));
14
+ console.warn = (...args) => fwdLog("warn", args.map(String).join(" "));
15
+ console.error = (...args) => fwdLog("error", args.map(String).join(" "));
16
+ process.on("uncaughtException", (err) => {
17
+ fwdLog("error", `llmWorker uncaughtException: ${err.message}`);
18
+ process.exit(1);
19
+ });
20
+ process.on("unhandledRejection", (reason) => {
21
+ fwdLog("error", `llmWorker unhandledRejection: ${String(reason)}`);
22
+ });
23
+ function cancelAll() {
24
+ for (const entry of contexts.values()) {
25
+ if (entry.abortController) {
26
+ try {
27
+ entry.abortController.abort();
28
+ }
29
+ catch {
30
+ // ignored
31
+ }
32
+ }
33
+ }
34
+ }
35
+ process.on("message", (msg) => {
36
+ const m = msg;
37
+ const type = m?.["type"];
38
+ void (async () => {
39
+ try {
40
+ switch (type) {
41
+ case "load-model": {
42
+ const modelPath = m["modelPath"];
43
+ const gpuLayers = m["gpuLayers"];
44
+ try {
45
+ llama = await getLlama();
46
+ // eslint-disable-next-line @typescript-eslint/no-unsafe-call, @typescript-eslint/no-unsafe-member-access
47
+ model = await llama.loadModel({ modelPath, gpuLayers });
48
+ }
49
+ catch (err) {
50
+ const errAny = err;
51
+ if (errAny?.constructor?.name === "InsufficientMemoryError") {
52
+ try {
53
+ // eslint-disable-next-line @typescript-eslint/no-unsafe-call, @typescript-eslint/no-unsafe-member-access
54
+ model = await llama.loadModel({
55
+ modelPath,
56
+ gpuLayers,
57
+ ignoreMemorySafetyChecks: true,
58
+ });
59
+ }
60
+ catch (err2) {
61
+ send({ type: "error", message: err2.message });
62
+ break;
63
+ }
64
+ }
65
+ else {
66
+ send({ type: "error", message: err.message });
67
+ break;
68
+ }
69
+ }
70
+ // eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
71
+ const trainContextSize = model?.trainContextSize ?? 4096;
72
+ send({ type: "loaded", trainContextSize });
73
+ break;
74
+ }
75
+ case "create-context": {
76
+ const conversationId = m["conversationId"];
77
+ try {
78
+ // eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
79
+ const trainSize = model?.trainContextSize ?? 4096;
80
+ const contextSize = Math.min(trainSize, 4096);
81
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
82
+ let context;
83
+ try {
84
+ // eslint-disable-next-line @typescript-eslint/no-unsafe-call, @typescript-eslint/no-unsafe-member-access
85
+ context = await model.createContext({ contextSize });
86
+ }
87
+ catch (err) {
88
+ const errAny = err;
89
+ if (errAny?.constructor?.name === "InsufficientMemoryError") {
90
+ // eslint-disable-next-line @typescript-eslint/no-unsafe-call, @typescript-eslint/no-unsafe-member-access
91
+ context = await model.createContext({ contextSize, ignoreMemorySafetyChecks: true });
92
+ }
93
+ else {
94
+ throw err;
95
+ }
96
+ }
97
+ // eslint-disable-next-line @typescript-eslint/no-unsafe-call, @typescript-eslint/no-unsafe-member-access
98
+ const session = new LlamaChatSession({ contextSequence: context.getSequence() });
99
+ contexts.set(conversationId, { context, session, abortController: null });
100
+ send({ type: "context-created", conversationId });
101
+ }
102
+ catch (err) {
103
+ send({ type: "error", conversationId, message: err.message });
104
+ }
105
+ break;
106
+ }
107
+ case "clear-context": {
108
+ const conversationId = m["conversationId"];
109
+ const entry = contexts.get(conversationId);
110
+ if (entry) {
111
+ if (entry.abortController) {
112
+ try {
113
+ entry.abortController.abort();
114
+ }
115
+ catch {
116
+ // ignored
117
+ }
118
+ }
119
+ contexts.delete(conversationId);
120
+ try {
121
+ // eslint-disable-next-line @typescript-eslint/no-unsafe-call, @typescript-eslint/no-unsafe-member-access
122
+ entry.context?.dispose?.();
123
+ }
124
+ catch {
125
+ // ignored
126
+ }
127
+ }
128
+ break;
129
+ }
130
+ case "prompt": {
131
+ const requestId = m["requestId"];
132
+ const conversationId = m["conversationId"];
133
+ const input = m["input"];
134
+ const options = m["options"];
135
+ const entry = contexts.get(conversationId);
136
+ if (!entry) {
137
+ send({ type: "error", requestId, message: `No context for "${conversationId}"` });
138
+ break;
139
+ }
140
+ entry.abortController = new AbortController();
141
+ // eslint-disable-next-line @typescript-eslint/no-unsafe-call, @typescript-eslint/no-unsafe-member-access
142
+ const promptTokens = model.tokenize(input).length;
143
+ let fullText = "";
144
+ try {
145
+ const promptOpts = {
146
+ onTextChunk: (chunk) => {
147
+ fullText += chunk;
148
+ send({ type: "token", requestId, chunk });
149
+ },
150
+ signal: entry.abortController.signal,
151
+ };
152
+ if (options?.maxTokens !== undefined)
153
+ promptOpts["maxTokens"] = options.maxTokens;
154
+ if (options?.temperature !== undefined)
155
+ promptOpts["temperature"] = options.temperature;
156
+ if (options?.stop?.length)
157
+ promptOpts["customStopTriggers"] = options.stop;
158
+ await entry.session.prompt(input, promptOpts);
159
+ fullText = fullText.trim();
160
+ // eslint-disable-next-line @typescript-eslint/no-unsafe-call, @typescript-eslint/no-unsafe-member-access
161
+ const completionTokens = model.tokenize(fullText).length;
162
+ send({ type: "done", requestId, text: fullText, promptTokens, completionTokens });
163
+ }
164
+ catch (err) {
165
+ const e = err;
166
+ if (e?.name === "AbortError") {
167
+ // eslint-disable-next-line @typescript-eslint/no-unsafe-call, @typescript-eslint/no-unsafe-member-access
168
+ const completionTokens = model ? model.tokenize(fullText).length : 0;
169
+ send({ type: "cancelled", requestId, text: fullText, promptTokens, completionTokens });
170
+ }
171
+ else {
172
+ send({ type: "error", requestId, message: err.message });
173
+ }
174
+ }
175
+ finally {
176
+ entry.abortController = null;
177
+ }
178
+ break;
179
+ }
180
+ case "cancel": {
181
+ const entry = contexts.get(m["conversationId"]);
182
+ if (entry?.abortController) {
183
+ try {
184
+ entry.abortController.abort();
185
+ }
186
+ catch {
187
+ // ignored
188
+ }
189
+ }
190
+ break;
191
+ }
192
+ case "cancel-all": {
193
+ cancelAll();
194
+ break;
195
+ }
196
+ case "dispose": {
197
+ cancelAll();
198
+ await new Promise((r) => setTimeout(r, 300));
199
+ for (const entry of contexts.values()) {
200
+ try {
201
+ // eslint-disable-next-line @typescript-eslint/no-unsafe-call, @typescript-eslint/no-unsafe-member-access
202
+ entry.context?.dispose?.();
203
+ }
204
+ catch {
205
+ // ignored
206
+ }
207
+ }
208
+ contexts.clear();
209
+ try {
210
+ // eslint-disable-next-line @typescript-eslint/no-unsafe-call, @typescript-eslint/no-unsafe-member-access
211
+ model?.dispose?.();
212
+ }
213
+ catch {
214
+ // ignored
215
+ }
216
+ try {
217
+ // eslint-disable-next-line @typescript-eslint/no-unsafe-call, @typescript-eslint/no-unsafe-member-access
218
+ llama?.dispose?.();
219
+ }
220
+ catch {
221
+ // ignored
222
+ }
223
+ model = null;
224
+ llama = null;
225
+ send({ type: "disposed" });
226
+ process.exit(0);
227
+ break;
228
+ }
229
+ default:
230
+ fwdLog("warn", `llmWorker: unknown message type "${type ?? "(none)"}"`);
231
+ }
232
+ }
233
+ catch (err) {
234
+ fwdLog("error", `llmWorker error in "${type}": ${err.message}`);
235
+ }
236
+ })();
237
+ });
238
+ send({ type: "ready" });
239
+ //# sourceMappingURL=llmWorker.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"llmWorker.js","sourceRoot":"","sources":["../../src/core/llmWorker.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,gBAAgB,CAAC;AAW5D,8DAA8D;AAC9D,IAAI,KAAK,GAAQ,IAAI,CAAC;AACtB,8DAA8D;AAC9D,IAAI,KAAK,GAAQ,IAAI,CAAC;AACtB,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAwB,CAAC;AAEjD,SAAS,IAAI,CAAC,GAAY;IACxB,OAAO,CAAC,IAAI,EAAE,CAAC,GAAG,CAAC,CAAC;AACtB,CAAC;AAED,SAAS,MAAM,CAAC,KAAe,EAAE,OAAe;IAC9C,IAAI,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,OAAO,EAAE,CAAC,CAAC;AACxC,CAAC;AAED,OAAO,CAAC,GAAG,GAAG,CAAC,GAAG,IAAe,EAAE,EAAE,CAAC,MAAM,CAAC,MAAM,EAAE,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AACjF,OAAO,CAAC,IAAI,GAAG,CAAC,GAAG,IAAe,EAAE,EAAE,CAAC,MAAM,CAAC,MAAM,EAAE,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAClF,OAAO,CAAC,KAAK,GAAG,CAAC,GAAG,IAAe,EAAE,EAAE,CAAC,MAAM,CAAC,OAAO,EAAE,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAEpF,OAAO,CAAC,EAAE,CAAC,mBAAmB,EAAE,CAAC,GAAU,EAAE,EAAE;IAC7C,MAAM,CAAC,OAAO,EAAE,gCAAgC,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC;IAC/D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC;AAEH,OAAO,CAAC,EAAE,CAAC,oBAAoB,EAAE,CAAC,MAAe,EAAE,EAAE;IACnD,MAAM,CAAC,OAAO,EAAE,iCAAiC,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;AACrE,CAAC,CAAC,CAAC;AAEH,SAAS,SAAS;IAChB,KAAK,MAAM,KAAK,IAAI,QAAQ,CAAC,MAAM,EAAE,EAAE,CAAC;QACtC,IAAI,KAAK,CAAC,eAAe,EAAE,CAAC;YAC1B,IAAI,CAAC;gBACH,KAAK,CAAC,eAAe,CAAC,KAAK,EAAE,CAAC;YAChC,CAAC;YAAC,MAAM,CAAC;gBACP,UAAU;YACZ,CAAC;QACH,CAAC;IACH,CAAC;AACH,CAAC;AAED,OAAO,CAAC,EAAE,CAAC,SAAS,EAAE,CAAC,GAAY,EAAE,EAAE;IACrC,MAAM,CAAC,GAAG,GAA8B,CAAC;IACzC,MAAM,IAAI,GAAG,CAAC,EAAE,CAAC,MAAM,CAAuB,CAAC;IAE/C,KAAK,CAAC,KAAK,IAAI,EAAE;QACf,IAAI,CAAC;YACH,QAAQ,IAAI,EAAE,CAAC;gBACb,KAAK,YAAY,CAAC,CAAC,CAAC;oBAClB,MAAM,SAAS,GAAG,CAAC,CAAC,WAAW,CAAW,CAAC;oBAC3C,MAAM,SAAS,GAAG,CAAC,CAAC,WAAW,CAAW,CAAC;oBAC3C,IAAI,CAAC;wBACH,KAAK,GAAG,MAAM,QAAQ,EAAE,CAAC;wBACzB,yGAAyG;wBACzG,KAAK,GAAG,MAAM,KAAK,CAAC,SAAS,CAAC,EAAE,SAAS,EAAE,SAAS,EAAE,CAAC,CAAC;oBAC1D,CAAC;oBAAC,OAAO,GAAG,EAAE,CAAC;wBACb,MAAM,MAAM,GAAG,GAA0C,CAAC;wBAC1D,IAAI,MAAM,EAAE,WAAW,EAAE,IAAI,KAAK,yBAAyB,EAAE,CAAC;4BAC5D,IAAI,CAAC;gCACH,yGAAyG;gCACzG,KAAK,GAAG,MAAM,KAAK,CAAC,SAAS,CAAC;oCAC5B,SAAS;oCACT,SAAS;oCACT,wBAAwB,EAAE,IAAI;iCAC/B,CAAC,CAAC;4BACL,CAAC;4BAAC,OAAO,IAAI,EAAE,CAAC;gCACd,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAG,IAAc,CAAC,OAAO,EAAE,CAAC,CAAC;gCAC1D,MAAM;4BACR,CAAC;wBACH,CAAC;6BAAM,CAAC;4BACN,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAG,GAAa,CAAC,OAAO,EAAE,CAAC,CAAC;4BACzD,MAAM;wBACR,CAAC;oBACH,CAAC;oBACD,sEAAsE;oBACtE,MAAM,gBAAgB,GAAI,KAAK,EAAE,gBAAuC,IAAI,IAAI,CAAC;oBACjF,IAAI,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,gBAAgB,EAAE,CAAC,CAAC;oBAC3C,MAAM;gBACR,CAAC;gBAED,KAAK,gBAAgB,CAAC,CAAC,CAAC;oBACtB,MAAM,cAAc,GAAG,CAAC,CAAC,gBAAgB,CAAW,CAAC;oBACrD,IAAI,CAAC;wBACH,sEAAsE;wBACtE,MAAM,SAAS,GAAI,KAAK,EAAE,gBAAuC,IAAI,IAAI,CAAC;wBAC1E,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;wBAC9C,8DAA8D;wBAC9D,IAAI,OAAY,CAAC;wBACjB,IAAI,CAAC;4BACH,yGAAyG;4BACzG,OAAO,GAAG,MAAM,KAAK,CAAC,aAAa,CAAC,EAAE,WAAW,EAAE,CAAC,CAAC;wBACvD,CAAC;wBAAC,OAAO,GAAG,EAAE,CAAC;4BACb,MAAM,MAAM,GAAG,GAA0C,CAAC;4BAC1D,IAAI,MAAM,EAAE,WAAW,EAAE,IAAI,KAAK,yBAAyB,EAAE,CAAC;gCAC5D,yGAAyG;gCACzG,OAAO,GAAG,MAAM,KAAK,CAAC,aAAa,CAAC,EAAE,WAAW,EAAE,wBAAwB,EAAE,IAAI,EAAE,CAAC,CAAC;4BACvF,CAAC;iCAAM,CAAC;gCACN,MAAM,GAAG,CAAC;4BACZ,CAAC;wBACH,CAAC;wBACD,yGAAyG;wBACzG,MAAM,OAAO,GAAG,IAAI,gBAAgB,CAAC,EAAE,eAAe,EAAE,OAAO,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC;wBACjF,QAAQ,CAAC,GAAG,CAAC,cAAc,EAAE,EAAE,OAAO,EAAE,OAAO,EAAE,eAAe,EAAE,IAAI,EAAE,CAAC,CAAC;wBAC1E,IAAI,CAAC,EAAE,IAAI,EAAE,iBAAiB,EAAE,cAAc,EAAE,CAAC,CAAC;oBACpD,CAAC;oBAAC,OAAO,GAAG,EAAE,CAAC;wBACb,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,cAAc,EAAE,OAAO,EAAG,GAAa,CAAC,OAAO,EAAE,CAAC,CAAC;oBAC3E,CAAC;oBACD,MAAM;gBACR,CAAC;gBAED,KAAK,eAAe,CAAC,CAAC,CAAC;oBACrB,MAAM,cAAc,GAAG,CAAC,CAAC,gBAAgB,CAAW,CAAC;oBACrD,MAAM,KAAK,GAAG,QAAQ,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC;oBAC3C,IAAI,KAAK,EAAE,CAAC;wBACV,IAAI,KAAK,CAAC,eAAe,EAAE,CAAC;4BAC1B,IAAI,CAAC;gCACH,KAAK,CAAC,eAAe,CAAC,KAAK,EAAE,CAAC;4BAChC,CAAC;4BAAC,MAAM,CAAC;gCACP,UAAU;4BACZ,CAAC;wBACH,CAAC;wBACD,QAAQ,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC;wBAChC,IAAI,CAAC;4BACH,yGAAyG;4BACzG,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,EAAE,CAAC;wBAC7B,CAAC;wBAAC,MAAM,CAAC;4BACP,UAAU;wBACZ,CAAC;oBACH,CAAC;oBACD,MAAM;gBACR,CAAC;gBAED,KAAK,QAAQ,CAAC,CAAC,CAAC;oBACd,MAAM,SAAS,GAAG,CAAC,CAAC,WAAW,CAAW,CAAC;oBAC3C,MAAM,cAAc,GAAG,CAAC,CAAC,gBAAgB,CAAW,CAAC;oBACrD,MAAM,KAAK,GAAG,CAAC,CAAC,OAAO,CAAW,CAAC;oBACnC,MAAM,OAAO,GAAG,CAAC,CAAC,SAAS,CAEd,CAAC;oBAEd,MAAM,KAAK,GAAG,QAAQ,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC;oBAC3C,IAAI,CAAC,KAAK,EAAE,CAAC;wBACX,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,SAAS,EAAE,OAAO,EAAE,mBAAmB,cAAc,GAAG,EAAE,CAAC,CAAC;wBAClF,MAAM;oBACR,CAAC;oBAED,KAAK,CAAC,eAAe,GAAG,IAAI,eAAe,EAAE,CAAC;oBAC9C,yGAAyG;oBACzG,MAAM,YAAY,GAAI,KAAK,CAAC,QAAQ,CAAC,KAAK,CAAe,CAAC,MAAM,CAAC;oBACjE,IAAI,QAAQ,GAAG,EAAE,CAAC;oBAElB,IAAI,CAAC;wBACH,MAAM,UAAU,GAA4B;4BAC1C,WAAW,EAAE,CAAC,KAAa,EAAE,EAAE;gCAC7B,QAAQ,IAAI,KAAK,CAAC;gCAClB,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC,CAAC;4BAC5C,CAAC;4BACD,MAAM,EAAE,KAAK,CAAC,eAAe,CAAC,MAAM;yBACrC,CAAC;wBACF,IAAI,OAAO,EAAE,SAAS,KAAK,SAAS;4BAAE,UAAU,CAAC,WAAW,CAAC,GAAG,OAAO,CAAC,SAAS,CAAC;wBAClF,IAAI,OAAO,EAAE,WAAW,KAAK,SAAS;4BAAE,UAAU,CAAC,aAAa,CAAC,GAAG,OAAO,CAAC,WAAW,CAAC;wBACxF,IAAI,OAAO,EAAE,IAAI,EAAE,MAAM;4BAAE,UAAU,CAAC,oBAAoB,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;wBAE3E,MAAM,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE,UAAU,CAAC,CAAC;wBAE9C,QAAQ,GAAG,QAAQ,CAAC,IAAI,EAAE,CAAC;wBAC3B,yGAAyG;wBACzG,MAAM,gBAAgB,GAAI,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAe,CAAC,MAAM,CAAC;wBACxE,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,IAAI,EAAE,QAAQ,EAAE,YAAY,EAAE,gBAAgB,EAAE,CAAC,CAAC;oBACpF,CAAC;oBAAC,OAAO,GAAG,EAAE,CAAC;wBACb,MAAM,CAAC,GAAG,GAAwB,CAAC;wBACnC,IAAI,CAAC,EAAE,IAAI,KAAK,YAAY,EAAE,CAAC;4BAC7B,yGAAyG;4BACzG,MAAM,gBAAgB,GAAG,KAAK,CAAC,CAAC,CAAE,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAe,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;4BACpF,IAAI,CAAC,EAAE,IAAI,EAAE,WAAW,EAAE,SAAS,EAAE,IAAI,EAAE,QAAQ,EAAE,YAAY,EAAE,gBAAgB,EAAE,CAAC,CAAC;wBACzF,CAAC;6BAAM,CAAC;4BACN,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,SAAS,EAAE,OAAO,EAAG,GAAa,CAAC,OAAO,EAAE,CAAC,CAAC;wBACtE,CAAC;oBACH,CAAC;4BAAS,CAAC;wBACT,KAAK,CAAC,eAAe,GAAG,IAAI,CAAC;oBAC/B,CAAC;oBACD,MAAM;gBACR,CAAC;gBAED,KAAK,QAAQ,CAAC,CAAC,CAAC;oBACd,MAAM,KAAK,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,gBAAgB,CAAW,CAAC,CAAC;oBAC1D,IAAI,KAAK,EAAE,eAAe,EAAE,CAAC;wBAC3B,IAAI,CAAC;4BACH,KAAK,CAAC,eAAe,CAAC,KAAK,EAAE,CAAC;wBAChC,CAAC;wBAAC,MAAM,CAAC;4BACP,UAAU;wBACZ,CAAC;oBACH,CAAC;oBACD,MAAM;gBACR,CAAC;gBAED,KAAK,YAAY,CAAC,CAAC,CAAC;oBAClB,SAAS,EAAE,CAAC;oBACZ,MAAM;gBACR,CAAC;gBAED,KAAK,SAAS,CAAC,CAAC,CAAC;oBACf,SAAS,EAAE,CAAC;oBACZ,MAAM,IAAI,OAAO,CAAO,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC;oBACnD,KAAK,MAAM,KAAK,IAAI,QAAQ,CAAC,MAAM,EAAE,EAAE,CAAC;wBACtC,IAAI,CAAC;4BACH,yGAAyG;4BACzG,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,EAAE,CAAC;wBAC7B,CAAC;wBAAC,MAAM,CAAC;4BACP,UAAU;wBACZ,CAAC;oBACH,CAAC;oBACD,QAAQ,CAAC,KAAK,EAAE,CAAC;oBACjB,IAAI,CAAC;wBACH,yGAAyG;wBACzG,KAAK,EAAE,OAAO,EAAE,EAAE,CAAC;oBACrB,CAAC;oBAAC,MAAM,CAAC;wBACP,UAAU;oBACZ,CAAC;oBACD,IAAI,CAAC;wBACH,yGAAyG;wBACzG,KAAK,EAAE,OAAO,EAAE,EAAE,CAAC;oBACrB,CAAC;oBAAC,MAAM,CAAC;wBACP,UAAU;oBACZ,CAAC;oBACD,KAAK,GAAG,IAAI,CAAC;oBACb,KAAK,GAAG,IAAI,CAAC;oBACb,IAAI,CAAC,EAAE,IAAI,EAAE,UAAU,EAAE,CAAC,CAAC;oBAC3B,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;oBAChB,MAAM;gBACR,CAAC;gBAED;oBACE,MAAM,CAAC,MAAM,EAAE,oCAAoC,IAAI,IAAI,QAAQ,GAAG,CAAC,CAAC;YAC5E,CAAC;QACH,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,CAAC,OAAO,EAAE,uBAAuB,IAAI,MAAO,GAAa,CAAC,OAAO,EAAE,CAAC,CAAC;QAC7E,CAAC;IACH,CAAC,CAAC,EAAE,CAAC;AACP,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"orchestrator.d.ts","sourceRoot":"","sources":["../../src/core/orchestrator.ts"],"names":[],"mappings":"AASA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AACjD,OAAO,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAG7C,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAEhD,OAAO,EAAoB,KAAK,cAAc,EAAE,MAAM,iBAAiB,CAAC;AA2BxE,MAAM,MAAM,kBAAkB,GAAG,cAAc,GAAG,YAAY,GAAG,WAAW,GAAG,OAAO,GAAG,WAAW,CAAC;AAErG,MAAM,WAAW,mBAAmB;IAClC,KAAK,EAAE,aAAa,CAAC;IACrB,MAAM,EAAE,aAAa,CAAC;IACtB,MAAM,EAAE,cAAc,CAAC;IACvB,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,qBAAa,mBAAmB;IAkBlB,OAAO,CAAC,QAAQ,CAAC,IAAI;IAjBjC,OAAO,CAAC,UAAU,CAA8B;IAChD,OAAO,CAAC,iBAAiB,CAA+B;IACxD,OAAO,CAAC,eAAe,CAA+B;IACtD,OAAO,CAAC,cAAc,CAA+B;IACrD,OAAO,CAAC,qBAAqB,CAAS;IACtC,OAAO,CAAC,YAAY,CAAS;IAC7B,OAAO,CAAC,MAAM,CAAsC;IAEpD,OAAO,CAAC,iBAAiB,CAAS;IAElC,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAS;IAC7B,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAmB;IAChD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAgE;IACzF,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAA0B;IACnD,OAAO,CAAC,QAAQ,CAAC,SAAS,CAA0B;IACpD,OAAO,CAAC,cAAc,CAAgB;gBAET,IAAI,EAAE,mBAAmB;IAQtD,SAAS,IAAI,kBAAkB;IAI/B,cAAc,IAAI;QAAE,cAAc,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,MAAM,CAAA;KAAE;IAIvG,aAAa,IAAI;QAAE,EAAE,EAAE,cAAc,CAAC;QAAC,GAAG,EAAE,cAAc,CAAA;KAAE;IAI5D,iBAAiB,IAAI,MAAM,EAAE;IAIvB,cAAc,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAgB/C,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IAsHxB,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;YAmBnB,gBAAgB;IA4B9B,OAAO,CAAC,iBAAiB;IAanB,mBAAmB,IAAI,OAAO,CAAC,IAAI,CAAC;YA2B5B,yBAAyB;YAkCzB,YAAY;IA0E1B,OAAO,CAAC,mBAAmB;IAM3B,OAAO,CAAC,gBAAgB;IAYxB,OAAO,CAAC,UAAU;YAeJ,IAAI;IASlB,OAAO,CAAC,SAAS;CAKlB"}
1
+ {"version":3,"file":"orchestrator.d.ts","sourceRoot":"","sources":["../../src/core/orchestrator.ts"],"names":[],"mappings":"AAUA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AACjD,OAAO,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAG7C,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAEhD,OAAO,EAAoB,KAAK,cAAc,EAAE,MAAM,iBAAiB,CAAC;AA2BxE,MAAM,MAAM,kBAAkB,GAAG,cAAc,GAAG,YAAY,GAAG,WAAW,GAAG,OAAO,GAAG,WAAW,CAAC;AAErG,MAAM,WAAW,mBAAmB;IAClC,KAAK,EAAE,aAAa,CAAC;IACrB,MAAM,EAAE,aAAa,CAAC;IACtB,MAAM,EAAE,cAAc,CAAC;IACvB,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,qBAAa,mBAAmB;IAkBlB,OAAO,CAAC,QAAQ,CAAC,IAAI;IAjBjC,OAAO,CAAC,UAAU,CAA8B;IAChD,OAAO,CAAC,iBAAiB,CAA+B;IACxD,OAAO,CAAC,eAAe,CAA+B;IACtD,OAAO,CAAC,cAAc,CAA+B;IACrD,OAAO,CAAC,qBAAqB,CAAS;IACtC,OAAO,CAAC,YAAY,CAAS;IAC7B,OAAO,CAAC,MAAM,CAAsC;IAEpD,OAAO,CAAC,iBAAiB,CAAS;IAElC,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAS;IAC7B,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAmB;IAChD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAgE;IACzF,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAA0B;IACnD,OAAO,CAAC,QAAQ,CAAC,SAAS,CAA0B;IACpD,OAAO,CAAC,cAAc,CAAgB;gBAET,IAAI,EAAE,mBAAmB;IAgBtD,SAAS,IAAI,kBAAkB;IAI/B,cAAc,IAAI;QAAE,cAAc,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,MAAM,CAAA;KAAE;IAIvG,aAAa,IAAI;QAAE,EAAE,EAAE,cAAc,CAAC;QAAC,GAAG,EAAE,cAAc,CAAA;KAAE;IAI5D,iBAAiB,IAAI,MAAM,EAAE;IAIvB,cAAc,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAgB/C,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IAsHxB,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;YAmBnB,gBAAgB;IA4B9B,OAAO,CAAC,iBAAiB;IAanB,mBAAmB,IAAI,OAAO,CAAC,IAAI,CAAC;YA2B5B,yBAAyB;YAgDzB,YAAY;IA0E1B,OAAO,CAAC,mBAAmB;IAM3B,OAAO,CAAC,gBAAgB;IAYxB,OAAO,CAAC,UAAU;YAeJ,IAAI;IASlB,OAAO,CAAC,SAAS;CAKlB"}
@@ -1,5 +1,6 @@
1
1
  import { arch as osArch, platform as osPlatform } from "node:os";
2
2
  import { HttpTransportType, HubConnectionBuilder, LogLevel, } from "@microsoft/signalr";
3
+ import { buildHostCapacity } from "./capacity.js";
3
4
  import { getLogger } from "./logger.js";
4
5
  import { listModelsSync } from "./models-fs.js";
5
6
  import { SharedGGUFRunner } from "./shared-runner.js";
@@ -32,6 +33,12 @@ export class OrchestratorService {
32
33
  paths: opts.paths,
33
34
  config: { schemaVersion: 1, lastWriter: "cli", schemaUpdatedAt: "" },
34
35
  });
36
+ this.sharedRunner.setWorkerCrashCallback((crash) => {
37
+ getLogger().error(`💥 llmWorker crash: model="${crash.model}" — clearing all active sessions`);
38
+ // All conversations are lost when the worker process dies — clear the session map
39
+ // so subsequent prompts trigger a fresh model load rather than using stale state.
40
+ this.sessions.clear();
41
+ });
35
42
  }
36
43
  getStatus() {
37
44
  return this.status;
@@ -257,6 +264,8 @@ export class OrchestratorService {
257
264
  const cfg = await this.opts.config.load();
258
265
  if (!cfg.hostId || !cfg.hostName)
259
266
  return;
267
+ const hostId = cfg.hostId;
268
+ const gpuIndex = cfg.selectedGpuIndex ?? 0;
260
269
  const capabilities = listModelsSync(this.opts.paths, cfg);
261
270
  try {
262
271
  await this.connection.invoke("RegisterHost", {
@@ -278,12 +287,25 @@ export class OrchestratorService {
278
287
  this.heartbeatInterval = setInterval(async () => {
279
288
  if (this.connection?.state !== "Connected")
280
289
  return;
290
+ // v1 heartbeat — liveness only. Still required: the server uses it for
291
+ // uptime tracking and DB heartbeat persistence (HeartbeatV2 does neither).
281
292
  try {
282
- await this.connection.send("Heartbeat", cfg.hostId);
293
+ await this.connection.send("Heartbeat", hostId);
283
294
  }
284
295
  catch (err) {
285
296
  getLogger().warn("orchestrator: heartbeat error:", err.message);
286
297
  }
298
+ // v2 heartbeat — live capacity (GPU/CPU/VRAM/RAM/sessions). Lets the
299
+ // orchestrator route around hosts that are out of VRAM or overloaded
300
+ // instead of assigning prompts until the host OOMs and crashes. Kept
301
+ // separate from v1 so a metrics-sampling hiccup never drops liveness.
302
+ try {
303
+ const capacity = await buildHostCapacity(hostId, gpuIndex, this.sessions.size);
304
+ await this.connection.send("HeartbeatV2", capacity);
305
+ }
306
+ catch (err) {
307
+ getLogger().warn("orchestrator: heartbeatV2 error:", err.message);
308
+ }
287
309
  }, HEARTBEAT_INTERVAL_MS);
288
310
  }
289
311
  async handlePrompt(req) {