peerllm-host-cli 1.9.3 → 1.9.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/capacity.d.ts +62 -0
- package/dist/core/capacity.d.ts.map +1 -0
- package/dist/core/capacity.js +228 -0
- package/dist/core/capacity.js.map +1 -0
- package/dist/core/llmWorker.d.ts +2 -0
- package/dist/core/llmWorker.d.ts.map +1 -0
- package/dist/core/llmWorker.js +239 -0
- package/dist/core/llmWorker.js.map +1 -0
- package/dist/core/orchestrator.d.ts.map +1 -1
- package/dist/core/orchestrator.js +23 -1
- package/dist/core/orchestrator.js.map +1 -1
- package/dist/core/shared-runner.d.ts +20 -6
- package/dist/core/shared-runner.d.ts.map +1 -1
- package/dist/core/shared-runner.js +339 -154
- package/dist/core/shared-runner.js.map +1 -1
- package/package.json +1 -1
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Live host capacity snapshot sent to the orchestrator via `HeartbeatV2`.
|
|
3
|
+
*
|
|
4
|
+
* Field names mirror the Electron host's payload (camelCase) so both host kinds
|
|
5
|
+
* speak the identical contract to the server's `HostCapacity` model. The
|
|
6
|
+
* orchestrator routes around hosts whose fresh snapshot says they're out of
|
|
7
|
+
* VRAM / overloaded instead of piling work on until they OOM and crash.
|
|
8
|
+
*/
|
|
9
|
+
export interface HostCapacityPayload {
|
|
10
|
+
hostId: string;
|
|
11
|
+
utilizationType: string;
|
|
12
|
+
cpuUsage: number;
|
|
13
|
+
gpuUsage: number;
|
|
14
|
+
ramUsedMB: number;
|
|
15
|
+
ramFreeMB: number;
|
|
16
|
+
ramTotalMB: number;
|
|
17
|
+
vramFreeMB: number;
|
|
18
|
+
totalVRAM: number;
|
|
19
|
+
effectiveFreeMB: number;
|
|
20
|
+
activeSessions: number;
|
|
21
|
+
status: string;
|
|
22
|
+
canAcceptRequests: boolean;
|
|
23
|
+
lastUpdated: string;
|
|
24
|
+
}
|
|
25
|
+
/** Effective free memory for scheduling, ported verbatim from the Electron host. */
|
|
26
|
+
export declare function calculateEffectiveMemory(args: {
|
|
27
|
+
vramFreeMB: number;
|
|
28
|
+
totalVRAM: number;
|
|
29
|
+
ramFreeMB: number;
|
|
30
|
+
utilizationType: string;
|
|
31
|
+
}): number;
|
|
32
|
+
/** Coarse host status, ported verbatim from the Electron host. */
|
|
33
|
+
export declare function classifyStatus(args: {
|
|
34
|
+
utilizationType: string;
|
|
35
|
+
cpuUsage: number;
|
|
36
|
+
gpuUsage: number;
|
|
37
|
+
effectiveFreeMB: number;
|
|
38
|
+
vramFreeMB: number;
|
|
39
|
+
totalVRAM: number;
|
|
40
|
+
}): string;
|
|
41
|
+
/**
|
|
42
|
+
* Whether this host can take a new request.
|
|
43
|
+
*
|
|
44
|
+
* The 2 GB VRAM floor is applied ONLY to discrete-GPU hosts; CPU/Hybrid/unified
|
|
45
|
+
* hosts have vramFreeMB ≈ 0 and are judged purely on `status` (which already
|
|
46
|
+
* folds in CPU load and effective memory). This keeps CPU-only hosts eligible —
|
|
47
|
+
* "a place for every piece of hardware" — while preserving the 2 GB VRAM
|
|
48
|
+
* headroom for real GPU hosts. The Electron host's HeartbeatV2 computes
|
|
49
|
+
* canAcceptRequests with this same formula, so both host kinds stay in sync.
|
|
50
|
+
*/
|
|
51
|
+
export declare function computeCanAcceptRequests(args: {
|
|
52
|
+
utilizationType: string;
|
|
53
|
+
status: string;
|
|
54
|
+
vramFreeMB: number;
|
|
55
|
+
}): boolean;
|
|
56
|
+
/**
|
|
57
|
+
* Build the full HostCapacity snapshot for a HeartbeatV2 message. Best-effort:
|
|
58
|
+
* all sampling is wrapped so a probe failure degrades a field rather than
|
|
59
|
+
* throwing — the caller still gets a usable, conservative snapshot.
|
|
60
|
+
*/
|
|
61
|
+
export declare function buildHostCapacity(hostId: string, gpuIndex: number, activeSessions: number): Promise<HostCapacityPayload>;
|
|
62
|
+
//# sourceMappingURL=capacity.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"capacity.d.ts","sourceRoot":"","sources":["../../src/core/capacity.ts"],"names":[],"mappings":"AAcA;;;;;;;GAOG;AACH,MAAM,WAAW,mBAAmB;IAClC,MAAM,EAAE,MAAM,CAAC;IACf,eAAe,EAAE,MAAM,CAAC;IACxB,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,eAAe,EAAE,MAAM,CAAC;IACxB,cAAc,EAAE,MAAM,CAAC;IACvB,MAAM,EAAE,MAAM,CAAC;IACf,iBAAiB,EAAE,OAAO,CAAC;IAC3B,WAAW,EAAE,MAAM,CAAC;CACrB;AAqKD,oFAAoF;AACpF,wBAAgB,wBAAwB,CAAC,IAAI,EAAE;IAC7C,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,eAAe,EAAE,MAAM,CAAC;CACzB,GAAG,MAAM,CAST;AAED,kEAAkE;AAClE,wBAAgB,cAAc,CAAC,IAAI,EAAE;IACnC,eAAe,EAAE,MAAM,CAAC;IACxB,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,eAAe,EAAE,MAAM,CAAC;IACxB,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;CACnB,GAAG,MAAM,CAOT;AAED;;;;;;;;;GASG;AACH,wBAAgB,wBAAwB,CAAC,IAAI,EAAE;IAC7C,eAAe,EAAE,MAAM,CAAC;IACxB,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,CAAC;CACpB,GAAG,OAAO,CAKV;AAED;;;;GAIG;AACH,wBAAsB,iBAAiB,CACrC,MAAM,EAAE,MAAM,EACd,QAAQ,EAAE,MAAM,EAChB,cAAc,EAAE,MAAM,GACrB,OAAO,CAAC,mBAAmB,CAAC,CA8C9B"}
|
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
import { execFile } from "node:child_process";
|
|
2
|
+
import { arch as osArch } from "node:os";
|
|
3
|
+
import { promisify } from "node:util";
|
|
4
|
+
import si from "systeminformation";
|
|
5
|
+
import { sampleGpuUtilization } from "./gpu-stats.js";
|
|
6
|
+
const execFileP = promisify(execFile);
|
|
7
|
+
const NVIDIA_SMI_TIMEOUT_MS = 1500;
|
|
8
|
+
const GRAPHICS_CACHE_TTL_MS = 5 * 60 * 1000;
|
|
9
|
+
const isAppleSilicon = process.platform === "darwin" && osArch() === "arm64";
|
|
10
|
+
// ---------------------------------------------------------------------------
|
|
11
|
+
// Graphics cache — vendor/static VRAM rarely changes, and si.graphics() is the
|
|
12
|
+
// expensive call. Live numbers (utilization, free VRAM) come from nvidia-smi /
|
|
13
|
+
// the GPU sampler on each tick; the cache only supplies vendor + static totals.
|
|
14
|
+
// ---------------------------------------------------------------------------
|
|
15
|
+
let cachedGraphics;
|
|
16
|
+
let graphicsInFlight;
|
|
17
|
+
async function getCachedGraphics() {
|
|
18
|
+
const now = Date.now();
|
|
19
|
+
if (cachedGraphics && now - cachedGraphics.ts < GRAPHICS_CACHE_TTL_MS) {
|
|
20
|
+
return cachedGraphics.data;
|
|
21
|
+
}
|
|
22
|
+
if (!graphicsInFlight) {
|
|
23
|
+
graphicsInFlight = si.graphics().then((data) => {
|
|
24
|
+
cachedGraphics = { data, ts: Date.now() };
|
|
25
|
+
graphicsInFlight = undefined;
|
|
26
|
+
return data;
|
|
27
|
+
});
|
|
28
|
+
}
|
|
29
|
+
return graphicsInFlight;
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Query the selected NVIDIA GPU for exact total/free VRAM and live utilization.
|
|
33
|
+
* `systeminformation`'s `memoryFree` is unreliable for NVIDIA on many systems,
|
|
34
|
+
* so — like the Electron host — we go straight to nvidia-smi for the numbers
|
|
35
|
+
* that drive the VRAM floor decision. Returns null when nvidia-smi is absent.
|
|
36
|
+
*/
|
|
37
|
+
async function queryNvidiaSmi(gpuIndex) {
|
|
38
|
+
try {
|
|
39
|
+
const cmd = process.platform === "win32"
|
|
40
|
+
? `${process.env.SYSTEMROOT ?? "C:\\Windows"}\\System32\\nvidia-smi.exe`
|
|
41
|
+
: "nvidia-smi";
|
|
42
|
+
const { stdout } = await execFileP(cmd, [
|
|
43
|
+
"--query-gpu=memory.total,memory.free,utilization.gpu",
|
|
44
|
+
"--format=csv,noheader,nounits",
|
|
45
|
+
], { timeout: NVIDIA_SMI_TIMEOUT_MS });
|
|
46
|
+
const lines = stdout.trim().split(/\r?\n/).filter((l) => l.trim().length > 0);
|
|
47
|
+
const line = lines[gpuIndex] ?? lines[0];
|
|
48
|
+
if (!line)
|
|
49
|
+
return null;
|
|
50
|
+
const [total, free, util] = line.split(/[\s,]+/).map((n) => parseInt(n, 10));
|
|
51
|
+
if (![total, free, util].every((n) => Number.isFinite(n)))
|
|
52
|
+
return null;
|
|
53
|
+
return { total: total, free: free, util: util };
|
|
54
|
+
}
|
|
55
|
+
catch {
|
|
56
|
+
return null;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Cross-platform GPU stats, ported from the Electron host's getGPUStats so both
|
|
61
|
+
* hosts classify utilization identically. Respects the user's selected GPU on
|
|
62
|
+
* multi-GPU systems (selectedGpuIndex), and treats unified-memory machines
|
|
63
|
+
* (Apple Silicon, AMD APUs) as RAM-backed rather than VRAM-backed.
|
|
64
|
+
*/
|
|
65
|
+
async function getGpuStats(mem, gpuIndex) {
|
|
66
|
+
const ginfo = await getCachedGraphics();
|
|
67
|
+
const controllers = ginfo.controllers ?? [];
|
|
68
|
+
const g = controllers[gpuIndex] ??
|
|
69
|
+
controllers[0] ??
|
|
70
|
+
{};
|
|
71
|
+
let gpuUsage = 0;
|
|
72
|
+
let totalVRAM = 0;
|
|
73
|
+
let vramFreeMB = 0;
|
|
74
|
+
let type = "CPU";
|
|
75
|
+
const ramTotalMB = mem.total / 1024 / 1024;
|
|
76
|
+
const ramFreeMB = mem.available / 1024 / 1024;
|
|
77
|
+
// 🍎 Apple Silicon — unified memory, no discrete VRAM.
|
|
78
|
+
if (isAppleSilicon) {
|
|
79
|
+
gpuUsage = g.utilizationGpu ?? 0;
|
|
80
|
+
if (!gpuUsage) {
|
|
81
|
+
const sampled = await sampleGpuUtilization();
|
|
82
|
+
if (sampled !== null)
|
|
83
|
+
gpuUsage = sampled * 100;
|
|
84
|
+
}
|
|
85
|
+
totalVRAM = ramTotalMB;
|
|
86
|
+
vramFreeMB = ramFreeMB;
|
|
87
|
+
// Under GPU pressure, discount free unified memory (floor at 50%).
|
|
88
|
+
vramFreeMB = Math.max(vramFreeMB * (1 - gpuUsage / 100), vramFreeMB * 0.5);
|
|
89
|
+
type = gpuUsage > 15 ? "Hybrid" : "CPU";
|
|
90
|
+
return { gpuUsage, totalVRAM, vramFreeMB, type };
|
|
91
|
+
}
|
|
92
|
+
const vendor = (g.vendor ?? "").toLowerCase();
|
|
93
|
+
const isAMD = vendor.includes("amd") || vendor.includes("ati") || vendor.includes("advanced micro");
|
|
94
|
+
const isIntel = vendor.includes("intel");
|
|
95
|
+
const isNvidia = vendor.includes("nvidia");
|
|
96
|
+
// 🟦 NVIDIA (Windows & Linux) — exact numbers from nvidia-smi.
|
|
97
|
+
if (isNvidia) {
|
|
98
|
+
const nv = await queryNvidiaSmi(gpuIndex);
|
|
99
|
+
if (nv) {
|
|
100
|
+
totalVRAM = nv.total;
|
|
101
|
+
vramFreeMB = nv.free;
|
|
102
|
+
gpuUsage = nv.util;
|
|
103
|
+
type = gpuUsage > 10 ? "GPU" : "CPU";
|
|
104
|
+
return { gpuUsage, totalVRAM, vramFreeMB, type };
|
|
105
|
+
}
|
|
106
|
+
// nvidia-smi unavailable → fall through to systeminformation values.
|
|
107
|
+
}
|
|
108
|
+
// 🟥 AMD (Windows & Linux).
|
|
109
|
+
if (isAMD) {
|
|
110
|
+
gpuUsage = g.utilizationGpu ?? 0;
|
|
111
|
+
totalVRAM = g.vram ?? g.memoryTotal ?? 0;
|
|
112
|
+
vramFreeMB = g.memoryFree ?? 0;
|
|
113
|
+
// Linux AMD APU (shared system RAM, e.g. Radeon 880M/890M) — treat as unified.
|
|
114
|
+
if (process.platform === "linux" && totalVRAM < 2048) {
|
|
115
|
+
totalVRAM = ramTotalMB;
|
|
116
|
+
vramFreeMB = ramFreeMB;
|
|
117
|
+
type = "Hybrid";
|
|
118
|
+
return { gpuUsage, totalVRAM, vramFreeMB, type };
|
|
119
|
+
}
|
|
120
|
+
// AMD driver quirk: free sometimes mirrors total — estimate from utilization.
|
|
121
|
+
if (vramFreeMB >= totalVRAM * 0.95) {
|
|
122
|
+
vramFreeMB = totalVRAM * (1 - gpuUsage / 100);
|
|
123
|
+
}
|
|
124
|
+
type = gpuUsage > 25 ? "GPU" : gpuUsage > 10 ? "Hybrid" : "CPU";
|
|
125
|
+
return { gpuUsage, totalVRAM, vramFreeMB, type };
|
|
126
|
+
}
|
|
127
|
+
// 🟫 Intel iGPU.
|
|
128
|
+
if (isIntel) {
|
|
129
|
+
gpuUsage = g.utilizationGpu ?? 0;
|
|
130
|
+
totalVRAM = g.vram ?? 0;
|
|
131
|
+
vramFreeMB = g.memoryFree ?? 0;
|
|
132
|
+
type = gpuUsage > 20 ? "Hybrid" : gpuUsage > 10 ? "CPU+GPU" : "CPU";
|
|
133
|
+
return { gpuUsage, totalVRAM, vramFreeMB, type };
|
|
134
|
+
}
|
|
135
|
+
// ⚪ Unknown / CPU-only.
|
|
136
|
+
gpuUsage = g.utilizationGpu ?? 0;
|
|
137
|
+
totalVRAM = g.vram ?? 0;
|
|
138
|
+
vramFreeMB = g.memoryFree ?? 0;
|
|
139
|
+
type = "CPU";
|
|
140
|
+
return { gpuUsage, totalVRAM, vramFreeMB, type };
|
|
141
|
+
}
|
|
142
|
+
/** Effective free memory for scheduling, ported verbatim from the Electron host. */
|
|
143
|
+
export function calculateEffectiveMemory(args) {
|
|
144
|
+
const { vramFreeMB, totalVRAM, ramFreeMB, utilizationType } = args;
|
|
145
|
+
if (utilizationType === "Hybrid") {
|
|
146
|
+
return (vramFreeMB > 0 ? vramFreeMB : totalVRAM * 0.8) + ramFreeMB * 0.25;
|
|
147
|
+
}
|
|
148
|
+
if (utilizationType === "GPU" && totalVRAM > 0) {
|
|
149
|
+
return vramFreeMB || totalVRAM * 0.8;
|
|
150
|
+
}
|
|
151
|
+
return ramFreeMB;
|
|
152
|
+
}
|
|
153
|
+
/** Coarse host status, ported verbatim from the Electron host. */
|
|
154
|
+
export function classifyStatus(args) {
|
|
155
|
+
const { utilizationType, cpuUsage, gpuUsage, effectiveFreeMB, vramFreeMB, totalVRAM } = args;
|
|
156
|
+
if (utilizationType === "GPU" && totalVRAM > 0 && vramFreeMB < 2000)
|
|
157
|
+
return "Overloaded";
|
|
158
|
+
if (cpuUsage > 85)
|
|
159
|
+
return "Busy";
|
|
160
|
+
if (gpuUsage > 90)
|
|
161
|
+
return "Busy";
|
|
162
|
+
if (effectiveFreeMB < 1500)
|
|
163
|
+
return "Overloaded";
|
|
164
|
+
return "Available";
|
|
165
|
+
}
|
|
166
|
+
/**
|
|
167
|
+
* Whether this host can take a new request.
|
|
168
|
+
*
|
|
169
|
+
* The 2 GB VRAM floor is applied ONLY to discrete-GPU hosts; CPU/Hybrid/unified
|
|
170
|
+
* hosts have vramFreeMB ≈ 0 and are judged purely on `status` (which already
|
|
171
|
+
* folds in CPU load and effective memory). This keeps CPU-only hosts eligible —
|
|
172
|
+
* "a place for every piece of hardware" — while preserving the 2 GB VRAM
|
|
173
|
+
* headroom for real GPU hosts. The Electron host's HeartbeatV2 computes
|
|
174
|
+
* canAcceptRequests with this same formula, so both host kinds stay in sync.
|
|
175
|
+
*/
|
|
176
|
+
export function computeCanAcceptRequests(args) {
|
|
177
|
+
const { utilizationType, status, vramFreeMB } = args;
|
|
178
|
+
if (status === "Overloaded" || status === "Busy")
|
|
179
|
+
return false;
|
|
180
|
+
if (utilizationType === "GPU" && vramFreeMB < 2000)
|
|
181
|
+
return false;
|
|
182
|
+
return true;
|
|
183
|
+
}
|
|
184
|
+
/**
|
|
185
|
+
* Build the full HostCapacity snapshot for a HeartbeatV2 message. Best-effort:
|
|
186
|
+
* all sampling is wrapped so a probe failure degrades a field rather than
|
|
187
|
+
* throwing — the caller still gets a usable, conservative snapshot.
|
|
188
|
+
*/
|
|
189
|
+
export async function buildHostCapacity(hostId, gpuIndex, activeSessions) {
|
|
190
|
+
const [cpu, mem] = await Promise.all([si.currentLoad(), si.mem()]);
|
|
191
|
+
const cpuUsage = cpu.currentLoad ?? 0;
|
|
192
|
+
const ramFreeMB = mem.available / 1024 / 1024;
|
|
193
|
+
const ramTotalMB = mem.total / 1024 / 1024;
|
|
194
|
+
const ramUsedMB = ramTotalMB - ramFreeMB;
|
|
195
|
+
const { gpuUsage, totalVRAM, vramFreeMB, type: utilizationType } = await getGpuStats(mem, gpuIndex);
|
|
196
|
+
const effectiveFreeMB = calculateEffectiveMemory({
|
|
197
|
+
vramFreeMB,
|
|
198
|
+
totalVRAM,
|
|
199
|
+
ramFreeMB,
|
|
200
|
+
utilizationType,
|
|
201
|
+
});
|
|
202
|
+
const status = classifyStatus({
|
|
203
|
+
utilizationType,
|
|
204
|
+
cpuUsage,
|
|
205
|
+
gpuUsage,
|
|
206
|
+
effectiveFreeMB,
|
|
207
|
+
vramFreeMB,
|
|
208
|
+
totalVRAM,
|
|
209
|
+
});
|
|
210
|
+
const canAcceptRequests = computeCanAcceptRequests({ utilizationType, status, vramFreeMB });
|
|
211
|
+
return {
|
|
212
|
+
hostId,
|
|
213
|
+
utilizationType,
|
|
214
|
+
cpuUsage,
|
|
215
|
+
gpuUsage,
|
|
216
|
+
ramUsedMB,
|
|
217
|
+
ramFreeMB,
|
|
218
|
+
ramTotalMB,
|
|
219
|
+
vramFreeMB,
|
|
220
|
+
totalVRAM,
|
|
221
|
+
effectiveFreeMB,
|
|
222
|
+
activeSessions,
|
|
223
|
+
status,
|
|
224
|
+
canAcceptRequests,
|
|
225
|
+
lastUpdated: new Date().toISOString(),
|
|
226
|
+
};
|
|
227
|
+
}
|
|
228
|
+
//# sourceMappingURL=capacity.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"capacity.js","sourceRoot":"","sources":["../../src/core/capacity.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAC9C,OAAO,EAAE,IAAI,IAAI,MAAM,EAAE,MAAM,SAAS,CAAC;AACzC,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAEtC,OAAO,EAAE,MAAM,mBAAmB,CAAC;AAEnC,OAAO,EAAE,oBAAoB,EAAE,MAAM,gBAAgB,CAAC;AAEtD,MAAM,SAAS,GAAG,SAAS,CAAC,QAAQ,CAAC,CAAC;AACtC,MAAM,qBAAqB,GAAG,IAAI,CAAC;AACnC,MAAM,qBAAqB,GAAG,CAAC,GAAG,EAAE,GAAG,IAAI,CAAC;AAE5C,MAAM,cAAc,GAAG,OAAO,CAAC,QAAQ,KAAK,QAAQ,IAAI,MAAM,EAAE,KAAK,OAAO,CAAC;AA2B7E,8EAA8E;AAC9E,+EAA+E;AAC/E,+EAA+E;AAC/E,gFAAgF;AAChF,8EAA8E;AAE9E,IAAI,cAAmF,CAAC;AACxF,IAAI,gBAAwE,CAAC;AAE7E,KAAK,UAAU,iBAAiB;IAC9B,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACvB,IAAI,cAAc,IAAI,GAAG,GAAG,cAAc,CAAC,EAAE,GAAG,qBAAqB,EAAE,CAAC;QACtE,OAAO,cAAc,CAAC,IAAI,CAAC;IAC7B,CAAC;IACD,IAAI,CAAC,gBAAgB,EAAE,CAAC;QACtB,gBAAgB,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE;YAC7C,cAAc,GAAG,EAAE,IAAI,EAAE,EAAE,EAAE,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC;YAC1C,gBAAgB,GAAG,SAAS,CAAC;YAC7B,OAAO,IAAI,CAAC;QACd,CAAC,CAAC,CAAC;IACL,CAAC;IACD,OAAO,gBAAgB,CAAC;AAC1B,CAAC;AAED;;;;;GAKG;AACH,KAAK,UAAU,cAAc,CAC3B,QAAgB;IAEhB,IAAI,CAAC;QACH,MAAM,GAAG,GACP,OAAO,CAAC,QAAQ,KAAK,OAAO;YAC1B,CAAC,CAAC,GAAG,OAAO,CAAC,GAAG,CAAC,UAAU,IAAI,aAAa,4BAA4B;YACxE,CAAC,CAAC,YAAY,CAAC;QACnB,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,SAAS,CAChC,GAAG,EACH;YACE,sDAAsD;YACtD,+BAA+B;SAChC,EACD,EAAE,OAAO,EAAE,qBAAqB,EAAE,CACnC,CAAC;QACF,MAAM,KAAK,GAAG,MAAM,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAC9E,MAAM,IAAI,GAAG,KAAK,CAAC,QAAQ,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC;QACzC,IAAI,CAAC,IAAI;YAAE,OAAO,IAAI,CAAC;QACvB,MAAM,CAAC,KAAK,EAAE,IAAI,EAAE,IAAI,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;QAC7E,IAAI,CAAC,CAAC,KAAK,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;YAAE,OAAO,IAAI,CAAC;QACvE,OAAO,EAAE,KAAK,EAAE,KAAM,EAAE,IAAI,EAAE,IAAK,EAAE,IAAI,EAAE,IAAK,EAAE,CAAC;IACrD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AASD;;;;;GAKG;AACH,KAAK,UAAU,WAAW,CACxB,GAAiC,EACjC,QAAgB;IAEhB,MAAM,KAAK,GAAG,MAAM,iBAAiB,EAAE,CAAC;IACxC,MAAM,WAAW,GAAG,KAAK,CAAC,WAAW,IAAI,EAAE,CAAC;IAC5C,MAAM,CAAC,GACL,WAAW,CAAC,QAAQ,CAAC;QACrB,WAAW,CAAC,CAAC,CAAC;QACb,EAAkD,CAAC;IAEtD,IAAI,QAAQ,GAAG,CAAC,CAAC;IACjB,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,IAAI,IAAI,GAAG,KAAK,CAAC;IAEjB,MAAM,UAAU,GAAG,GAAG,CAAC,KAAK,GAAG,IAAI,GAAG,IAAI,CAAC;IAC3C,MAAM,SAAS,GAAG,GAAG,CAAC,SAAS,GAAG,IAAI,GAAG,IAAI,CAAC;IAE9C,uDAAuD;IACvD,IAAI,cAAc,EAAE,CAAC;QACnB,QAAQ,GAAG,CAAC,CAAC,cAAc,IAAI,CAAC,CAAC;QACjC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACd,MAAM,OAAO,GAAG,MAAM,oBAAoB,EAAE,CAAC;YAC7C,IAAI,OAAO,KAAK,IAAI;gBAAE,QAAQ,GAAG,OAAO,GAAG,GAAG,CAAC;QACjD,CAAC;QACD,SAAS,GAAG,UAAU,CAAC;QACvB,UAAU,GAAG,SAAS,CAAC;QACvB,mEAAmE;QACnE,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,GAAG,CAAC,CAAC,GAAG,QAAQ,GAAG,GAAG,CAAC,EAAE,UAAU,GAAG,GAAG,CAAC,CAAC;QAC3E,IAAI,GAAG,QAAQ,GAAG,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC;QACxC,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC;IACnD,CAAC;IAED,MAAM,MAAM,GAAG,CAAC,CAAC,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;IAC9C,MAAM,KAAK,GACT,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,MAAM,CAAC,QAAQ,CAAC,gBAAgB,CAAC,CAAC;IACxF,MAAM,OAAO,GAAG,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;IACzC,MAAM,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAE3C,+DAA+D;IAC/D,IAAI,QAAQ,EAAE,CAAC;QACb,MAAM,EAAE,GAAG,MAAM,cAAc,CAAC,QAAQ,CAAC,CAAC;QAC1C,IAAI,EAAE,EAAE,CAAC;YACP,SAAS,GAAG,EAAE,CAAC,KAAK,CAAC;YACrB,UAAU,GAAG,EAAE,CAAC,IAAI,CAAC;YACrB,QAAQ,GAAG,EAAE,CAAC,IAAI,CAAC;YACnB,IAAI,GAAG,QAAQ,GAAG,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC;YACrC,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC;QACnD,CAAC;QACD,qEAAqE;IACvE,CAAC;IAED,4BAA4B;IAC5B,IAAI,KAAK,EAAE,CAAC;QACV,QAAQ,GAAG,CAAC,CAAC,cAAc,IAAI,CAAC,CAAC;QACjC,SAAS,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,WAAW,IAAI,CAAC,CAAC;QACzC,UAAU,GAAG,CAAC,CAAC,UAAU,IAAI,CAAC,CAAC;QAE/B,+EAA+E;QAC/E,IAAI,OAAO,CAAC,QAAQ,KAAK,OAAO,IAAI,SAAS,GAAG,IAAI,EAAE,CAAC;YACrD,SAAS,GAAG,UAAU,CAAC;YACvB,UAAU,GAAG,SAAS,CAAC;YACvB,IAAI,GAAG,QAAQ,CAAC;YAChB,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC;QACnD,CAAC;QAED,8EAA8E;QAC9E,IAAI,UAAU,IAAI,SAAS,GAAG,IAAI,EAAE,CAAC;YACnC,UAAU,GAAG,SAAS,GAAG,CAAC,CAAC,GAAG,QAAQ,GAAG,GAAG,CAAC,CAAC;QAChD,CAAC;QAED,IAAI,GAAG,QAAQ,GAAG,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,QAAQ,GAAG,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC;QAChE,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC;IACnD,CAAC;IAED,iBAAiB;IACjB,IAAI,OAAO,EAAE,CAAC;QACZ,QAAQ,GAAG,CAAC,CAAC,cAAc,IAAI,CAAC,CAAC;QACjC,SAAS,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC;QACxB,UAAU,GAAG,CAAC,CAAC,UAAU,IAAI,CAAC,CAAC;QAC/B,IAAI,GAAG,QAAQ,GAAG,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,GAAG,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC;QACpE,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC;IACnD,CAAC;IAED,wBAAwB;IACxB,QAAQ,GAAG,CAAC,CAAC,cAAc,IAAI,CAAC,CAAC;IACjC,SAAS,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC;IACxB,UAAU,GAAG,CAAC,CAAC,UAAU,IAAI,CAAC,CAAC;IAC/B,IAAI,GAAG,KAAK,CAAC;IACb,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC;AACnD,CAAC;AAED,oFAAoF;AACpF,MAAM,UAAU,wBAAwB,CAAC,IAKxC;IACC,MAAM,EAAE,UAAU,EAAE,SAAS,EAAE,SAAS,EAAE,eAAe,EAAE,GAAG,IAAI,CAAC;IACnE,IAAI,eAAe,KAAK,QAAQ,EAAE,CAAC;QACjC,OAAO,CAAC,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,SAAS,GAAG,GAAG,CAAC,GAAG,SAAS,GAAG,IAAI,CAAC;IAC5E,CAAC;IACD,IAAI,eAAe,KAAK,KAAK,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;QAC/C,OAAO,UAAU,IAAI,SAAS,GAAG,GAAG,CAAC;IACvC,CAAC;IACD,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,kEAAkE;AAClE,MAAM,UAAU,cAAc,CAAC,IAO9B;IACC,MAAM,EAAE,eAAe,EAAE,QAAQ,EAAE,QAAQ,EAAE,eAAe,EAAE,UAAU,EAAE,SAAS,EAAE,GAAG,IAAI,CAAC;IAC7F,IAAI,eAAe,KAAK,KAAK,IAAI,SAAS,GAAG,CAAC,IAAI,UAAU,GAAG,IAAI;QAAE,OAAO,YAAY,CAAC;IACzF,IAAI,QAAQ,GAAG,EAAE;QAAE,OAAO,MAAM,CAAC;IACjC,IAAI,QAAQ,GAAG,EAAE;QAAE,OAAO,MAAM,CAAC;IACjC,IAAI,eAAe,GAAG,IAAI;QAAE,OAAO,YAAY,CAAC;IAChD,OAAO,WAAW,CAAC;AACrB,CAAC;AAED;;;;;;;;;GASG;AACH,MAAM,UAAU,wBAAwB,CAAC,IAIxC;IACC,MAAM,EAAE,eAAe,EAAE,MAAM,EAAE,UAAU,EAAE,GAAG,IAAI,CAAC;IACrD,IAAI,MAAM,KAAK,YAAY,IAAI,MAAM,KAAK,MAAM;QAAE,OAAO,KAAK,CAAC;IAC/D,IAAI,eAAe,KAAK,KAAK,IAAI,UAAU,GAAG,IAAI;QAAE,OAAO,KAAK,CAAC;IACjE,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,MAAc,EACd,QAAgB,EAChB,cAAsB;IAEtB,MAAM,CAAC,GAAG,EAAE,GAAG,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,WAAW,EAAE,EAAE,EAAE,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;IACnE,MAAM,QAAQ,GAAG,GAAG,CAAC,WAAW,IAAI,CAAC,CAAC;IACtC,MAAM,SAAS,GAAG,GAAG,CAAC,SAAS,GAAG,IAAI,GAAG,IAAI,CAAC;IAC9C,MAAM,UAAU,GAAG,GAAG,CAAC,KAAK,GAAG,IAAI,GAAG,IAAI,CAAC;IAC3C,MAAM,SAAS,GAAG,UAAU,GAAG,SAAS,CAAC;IAEzC,MAAM,EAAE,QAAQ,EAAE,SAAS,EAAE,UAAU,EAAE,IAAI,EAAE,eAAe,EAAE,GAAG,MAAM,WAAW,CAClF,GAAG,EACH,QAAQ,CACT,CAAC;IAEF,MAAM,eAAe,GAAG,wBAAwB,CAAC;QAC/C,UAAU;QACV,SAAS;QACT,SAAS;QACT,eAAe;KAChB,CAAC,CAAC;IAEH,MAAM,MAAM,GAAG,cAAc,CAAC;QAC5B,eAAe;QACf,QAAQ;QACR,QAAQ;QACR,eAAe;QACf,UAAU;QACV,SAAS;KACV,CAAC,CAAC;IAEH,MAAM,iBAAiB,GAAG,wBAAwB,CAAC,EAAE,eAAe,EAAE,MAAM,EAAE,UAAU,EAAE,CAAC,CAAC;IAE5F,OAAO;QACL,MAAM;QACN,eAAe;QACf,QAAQ;QACR,QAAQ;QACR,SAAS;QACT,SAAS;QACT,UAAU;QACV,UAAU;QACV,SAAS;QACT,eAAe;QACf,cAAc;QACd,MAAM;QACN,iBAAiB;QACjB,WAAW,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;KACtC,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"llmWorker.d.ts","sourceRoot":"","sources":["../../src/core/llmWorker.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
import { getLlama, LlamaChatSession } from "node-llama-cpp";
|
|
2
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
3
|
+
let llama = null;
|
|
4
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
5
|
+
let model = null;
|
|
6
|
+
const contexts = new Map();
|
|
7
|
+
function send(msg) {
|
|
8
|
+
process.send?.(msg);
|
|
9
|
+
}
|
|
10
|
+
function fwdLog(level, message) {
|
|
11
|
+
send({ type: "log", level, message });
|
|
12
|
+
}
|
|
13
|
+
console.log = (...args) => fwdLog("info", args.map(String).join(" "));
|
|
14
|
+
console.warn = (...args) => fwdLog("warn", args.map(String).join(" "));
|
|
15
|
+
console.error = (...args) => fwdLog("error", args.map(String).join(" "));
|
|
16
|
+
process.on("uncaughtException", (err) => {
|
|
17
|
+
fwdLog("error", `llmWorker uncaughtException: ${err.message}`);
|
|
18
|
+
process.exit(1);
|
|
19
|
+
});
|
|
20
|
+
process.on("unhandledRejection", (reason) => {
|
|
21
|
+
fwdLog("error", `llmWorker unhandledRejection: ${String(reason)}`);
|
|
22
|
+
});
|
|
23
|
+
function cancelAll() {
|
|
24
|
+
for (const entry of contexts.values()) {
|
|
25
|
+
if (entry.abortController) {
|
|
26
|
+
try {
|
|
27
|
+
entry.abortController.abort();
|
|
28
|
+
}
|
|
29
|
+
catch {
|
|
30
|
+
// ignored
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
process.on("message", (msg) => {
|
|
36
|
+
const m = msg;
|
|
37
|
+
const type = m?.["type"];
|
|
38
|
+
void (async () => {
|
|
39
|
+
try {
|
|
40
|
+
switch (type) {
|
|
41
|
+
case "load-model": {
|
|
42
|
+
const modelPath = m["modelPath"];
|
|
43
|
+
const gpuLayers = m["gpuLayers"];
|
|
44
|
+
try {
|
|
45
|
+
llama = await getLlama();
|
|
46
|
+
// eslint-disable-next-line @typescript-eslint/no-unsafe-call, @typescript-eslint/no-unsafe-member-access
|
|
47
|
+
model = await llama.loadModel({ modelPath, gpuLayers });
|
|
48
|
+
}
|
|
49
|
+
catch (err) {
|
|
50
|
+
const errAny = err;
|
|
51
|
+
if (errAny?.constructor?.name === "InsufficientMemoryError") {
|
|
52
|
+
try {
|
|
53
|
+
// eslint-disable-next-line @typescript-eslint/no-unsafe-call, @typescript-eslint/no-unsafe-member-access
|
|
54
|
+
model = await llama.loadModel({
|
|
55
|
+
modelPath,
|
|
56
|
+
gpuLayers,
|
|
57
|
+
ignoreMemorySafetyChecks: true,
|
|
58
|
+
});
|
|
59
|
+
}
|
|
60
|
+
catch (err2) {
|
|
61
|
+
send({ type: "error", message: err2.message });
|
|
62
|
+
break;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
else {
|
|
66
|
+
send({ type: "error", message: err.message });
|
|
67
|
+
break;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
// eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
|
|
71
|
+
const trainContextSize = model?.trainContextSize ?? 4096;
|
|
72
|
+
send({ type: "loaded", trainContextSize });
|
|
73
|
+
break;
|
|
74
|
+
}
|
|
75
|
+
case "create-context": {
|
|
76
|
+
const conversationId = m["conversationId"];
|
|
77
|
+
try {
|
|
78
|
+
// eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
|
|
79
|
+
const trainSize = model?.trainContextSize ?? 4096;
|
|
80
|
+
const contextSize = Math.min(trainSize, 4096);
|
|
81
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
82
|
+
let context;
|
|
83
|
+
try {
|
|
84
|
+
// eslint-disable-next-line @typescript-eslint/no-unsafe-call, @typescript-eslint/no-unsafe-member-access
|
|
85
|
+
context = await model.createContext({ contextSize });
|
|
86
|
+
}
|
|
87
|
+
catch (err) {
|
|
88
|
+
const errAny = err;
|
|
89
|
+
if (errAny?.constructor?.name === "InsufficientMemoryError") {
|
|
90
|
+
// eslint-disable-next-line @typescript-eslint/no-unsafe-call, @typescript-eslint/no-unsafe-member-access
|
|
91
|
+
context = await model.createContext({ contextSize, ignoreMemorySafetyChecks: true });
|
|
92
|
+
}
|
|
93
|
+
else {
|
|
94
|
+
throw err;
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
// eslint-disable-next-line @typescript-eslint/no-unsafe-call, @typescript-eslint/no-unsafe-member-access
|
|
98
|
+
const session = new LlamaChatSession({ contextSequence: context.getSequence() });
|
|
99
|
+
contexts.set(conversationId, { context, session, abortController: null });
|
|
100
|
+
send({ type: "context-created", conversationId });
|
|
101
|
+
}
|
|
102
|
+
catch (err) {
|
|
103
|
+
send({ type: "error", conversationId, message: err.message });
|
|
104
|
+
}
|
|
105
|
+
break;
|
|
106
|
+
}
|
|
107
|
+
case "clear-context": {
|
|
108
|
+
const conversationId = m["conversationId"];
|
|
109
|
+
const entry = contexts.get(conversationId);
|
|
110
|
+
if (entry) {
|
|
111
|
+
if (entry.abortController) {
|
|
112
|
+
try {
|
|
113
|
+
entry.abortController.abort();
|
|
114
|
+
}
|
|
115
|
+
catch {
|
|
116
|
+
// ignored
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
contexts.delete(conversationId);
|
|
120
|
+
try {
|
|
121
|
+
// eslint-disable-next-line @typescript-eslint/no-unsafe-call, @typescript-eslint/no-unsafe-member-access
|
|
122
|
+
entry.context?.dispose?.();
|
|
123
|
+
}
|
|
124
|
+
catch {
|
|
125
|
+
// ignored
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
break;
|
|
129
|
+
}
|
|
130
|
+
case "prompt": {
|
|
131
|
+
const requestId = m["requestId"];
|
|
132
|
+
const conversationId = m["conversationId"];
|
|
133
|
+
const input = m["input"];
|
|
134
|
+
const options = m["options"];
|
|
135
|
+
const entry = contexts.get(conversationId);
|
|
136
|
+
if (!entry) {
|
|
137
|
+
send({ type: "error", requestId, message: `No context for "${conversationId}"` });
|
|
138
|
+
break;
|
|
139
|
+
}
|
|
140
|
+
entry.abortController = new AbortController();
|
|
141
|
+
// eslint-disable-next-line @typescript-eslint/no-unsafe-call, @typescript-eslint/no-unsafe-member-access
|
|
142
|
+
const promptTokens = model.tokenize(input).length;
|
|
143
|
+
let fullText = "";
|
|
144
|
+
try {
|
|
145
|
+
const promptOpts = {
|
|
146
|
+
onTextChunk: (chunk) => {
|
|
147
|
+
fullText += chunk;
|
|
148
|
+
send({ type: "token", requestId, chunk });
|
|
149
|
+
},
|
|
150
|
+
signal: entry.abortController.signal,
|
|
151
|
+
};
|
|
152
|
+
if (options?.maxTokens !== undefined)
|
|
153
|
+
promptOpts["maxTokens"] = options.maxTokens;
|
|
154
|
+
if (options?.temperature !== undefined)
|
|
155
|
+
promptOpts["temperature"] = options.temperature;
|
|
156
|
+
if (options?.stop?.length)
|
|
157
|
+
promptOpts["customStopTriggers"] = options.stop;
|
|
158
|
+
await entry.session.prompt(input, promptOpts);
|
|
159
|
+
fullText = fullText.trim();
|
|
160
|
+
// eslint-disable-next-line @typescript-eslint/no-unsafe-call, @typescript-eslint/no-unsafe-member-access
|
|
161
|
+
const completionTokens = model.tokenize(fullText).length;
|
|
162
|
+
send({ type: "done", requestId, text: fullText, promptTokens, completionTokens });
|
|
163
|
+
}
|
|
164
|
+
catch (err) {
|
|
165
|
+
const e = err;
|
|
166
|
+
if (e?.name === "AbortError") {
|
|
167
|
+
// eslint-disable-next-line @typescript-eslint/no-unsafe-call, @typescript-eslint/no-unsafe-member-access
|
|
168
|
+
const completionTokens = model ? model.tokenize(fullText).length : 0;
|
|
169
|
+
send({ type: "cancelled", requestId, text: fullText, promptTokens, completionTokens });
|
|
170
|
+
}
|
|
171
|
+
else {
|
|
172
|
+
send({ type: "error", requestId, message: err.message });
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
finally {
|
|
176
|
+
entry.abortController = null;
|
|
177
|
+
}
|
|
178
|
+
break;
|
|
179
|
+
}
|
|
180
|
+
case "cancel": {
|
|
181
|
+
const entry = contexts.get(m["conversationId"]);
|
|
182
|
+
if (entry?.abortController) {
|
|
183
|
+
try {
|
|
184
|
+
entry.abortController.abort();
|
|
185
|
+
}
|
|
186
|
+
catch {
|
|
187
|
+
// ignored
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
break;
|
|
191
|
+
}
|
|
192
|
+
case "cancel-all": {
|
|
193
|
+
cancelAll();
|
|
194
|
+
break;
|
|
195
|
+
}
|
|
196
|
+
case "dispose": {
|
|
197
|
+
cancelAll();
|
|
198
|
+
await new Promise((r) => setTimeout(r, 300));
|
|
199
|
+
for (const entry of contexts.values()) {
|
|
200
|
+
try {
|
|
201
|
+
// eslint-disable-next-line @typescript-eslint/no-unsafe-call, @typescript-eslint/no-unsafe-member-access
|
|
202
|
+
entry.context?.dispose?.();
|
|
203
|
+
}
|
|
204
|
+
catch {
|
|
205
|
+
// ignored
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
contexts.clear();
|
|
209
|
+
try {
|
|
210
|
+
// eslint-disable-next-line @typescript-eslint/no-unsafe-call, @typescript-eslint/no-unsafe-member-access
|
|
211
|
+
model?.dispose?.();
|
|
212
|
+
}
|
|
213
|
+
catch {
|
|
214
|
+
// ignored
|
|
215
|
+
}
|
|
216
|
+
try {
|
|
217
|
+
// eslint-disable-next-line @typescript-eslint/no-unsafe-call, @typescript-eslint/no-unsafe-member-access
|
|
218
|
+
llama?.dispose?.();
|
|
219
|
+
}
|
|
220
|
+
catch {
|
|
221
|
+
// ignored
|
|
222
|
+
}
|
|
223
|
+
model = null;
|
|
224
|
+
llama = null;
|
|
225
|
+
send({ type: "disposed" });
|
|
226
|
+
process.exit(0);
|
|
227
|
+
break;
|
|
228
|
+
}
|
|
229
|
+
default:
|
|
230
|
+
fwdLog("warn", `llmWorker: unknown message type "${type ?? "(none)"}"`);
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
catch (err) {
|
|
234
|
+
fwdLog("error", `llmWorker error in "${type}": ${err.message}`);
|
|
235
|
+
}
|
|
236
|
+
})();
|
|
237
|
+
});
|
|
238
|
+
send({ type: "ready" });
|
|
239
|
+
//# sourceMappingURL=llmWorker.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"llmWorker.js","sourceRoot":"","sources":["../../src/core/llmWorker.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,gBAAgB,CAAC;AAW5D,8DAA8D;AAC9D,IAAI,KAAK,GAAQ,IAAI,CAAC;AACtB,8DAA8D;AAC9D,IAAI,KAAK,GAAQ,IAAI,CAAC;AACtB,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAwB,CAAC;AAEjD,SAAS,IAAI,CAAC,GAAY;IACxB,OAAO,CAAC,IAAI,EAAE,CAAC,GAAG,CAAC,CAAC;AACtB,CAAC;AAED,SAAS,MAAM,CAAC,KAAe,EAAE,OAAe;IAC9C,IAAI,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,OAAO,EAAE,CAAC,CAAC;AACxC,CAAC;AAED,OAAO,CAAC,GAAG,GAAG,CAAC,GAAG,IAAe,EAAE,EAAE,CAAC,MAAM,CAAC,MAAM,EAAE,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AACjF,OAAO,CAAC,IAAI,GAAG,CAAC,GAAG,IAAe,EAAE,EAAE,CAAC,MAAM,CAAC,MAAM,EAAE,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAClF,OAAO,CAAC,KAAK,GAAG,CAAC,GAAG,IAAe,EAAE,EAAE,CAAC,MAAM,CAAC,OAAO,EAAE,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAEpF,OAAO,CAAC,EAAE,CAAC,mBAAmB,EAAE,CAAC,GAAU,EAAE,EAAE;IAC7C,MAAM,CAAC,OAAO,EAAE,gCAAgC,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC;IAC/D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC;AAEH,OAAO,CAAC,EAAE,CAAC,oBAAoB,EAAE,CAAC,MAAe,EAAE,EAAE;IACnD,MAAM,CAAC,OAAO,EAAE,iCAAiC,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;AACrE,CAAC,CAAC,CAAC;AAEH,SAAS,SAAS;IAChB,KAAK,MAAM,KAAK,IAAI,QAAQ,CAAC,MAAM,EAAE,EAAE,CAAC;QACtC,IAAI,KAAK,CAAC,eAAe,EAAE,CAAC;YAC1B,IAAI,CAAC;gBACH,KAAK,CAAC,eAAe,CAAC,KAAK,EAAE,CAAC;YAChC,CAAC;YAAC,MAAM,CAAC;gBACP,UAAU;YACZ,CAAC;QACH,CAAC;IACH,CAAC;AACH,CAAC;AAED,OAAO,CAAC,EAAE,CAAC,SAAS,EAAE,CAAC,GAAY,EAAE,EAAE;IACrC,MAAM,CAAC,GAAG,GAA8B,CAAC;IACzC,MAAM,IAAI,GAAG,CAAC,EAAE,CAAC,MAAM,CAAuB,CAAC;IAE/C,KAAK,CAAC,KAAK,IAAI,EAAE;QACf,IAAI,CAAC;YACH,QAAQ,IAAI,EAAE,CAAC;gBACb,KAAK,YAAY,CAAC,CAAC,CAAC;oBAClB,MAAM,SAAS,GAAG,CAAC,CAAC,WAAW,CAAW,CAAC;oBAC3C,MAAM,SAAS,GAAG,CAAC,CAAC,WAAW,CAAW,CAAC;oBAC3C,IAAI,CAAC;wBACH,KAAK,GAAG,MAAM,QAAQ,EAAE,CAAC;wBACzB,yGAAyG;wBACzG,KAAK,GAAG,MAAM,KAAK,CAAC,SAAS,CAAC,EAAE,SAAS,EAAE,SAAS,EAAE,CAAC,CAAC;oBAC1D,CAAC;oBAAC,OAAO,GAAG,EAAE,CAAC;wBACb,MAAM,MAAM,GAAG,GAA0C,CAAC;wBAC1D,IAAI,MAAM,EAAE,WAAW,EAAE,IAAI,KAAK,yBAAyB,EAAE,CAAC;4BAC5D,IAAI,CAAC;gCACH,yGAAyG;gCACzG,KAAK,GAAG,MAAM,KAAK,CAAC,SAAS,CAAC;oCAC5B,SAAS;oCACT,SAAS;oCACT,wBAAwB,EAAE,IAAI;iCAC/B,CAAC,CAAC;4BACL,CAAC;4BAAC,OAAO,IAAI,EAAE,CAAC;gCACd,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAG,IAAc,CAAC,OAAO,EAAE,CAAC,CAAC;gCAC1D,MAAM;4BACR,CAAC;wBACH,CAAC;6BAAM,CAAC;4BACN,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAG,GAAa,CAAC,OAAO,EAAE,CAAC,CAAC;4BACzD,MAAM;wBACR,CAAC;oBACH,CAAC;oBACD,sEAAsE;oBACtE,MAAM,gBAAgB,GAAI,KAAK,EAAE,gBAAuC,IAAI,IAAI,CAAC;oBACjF,IAAI,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,gBAAgB,EAAE,CAAC,CAAC;oBAC3C,MAAM;gBACR,CAAC;gBAED,KAAK,gBAAgB,CAAC,CAAC,CAAC;oBACtB,MAAM,cAAc,GAAG,CAAC,CAAC,gBAAgB,CAAW,CAAC;oBACrD,IAAI,CAAC;wBACH,sEAAsE;wBACtE,MAAM,SAAS,GAAI,KAAK,EAAE,gBAAuC,IAAI,IAAI,CAAC;wBAC1E,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;wBAC9C,8DAA8D;wBAC9D,IAAI,OAAY,CAAC;wBACjB,IAAI,CAAC;4BACH,yGAAyG;4BACzG,OAAO,GAAG,MAAM,KAAK,CAAC,aAAa,CAAC,EAAE,WAAW,EAAE,CAAC,CAAC;wBACvD,CAAC;wBAAC,OAAO,GAAG,EAAE,CAAC;4BACb,MAAM,MAAM,GAAG,GAA0C,CAAC;4BAC1D,IAAI,MAAM,EAAE,WAAW,EAAE,IAAI,KAAK,yBAAyB,EAAE,CAAC;gCAC5D,yGAAyG;gCACzG,OAAO,GAAG,MAAM,KAAK,CAAC,aAAa,CAAC,EAAE,WAAW,EAAE,wBAAwB,EAAE,IAAI,EAAE,CAAC,CAAC;4BACvF,CAAC;iCAAM,CAAC;gCACN,MAAM,GAAG,CAAC;4BACZ,CAAC;wBACH,CAAC;wBACD,yGAAyG;wBACzG,MAAM,OAAO,GAAG,IAAI,gBAAgB,CAAC,EAAE,eAAe,EAAE,OAAO,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC;wBACjF,QAAQ,CAAC,GAAG,CAAC,cAAc,EAAE,EAAE,OAAO,EAAE,OAAO,EAAE,eAAe,EAAE,IAAI,EAAE,CAAC,CAAC;wBAC1E,IAAI,CAAC,EAAE,IAAI,EAAE,iBAAiB,EAAE,cAAc,EAAE,CAAC,CAAC;oBACpD,CAAC;oBAAC,OAAO,GAAG,EAAE,CAAC;wBACb,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,cAAc,EAAE,OAAO,EAAG,GAAa,CAAC,OAAO,EAAE,CAAC,CAAC;oBAC3E,CAAC;oBACD,MAAM;gBACR,CAAC;gBAED,KAAK,eAAe,CAAC,CAAC,CAAC;oBACrB,MAAM,cAAc,GAAG,CAAC,CAAC,gBAAgB,CAAW,CAAC;oBACrD,MAAM,KAAK,GAAG,QAAQ,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC;oBAC3C,IAAI,KAAK,EAAE,CAAC;wBACV,IAAI,KAAK,CAAC,eAAe,EAAE,CAAC;4BAC1B,IAAI,CAAC;gCACH,KAAK,CAAC,eAAe,CAAC,KAAK,EAAE,CAAC;4BAChC,CAAC;4BAAC,MAAM,CAAC;gCACP,UAAU;4BACZ,CAAC;wBACH,CAAC;wBACD,QAAQ,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC;wBAChC,IAAI,CAAC;4BACH,yGAAyG;4BACzG,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,EAAE,CAAC;wBAC7B,CAAC;wBAAC,MAAM,CAAC;4BACP,UAAU;wBACZ,CAAC;oBACH,CAAC;oBACD,MAAM;gBACR,CAAC;gBAED,KAAK,QAAQ,CAAC,CAAC,CAAC;oBACd,MAAM,SAAS,GAAG,CAAC,CAAC,WAAW,CAAW,CAAC;oBAC3C,MAAM,cAAc,GAAG,CAAC,CAAC,gBAAgB,CAAW,CAAC;oBACrD,MAAM,KAAK,GAAG,CAAC,CAAC,OAAO,CAAW,CAAC;oBACnC,MAAM,OAAO,GAAG,CAAC,CAAC,SAAS,CAEd,CAAC;oBAEd,MAAM,KAAK,GAAG,QAAQ,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC;oBAC3C,IAAI,CAAC,KAAK,EAAE,CAAC;wBACX,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,SAAS,EAAE,OAAO,EAAE,mBAAmB,cAAc,GAAG,EAAE,CAAC,CAAC;wBAClF,MAAM;oBACR,CAAC;oBAED,KAAK,CAAC,eAAe,GAAG,IAAI,eAAe,EAAE,CAAC;oBAC9C,yGAAyG;oBACzG,MAAM,YAAY,GAAI,KAAK,CAAC,QAAQ,CAAC,KAAK,CAAe,CAAC,MAAM,CAAC;oBACjE,IAAI,QAAQ,GAAG,EAAE,CAAC;oBAElB,IAAI,CAAC;wBACH,MAAM,UAAU,GAA4B;4BAC1C,WAAW,EAAE,CAAC,KAAa,EAAE,EAAE;gCAC7B,QAAQ,IAAI,KAAK,CAAC;gCAClB,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC,CAAC;4BAC5C,CAAC;4BACD,MAAM,EAAE,KAAK,CAAC,eAAe,CAAC,MAAM;yBACrC,CAAC;wBACF,IAAI,OAAO,EAAE,SAAS,KAAK,SAAS;4BAAE,UAAU,CAAC,WAAW,CAAC,GAAG,OAAO,CAAC,SAAS,CAAC;wBAClF,IAAI,OAAO,EAAE,WAAW,KAAK,SAAS;4BAAE,UAAU,CAAC,aAAa,CAAC,GAAG,OAAO,CAAC,WAAW,CAAC;wBACxF,IAAI,OAAO,EAAE,IAAI,EAAE,MAAM;4BAAE,UAAU,CAAC,oBAAoB,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;wBAE3E,MAAM,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE,UAAU,CAAC,CAAC;wBAE9C,QAAQ,GAAG,QAAQ,CAAC,IAAI,EAAE,CAAC;wBAC3B,yGAAyG;wBACzG,MAAM,gBAAgB,GAAI,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAe,CAAC,MAAM,CAAC;wBACxE,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,IAAI,EAAE,QAAQ,EAAE,YAAY,EAAE,gBAAgB,EAAE,CAAC,CAAC;oBACpF,CAAC;oBAAC,OAAO,GAAG,EAAE,CAAC;wBACb,MAAM,CAAC,GAAG,GAAwB,CAAC;wBACnC,IAAI,CAAC,EAAE,IAAI,KAAK,YAAY,EAAE,CAAC;4BAC7B,yGAAyG;4BACzG,MAAM,gBAAgB,GAAG,KAAK,CAAC,CAAC,CAAE,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAe,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;4BACpF,IAAI,CAAC,EAAE,IAAI,EAAE,WAAW,EAAE,SAAS,EAAE,IAAI,EAAE,QAAQ,EAAE,YAAY,EAAE,gBAAgB,EAAE,CAAC,CAAC;wBACzF,CAAC;6BAAM,CAAC;4BACN,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,SAAS,EAAE,OAAO,EAAG,GAAa,CAAC,OAAO,EAAE,CAAC,CAAC;wBACtE,CAAC;oBACH,CAAC;4BAAS,CAAC;wBACT,KAAK,CAAC,eAAe,GAAG,IAAI,CAAC;oBAC/B,CAAC;oBACD,MAAM;gBACR,CAAC;gBAED,KAAK,QAAQ,CAAC,CAAC,CAAC;oBACd,MAAM,KAAK,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,gBAAgB,CAAW,CAAC,CAAC;oBAC1D,IAAI,KAAK,EAAE,eAAe,EAAE,CAAC;wBAC3B,IAAI,CAAC;4BACH,KAAK,CAAC,eAAe,CAAC,KAAK,EAAE,CAAC;wBAChC,CAAC;wBAAC,MAAM,CAAC;4BACP,UAAU;wBACZ,CAAC;oBACH,CAAC;oBACD,MAAM;gBACR,CAAC;gBAED,KAAK,YAAY,CAAC,CAAC,CAAC;oBAClB,SAAS,EAAE,CAAC;oBACZ,MAAM;gBACR,CAAC;gBAED,KAAK,SAAS,CAAC,CAAC,CAAC;oBACf,SAAS,EAAE,CAAC;oBACZ,MAAM,IAAI,OAAO,CAAO,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC;oBACnD,KAAK,MAAM,KAAK,IAAI,QAAQ,CAAC,MAAM,EAAE,EAAE,CAAC;wBACtC,IAAI,CAAC;4BACH,yGAAyG;4BACzG,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,EAAE,CAAC;wBAC7B,CAAC;wBAAC,MAAM,CAAC;4BACP,UAAU;wBACZ,CAAC;oBACH,CAAC;oBACD,QAAQ,CAAC,KAAK,EAAE,CAAC;oBACjB,IAAI,CAAC;wBACH,yGAAyG;wBACzG,KAAK,EAAE,OAAO,EAAE,EAAE,CAAC;oBACrB,CAAC;oBAAC,MAAM,CAAC;wBACP,UAAU;oBACZ,CAAC;oBACD,IAAI,CAAC;wBACH,yGAAyG;wBACzG,KAAK,EAAE,OAAO,EAAE,EAAE,CAAC;oBACrB,CAAC;oBAAC,MAAM,CAAC;wBACP,UAAU;oBACZ,CAAC;oBACD,KAAK,GAAG,IAAI,CAAC;oBACb,KAAK,GAAG,IAAI,CAAC;oBACb,IAAI,CAAC,EAAE,IAAI,EAAE,UAAU,EAAE,CAAC,CAAC;oBAC3B,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;oBAChB,MAAM;gBACR,CAAC;gBAED;oBACE,MAAM,CAAC,MAAM,EAAE,oCAAoC,IAAI,IAAI,QAAQ,GAAG,CAAC,CAAC;YAC5E,CAAC;QACH,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,CAAC,OAAO,EAAE,uBAAuB,IAAI,MAAO,GAAa,CAAC,OAAO,EAAE,CAAC,CAAC;QAC7E,CAAC;IACH,CAAC,CAAC,EAAE,CAAC;AACP,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"orchestrator.d.ts","sourceRoot":"","sources":["../../src/core/orchestrator.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"orchestrator.d.ts","sourceRoot":"","sources":["../../src/core/orchestrator.ts"],"names":[],"mappings":"AAUA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AACjD,OAAO,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAG7C,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAEhD,OAAO,EAAoB,KAAK,cAAc,EAAE,MAAM,iBAAiB,CAAC;AA2BxE,MAAM,MAAM,kBAAkB,GAAG,cAAc,GAAG,YAAY,GAAG,WAAW,GAAG,OAAO,GAAG,WAAW,CAAC;AAErG,MAAM,WAAW,mBAAmB;IAClC,KAAK,EAAE,aAAa,CAAC;IACrB,MAAM,EAAE,aAAa,CAAC;IACtB,MAAM,EAAE,cAAc,CAAC;IACvB,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,qBAAa,mBAAmB;IAkBlB,OAAO,CAAC,QAAQ,CAAC,IAAI;IAjBjC,OAAO,CAAC,UAAU,CAA8B;IAChD,OAAO,CAAC,iBAAiB,CAA+B;IACxD,OAAO,CAAC,eAAe,CAA+B;IACtD,OAAO,CAAC,cAAc,CAA+B;IACrD,OAAO,CAAC,qBAAqB,CAAS;IACtC,OAAO,CAAC,YAAY,CAAS;IAC7B,OAAO,CAAC,MAAM,CAAsC;IAEpD,OAAO,CAAC,iBAAiB,CAAS;IAElC,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAS;IAC7B,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAmB;IAChD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAgE;IACzF,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAA0B;IACnD,OAAO,CAAC,QAAQ,CAAC,SAAS,CAA0B;IACpD,OAAO,CAAC,cAAc,CAAgB;gBAET,IAAI,EAAE,mBAAmB;IAgBtD,SAAS,IAAI,kBAAkB;IAI/B,cAAc,IAAI;QAAE,cAAc,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,MAAM,CAAA;KAAE;IAIvG,aAAa,IAAI;QAAE,EAAE,EAAE,cAAc,CAAC;QAAC,GAAG,EAAE,cAAc,CAAA;KAAE;IAI5D,iBAAiB,IAAI,MAAM,EAAE;IAIvB,cAAc,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAgB/C,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IAsHxB,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;YAmBnB,gBAAgB;IA4B9B,OAAO,CAAC,iBAAiB;IAanB,mBAAmB,IAAI,OAAO,CAAC,IAAI,CAAC;YA2B5B,yBAAyB;YAgDzB,YAAY;IA0E1B,OAAO,CAAC,mBAAmB;IAM3B,OAAO,CAAC,gBAAgB;IAYxB,OAAO,CAAC,UAAU;YAeJ,IAAI;IASlB,OAAO,CAAC,SAAS;CAKlB"}
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { arch as osArch, platform as osPlatform } from "node:os";
|
|
2
2
|
import { HttpTransportType, HubConnectionBuilder, LogLevel, } from "@microsoft/signalr";
|
|
3
|
+
import { buildHostCapacity } from "./capacity.js";
|
|
3
4
|
import { getLogger } from "./logger.js";
|
|
4
5
|
import { listModelsSync } from "./models-fs.js";
|
|
5
6
|
import { SharedGGUFRunner } from "./shared-runner.js";
|
|
@@ -32,6 +33,12 @@ export class OrchestratorService {
|
|
|
32
33
|
paths: opts.paths,
|
|
33
34
|
config: { schemaVersion: 1, lastWriter: "cli", schemaUpdatedAt: "" },
|
|
34
35
|
});
|
|
36
|
+
this.sharedRunner.setWorkerCrashCallback((crash) => {
|
|
37
|
+
getLogger().error(`💥 llmWorker crash: model="${crash.model}" — clearing all active sessions`);
|
|
38
|
+
// All conversations are lost when the worker process dies — clear the session map
|
|
39
|
+
// so subsequent prompts trigger a fresh model load rather than using stale state.
|
|
40
|
+
this.sessions.clear();
|
|
41
|
+
});
|
|
35
42
|
}
|
|
36
43
|
getStatus() {
|
|
37
44
|
return this.status;
|
|
@@ -257,6 +264,8 @@ export class OrchestratorService {
|
|
|
257
264
|
const cfg = await this.opts.config.load();
|
|
258
265
|
if (!cfg.hostId || !cfg.hostName)
|
|
259
266
|
return;
|
|
267
|
+
const hostId = cfg.hostId;
|
|
268
|
+
const gpuIndex = cfg.selectedGpuIndex ?? 0;
|
|
260
269
|
const capabilities = listModelsSync(this.opts.paths, cfg);
|
|
261
270
|
try {
|
|
262
271
|
await this.connection.invoke("RegisterHost", {
|
|
@@ -278,12 +287,25 @@ export class OrchestratorService {
|
|
|
278
287
|
this.heartbeatInterval = setInterval(async () => {
|
|
279
288
|
if (this.connection?.state !== "Connected")
|
|
280
289
|
return;
|
|
290
|
+
// v1 heartbeat — liveness only. Still required: the server uses it for
|
|
291
|
+
// uptime tracking and DB heartbeat persistence (HeartbeatV2 does neither).
|
|
281
292
|
try {
|
|
282
|
-
await this.connection.send("Heartbeat",
|
|
293
|
+
await this.connection.send("Heartbeat", hostId);
|
|
283
294
|
}
|
|
284
295
|
catch (err) {
|
|
285
296
|
getLogger().warn("orchestrator: heartbeat error:", err.message);
|
|
286
297
|
}
|
|
298
|
+
// v2 heartbeat — live capacity (GPU/CPU/VRAM/RAM/sessions). Lets the
|
|
299
|
+
// orchestrator route around hosts that are out of VRAM or overloaded
|
|
300
|
+
// instead of assigning prompts until the host OOMs and crashes. Kept
|
|
301
|
+
// separate from v1 so a metrics-sampling hiccup never drops liveness.
|
|
302
|
+
try {
|
|
303
|
+
const capacity = await buildHostCapacity(hostId, gpuIndex, this.sessions.size);
|
|
304
|
+
await this.connection.send("HeartbeatV2", capacity);
|
|
305
|
+
}
|
|
306
|
+
catch (err) {
|
|
307
|
+
getLogger().warn("orchestrator: heartbeatV2 error:", err.message);
|
|
308
|
+
}
|
|
287
309
|
}, HEARTBEAT_INTERVAL_MS);
|
|
288
310
|
}
|
|
289
311
|
async handlePrompt(req) {
|