peerllm-host-cli 1.9.1 → 1.9.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/commands/dashboard.d.ts +2 -1
- package/dist/cli/commands/dashboard.d.ts.map +1 -1
- package/dist/cli/commands/dashboard.js +76 -3
- package/dist/cli/commands/dashboard.js.map +1 -1
- package/dist/cli/commands/models.d.ts.map +1 -1
- package/dist/cli/commands/models.js +81 -11
- package/dist/cli/commands/models.js.map +1 -1
- package/dist/cli/commands/start.d.ts.map +1 -1
- package/dist/cli/commands/start.js +2 -1
- package/dist/cli/commands/start.js.map +1 -1
- package/dist/core/llmWorker.d.ts +2 -0
- package/dist/core/llmWorker.d.ts.map +1 -0
- package/dist/core/llmWorker.js +239 -0
- package/dist/core/llmWorker.js.map +1 -0
- package/dist/core/orchestrator.d.ts.map +1 -1
- package/dist/core/orchestrator.js +6 -0
- package/dist/core/orchestrator.js.map +1 -1
- package/dist/core/shared-runner.d.ts +20 -6
- package/dist/core/shared-runner.d.ts.map +1 -1
- package/dist/core/shared-runner.js +339 -154
- package/dist/core/shared-runner.js.map +1 -1
- package/package.json +1 -1
|
@@ -1,14 +1,24 @@
|
|
|
1
|
+
import * as cp from "node:child_process";
|
|
1
2
|
import { existsSync, promises as fsp } from "node:fs";
|
|
2
3
|
import { arch as osArch, platform as osPlatform } from "node:os";
|
|
3
4
|
import { join } from "node:path";
|
|
4
|
-
import {
|
|
5
|
+
import { fileURLToPath } from "node:url";
|
|
5
6
|
import si from "systeminformation";
|
|
6
7
|
import { getLogger } from "./logger.js";
|
|
7
8
|
import { calculateGpuLayersFromConfig, parseModelFilename, } from "./model-info.js";
|
|
8
9
|
import { getLLMDirectory } from "./models-fs.js";
|
|
10
|
+
// ---------------------------------------------------------------------------
|
|
11
|
+
// Module-level constants
|
|
12
|
+
// ---------------------------------------------------------------------------
|
|
9
13
|
const DEFAULT_IDLE_TIMEOUT_MS = 10 * 60 * 1000;
|
|
10
14
|
const WATCHDOG_INTERVAL_MS = 60 * 1000;
|
|
11
15
|
const STALE_CONTEXT_LIMIT_MS = 5 * 60 * 1000;
|
|
16
|
+
const WORKER_READY_TIMEOUT_MS = 10_000;
|
|
17
|
+
const DISPOSE_SAFETY_TIMEOUT_MS = 5_000;
|
|
18
|
+
const workerScript = fileURLToPath(new URL("./llmWorker.js", import.meta.url));
|
|
19
|
+
// ---------------------------------------------------------------------------
|
|
20
|
+
// Graphics cache (used for GPU layer calculation in parent process)
|
|
21
|
+
// ---------------------------------------------------------------------------
|
|
12
22
|
let _cachedGraphics;
|
|
13
23
|
let _graphicsInFlight;
|
|
14
24
|
const GRAPHICS_CACHE_TTL_MS = 5 * 60 * 1000;
|
|
@@ -26,15 +36,28 @@ async function getCachedGraphics() {
|
|
|
26
36
|
}
|
|
27
37
|
return _graphicsInFlight;
|
|
28
38
|
}
|
|
39
|
+
// ---------------------------------------------------------------------------
|
|
40
|
+
// SharedGGUFRunner
|
|
41
|
+
// ---------------------------------------------------------------------------
|
|
29
42
|
export class SharedGGUFRunner {
|
|
30
43
|
env;
|
|
31
44
|
idleTimeoutMs;
|
|
32
|
-
|
|
33
|
-
|
|
45
|
+
// Worker process
|
|
46
|
+
worker = null;
|
|
47
|
+
_workerEra = 0;
|
|
48
|
+
_gracefulDispose = false;
|
|
49
|
+
_workerCrashCallback = null;
|
|
50
|
+
// Lightweight context trackers — actual objects live in the worker
|
|
51
|
+
contextTrackers = new Map();
|
|
52
|
+
maxConcurrentContexts = 10;
|
|
53
|
+
// IPC promise bridges
|
|
54
|
+
_loadCallback = null;
|
|
55
|
+
_contextCallbacks = new Map();
|
|
56
|
+
_promptCallbacks = new Map();
|
|
57
|
+
_disposeCallback = null;
|
|
58
|
+
// Runner state
|
|
34
59
|
modelName = "";
|
|
35
60
|
gpuLayers = 0;
|
|
36
|
-
contexts = new Map();
|
|
37
|
-
maxConcurrentContexts = 10;
|
|
38
61
|
runnerState = "cold";
|
|
39
62
|
modelState = null;
|
|
40
63
|
activeRequestCount = 0;
|
|
@@ -50,6 +73,9 @@ export class SharedGGUFRunner {
|
|
|
50
73
|
setIdleDisposeCallback(cb) {
|
|
51
74
|
this.onIdleDispose = cb;
|
|
52
75
|
}
|
|
76
|
+
setWorkerCrashCallback(cb) {
|
|
77
|
+
this._workerCrashCallback = cb;
|
|
78
|
+
}
|
|
53
79
|
getModelState() {
|
|
54
80
|
return this.modelState;
|
|
55
81
|
}
|
|
@@ -61,7 +87,7 @@ export class SharedGGUFRunner {
|
|
|
61
87
|
}
|
|
62
88
|
getStats() {
|
|
63
89
|
return {
|
|
64
|
-
activeContexts: this.
|
|
90
|
+
activeContexts: this.contextTrackers.size,
|
|
65
91
|
maxContexts: this.maxConcurrentContexts,
|
|
66
92
|
modelName: this.modelName,
|
|
67
93
|
gpuLayers: this.gpuLayers,
|
|
@@ -71,7 +97,7 @@ export class SharedGGUFRunner {
|
|
|
71
97
|
if (this.runnerState === "disposing") {
|
|
72
98
|
throw new Error(`Cannot load "${modelName}" while disposal is in progress`);
|
|
73
99
|
}
|
|
74
|
-
if (this.
|
|
100
|
+
if (this.runnerState === "loaded" && this.modelName === modelName) {
|
|
75
101
|
if (this.modelState && loadType === "manual" && this.modelState.loadType === "auto") {
|
|
76
102
|
this.modelState.loadType = "manual";
|
|
77
103
|
this.clearIdleTimer();
|
|
@@ -97,7 +123,6 @@ export class SharedGGUFRunner {
|
|
|
97
123
|
model: modelName,
|
|
98
124
|
});
|
|
99
125
|
try {
|
|
100
|
-
this.llama = await getLlama();
|
|
101
126
|
const llmDir = getLLMDirectory(this.env.paths, this.env.config);
|
|
102
127
|
const modelPath = join(llmDir, `${modelName}.gguf`);
|
|
103
128
|
const graphics = await getCachedGraphics();
|
|
@@ -130,7 +155,7 @@ export class SharedGGUFRunner {
|
|
|
130
155
|
this._cleanupPartialLoad();
|
|
131
156
|
return;
|
|
132
157
|
}
|
|
133
|
-
|
|
158
|
+
await this._spawnAndLoad(modelPath, this.gpuLayers);
|
|
134
159
|
if (this.generation !== gen) {
|
|
135
160
|
this._cleanupPartialLoad();
|
|
136
161
|
return;
|
|
@@ -157,79 +182,288 @@ export class SharedGGUFRunner {
|
|
|
157
182
|
throw new Error(`Failed to load '${modelName}': ${message}`);
|
|
158
183
|
}
|
|
159
184
|
}
|
|
185
|
+
// ---------------------------------------------------------------------------
|
|
186
|
+
// Worker lifecycle
|
|
187
|
+
// ---------------------------------------------------------------------------
|
|
188
|
+
async _spawnAndLoad(modelPath, gpuLayers) {
|
|
189
|
+
const era = ++this._workerEra;
|
|
190
|
+
const worker = cp.fork(workerScript, [], {
|
|
191
|
+
stdio: ["ignore", "ignore", "inherit"],
|
|
192
|
+
serialization: "json",
|
|
193
|
+
});
|
|
194
|
+
this.worker = worker;
|
|
195
|
+
// Self-invalidating exit handler: ignores stale exits after era increments
|
|
196
|
+
worker.on("exit", (code, signal) => {
|
|
197
|
+
if (this._workerEra !== era)
|
|
198
|
+
return;
|
|
199
|
+
this._handleWorkerExit(code, signal);
|
|
200
|
+
});
|
|
201
|
+
// Await the initial 'ready' handshake, then switch to permanent handler
|
|
202
|
+
await new Promise((resolve, reject) => {
|
|
203
|
+
const t = setTimeout(() => {
|
|
204
|
+
reject(new Error("llmWorker: ready timeout"));
|
|
205
|
+
}, WORKER_READY_TIMEOUT_MS);
|
|
206
|
+
const onMsg = (raw) => {
|
|
207
|
+
const msg = raw;
|
|
208
|
+
if (msg?.type === "ready") {
|
|
209
|
+
clearTimeout(t);
|
|
210
|
+
worker.off("message", onMsg);
|
|
211
|
+
worker.on("message", (m) => this._handleWorkerMessage(m));
|
|
212
|
+
resolve();
|
|
213
|
+
}
|
|
214
|
+
};
|
|
215
|
+
worker.on("message", onMsg);
|
|
216
|
+
worker.once("error", (err) => {
|
|
217
|
+
clearTimeout(t);
|
|
218
|
+
reject(err);
|
|
219
|
+
});
|
|
220
|
+
});
|
|
221
|
+
// Send load-model and await confirmation
|
|
222
|
+
await new Promise((resolve, reject) => {
|
|
223
|
+
this._loadCallback = { resolve, reject, gen: this.generation };
|
|
224
|
+
this._workerSend({ type: "load-model", modelPath, gpuLayers });
|
|
225
|
+
});
|
|
226
|
+
this._loadCallback = null;
|
|
227
|
+
}
|
|
228
|
+
_handleWorkerMessage(msg) {
|
|
229
|
+
switch (msg.type) {
|
|
230
|
+
case "loaded": {
|
|
231
|
+
const cb = this._loadCallback;
|
|
232
|
+
this._loadCallback = null;
|
|
233
|
+
cb?.resolve();
|
|
234
|
+
break;
|
|
235
|
+
}
|
|
236
|
+
case "context-created": {
|
|
237
|
+
this.contextTrackers.set(msg.conversationId, {
|
|
238
|
+
lastUsedAt: Date.now(),
|
|
239
|
+
inFlight: false,
|
|
240
|
+
});
|
|
241
|
+
const cb = this._contextCallbacks.get(msg.conversationId);
|
|
242
|
+
if (cb) {
|
|
243
|
+
this._contextCallbacks.delete(msg.conversationId);
|
|
244
|
+
cb.resolve();
|
|
245
|
+
}
|
|
246
|
+
break;
|
|
247
|
+
}
|
|
248
|
+
case "token": {
|
|
249
|
+
const cb = this._promptCallbacks.get(msg.requestId);
|
|
250
|
+
if (cb && this.generation === cb.gen) {
|
|
251
|
+
cb.onToken?.(msg.chunk);
|
|
252
|
+
}
|
|
253
|
+
break;
|
|
254
|
+
}
|
|
255
|
+
case "done": {
|
|
256
|
+
const cb = this._promptCallbacks.get(msg.requestId);
|
|
257
|
+
if (cb) {
|
|
258
|
+
this._promptCallbacks.delete(msg.requestId);
|
|
259
|
+
cb.resolve({ text: msg.text, promptTokens: msg.promptTokens, completionTokens: msg.completionTokens });
|
|
260
|
+
}
|
|
261
|
+
break;
|
|
262
|
+
}
|
|
263
|
+
case "cancelled": {
|
|
264
|
+
const cb = this._promptCallbacks.get(msg.requestId);
|
|
265
|
+
if (cb) {
|
|
266
|
+
this._promptCallbacks.delete(msg.requestId);
|
|
267
|
+
cb.resolve({ text: msg.text, promptTokens: msg.promptTokens, completionTokens: msg.completionTokens });
|
|
268
|
+
}
|
|
269
|
+
break;
|
|
270
|
+
}
|
|
271
|
+
case "error": {
|
|
272
|
+
if (msg.requestId) {
|
|
273
|
+
const cb = this._promptCallbacks.get(msg.requestId);
|
|
274
|
+
if (cb) {
|
|
275
|
+
this._promptCallbacks.delete(msg.requestId);
|
|
276
|
+
cb.reject(new Error(msg.message));
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
else if (msg.conversationId) {
|
|
280
|
+
const cb = this._contextCallbacks.get(msg.conversationId);
|
|
281
|
+
if (cb) {
|
|
282
|
+
this._contextCallbacks.delete(msg.conversationId);
|
|
283
|
+
cb.reject(new Error(msg.message));
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
else {
|
|
287
|
+
// Load error
|
|
288
|
+
const cb = this._loadCallback;
|
|
289
|
+
this._loadCallback = null;
|
|
290
|
+
cb?.reject(new Error(msg.message));
|
|
291
|
+
}
|
|
292
|
+
break;
|
|
293
|
+
}
|
|
294
|
+
case "disposed": {
|
|
295
|
+
const cb = this._disposeCallback;
|
|
296
|
+
this._disposeCallback = null;
|
|
297
|
+
cb?.resolve();
|
|
298
|
+
this._finalizeDispose();
|
|
299
|
+
break;
|
|
300
|
+
}
|
|
301
|
+
case "log": {
|
|
302
|
+
const level = msg.level;
|
|
303
|
+
getLogger()[level]?.(`[llmWorker] ${msg.message}`);
|
|
304
|
+
break;
|
|
305
|
+
}
|
|
306
|
+
default:
|
|
307
|
+
break;
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
_handleWorkerExit(code, signal) {
|
|
311
|
+
if (this._gracefulDispose) {
|
|
312
|
+
// Expected shutdown — resolve dispose callback if not already done by 'disposed' message
|
|
313
|
+
const cb = this._disposeCallback;
|
|
314
|
+
this._disposeCallback = null;
|
|
315
|
+
cb?.resolve();
|
|
316
|
+
return;
|
|
317
|
+
}
|
|
318
|
+
// Unexpected crash
|
|
319
|
+
const crashModel = this.modelName;
|
|
320
|
+
const timestamp = Date.now();
|
|
321
|
+
getLogger().error(`llmWorker crashed: model="${crashModel}" code=${code ?? "null"} signal=${signal ?? "null"}`);
|
|
322
|
+
// Reject all pending load/context/prompt callbacks
|
|
323
|
+
const loadCb = this._loadCallback;
|
|
324
|
+
this._loadCallback = null;
|
|
325
|
+
loadCb?.reject(new Error(`llmWorker crashed during load (code=${String(code)})`));
|
|
326
|
+
for (const [, cb] of this._contextCallbacks.entries()) {
|
|
327
|
+
cb.reject(new Error(`llmWorker crashed (code=${String(code)})`));
|
|
328
|
+
}
|
|
329
|
+
this._contextCallbacks.clear();
|
|
330
|
+
for (const [, cb] of this._promptCallbacks.entries()) {
|
|
331
|
+
cb.reject(new Error(`llmWorker crashed (code=${String(code)})`));
|
|
332
|
+
}
|
|
333
|
+
this._promptCallbacks.clear();
|
|
334
|
+
const crashCb = this._workerCrashCallback;
|
|
335
|
+
this._finalizeDispose();
|
|
336
|
+
crashCb?.({ model: crashModel, timestamp });
|
|
337
|
+
this.onIdleDispose?.(crashModel);
|
|
338
|
+
}
|
|
339
|
+
_finalizeDispose() {
|
|
340
|
+
this.worker = null;
|
|
341
|
+
this.modelName = "";
|
|
342
|
+
this.modelState = null;
|
|
343
|
+
this.activeRequestCount = 0;
|
|
344
|
+
this.contextTrackers.clear();
|
|
345
|
+
if (this._loadCallback) {
|
|
346
|
+
this._loadCallback.reject(new Error("runner disposed"));
|
|
347
|
+
this._loadCallback = null;
|
|
348
|
+
}
|
|
349
|
+
for (const [, cb] of this._contextCallbacks.entries()) {
|
|
350
|
+
cb.reject(new Error("runner disposed"));
|
|
351
|
+
}
|
|
352
|
+
this._contextCallbacks.clear();
|
|
353
|
+
for (const [, cb] of this._promptCallbacks.entries()) {
|
|
354
|
+
cb.reject(new Error("runner disposed"));
|
|
355
|
+
}
|
|
356
|
+
this._promptCallbacks.clear();
|
|
357
|
+
if (this._disposeCallback) {
|
|
358
|
+
this._disposeCallback.resolve();
|
|
359
|
+
this._disposeCallback = null;
|
|
360
|
+
}
|
|
361
|
+
this.clearIdleTimer();
|
|
362
|
+
this.stopWatchdog();
|
|
363
|
+
this.runnerState = "cold";
|
|
364
|
+
}
|
|
365
|
+
_workerSend(msg) {
|
|
366
|
+
if (!this.worker)
|
|
367
|
+
return;
|
|
368
|
+
try {
|
|
369
|
+
this.worker.send(msg);
|
|
370
|
+
}
|
|
371
|
+
catch {
|
|
372
|
+
// Worker may have already exited
|
|
373
|
+
}
|
|
374
|
+
}
|
|
160
375
|
_cleanupPartialLoad() {
|
|
376
|
+
if (this.runnerState === "cold")
|
|
377
|
+
return; // already finalized by exit handler
|
|
161
378
|
this.clearIdleTimer();
|
|
162
379
|
this.stopWatchdog();
|
|
163
|
-
this.
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
380
|
+
++this._workerEra; // invalidate exit handler for this era
|
|
381
|
+
if (this.worker) {
|
|
382
|
+
try {
|
|
383
|
+
this.worker.kill();
|
|
384
|
+
}
|
|
385
|
+
catch {
|
|
386
|
+
// ignored
|
|
387
|
+
}
|
|
388
|
+
this.worker = null;
|
|
389
|
+
}
|
|
390
|
+
this.contextTrackers.clear();
|
|
171
391
|
this.modelName = "";
|
|
172
392
|
this.modelState = null;
|
|
173
393
|
this.activeRequestCount = 0;
|
|
174
394
|
this.runnerState = "cold";
|
|
175
395
|
}
|
|
396
|
+
// ---------------------------------------------------------------------------
|
|
397
|
+
// Context management
|
|
398
|
+
// ---------------------------------------------------------------------------
|
|
399
|
+
async _ensureContext(conversationId, gen) {
|
|
400
|
+
const existing = this.contextTrackers.get(conversationId);
|
|
401
|
+
if (existing) {
|
|
402
|
+
existing.lastUsedAt = Date.now();
|
|
403
|
+
return;
|
|
404
|
+
}
|
|
405
|
+
// LRU eviction if at capacity
|
|
406
|
+
if (this.contextTrackers.size >= this.maxConcurrentContexts) {
|
|
407
|
+
const evictable = Array.from(this.contextTrackers.entries())
|
|
408
|
+
.filter(([, t]) => !t.inFlight)
|
|
409
|
+
.sort((a, b) => a[1].lastUsedAt - b[1].lastUsedAt);
|
|
410
|
+
if (evictable.length > 0 && evictable[0]) {
|
|
411
|
+
const [evictId] = evictable[0];
|
|
412
|
+
this.contextTrackers.delete(evictId);
|
|
413
|
+
this._workerSend({ type: "clear-context", conversationId: evictId });
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
await new Promise((resolve, reject) => {
|
|
417
|
+
this._contextCallbacks.set(conversationId, { resolve, reject, gen });
|
|
418
|
+
this._workerSend({ type: "create-context", conversationId });
|
|
419
|
+
});
|
|
420
|
+
// Tracker entry is set by _handleWorkerMessage when 'context-created' arrives
|
|
421
|
+
}
|
|
422
|
+
// ---------------------------------------------------------------------------
|
|
423
|
+
// Public inference API
|
|
424
|
+
// ---------------------------------------------------------------------------
|
|
176
425
|
async prompt(conversationId, input, onToken, options) {
|
|
177
|
-
if (
|
|
426
|
+
if (this.runnerState !== "loaded")
|
|
178
427
|
throw new Error("Model not loaded.");
|
|
179
428
|
const gen = this.generation;
|
|
180
429
|
this.activeRequestCount++;
|
|
181
430
|
this.clearIdleTimer();
|
|
182
431
|
if (this.modelState)
|
|
183
432
|
this.modelState.lastUsedAt = Date.now();
|
|
184
|
-
|
|
185
|
-
const
|
|
186
|
-
|
|
187
|
-
if (!entry) {
|
|
433
|
+
await this._ensureContext(conversationId, gen);
|
|
434
|
+
const tracker = this.contextTrackers.get(conversationId);
|
|
435
|
+
if (!tracker) {
|
|
188
436
|
this.activeRequestCount = Math.max(0, this.activeRequestCount - 1);
|
|
189
437
|
throw new Error(`Context for "${conversationId}" was evicted before prompt could start`);
|
|
190
438
|
}
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
let fullText = "";
|
|
439
|
+
tracker.inFlight = true;
|
|
440
|
+
const requestId = `${conversationId}:${Date.now()}:${Math.random().toString(36).slice(2)}`;
|
|
194
441
|
try {
|
|
195
|
-
const
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
}
|
|
203
|
-
|
|
204
|
-
};
|
|
205
|
-
|
|
206
|
-
promptOptions["maxTokens"] = options.maxTokens;
|
|
207
|
-
if (options?.temperature !== undefined)
|
|
208
|
-
promptOptions["temperature"] = options.temperature;
|
|
209
|
-
if (options?.stop?.length)
|
|
210
|
-
promptOptions["customStopTriggers"] = options.stop;
|
|
211
|
-
await session.prompt(input, promptOptions);
|
|
442
|
+
const result = await new Promise((resolve, reject) => {
|
|
443
|
+
this._promptCallbacks.set(requestId, {
|
|
444
|
+
resolve,
|
|
445
|
+
reject,
|
|
446
|
+
onToken,
|
|
447
|
+
gen,
|
|
448
|
+
conversationId,
|
|
449
|
+
});
|
|
450
|
+
this._workerSend({ type: "prompt", requestId, conversationId, input, options });
|
|
451
|
+
});
|
|
452
|
+
this._promptCallbacks.delete(requestId);
|
|
212
453
|
if (this.generation !== gen) {
|
|
213
|
-
return { text:
|
|
454
|
+
return { text: result.text.trim(), promptTokens: result.promptTokens, completionTokens: 0 };
|
|
214
455
|
}
|
|
215
|
-
|
|
216
|
-
const completionTokens = this.model.tokenize(fullText).length;
|
|
217
|
-
return { text: fullText, promptTokens, completionTokens };
|
|
456
|
+
return result;
|
|
218
457
|
}
|
|
219
458
|
catch (err) {
|
|
220
|
-
|
|
221
|
-
if (e?.name === "AbortError") {
|
|
222
|
-
const completionTokens = this.generation === gen && this.model ? this.model.tokenize(fullText).length : 0;
|
|
223
|
-
return { text: fullText, promptTokens, completionTokens };
|
|
224
|
-
}
|
|
459
|
+
this._promptCallbacks.delete(requestId);
|
|
225
460
|
throw err;
|
|
226
461
|
}
|
|
227
462
|
finally {
|
|
228
463
|
if (this.generation === gen) {
|
|
229
|
-
if (
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
entry.lastUsedAt = Date.now();
|
|
464
|
+
if (tracker) {
|
|
465
|
+
tracker.inFlight = false;
|
|
466
|
+
tracker.lastUsedAt = Date.now();
|
|
233
467
|
}
|
|
234
468
|
this.activeRequestCount = Math.max(0, this.activeRequestCount - 1);
|
|
235
469
|
this.scheduleIdleCheck();
|
|
@@ -237,41 +471,15 @@ export class SharedGGUFRunner {
|
|
|
237
471
|
}
|
|
238
472
|
}
|
|
239
473
|
cancel(conversationId) {
|
|
240
|
-
|
|
241
|
-
if (entry?.abortController) {
|
|
242
|
-
try {
|
|
243
|
-
entry.abortController.abort();
|
|
244
|
-
}
|
|
245
|
-
catch {
|
|
246
|
-
// ignored
|
|
247
|
-
}
|
|
248
|
-
}
|
|
474
|
+
this._workerSend({ type: "cancel", conversationId });
|
|
249
475
|
}
|
|
250
476
|
cancelAll() {
|
|
251
|
-
|
|
252
|
-
if (entry.abortController) {
|
|
253
|
-
try {
|
|
254
|
-
entry.abortController.abort();
|
|
255
|
-
}
|
|
256
|
-
catch {
|
|
257
|
-
// ignored
|
|
258
|
-
}
|
|
259
|
-
}
|
|
260
|
-
}
|
|
477
|
+
this._workerSend({ type: "cancel-all" });
|
|
261
478
|
}
|
|
262
479
|
clearConversation(conversationId) {
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
try {
|
|
267
|
-
entry.abortController?.abort();
|
|
268
|
-
}
|
|
269
|
-
catch {
|
|
270
|
-
// ignored
|
|
271
|
-
}
|
|
272
|
-
}
|
|
273
|
-
this.contexts.delete(conversationId);
|
|
274
|
-
this.safeDispose(entry.context, `context[${conversationId}]`);
|
|
480
|
+
if (this.contextTrackers.has(conversationId)) {
|
|
481
|
+
this.contextTrackers.delete(conversationId);
|
|
482
|
+
this._workerSend({ type: "clear-context", conversationId });
|
|
275
483
|
}
|
|
276
484
|
}
|
|
277
485
|
async dispose(reason = "manual-dispose") {
|
|
@@ -287,10 +495,8 @@ export class SharedGGUFRunner {
|
|
|
287
495
|
// load failed; we're cold now
|
|
288
496
|
}
|
|
289
497
|
}
|
|
290
|
-
const
|
|
291
|
-
if (
|
|
292
|
-
return;
|
|
293
|
-
if (stateAfter === "disposing")
|
|
498
|
+
const stateCheck = this.runnerState;
|
|
499
|
+
if (stateCheck === "cold" || stateCheck === "disposing")
|
|
294
500
|
return;
|
|
295
501
|
const prev = this.runnerState;
|
|
296
502
|
this.runnerState = "disposing";
|
|
@@ -298,27 +504,42 @@ export class SharedGGUFRunner {
|
|
|
298
504
|
this.logTransition(prev, "disposing", reason);
|
|
299
505
|
this.clearIdleTimer();
|
|
300
506
|
this.stopWatchdog();
|
|
301
|
-
this.
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
507
|
+
this._gracefulDispose = true;
|
|
508
|
+
const disposePromise = new Promise((resolve, reject) => {
|
|
509
|
+
this._disposeCallback = { resolve, reject };
|
|
510
|
+
});
|
|
511
|
+
this._workerSend({ type: "dispose" });
|
|
512
|
+
const safetyTimer = setTimeout(() => {
|
|
513
|
+
++this._workerEra;
|
|
514
|
+
if (this.worker) {
|
|
515
|
+
try {
|
|
516
|
+
this.worker.kill();
|
|
517
|
+
}
|
|
518
|
+
catch {
|
|
519
|
+
// ignored
|
|
520
|
+
}
|
|
521
|
+
}
|
|
522
|
+
this._disposeCallback?.resolve();
|
|
523
|
+
this._disposeCallback = null;
|
|
524
|
+
}, DISPOSE_SAFETY_TIMEOUT_MS);
|
|
525
|
+
try {
|
|
526
|
+
await disposePromise;
|
|
527
|
+
}
|
|
528
|
+
finally {
|
|
529
|
+
clearTimeout(safetyTimer);
|
|
530
|
+
this._gracefulDispose = false;
|
|
531
|
+
}
|
|
532
|
+
const stateAfterDispose = this.runnerState;
|
|
533
|
+
if (stateAfterDispose !== "cold")
|
|
534
|
+
this._finalizeDispose();
|
|
317
535
|
this.logTransition("disposing", "cold", reason);
|
|
318
536
|
}
|
|
537
|
+
// ---------------------------------------------------------------------------
|
|
538
|
+
// Idle / watchdog timers
|
|
539
|
+
// ---------------------------------------------------------------------------
|
|
319
540
|
scheduleIdleCheck() {
|
|
320
541
|
this.clearIdleTimer();
|
|
321
|
-
if (
|
|
542
|
+
if (this.runnerState !== "loaded")
|
|
322
543
|
return;
|
|
323
544
|
if (this.activeRequestCount > 0)
|
|
324
545
|
return;
|
|
@@ -331,7 +552,7 @@ export class SharedGGUFRunner {
|
|
|
331
552
|
return;
|
|
332
553
|
if (this.activeRequestCount > 0)
|
|
333
554
|
return;
|
|
334
|
-
if (
|
|
555
|
+
if (this.runnerState !== "loaded")
|
|
335
556
|
return;
|
|
336
557
|
const nameBefore = this.modelName;
|
|
337
558
|
try {
|
|
@@ -360,57 +581,31 @@ export class SharedGGUFRunner {
|
|
|
360
581
|
}
|
|
361
582
|
}
|
|
362
583
|
runWatchdogCheck() {
|
|
363
|
-
if (
|
|
584
|
+
if (this.runnerState !== "loaded") {
|
|
364
585
|
this.stopWatchdog();
|
|
365
586
|
return;
|
|
366
587
|
}
|
|
367
588
|
const checkGen = this.generation;
|
|
368
589
|
const now = Date.now();
|
|
369
|
-
for (const [id,
|
|
590
|
+
for (const [id, tracker] of this.contextTrackers.entries()) {
|
|
370
591
|
if (this.generation !== checkGen)
|
|
371
592
|
break;
|
|
372
|
-
if (
|
|
593
|
+
if (tracker.inFlight)
|
|
373
594
|
continue;
|
|
374
|
-
const idleMs = now -
|
|
595
|
+
const idleMs = now - tracker.lastUsedAt;
|
|
375
596
|
if (idleMs <= STALE_CONTEXT_LIMIT_MS)
|
|
376
597
|
continue;
|
|
377
|
-
this.
|
|
598
|
+
this.contextTrackers.delete(id);
|
|
599
|
+
this._workerSend({ type: "clear-context", conversationId: id });
|
|
378
600
|
this.logTransition(this.runnerState, this.runnerState, "watchdog-evict", {
|
|
379
601
|
contextId: id,
|
|
380
602
|
idleMin: Math.floor(idleMs / 60000),
|
|
381
603
|
});
|
|
382
|
-
this.safeDispose(entry.context, `watchdog-evict-context[${id}]`);
|
|
383
|
-
}
|
|
384
|
-
}
|
|
385
|
-
async getOrCreateContext(conversationId) {
|
|
386
|
-
const existing = this.contexts.get(conversationId);
|
|
387
|
-
if (existing) {
|
|
388
|
-
existing.lastUsedAt = Date.now();
|
|
389
|
-
return { context: existing.context, session: existing.session };
|
|
390
|
-
}
|
|
391
|
-
if (this.contexts.size >= this.maxConcurrentContexts) {
|
|
392
|
-
const evictable = Array.from(this.contexts.entries())
|
|
393
|
-
.filter(([, e]) => !e.inFlight)
|
|
394
|
-
.sort((a, b) => a[1].lastUsedAt - b[1].lastUsedAt);
|
|
395
|
-
if (evictable.length > 0 && evictable[0]) {
|
|
396
|
-
const [id, entry] = evictable[0];
|
|
397
|
-
this.contexts.delete(id);
|
|
398
|
-
this.safeDispose(entry.context, `lru-context[${id}]`);
|
|
399
|
-
}
|
|
400
604
|
}
|
|
401
|
-
if (!this.model)
|
|
402
|
-
throw new Error("Model not loaded when creating context");
|
|
403
|
-
const context = await this.model.createContext({ contextSize: 4096 });
|
|
404
|
-
const session = new LlamaChatSession({ contextSequence: context.getSequence() });
|
|
405
|
-
this.contexts.set(conversationId, {
|
|
406
|
-
context,
|
|
407
|
-
session,
|
|
408
|
-
lastUsedAt: Date.now(),
|
|
409
|
-
inFlight: false,
|
|
410
|
-
abortController: null,
|
|
411
|
-
});
|
|
412
|
-
return { context, session };
|
|
413
605
|
}
|
|
606
|
+
// ---------------------------------------------------------------------------
|
|
607
|
+
// Helpers
|
|
608
|
+
// ---------------------------------------------------------------------------
|
|
414
609
|
async validateModelFile(modelPath, modelName) {
|
|
415
610
|
if (!existsSync(modelPath)) {
|
|
416
611
|
throw new Error(`Model '${modelName}' not found at ${modelPath}`);
|
|
@@ -422,16 +617,6 @@ export class SharedGGUFRunner {
|
|
|
422
617
|
throw new Error(`Model '${modelName}' cannot be read at ${modelPath}`);
|
|
423
618
|
}
|
|
424
619
|
}
|
|
425
|
-
safeDispose(obj, name) {
|
|
426
|
-
if (!obj)
|
|
427
|
-
return;
|
|
428
|
-
try {
|
|
429
|
-
obj.dispose?.();
|
|
430
|
-
}
|
|
431
|
-
catch (err) {
|
|
432
|
-
getLogger().warn(`failed to dispose ${name}:`, err.message);
|
|
433
|
-
}
|
|
434
|
-
}
|
|
435
620
|
logTransition(from, to, reason, extra) {
|
|
436
621
|
getLogger().info(JSON.stringify({
|
|
437
622
|
event: "runner_state_transition",
|
|
@@ -441,7 +626,7 @@ export class SharedGGUFRunner {
|
|
|
441
626
|
reason,
|
|
442
627
|
generation: this.generation,
|
|
443
628
|
activeRequests: this.activeRequestCount,
|
|
444
|
-
contexts: this.
|
|
629
|
+
contexts: this.contextTrackers.size,
|
|
445
630
|
timestamp: Date.now(),
|
|
446
631
|
...extra,
|
|
447
632
|
}));
|