peerllm-host-cli 1.9.1 → 1.9.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,14 +1,24 @@
1
+ import * as cp from "node:child_process";
1
2
  import { existsSync, promises as fsp } from "node:fs";
2
3
  import { arch as osArch, platform as osPlatform } from "node:os";
3
4
  import { join } from "node:path";
4
- import { getLlama, LlamaChatSession } from "node-llama-cpp";
5
+ import { fileURLToPath } from "node:url";
5
6
  import si from "systeminformation";
6
7
  import { getLogger } from "./logger.js";
7
8
  import { calculateGpuLayersFromConfig, parseModelFilename, } from "./model-info.js";
8
9
  import { getLLMDirectory } from "./models-fs.js";
10
+ // ---------------------------------------------------------------------------
11
+ // Module-level constants
12
+ // ---------------------------------------------------------------------------
9
13
  const DEFAULT_IDLE_TIMEOUT_MS = 10 * 60 * 1000;
10
14
  const WATCHDOG_INTERVAL_MS = 60 * 1000;
11
15
  const STALE_CONTEXT_LIMIT_MS = 5 * 60 * 1000;
16
+ const WORKER_READY_TIMEOUT_MS = 10_000;
17
+ const DISPOSE_SAFETY_TIMEOUT_MS = 5_000;
18
+ const workerScript = fileURLToPath(new URL("./llmWorker.js", import.meta.url));
19
+ // ---------------------------------------------------------------------------
20
+ // Graphics cache (used for GPU layer calculation in parent process)
21
+ // ---------------------------------------------------------------------------
12
22
  let _cachedGraphics;
13
23
  let _graphicsInFlight;
14
24
  const GRAPHICS_CACHE_TTL_MS = 5 * 60 * 1000;
@@ -26,15 +36,28 @@ async function getCachedGraphics() {
26
36
  }
27
37
  return _graphicsInFlight;
28
38
  }
39
+ // ---------------------------------------------------------------------------
40
+ // SharedGGUFRunner
41
+ // ---------------------------------------------------------------------------
29
42
  export class SharedGGUFRunner {
30
43
  env;
31
44
  idleTimeoutMs;
32
- llama = null;
33
- model = null;
45
+ // Worker process
46
+ worker = null;
47
+ _workerEra = 0;
48
+ _gracefulDispose = false;
49
+ _workerCrashCallback = null;
50
+ // Lightweight context trackers — actual objects live in the worker
51
+ contextTrackers = new Map();
52
+ maxConcurrentContexts = 10;
53
+ // IPC promise bridges
54
+ _loadCallback = null;
55
+ _contextCallbacks = new Map();
56
+ _promptCallbacks = new Map();
57
+ _disposeCallback = null;
58
+ // Runner state
34
59
  modelName = "";
35
60
  gpuLayers = 0;
36
- contexts = new Map();
37
- maxConcurrentContexts = 10;
38
61
  runnerState = "cold";
39
62
  modelState = null;
40
63
  activeRequestCount = 0;
@@ -50,6 +73,9 @@ export class SharedGGUFRunner {
50
73
  setIdleDisposeCallback(cb) {
51
74
  this.onIdleDispose = cb;
52
75
  }
76
+ setWorkerCrashCallback(cb) {
77
+ this._workerCrashCallback = cb;
78
+ }
53
79
  getModelState() {
54
80
  return this.modelState;
55
81
  }
@@ -61,7 +87,7 @@ export class SharedGGUFRunner {
61
87
  }
62
88
  getStats() {
63
89
  return {
64
- activeContexts: this.contexts.size,
90
+ activeContexts: this.contextTrackers.size,
65
91
  maxContexts: this.maxConcurrentContexts,
66
92
  modelName: this.modelName,
67
93
  gpuLayers: this.gpuLayers,
@@ -71,7 +97,7 @@ export class SharedGGUFRunner {
71
97
  if (this.runnerState === "disposing") {
72
98
  throw new Error(`Cannot load "${modelName}" while disposal is in progress`);
73
99
  }
74
- if (this.model && this.modelName === modelName) {
100
+ if (this.runnerState === "loaded" && this.modelName === modelName) {
75
101
  if (this.modelState && loadType === "manual" && this.modelState.loadType === "auto") {
76
102
  this.modelState.loadType = "manual";
77
103
  this.clearIdleTimer();
@@ -97,7 +123,6 @@ export class SharedGGUFRunner {
97
123
  model: modelName,
98
124
  });
99
125
  try {
100
- this.llama = await getLlama();
101
126
  const llmDir = getLLMDirectory(this.env.paths, this.env.config);
102
127
  const modelPath = join(llmDir, `${modelName}.gguf`);
103
128
  const graphics = await getCachedGraphics();
@@ -130,7 +155,7 @@ export class SharedGGUFRunner {
130
155
  this._cleanupPartialLoad();
131
156
  return;
132
157
  }
133
- this.model = await this.llama.loadModel({ modelPath, gpuLayers: this.gpuLayers });
158
+ await this._spawnAndLoad(modelPath, this.gpuLayers);
134
159
  if (this.generation !== gen) {
135
160
  this._cleanupPartialLoad();
136
161
  return;
@@ -157,79 +182,288 @@ export class SharedGGUFRunner {
157
182
  throw new Error(`Failed to load '${modelName}': ${message}`);
158
183
  }
159
184
  }
185
+ // ---------------------------------------------------------------------------
186
+ // Worker lifecycle
187
+ // ---------------------------------------------------------------------------
188
+ async _spawnAndLoad(modelPath, gpuLayers) {
189
+ const era = ++this._workerEra;
190
+ const worker = cp.fork(workerScript, [], {
191
+ stdio: ["ignore", "ignore", "inherit"],
192
+ serialization: "json",
193
+ });
194
+ this.worker = worker;
195
+ // Self-invalidating exit handler: ignores stale exits after era increments
196
+ worker.on("exit", (code, signal) => {
197
+ if (this._workerEra !== era)
198
+ return;
199
+ this._handleWorkerExit(code, signal);
200
+ });
201
+ // Await the initial 'ready' handshake, then switch to permanent handler
202
+ await new Promise((resolve, reject) => {
203
+ const t = setTimeout(() => {
204
+ reject(new Error("llmWorker: ready timeout"));
205
+ }, WORKER_READY_TIMEOUT_MS);
206
+ const onMsg = (raw) => {
207
+ const msg = raw;
208
+ if (msg?.type === "ready") {
209
+ clearTimeout(t);
210
+ worker.off("message", onMsg);
211
+ worker.on("message", (m) => this._handleWorkerMessage(m));
212
+ resolve();
213
+ }
214
+ };
215
+ worker.on("message", onMsg);
216
+ worker.once("error", (err) => {
217
+ clearTimeout(t);
218
+ reject(err);
219
+ });
220
+ });
221
+ // Send load-model and await confirmation
222
+ await new Promise((resolve, reject) => {
223
+ this._loadCallback = { resolve, reject, gen: this.generation };
224
+ this._workerSend({ type: "load-model", modelPath, gpuLayers });
225
+ });
226
+ this._loadCallback = null;
227
+ }
228
+ _handleWorkerMessage(msg) {
229
+ switch (msg.type) {
230
+ case "loaded": {
231
+ const cb = this._loadCallback;
232
+ this._loadCallback = null;
233
+ cb?.resolve();
234
+ break;
235
+ }
236
+ case "context-created": {
237
+ this.contextTrackers.set(msg.conversationId, {
238
+ lastUsedAt: Date.now(),
239
+ inFlight: false,
240
+ });
241
+ const cb = this._contextCallbacks.get(msg.conversationId);
242
+ if (cb) {
243
+ this._contextCallbacks.delete(msg.conversationId);
244
+ cb.resolve();
245
+ }
246
+ break;
247
+ }
248
+ case "token": {
249
+ const cb = this._promptCallbacks.get(msg.requestId);
250
+ if (cb && this.generation === cb.gen) {
251
+ cb.onToken?.(msg.chunk);
252
+ }
253
+ break;
254
+ }
255
+ case "done": {
256
+ const cb = this._promptCallbacks.get(msg.requestId);
257
+ if (cb) {
258
+ this._promptCallbacks.delete(msg.requestId);
259
+ cb.resolve({ text: msg.text, promptTokens: msg.promptTokens, completionTokens: msg.completionTokens });
260
+ }
261
+ break;
262
+ }
263
+ case "cancelled": {
264
+ const cb = this._promptCallbacks.get(msg.requestId);
265
+ if (cb) {
266
+ this._promptCallbacks.delete(msg.requestId);
267
+ cb.resolve({ text: msg.text, promptTokens: msg.promptTokens, completionTokens: msg.completionTokens });
268
+ }
269
+ break;
270
+ }
271
+ case "error": {
272
+ if (msg.requestId) {
273
+ const cb = this._promptCallbacks.get(msg.requestId);
274
+ if (cb) {
275
+ this._promptCallbacks.delete(msg.requestId);
276
+ cb.reject(new Error(msg.message));
277
+ }
278
+ }
279
+ else if (msg.conversationId) {
280
+ const cb = this._contextCallbacks.get(msg.conversationId);
281
+ if (cb) {
282
+ this._contextCallbacks.delete(msg.conversationId);
283
+ cb.reject(new Error(msg.message));
284
+ }
285
+ }
286
+ else {
287
+ // Load error
288
+ const cb = this._loadCallback;
289
+ this._loadCallback = null;
290
+ cb?.reject(new Error(msg.message));
291
+ }
292
+ break;
293
+ }
294
+ case "disposed": {
295
+ const cb = this._disposeCallback;
296
+ this._disposeCallback = null;
297
+ cb?.resolve();
298
+ this._finalizeDispose();
299
+ break;
300
+ }
301
+ case "log": {
302
+ const level = msg.level;
303
+ getLogger()[level]?.(`[llmWorker] ${msg.message}`);
304
+ break;
305
+ }
306
+ default:
307
+ break;
308
+ }
309
+ }
310
+ _handleWorkerExit(code, signal) {
311
+ if (this._gracefulDispose) {
312
+ // Expected shutdown — resolve dispose callback if not already done by 'disposed' message
313
+ const cb = this._disposeCallback;
314
+ this._disposeCallback = null;
315
+ cb?.resolve();
316
+ return;
317
+ }
318
+ // Unexpected crash
319
+ const crashModel = this.modelName;
320
+ const timestamp = Date.now();
321
+ getLogger().error(`llmWorker crashed: model="${crashModel}" code=${code ?? "null"} signal=${signal ?? "null"}`);
322
+ // Reject all pending load/context/prompt callbacks
323
+ const loadCb = this._loadCallback;
324
+ this._loadCallback = null;
325
+ loadCb?.reject(new Error(`llmWorker crashed during load (code=${String(code)})`));
326
+ for (const [, cb] of this._contextCallbacks.entries()) {
327
+ cb.reject(new Error(`llmWorker crashed (code=${String(code)})`));
328
+ }
329
+ this._contextCallbacks.clear();
330
+ for (const [, cb] of this._promptCallbacks.entries()) {
331
+ cb.reject(new Error(`llmWorker crashed (code=${String(code)})`));
332
+ }
333
+ this._promptCallbacks.clear();
334
+ const crashCb = this._workerCrashCallback;
335
+ this._finalizeDispose();
336
+ crashCb?.({ model: crashModel, timestamp });
337
+ this.onIdleDispose?.(crashModel);
338
+ }
339
+ _finalizeDispose() {
340
+ this.worker = null;
341
+ this.modelName = "";
342
+ this.modelState = null;
343
+ this.activeRequestCount = 0;
344
+ this.contextTrackers.clear();
345
+ if (this._loadCallback) {
346
+ this._loadCallback.reject(new Error("runner disposed"));
347
+ this._loadCallback = null;
348
+ }
349
+ for (const [, cb] of this._contextCallbacks.entries()) {
350
+ cb.reject(new Error("runner disposed"));
351
+ }
352
+ this._contextCallbacks.clear();
353
+ for (const [, cb] of this._promptCallbacks.entries()) {
354
+ cb.reject(new Error("runner disposed"));
355
+ }
356
+ this._promptCallbacks.clear();
357
+ if (this._disposeCallback) {
358
+ this._disposeCallback.resolve();
359
+ this._disposeCallback = null;
360
+ }
361
+ this.clearIdleTimer();
362
+ this.stopWatchdog();
363
+ this.runnerState = "cold";
364
+ }
365
+ _workerSend(msg) {
366
+ if (!this.worker)
367
+ return;
368
+ try {
369
+ this.worker.send(msg);
370
+ }
371
+ catch {
372
+ // Worker may have already exited
373
+ }
374
+ }
160
375
  _cleanupPartialLoad() {
376
+ if (this.runnerState === "cold")
377
+ return; // already finalized by exit handler
161
378
  this.clearIdleTimer();
162
379
  this.stopWatchdog();
163
- this.safeDispose(this.model, "model (partial)");
164
- this.safeDispose(this.llama, "llama (partial)");
165
- for (const [id, entry] of this.contexts.entries()) {
166
- this.safeDispose(entry.context, `context[${id}] (partial)`);
167
- }
168
- this.contexts.clear();
169
- this.model = null;
170
- this.llama = null;
380
+ ++this._workerEra; // invalidate exit handler for this era
381
+ if (this.worker) {
382
+ try {
383
+ this.worker.kill();
384
+ }
385
+ catch {
386
+ // ignored
387
+ }
388
+ this.worker = null;
389
+ }
390
+ this.contextTrackers.clear();
171
391
  this.modelName = "";
172
392
  this.modelState = null;
173
393
  this.activeRequestCount = 0;
174
394
  this.runnerState = "cold";
175
395
  }
396
+ // ---------------------------------------------------------------------------
397
+ // Context management
398
+ // ---------------------------------------------------------------------------
399
+ async _ensureContext(conversationId, gen) {
400
+ const existing = this.contextTrackers.get(conversationId);
401
+ if (existing) {
402
+ existing.lastUsedAt = Date.now();
403
+ return;
404
+ }
405
+ // LRU eviction if at capacity
406
+ if (this.contextTrackers.size >= this.maxConcurrentContexts) {
407
+ const evictable = Array.from(this.contextTrackers.entries())
408
+ .filter(([, t]) => !t.inFlight)
409
+ .sort((a, b) => a[1].lastUsedAt - b[1].lastUsedAt);
410
+ if (evictable.length > 0 && evictable[0]) {
411
+ const [evictId] = evictable[0];
412
+ this.contextTrackers.delete(evictId);
413
+ this._workerSend({ type: "clear-context", conversationId: evictId });
414
+ }
415
+ }
416
+ await new Promise((resolve, reject) => {
417
+ this._contextCallbacks.set(conversationId, { resolve, reject, gen });
418
+ this._workerSend({ type: "create-context", conversationId });
419
+ });
420
+ // Tracker entry is set by _handleWorkerMessage when 'context-created' arrives
421
+ }
422
+ // ---------------------------------------------------------------------------
423
+ // Public inference API
424
+ // ---------------------------------------------------------------------------
176
425
  async prompt(conversationId, input, onToken, options) {
177
- if (!this.model)
426
+ if (this.runnerState !== "loaded")
178
427
  throw new Error("Model not loaded.");
179
428
  const gen = this.generation;
180
429
  this.activeRequestCount++;
181
430
  this.clearIdleTimer();
182
431
  if (this.modelState)
183
432
  this.modelState.lastUsedAt = Date.now();
184
- const promptTokens = this.model.tokenize(input).length;
185
- const { session } = await this.getOrCreateContext(conversationId);
186
- const entry = this.contexts.get(conversationId);
187
- if (!entry) {
433
+ await this._ensureContext(conversationId, gen);
434
+ const tracker = this.contextTrackers.get(conversationId);
435
+ if (!tracker) {
188
436
  this.activeRequestCount = Math.max(0, this.activeRequestCount - 1);
189
437
  throw new Error(`Context for "${conversationId}" was evicted before prompt could start`);
190
438
  }
191
- entry.abortController = new AbortController();
192
- entry.inFlight = true;
193
- let fullText = "";
439
+ tracker.inFlight = true;
440
+ const requestId = `${conversationId}:${Date.now()}:${Math.random().toString(36).slice(2)}`;
194
441
  try {
195
- const promptOptions = {
196
- onTextChunk: (chunk) => {
197
- if (this.generation !== gen)
198
- return;
199
- if (onToken)
200
- onToken(chunk);
201
- fullText += chunk;
202
- },
203
- signal: entry.abortController.signal,
204
- };
205
- if (options?.maxTokens !== undefined)
206
- promptOptions["maxTokens"] = options.maxTokens;
207
- if (options?.temperature !== undefined)
208
- promptOptions["temperature"] = options.temperature;
209
- if (options?.stop?.length)
210
- promptOptions["customStopTriggers"] = options.stop;
211
- await session.prompt(input, promptOptions);
442
+ const result = await new Promise((resolve, reject) => {
443
+ this._promptCallbacks.set(requestId, {
444
+ resolve,
445
+ reject,
446
+ onToken,
447
+ gen,
448
+ conversationId,
449
+ });
450
+ this._workerSend({ type: "prompt", requestId, conversationId, input, options });
451
+ });
452
+ this._promptCallbacks.delete(requestId);
212
453
  if (this.generation !== gen) {
213
- return { text: fullText.trim(), promptTokens, completionTokens: 0 };
454
+ return { text: result.text.trim(), promptTokens: result.promptTokens, completionTokens: 0 };
214
455
  }
215
- fullText = fullText.trim();
216
- const completionTokens = this.model.tokenize(fullText).length;
217
- return { text: fullText, promptTokens, completionTokens };
456
+ return result;
218
457
  }
219
458
  catch (err) {
220
- const e = err;
221
- if (e?.name === "AbortError") {
222
- const completionTokens = this.generation === gen && this.model ? this.model.tokenize(fullText).length : 0;
223
- return { text: fullText, promptTokens, completionTokens };
224
- }
459
+ this._promptCallbacks.delete(requestId);
225
460
  throw err;
226
461
  }
227
462
  finally {
228
463
  if (this.generation === gen) {
229
- if (entry) {
230
- entry.abortController = null;
231
- entry.inFlight = false;
232
- entry.lastUsedAt = Date.now();
464
+ if (tracker) {
465
+ tracker.inFlight = false;
466
+ tracker.lastUsedAt = Date.now();
233
467
  }
234
468
  this.activeRequestCount = Math.max(0, this.activeRequestCount - 1);
235
469
  this.scheduleIdleCheck();
@@ -237,41 +471,15 @@ export class SharedGGUFRunner {
237
471
  }
238
472
  }
239
473
  cancel(conversationId) {
240
- const entry = this.contexts.get(conversationId);
241
- if (entry?.abortController) {
242
- try {
243
- entry.abortController.abort();
244
- }
245
- catch {
246
- // ignored
247
- }
248
- }
474
+ this._workerSend({ type: "cancel", conversationId });
249
475
  }
250
476
  cancelAll() {
251
- for (const entry of this.contexts.values()) {
252
- if (entry.abortController) {
253
- try {
254
- entry.abortController.abort();
255
- }
256
- catch {
257
- // ignored
258
- }
259
- }
260
- }
477
+ this._workerSend({ type: "cancel-all" });
261
478
  }
262
479
  clearConversation(conversationId) {
263
- const entry = this.contexts.get(conversationId);
264
- if (entry) {
265
- if (entry.inFlight) {
266
- try {
267
- entry.abortController?.abort();
268
- }
269
- catch {
270
- // ignored
271
- }
272
- }
273
- this.contexts.delete(conversationId);
274
- this.safeDispose(entry.context, `context[${conversationId}]`);
480
+ if (this.contextTrackers.has(conversationId)) {
481
+ this.contextTrackers.delete(conversationId);
482
+ this._workerSend({ type: "clear-context", conversationId });
275
483
  }
276
484
  }
277
485
  async dispose(reason = "manual-dispose") {
@@ -287,10 +495,8 @@ export class SharedGGUFRunner {
287
495
  // load failed; we're cold now
288
496
  }
289
497
  }
290
- const stateAfter = this.runnerState;
291
- if (stateAfter === "cold")
292
- return;
293
- if (stateAfter === "disposing")
498
+ const stateCheck = this.runnerState;
499
+ if (stateCheck === "cold" || stateCheck === "disposing")
294
500
  return;
295
501
  const prev = this.runnerState;
296
502
  this.runnerState = "disposing";
@@ -298,27 +504,42 @@ export class SharedGGUFRunner {
298
504
  this.logTransition(prev, "disposing", reason);
299
505
  this.clearIdleTimer();
300
506
  this.stopWatchdog();
301
- this.cancelAll();
302
- await new Promise((resolve) => setTimeout(resolve, 300));
303
- const entries = Array.from(this.contexts.entries());
304
- this.contexts.clear();
305
- for (const [id, entry] of entries) {
306
- this.safeDispose(entry.context, `context[${id}]`);
307
- }
308
- await new Promise((resolve) => setTimeout(resolve, 100));
309
- this.safeDispose(this.model, `model[${this.modelName}]`);
310
- this.safeDispose(this.llama, `llama[${this.modelName}]`);
311
- this.model = null;
312
- this.llama = null;
313
- this.modelName = "";
314
- this.modelState = null;
315
- this.activeRequestCount = 0;
316
- this.runnerState = "cold";
507
+ this._gracefulDispose = true;
508
+ const disposePromise = new Promise((resolve, reject) => {
509
+ this._disposeCallback = { resolve, reject };
510
+ });
511
+ this._workerSend({ type: "dispose" });
512
+ const safetyTimer = setTimeout(() => {
513
+ ++this._workerEra;
514
+ if (this.worker) {
515
+ try {
516
+ this.worker.kill();
517
+ }
518
+ catch {
519
+ // ignored
520
+ }
521
+ }
522
+ this._disposeCallback?.resolve();
523
+ this._disposeCallback = null;
524
+ }, DISPOSE_SAFETY_TIMEOUT_MS);
525
+ try {
526
+ await disposePromise;
527
+ }
528
+ finally {
529
+ clearTimeout(safetyTimer);
530
+ this._gracefulDispose = false;
531
+ }
532
+ const stateAfterDispose = this.runnerState;
533
+ if (stateAfterDispose !== "cold")
534
+ this._finalizeDispose();
317
535
  this.logTransition("disposing", "cold", reason);
318
536
  }
537
+ // ---------------------------------------------------------------------------
538
+ // Idle / watchdog timers
539
+ // ---------------------------------------------------------------------------
319
540
  scheduleIdleCheck() {
320
541
  this.clearIdleTimer();
321
- if (!this.model)
542
+ if (this.runnerState !== "loaded")
322
543
  return;
323
544
  if (this.activeRequestCount > 0)
324
545
  return;
@@ -331,7 +552,7 @@ export class SharedGGUFRunner {
331
552
  return;
332
553
  if (this.activeRequestCount > 0)
333
554
  return;
334
- if (!this.model)
555
+ if (this.runnerState !== "loaded")
335
556
  return;
336
557
  const nameBefore = this.modelName;
337
558
  try {
@@ -360,57 +581,31 @@ export class SharedGGUFRunner {
360
581
  }
361
582
  }
362
583
  runWatchdogCheck() {
363
- if (!this.model) {
584
+ if (this.runnerState !== "loaded") {
364
585
  this.stopWatchdog();
365
586
  return;
366
587
  }
367
588
  const checkGen = this.generation;
368
589
  const now = Date.now();
369
- for (const [id, entry] of this.contexts.entries()) {
590
+ for (const [id, tracker] of this.contextTrackers.entries()) {
370
591
  if (this.generation !== checkGen)
371
592
  break;
372
- if (entry.inFlight)
593
+ if (tracker.inFlight)
373
594
  continue;
374
- const idleMs = now - entry.lastUsedAt;
595
+ const idleMs = now - tracker.lastUsedAt;
375
596
  if (idleMs <= STALE_CONTEXT_LIMIT_MS)
376
597
  continue;
377
- this.contexts.delete(id);
598
+ this.contextTrackers.delete(id);
599
+ this._workerSend({ type: "clear-context", conversationId: id });
378
600
  this.logTransition(this.runnerState, this.runnerState, "watchdog-evict", {
379
601
  contextId: id,
380
602
  idleMin: Math.floor(idleMs / 60000),
381
603
  });
382
- this.safeDispose(entry.context, `watchdog-evict-context[${id}]`);
383
- }
384
- }
385
- async getOrCreateContext(conversationId) {
386
- const existing = this.contexts.get(conversationId);
387
- if (existing) {
388
- existing.lastUsedAt = Date.now();
389
- return { context: existing.context, session: existing.session };
390
- }
391
- if (this.contexts.size >= this.maxConcurrentContexts) {
392
- const evictable = Array.from(this.contexts.entries())
393
- .filter(([, e]) => !e.inFlight)
394
- .sort((a, b) => a[1].lastUsedAt - b[1].lastUsedAt);
395
- if (evictable.length > 0 && evictable[0]) {
396
- const [id, entry] = evictable[0];
397
- this.contexts.delete(id);
398
- this.safeDispose(entry.context, `lru-context[${id}]`);
399
- }
400
604
  }
401
- if (!this.model)
402
- throw new Error("Model not loaded when creating context");
403
- const context = await this.model.createContext({ contextSize: 4096 });
404
- const session = new LlamaChatSession({ contextSequence: context.getSequence() });
405
- this.contexts.set(conversationId, {
406
- context,
407
- session,
408
- lastUsedAt: Date.now(),
409
- inFlight: false,
410
- abortController: null,
411
- });
412
- return { context, session };
413
605
  }
606
+ // ---------------------------------------------------------------------------
607
+ // Helpers
608
+ // ---------------------------------------------------------------------------
414
609
  async validateModelFile(modelPath, modelName) {
415
610
  if (!existsSync(modelPath)) {
416
611
  throw new Error(`Model '${modelName}' not found at ${modelPath}`);
@@ -422,16 +617,6 @@ export class SharedGGUFRunner {
422
617
  throw new Error(`Model '${modelName}' cannot be read at ${modelPath}`);
423
618
  }
424
619
  }
425
- safeDispose(obj, name) {
426
- if (!obj)
427
- return;
428
- try {
429
- obj.dispose?.();
430
- }
431
- catch (err) {
432
- getLogger().warn(`failed to dispose ${name}:`, err.message);
433
- }
434
- }
435
620
  logTransition(from, to, reason, extra) {
436
621
  getLogger().info(JSON.stringify({
437
622
  event: "runner_state_transition",
@@ -441,7 +626,7 @@ export class SharedGGUFRunner {
441
626
  reason,
442
627
  generation: this.generation,
443
628
  activeRequests: this.activeRequestCount,
444
- contexts: this.contexts.size,
629
+ contexts: this.contextTrackers.size,
445
630
  timestamp: Date.now(),
446
631
  ...extra,
447
632
  }));