llm-checker 3.5.0 → 3.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,6 +9,7 @@ const CUDADetector = require('./backends/cuda-detector');
9
9
  const ROCmDetector = require('./backends/rocm-detector');
10
10
  const IntelDetector = require('./backends/intel-detector');
11
11
  const CPUDetector = require('./backends/cpu-detector');
12
+ const si = require('systeminformation');
12
13
 
13
14
  class UnifiedDetector {
14
15
  constructor() {
@@ -37,6 +38,7 @@ class UnifiedDetector {
37
38
  backends: {},
38
39
  primary: null,
39
40
  cpu: null,
41
+ systemGpu: null,
40
42
  summary: {
41
43
  bestBackend: 'cpu',
42
44
  totalVRAM: 0,
@@ -122,6 +124,30 @@ class UnifiedDetector {
122
124
  }
123
125
  }
124
126
 
127
+ // Fallback GPU inventory via systeminformation (Windows/Linux) when no
128
+ // accelerator backend is currently available (CUDA/ROCm/Metal/Intel).
129
+ const hasAcceleratedBackend = Boolean(
130
+ result.backends.cuda?.available ||
131
+ result.backends.rocm?.available ||
132
+ result.backends.metal?.available ||
133
+ result.backends.intel?.available
134
+ );
135
+
136
+ if (!hasAcceleratedBackend && (process.platform === 'win32' || process.platform === 'linux')) {
137
+ try {
138
+ const genericGpuInfo = await this.detectSystemGpuFallback();
139
+ if (genericGpuInfo?.available) {
140
+ result.systemGpu = genericGpuInfo;
141
+ result.backends.generic = {
142
+ available: true,
143
+ info: genericGpuInfo
144
+ };
145
+ }
146
+ } catch (e) {
147
+ result.backends.generic = { available: false, error: e.message };
148
+ }
149
+ }
150
+
125
151
  // Select the best available backend
126
152
  result.primary = this.selectPrimaryBackend(result.backends);
127
153
 
@@ -251,11 +277,22 @@ class UnifiedDetector {
251
277
  }
252
278
  else if (result.cpu) {
253
279
  summary.speedCoefficient = result.cpu.speedCoefficient;
280
+
281
+ if (result.systemGpu?.available && Array.isArray(result.systemGpu.gpus) && result.systemGpu.gpus.length > 0) {
282
+ const inventory = this.summarizeGPUInventory(result.systemGpu.gpus);
283
+ summary.totalVRAM = result.systemGpu.totalVRAM || 0;
284
+ summary.gpuCount = result.systemGpu.gpus.length;
285
+ summary.isMultiGPU = Boolean(result.systemGpu.isMultiGPU);
286
+ summary.gpuModel = inventory.primaryModel || null;
287
+ summary.gpuInventory = inventory.displayName || summary.gpuModel;
288
+ summary.gpuModels = inventory.models;
289
+ summary.hasHeterogeneousGPU = inventory.isHeterogeneous;
290
+ }
254
291
  }
255
292
 
256
293
  // Effective memory for LLM loading
257
294
  // For GPU: use VRAM; for CPU/Metal: use system RAM
258
- if (summary.totalVRAM > 0 && primary?.type !== 'metal') {
295
+ if (summary.totalVRAM > 0 && ['cuda', 'rocm', 'intel'].includes(primary?.type)) {
259
296
  summary.effectiveMemory = summary.totalVRAM;
260
297
  } else {
261
298
  // Use 70% of system RAM for models (leave room for OS)
@@ -286,6 +323,137 @@ class UnifiedDetector {
286
323
  };
287
324
  }
288
325
 
326
+ async detectSystemGpuFallback() {
327
+ const graphics = await si.graphics();
328
+ const controllers = Array.isArray(graphics?.controllers) ? graphics.controllers : [];
329
+
330
+ if (controllers.length === 0) {
331
+ return {
332
+ available: false,
333
+ source: 'systeminformation',
334
+ gpus: [],
335
+ totalVRAM: 0,
336
+ isMultiGPU: false,
337
+ hasDedicated: false
338
+ };
339
+ }
340
+
341
+ const normalized = controllers
342
+ .map((controller) => {
343
+ const name = String(controller?.model || controller?.name || '').replace(/\s+/g, ' ').trim();
344
+ if (!name || name.toLowerCase() === 'unknown') return null;
345
+
346
+ const nameLower = name.toLowerCase();
347
+ if (nameLower.includes('microsoft basic') || nameLower.includes('standard vga')) return null;
348
+
349
+ const isIntegrated = this.isIntegratedGPUModel(name);
350
+ let vram = this.normalizeFallbackVRAM(controller?.vram || controller?.memoryTotal || controller?.memory || 0);
351
+
352
+ // For dedicated cards, estimate VRAM from model if runtime did not report memory.
353
+ if (!isIntegrated && vram === 0) {
354
+ vram = this.estimateFallbackVRAM(name);
355
+ }
356
+
357
+ return {
358
+ name,
359
+ vendor: controller?.vendor || '',
360
+ type: isIntegrated ? 'integrated' : 'dedicated',
361
+ memory: { total: vram }
362
+ };
363
+ })
364
+ .filter(Boolean);
365
+
366
+ if (normalized.length === 0) {
367
+ return {
368
+ available: false,
369
+ source: 'systeminformation',
370
+ gpus: [],
371
+ totalVRAM: 0,
372
+ isMultiGPU: false,
373
+ hasDedicated: false
374
+ };
375
+ }
376
+
377
+ const dedicated = normalized.filter((gpu) => gpu.type === 'dedicated');
378
+ const totalVRAM = dedicated.length > 0
379
+ ? dedicated.reduce((sum, gpu) => sum + (gpu.memory?.total || 0), 0)
380
+ : 0;
381
+
382
+ return {
383
+ available: true,
384
+ source: 'systeminformation',
385
+ gpus: normalized,
386
+ totalVRAM,
387
+ isMultiGPU: dedicated.length > 1,
388
+ hasDedicated: dedicated.length > 0
389
+ };
390
+ }
391
+
392
+ normalizeFallbackVRAM(value) {
393
+ const num = Number(value);
394
+ if (!Number.isFinite(num) || num <= 0) return 0;
395
+
396
+ // Bytes -> GB
397
+ if (num > 1024 * 1024) {
398
+ return Math.round(num / (1024 * 1024 * 1024));
399
+ }
400
+
401
+ // MB -> GB
402
+ if (num >= 1024) {
403
+ return Math.round(num / 1024);
404
+ }
405
+
406
+ // Likely already GB
407
+ if (num >= 1 && num <= 80) {
408
+ return Math.round(num);
409
+ }
410
+
411
+ return 0;
412
+ }
413
+
414
+ isIntegratedGPUModel(model) {
415
+ const lower = String(model || '').toLowerCase();
416
+ if (!lower) return false;
417
+
418
+ if (lower.includes('radeon rx') || lower.includes('rtx') || lower.includes('gtx') ||
419
+ lower.includes('geforce') || lower.includes('tesla') || lower.includes('quadro') ||
420
+ lower.includes('instinct') || lower.includes('arc a') || lower.includes('radeon pro')) {
421
+ return false;
422
+ }
423
+
424
+ return (
425
+ lower.includes('intel') ||
426
+ lower.includes('iris') ||
427
+ lower.includes('uhd') ||
428
+ lower.includes('hd graphics') ||
429
+ lower.includes('radeon graphics') ||
430
+ lower.includes('radeon(tm) graphics') ||
431
+ lower.includes('vega') ||
432
+ lower.includes('apple')
433
+ );
434
+ }
435
+
436
+ estimateFallbackVRAM(model) {
437
+ const lower = String(model || '').toLowerCase();
438
+ if (!lower) return 0;
439
+
440
+ if (lower.includes('rx 7900')) return 24;
441
+ if (lower.includes('rx 7800')) return 16;
442
+ if (lower.includes('rx 7700')) return 12;
443
+ if (lower.includes('rx 7600 xt')) return 16;
444
+ if (lower.includes('rx 7600')) return 8;
445
+ if (lower.includes('rx 6900') || lower.includes('rx 6800')) return 16;
446
+ if (lower.includes('rx 6700')) return 12;
447
+
448
+ if (lower.includes('rtx 5090')) return 32;
449
+ if (lower.includes('rtx 4090') || lower.includes('rtx 3090')) return 24;
450
+ if (lower.includes('rtx 5080') || lower.includes('rtx 4080')) return 16;
451
+ if (lower.includes('rtx 5070') || lower.includes('rtx 4070') || lower.includes('rtx 3060')) return 12;
452
+ if (lower.includes('rtx 4060') || lower.includes('rtx 3070')) return 8;
453
+
454
+ return 0;
455
+ }
456
+
289
457
  /**
290
458
  * Generate hardware fingerprint for benchmarks
291
459
  */
@@ -448,6 +616,10 @@ class UnifiedDetector {
448
616
  return `${gpuDesc} (${summary.totalVRAM}GB) + ${summary.cpuModel}`;
449
617
  }
450
618
  else {
619
+ if (summary.gpuModel && summary.gpuCount > 0) {
620
+ const gpuDesc = summary.gpuInventory || summary.gpuModel;
621
+ return `${gpuDesc} (${summary.totalVRAM}GB VRAM detected, CPU backend) + ${summary.cpuModel}`;
622
+ }
451
623
  return `${summary.cpuModel} (${Math.round(summary.systemRAM)}GB RAM, CPU-only)`;
452
624
  }
453
625
  }
@@ -0,0 +1,23 @@
1
+ <claude-mem-context>
2
+ # Recent Activity
3
+
4
+ <!-- This section is auto-generated by claude-mem. Edit content outside the tags. -->
5
+
6
+ ### Feb 12, 2026
7
+
8
+ | ID | Time | T | Title | Read |
9
+ |----|------|---|-------|------|
10
+ | #3442 | 9:59 PM | 🔵 | Static Model Database Structure - Hardcoded LLM Specifications | ~572 |
11
+
12
+ ### Feb 13, 2026
13
+
14
+ | ID | Time | T | Title | Read |
15
+ |----|------|---|-------|------|
16
+ | #3699 | 12:05 AM | ✅ | Git Push Consolidated Architecture Changes to GitHub | ~367 |
17
+
18
+ ### Feb 14, 2026
19
+
20
+ | ID | Time | T | Title | Read |
21
+ |----|------|---|-------|------|
22
+ | #4339 | 6:49 PM | 🟣 | MCP server implementation and documentation added to llm-checker repository | ~457 |
23
+ </claude-mem-context>
@@ -0,0 +1,30 @@
1
+ <claude-mem-context>
2
+ # Recent Activity
3
+
4
+ <!-- This section is auto-generated by claude-mem. Edit content outside the tags. -->
5
+
6
+ ### Feb 12, 2026
7
+
8
+ | ID | Time | T | Title | Read |
9
+ |----|------|---|-------|------|
10
+ | #3500 | 10:26 PM | 🔴 | pullModel() Stream Handling Improved - Success Validation Added | ~458 |
11
+ | #3499 | " | 🔴 | Race Condition Fixed in Ollama Availability Cache | ~440 |
12
+ | #3498 | 10:25 PM | 🔵 | testModelPerformance() Timeout Already Fixed | ~418 |
13
+ | #3497 | " | 🔴 | Timeout Fixed in deleteModel() Using AbortController | ~391 |
14
+ | #3496 | " | 🔴 | Timeout Fixed in testConnection() Using AbortController | ~395 |
15
+ | #3495 | " | 🔴 | Fixed unbounded memory growth in native scraper HTTP request handler | ~361 |
16
+ | #3493 | " | 🔴 | Fixed race condition in checkOllamaAvailability() with promise deduplication | ~398 |
17
+ | #3491 | 10:24 PM | 🔴 | Added missing clearTimeout() in testModelPerformance() | ~319 |
18
+ | #3489 | " | 🔴 | Fixed node-fetch timeout handling in testModelPerformance() | ~332 |
19
+ | #3488 | " | 🔴 | Fixed node-fetch timeout handling in testConnection() tags check | ~303 |
20
+ | #3486 | " | 🔴 | Fixed node-fetch timeout handling in getRunningModels() | ~308 |
21
+ | #3484 | 10:23 PM | 🔵 | Ollama Client Timeout Implementation - Mixed Patterns with AbortController | ~554 |
22
+ | #3443 | 9:59 PM | 🔵 | Ollama Native Scraper - Web Scraping with Dual Cache Strategy | ~594 |
23
+ | #3437 | 9:58 PM | 🔵 | Ollama Client Implementation - HTTP API Wrapper with Connection Management | ~605 |
24
+
25
+ ### Feb 14, 2026
26
+
27
+ | ID | Time | T | Title | Read |
28
+ |----|------|---|-------|------|
29
+ | #4339 | 6:49 PM | 🟣 | MCP server implementation and documentation added to llm-checker repository | ~457 |
30
+ </claude-mem-context>
@@ -458,6 +458,87 @@ class OllamaClient {
458
458
  };
459
459
  }
460
460
  }
461
+
462
+ async showModel(modelName) {
463
+ const availability = await this.checkOllamaAvailability();
464
+ if (!availability.available) {
465
+ throw new Error(`Ollama not available: ${availability.error}`);
466
+ }
467
+
468
+ try {
469
+ const controller = new AbortController();
470
+ const timeoutId = setTimeout(() => controller.abort(), 15000);
471
+
472
+ const response = await fetch(`${this.baseURL}/api/show`, {
473
+ method: 'POST',
474
+ signal: controller.signal,
475
+ headers: { 'Content-Type': 'application/json' },
476
+ body: JSON.stringify({ model: modelName })
477
+ });
478
+
479
+ clearTimeout(timeoutId);
480
+
481
+ if (!response.ok) {
482
+ const errorText = await response.text();
483
+ throw new Error(`HTTP ${response.status}: ${response.statusText} - ${errorText}`);
484
+ }
485
+
486
+ return response.json();
487
+ } catch (error) {
488
+ throw new Error(`Failed to show model info: ${error.message}`);
489
+ }
490
+ }
491
+
492
+ async chat(modelName, messages, options = {}) {
493
+ const availability = await this.checkOllamaAvailability();
494
+ if (!availability.available) {
495
+ throw new Error(`Ollama not available: ${availability.error}`);
496
+ }
497
+
498
+ const {
499
+ tools,
500
+ format,
501
+ keepAlive,
502
+ timeoutMs = 45000,
503
+ generationOptions = {}
504
+ } = options;
505
+
506
+ const payload = {
507
+ model: modelName,
508
+ messages: Array.isArray(messages) ? messages : [],
509
+ stream: false
510
+ };
511
+
512
+ if (Array.isArray(tools) && tools.length > 0) payload.tools = tools;
513
+ if (format) payload.format = format;
514
+ if (keepAlive) payload.keep_alive = keepAlive;
515
+ if (generationOptions && Object.keys(generationOptions).length > 0) {
516
+ payload.options = generationOptions;
517
+ }
518
+
519
+ try {
520
+ const controller = new AbortController();
521
+ const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
522
+
523
+ const response = await fetch(`${this.baseURL}/api/chat`, {
524
+ method: 'POST',
525
+ signal: controller.signal,
526
+ headers: { 'Content-Type': 'application/json' },
527
+ body: JSON.stringify(payload)
528
+ });
529
+
530
+ clearTimeout(timeoutId);
531
+
532
+ if (!response.ok) {
533
+ const errorText = await response.text();
534
+ throw new Error(`HTTP ${response.status}: ${response.statusText} - ${errorText}`);
535
+ }
536
+
537
+ return response.json();
538
+ } catch (error) {
539
+ throw new Error(`Failed to run chat request: ${error.message}`);
540
+ }
541
+ }
461
542
  }
462
543
 
463
544
  module.exports = OllamaClient;
@@ -0,0 +1,17 @@
1
+ <claude-mem-context>
2
+ # Recent Activity
3
+
4
+ <!-- This section is auto-generated by claude-mem. Edit content outside the tags. -->
5
+
6
+ ### Feb 12, 2026
7
+
8
+ | ID | Time | T | Title | Read |
9
+ |----|------|---|-------|------|
10
+ | #3462 | 10:02 PM | 🔵 | Plugin System Architecture - Hook-Based Extensibility Framework | ~648 |
11
+
12
+ ### Feb 14, 2026
13
+
14
+ | ID | Time | T | Title | Read |
15
+ |----|------|---|-------|------|
16
+ | #4339 | 6:49 PM | 🟣 | MCP server implementation and documentation added to llm-checker repository | ~457 |
17
+ </claude-mem-context>
@@ -0,0 +1,17 @@
1
+ <claude-mem-context>
2
+ # Recent Activity
3
+
4
+ <!-- This section is auto-generated by claude-mem. Edit content outside the tags. -->
5
+
6
+ ### Feb 12, 2026
7
+
8
+ | ID | Time | T | Title | Read |
9
+ |----|------|---|-------|------|
10
+ | #3438 | 9:58 PM | 🔵 | Configuration Management System - Comprehensive Settings with Environment Overrides | ~580 |
11
+
12
+ ### Feb 14, 2026
13
+
14
+ | ID | Time | T | Title | Read |
15
+ |----|------|---|-------|------|
16
+ | #4339 | 6:49 PM | 🟣 | MCP server implementation and documentation added to llm-checker repository | ~457 |
17
+ </claude-mem-context>