llm-checker 3.5.0 → 3.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +35 -43
- package/bin/enhanced_cli.js +426 -0
- package/bin/mcp-server.mjs +60 -0
- package/package.json +6 -2
- package/src/commands/roadmap-tools.js +368 -0
- package/src/data/CLAUDE.md +17 -0
- package/src/hardware/CLAUDE.md +18 -0
- package/src/hardware/backends/CLAUDE.md +17 -0
- package/src/hardware/backends/cpu-detector.js +66 -6
- package/src/hardware/unified-detector.js +173 -1
- package/src/models/CLAUDE.md +23 -0
- package/src/ollama/CLAUDE.md +30 -0
- package/src/ollama/client.js +81 -0
- package/src/plugins/CLAUDE.md +17 -0
- package/src/utils/CLAUDE.md +17 -0
|
@@ -9,6 +9,7 @@ const CUDADetector = require('./backends/cuda-detector');
|
|
|
9
9
|
const ROCmDetector = require('./backends/rocm-detector');
|
|
10
10
|
const IntelDetector = require('./backends/intel-detector');
|
|
11
11
|
const CPUDetector = require('./backends/cpu-detector');
|
|
12
|
+
const si = require('systeminformation');
|
|
12
13
|
|
|
13
14
|
class UnifiedDetector {
|
|
14
15
|
constructor() {
|
|
@@ -37,6 +38,7 @@ class UnifiedDetector {
|
|
|
37
38
|
backends: {},
|
|
38
39
|
primary: null,
|
|
39
40
|
cpu: null,
|
|
41
|
+
systemGpu: null,
|
|
40
42
|
summary: {
|
|
41
43
|
bestBackend: 'cpu',
|
|
42
44
|
totalVRAM: 0,
|
|
@@ -122,6 +124,30 @@ class UnifiedDetector {
|
|
|
122
124
|
}
|
|
123
125
|
}
|
|
124
126
|
|
|
127
|
+
// Fallback GPU inventory via systeminformation (Windows/Linux) when no
|
|
128
|
+
// accelerator backend is currently available (CUDA/ROCm/Metal/Intel).
|
|
129
|
+
const hasAcceleratedBackend = Boolean(
|
|
130
|
+
result.backends.cuda?.available ||
|
|
131
|
+
result.backends.rocm?.available ||
|
|
132
|
+
result.backends.metal?.available ||
|
|
133
|
+
result.backends.intel?.available
|
|
134
|
+
);
|
|
135
|
+
|
|
136
|
+
if (!hasAcceleratedBackend && (process.platform === 'win32' || process.platform === 'linux')) {
|
|
137
|
+
try {
|
|
138
|
+
const genericGpuInfo = await this.detectSystemGpuFallback();
|
|
139
|
+
if (genericGpuInfo?.available) {
|
|
140
|
+
result.systemGpu = genericGpuInfo;
|
|
141
|
+
result.backends.generic = {
|
|
142
|
+
available: true,
|
|
143
|
+
info: genericGpuInfo
|
|
144
|
+
};
|
|
145
|
+
}
|
|
146
|
+
} catch (e) {
|
|
147
|
+
result.backends.generic = { available: false, error: e.message };
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
|
|
125
151
|
// Select the best available backend
|
|
126
152
|
result.primary = this.selectPrimaryBackend(result.backends);
|
|
127
153
|
|
|
@@ -251,11 +277,22 @@ class UnifiedDetector {
|
|
|
251
277
|
}
|
|
252
278
|
else if (result.cpu) {
|
|
253
279
|
summary.speedCoefficient = result.cpu.speedCoefficient;
|
|
280
|
+
|
|
281
|
+
if (result.systemGpu?.available && Array.isArray(result.systemGpu.gpus) && result.systemGpu.gpus.length > 0) {
|
|
282
|
+
const inventory = this.summarizeGPUInventory(result.systemGpu.gpus);
|
|
283
|
+
summary.totalVRAM = result.systemGpu.totalVRAM || 0;
|
|
284
|
+
summary.gpuCount = result.systemGpu.gpus.length;
|
|
285
|
+
summary.isMultiGPU = Boolean(result.systemGpu.isMultiGPU);
|
|
286
|
+
summary.gpuModel = inventory.primaryModel || null;
|
|
287
|
+
summary.gpuInventory = inventory.displayName || summary.gpuModel;
|
|
288
|
+
summary.gpuModels = inventory.models;
|
|
289
|
+
summary.hasHeterogeneousGPU = inventory.isHeterogeneous;
|
|
290
|
+
}
|
|
254
291
|
}
|
|
255
292
|
|
|
256
293
|
// Effective memory for LLM loading
|
|
257
294
|
// For GPU: use VRAM; for CPU/Metal: use system RAM
|
|
258
|
-
if (summary.totalVRAM > 0 &&
|
|
295
|
+
if (summary.totalVRAM > 0 && ['cuda', 'rocm', 'intel'].includes(primary?.type)) {
|
|
259
296
|
summary.effectiveMemory = summary.totalVRAM;
|
|
260
297
|
} else {
|
|
261
298
|
// Use 70% of system RAM for models (leave room for OS)
|
|
@@ -286,6 +323,137 @@ class UnifiedDetector {
|
|
|
286
323
|
};
|
|
287
324
|
}
|
|
288
325
|
|
|
326
|
+
async detectSystemGpuFallback() {
|
|
327
|
+
const graphics = await si.graphics();
|
|
328
|
+
const controllers = Array.isArray(graphics?.controllers) ? graphics.controllers : [];
|
|
329
|
+
|
|
330
|
+
if (controllers.length === 0) {
|
|
331
|
+
return {
|
|
332
|
+
available: false,
|
|
333
|
+
source: 'systeminformation',
|
|
334
|
+
gpus: [],
|
|
335
|
+
totalVRAM: 0,
|
|
336
|
+
isMultiGPU: false,
|
|
337
|
+
hasDedicated: false
|
|
338
|
+
};
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
const normalized = controllers
|
|
342
|
+
.map((controller) => {
|
|
343
|
+
const name = String(controller?.model || controller?.name || '').replace(/\s+/g, ' ').trim();
|
|
344
|
+
if (!name || name.toLowerCase() === 'unknown') return null;
|
|
345
|
+
|
|
346
|
+
const nameLower = name.toLowerCase();
|
|
347
|
+
if (nameLower.includes('microsoft basic') || nameLower.includes('standard vga')) return null;
|
|
348
|
+
|
|
349
|
+
const isIntegrated = this.isIntegratedGPUModel(name);
|
|
350
|
+
let vram = this.normalizeFallbackVRAM(controller?.vram || controller?.memoryTotal || controller?.memory || 0);
|
|
351
|
+
|
|
352
|
+
// For dedicated cards, estimate VRAM from model if runtime did not report memory.
|
|
353
|
+
if (!isIntegrated && vram === 0) {
|
|
354
|
+
vram = this.estimateFallbackVRAM(name);
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
return {
|
|
358
|
+
name,
|
|
359
|
+
vendor: controller?.vendor || '',
|
|
360
|
+
type: isIntegrated ? 'integrated' : 'dedicated',
|
|
361
|
+
memory: { total: vram }
|
|
362
|
+
};
|
|
363
|
+
})
|
|
364
|
+
.filter(Boolean);
|
|
365
|
+
|
|
366
|
+
if (normalized.length === 0) {
|
|
367
|
+
return {
|
|
368
|
+
available: false,
|
|
369
|
+
source: 'systeminformation',
|
|
370
|
+
gpus: [],
|
|
371
|
+
totalVRAM: 0,
|
|
372
|
+
isMultiGPU: false,
|
|
373
|
+
hasDedicated: false
|
|
374
|
+
};
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
const dedicated = normalized.filter((gpu) => gpu.type === 'dedicated');
|
|
378
|
+
const totalVRAM = dedicated.length > 0
|
|
379
|
+
? dedicated.reduce((sum, gpu) => sum + (gpu.memory?.total || 0), 0)
|
|
380
|
+
: 0;
|
|
381
|
+
|
|
382
|
+
return {
|
|
383
|
+
available: true,
|
|
384
|
+
source: 'systeminformation',
|
|
385
|
+
gpus: normalized,
|
|
386
|
+
totalVRAM,
|
|
387
|
+
isMultiGPU: dedicated.length > 1,
|
|
388
|
+
hasDedicated: dedicated.length > 0
|
|
389
|
+
};
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
normalizeFallbackVRAM(value) {
|
|
393
|
+
const num = Number(value);
|
|
394
|
+
if (!Number.isFinite(num) || num <= 0) return 0;
|
|
395
|
+
|
|
396
|
+
// Bytes -> GB
|
|
397
|
+
if (num > 1024 * 1024) {
|
|
398
|
+
return Math.round(num / (1024 * 1024 * 1024));
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
// MB -> GB
|
|
402
|
+
if (num >= 1024) {
|
|
403
|
+
return Math.round(num / 1024);
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
// Likely already GB
|
|
407
|
+
if (num >= 1 && num <= 80) {
|
|
408
|
+
return Math.round(num);
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
return 0;
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
isIntegratedGPUModel(model) {
|
|
415
|
+
const lower = String(model || '').toLowerCase();
|
|
416
|
+
if (!lower) return false;
|
|
417
|
+
|
|
418
|
+
if (lower.includes('radeon rx') || lower.includes('rtx') || lower.includes('gtx') ||
|
|
419
|
+
lower.includes('geforce') || lower.includes('tesla') || lower.includes('quadro') ||
|
|
420
|
+
lower.includes('instinct') || lower.includes('arc a') || lower.includes('radeon pro')) {
|
|
421
|
+
return false;
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
return (
|
|
425
|
+
lower.includes('intel') ||
|
|
426
|
+
lower.includes('iris') ||
|
|
427
|
+
lower.includes('uhd') ||
|
|
428
|
+
lower.includes('hd graphics') ||
|
|
429
|
+
lower.includes('radeon graphics') ||
|
|
430
|
+
lower.includes('radeon(tm) graphics') ||
|
|
431
|
+
lower.includes('vega') ||
|
|
432
|
+
lower.includes('apple')
|
|
433
|
+
);
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
estimateFallbackVRAM(model) {
|
|
437
|
+
const lower = String(model || '').toLowerCase();
|
|
438
|
+
if (!lower) return 0;
|
|
439
|
+
|
|
440
|
+
if (lower.includes('rx 7900')) return 24;
|
|
441
|
+
if (lower.includes('rx 7800')) return 16;
|
|
442
|
+
if (lower.includes('rx 7700')) return 12;
|
|
443
|
+
if (lower.includes('rx 7600 xt')) return 16;
|
|
444
|
+
if (lower.includes('rx 7600')) return 8;
|
|
445
|
+
if (lower.includes('rx 6900') || lower.includes('rx 6800')) return 16;
|
|
446
|
+
if (lower.includes('rx 6700')) return 12;
|
|
447
|
+
|
|
448
|
+
if (lower.includes('rtx 5090')) return 32;
|
|
449
|
+
if (lower.includes('rtx 4090') || lower.includes('rtx 3090')) return 24;
|
|
450
|
+
if (lower.includes('rtx 5080') || lower.includes('rtx 4080')) return 16;
|
|
451
|
+
if (lower.includes('rtx 5070') || lower.includes('rtx 4070') || lower.includes('rtx 3060')) return 12;
|
|
452
|
+
if (lower.includes('rtx 4060') || lower.includes('rtx 3070')) return 8;
|
|
453
|
+
|
|
454
|
+
return 0;
|
|
455
|
+
}
|
|
456
|
+
|
|
289
457
|
/**
|
|
290
458
|
* Generate hardware fingerprint for benchmarks
|
|
291
459
|
*/
|
|
@@ -448,6 +616,10 @@ class UnifiedDetector {
|
|
|
448
616
|
return `${gpuDesc} (${summary.totalVRAM}GB) + ${summary.cpuModel}`;
|
|
449
617
|
}
|
|
450
618
|
else {
|
|
619
|
+
if (summary.gpuModel && summary.gpuCount > 0) {
|
|
620
|
+
const gpuDesc = summary.gpuInventory || summary.gpuModel;
|
|
621
|
+
return `${gpuDesc} (${summary.totalVRAM}GB VRAM detected, CPU backend) + ${summary.cpuModel}`;
|
|
622
|
+
}
|
|
451
623
|
return `${summary.cpuModel} (${Math.round(summary.systemRAM)}GB RAM, CPU-only)`;
|
|
452
624
|
}
|
|
453
625
|
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
<claude-mem-context>
|
|
2
|
+
# Recent Activity
|
|
3
|
+
|
|
4
|
+
<!-- This section is auto-generated by claude-mem. Edit content outside the tags. -->
|
|
5
|
+
|
|
6
|
+
### Feb 12, 2026
|
|
7
|
+
|
|
8
|
+
| ID | Time | T | Title | Read |
|
|
9
|
+
|----|------|---|-------|------|
|
|
10
|
+
| #3442 | 9:59 PM | 🔵 | Static Model Database Structure - Hardcoded LLM Specifications | ~572 |
|
|
11
|
+
|
|
12
|
+
### Feb 13, 2026
|
|
13
|
+
|
|
14
|
+
| ID | Time | T | Title | Read |
|
|
15
|
+
|----|------|---|-------|------|
|
|
16
|
+
| #3699 | 12:05 AM | ✅ | Git Push Consolidated Architecture Changes to GitHub | ~367 |
|
|
17
|
+
|
|
18
|
+
### Feb 14, 2026
|
|
19
|
+
|
|
20
|
+
| ID | Time | T | Title | Read |
|
|
21
|
+
|----|------|---|-------|------|
|
|
22
|
+
| #4339 | 6:49 PM | 🟣 | MCP server implementation and documentation added to llm-checker repository | ~457 |
|
|
23
|
+
</claude-mem-context>
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
<claude-mem-context>
|
|
2
|
+
# Recent Activity
|
|
3
|
+
|
|
4
|
+
<!-- This section is auto-generated by claude-mem. Edit content outside the tags. -->
|
|
5
|
+
|
|
6
|
+
### Feb 12, 2026
|
|
7
|
+
|
|
8
|
+
| ID | Time | T | Title | Read |
|
|
9
|
+
|----|------|---|-------|------|
|
|
10
|
+
| #3500 | 10:26 PM | 🔴 | pullModel() Stream Handling Improved - Success Validation Added | ~458 |
|
|
11
|
+
| #3499 | " | 🔴 | Race Condition Fixed in Ollama Availability Cache | ~440 |
|
|
12
|
+
| #3498 | 10:25 PM | 🔵 | testModelPerformance() Timeout Already Fixed | ~418 |
|
|
13
|
+
| #3497 | " | 🔴 | Timeout Fixed in deleteModel() Using AbortController | ~391 |
|
|
14
|
+
| #3496 | " | 🔴 | Timeout Fixed in testConnection() Using AbortController | ~395 |
|
|
15
|
+
| #3495 | " | 🔴 | Fixed unbounded memory growth in native scraper HTTP request handler | ~361 |
|
|
16
|
+
| #3493 | " | 🔴 | Fixed race condition in checkOllamaAvailability() with promise deduplication | ~398 |
|
|
17
|
+
| #3491 | 10:24 PM | 🔴 | Added missing clearTimeout() in testModelPerformance() | ~319 |
|
|
18
|
+
| #3489 | " | 🔴 | Fixed node-fetch timeout handling in testModelPerformance() | ~332 |
|
|
19
|
+
| #3488 | " | 🔴 | Fixed node-fetch timeout handling in testConnection() tags check | ~303 |
|
|
20
|
+
| #3486 | " | 🔴 | Fixed node-fetch timeout handling in getRunningModels() | ~308 |
|
|
21
|
+
| #3484 | 10:23 PM | 🔵 | Ollama Client Timeout Implementation - Mixed Patterns with AbortController | ~554 |
|
|
22
|
+
| #3443 | 9:59 PM | 🔵 | Ollama Native Scraper - Web Scraping with Dual Cache Strategy | ~594 |
|
|
23
|
+
| #3437 | 9:58 PM | 🔵 | Ollama Client Implementation - HTTP API Wrapper with Connection Management | ~605 |
|
|
24
|
+
|
|
25
|
+
### Feb 14, 2026
|
|
26
|
+
|
|
27
|
+
| ID | Time | T | Title | Read |
|
|
28
|
+
|----|------|---|-------|------|
|
|
29
|
+
| #4339 | 6:49 PM | 🟣 | MCP server implementation and documentation added to llm-checker repository | ~457 |
|
|
30
|
+
</claude-mem-context>
|
package/src/ollama/client.js
CHANGED
|
@@ -458,6 +458,87 @@ class OllamaClient {
|
|
|
458
458
|
};
|
|
459
459
|
}
|
|
460
460
|
}
|
|
461
|
+
|
|
462
|
+
async showModel(modelName) {
|
|
463
|
+
const availability = await this.checkOllamaAvailability();
|
|
464
|
+
if (!availability.available) {
|
|
465
|
+
throw new Error(`Ollama not available: ${availability.error}`);
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
try {
|
|
469
|
+
const controller = new AbortController();
|
|
470
|
+
const timeoutId = setTimeout(() => controller.abort(), 15000);
|
|
471
|
+
|
|
472
|
+
const response = await fetch(`${this.baseURL}/api/show`, {
|
|
473
|
+
method: 'POST',
|
|
474
|
+
signal: controller.signal,
|
|
475
|
+
headers: { 'Content-Type': 'application/json' },
|
|
476
|
+
body: JSON.stringify({ model: modelName })
|
|
477
|
+
});
|
|
478
|
+
|
|
479
|
+
clearTimeout(timeoutId);
|
|
480
|
+
|
|
481
|
+
if (!response.ok) {
|
|
482
|
+
const errorText = await response.text();
|
|
483
|
+
throw new Error(`HTTP ${response.status}: ${response.statusText} - ${errorText}`);
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
return response.json();
|
|
487
|
+
} catch (error) {
|
|
488
|
+
throw new Error(`Failed to show model info: ${error.message}`);
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
async chat(modelName, messages, options = {}) {
|
|
493
|
+
const availability = await this.checkOllamaAvailability();
|
|
494
|
+
if (!availability.available) {
|
|
495
|
+
throw new Error(`Ollama not available: ${availability.error}`);
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
const {
|
|
499
|
+
tools,
|
|
500
|
+
format,
|
|
501
|
+
keepAlive,
|
|
502
|
+
timeoutMs = 45000,
|
|
503
|
+
generationOptions = {}
|
|
504
|
+
} = options;
|
|
505
|
+
|
|
506
|
+
const payload = {
|
|
507
|
+
model: modelName,
|
|
508
|
+
messages: Array.isArray(messages) ? messages : [],
|
|
509
|
+
stream: false
|
|
510
|
+
};
|
|
511
|
+
|
|
512
|
+
if (Array.isArray(tools) && tools.length > 0) payload.tools = tools;
|
|
513
|
+
if (format) payload.format = format;
|
|
514
|
+
if (keepAlive) payload.keep_alive = keepAlive;
|
|
515
|
+
if (generationOptions && Object.keys(generationOptions).length > 0) {
|
|
516
|
+
payload.options = generationOptions;
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
try {
|
|
520
|
+
const controller = new AbortController();
|
|
521
|
+
const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
|
|
522
|
+
|
|
523
|
+
const response = await fetch(`${this.baseURL}/api/chat`, {
|
|
524
|
+
method: 'POST',
|
|
525
|
+
signal: controller.signal,
|
|
526
|
+
headers: { 'Content-Type': 'application/json' },
|
|
527
|
+
body: JSON.stringify(payload)
|
|
528
|
+
});
|
|
529
|
+
|
|
530
|
+
clearTimeout(timeoutId);
|
|
531
|
+
|
|
532
|
+
if (!response.ok) {
|
|
533
|
+
const errorText = await response.text();
|
|
534
|
+
throw new Error(`HTTP ${response.status}: ${response.statusText} - ${errorText}`);
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
return response.json();
|
|
538
|
+
} catch (error) {
|
|
539
|
+
throw new Error(`Failed to run chat request: ${error.message}`);
|
|
540
|
+
}
|
|
541
|
+
}
|
|
461
542
|
}
|
|
462
543
|
|
|
463
544
|
module.exports = OllamaClient;
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
<claude-mem-context>
|
|
2
|
+
# Recent Activity
|
|
3
|
+
|
|
4
|
+
<!-- This section is auto-generated by claude-mem. Edit content outside the tags. -->
|
|
5
|
+
|
|
6
|
+
### Feb 12, 2026
|
|
7
|
+
|
|
8
|
+
| ID | Time | T | Title | Read |
|
|
9
|
+
|----|------|---|-------|------|
|
|
10
|
+
| #3462 | 10:02 PM | 🔵 | Plugin System Architecture - Hook-Based Extensibility Framework | ~648 |
|
|
11
|
+
|
|
12
|
+
### Feb 14, 2026
|
|
13
|
+
|
|
14
|
+
| ID | Time | T | Title | Read |
|
|
15
|
+
|----|------|---|-------|------|
|
|
16
|
+
| #4339 | 6:49 PM | 🟣 | MCP server implementation and documentation added to llm-checker repository | ~457 |
|
|
17
|
+
</claude-mem-context>
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
<claude-mem-context>
|
|
2
|
+
# Recent Activity
|
|
3
|
+
|
|
4
|
+
<!-- This section is auto-generated by claude-mem. Edit content outside the tags. -->
|
|
5
|
+
|
|
6
|
+
### Feb 12, 2026
|
|
7
|
+
|
|
8
|
+
| ID | Time | T | Title | Read |
|
|
9
|
+
|----|------|---|-------|------|
|
|
10
|
+
| #3438 | 9:58 PM | 🔵 | Configuration Management System - Comprehensive Settings with Environment Overrides | ~580 |
|
|
11
|
+
|
|
12
|
+
### Feb 14, 2026
|
|
13
|
+
|
|
14
|
+
| ID | Time | T | Title | Read |
|
|
15
|
+
|----|------|---|-------|------|
|
|
16
|
+
| #4339 | 6:49 PM | 🟣 | MCP server implementation and documentation added to llm-checker repository | ~457 |
|
|
17
|
+
</claude-mem-context>
|