llm-checker 3.5.6 → 3.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -187,7 +187,7 @@ const ALLOWED_CLI_COMMANDS = new Set([
187
187
 
188
188
  const server = new McpServer({
189
189
  name: "llm-checker",
190
- version: "3.4.0",
190
+ version: "3.5.7",
191
191
  });
192
192
 
193
193
  // ============================================================================
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "llm-checker",
3
- "version": "3.5.6",
3
+ "version": "3.5.7",
4
4
  "description": "Intelligent CLI tool with AI-powered model selection that analyzes your hardware and recommends optimal LLM models for your system",
5
5
  "bin": {
6
6
  "llm-checker": "bin/cli.js",
@@ -353,13 +353,16 @@ class MultiObjectiveSelector {
353
353
  const hasIntegratedGPU = typeof hardware.summary?.hasIntegratedGPU === 'boolean'
354
354
  ? hardware.summary.hasIntegratedGPU
355
355
  : /iris xe|uhd.*graphics|vega.*integrated|radeon.*graphics/i.test(`${gpuModel} ${integratedGpuInventory}`);
356
+ const hasDedicatedGPU = typeof hardware.summary?.hasDedicatedGPU === 'boolean'
357
+ ? hardware.summary.hasDedicatedGPU
358
+ : Boolean(hardware.gpu?.dedicated || (vramGB > 0 && !hasIntegratedGPU));
356
359
  const platform = normalizePlatform(hardware?.os?.platform || process.platform);
357
360
  const isPC = !isAppleSilicon && (platform === 'win32' || platform === 'linux');
358
361
 
359
362
  // 1) Effective memory for model weights (45%) - Apple Silicon & PC optimized
360
363
  let effMem;
361
364
 
362
- if (vramGB > 0 && !unified) {
365
+ if (hasDedicatedGPU && vramGB > 0 && !unified) {
363
366
  // Dedicated GPU path (Windows/Linux with discrete GPU)
364
367
  if (isPC) {
365
368
  // PC-specific GPU memory calculation with offload support
@@ -454,9 +457,9 @@ class MultiObjectiveSelector {
454
457
  tier = bumpTier(tier, +1); // High-end GPU boost
455
458
  } else if (!vramGB && !unified) {
456
459
  tier = bumpTier(tier, -1); // CPU-only penalty (moderate)
457
- } else if (hasIntegratedGPU) {
460
+ } else if (hasIntegratedGPU && !hasDedicatedGPU) {
458
461
  tier = bumpTier(tier, -1); // iGPU penalty
459
- } else if (vramGB > 0 && vramGB < 6) {
462
+ } else if (hasDedicatedGPU && vramGB > 0 && vramGB < 6) {
460
463
  tier = bumpTier(tier, -1); // Low VRAM penalty
461
464
  }
462
465
 
@@ -752,13 +755,16 @@ class MultiObjectiveSelector {
752
755
  const hasIntegratedGPU = typeof hardware.summary?.hasIntegratedGPU === 'boolean'
753
756
  ? hardware.summary.hasIntegratedGPU
754
757
  : false;
758
+ const hasDedicatedGPU = typeof hardware.summary?.hasDedicatedGPU === 'boolean'
759
+ ? hardware.summary.hasDedicatedGPU
760
+ : Boolean(hardware.gpu?.dedicated || (vramGB > 0 && !hasIntegratedGPU));
755
761
 
756
762
  // Use improved CPU estimation function for more realistic and varying speeds
757
763
  const hasAVX512 = cpuModel.toLowerCase().includes('intel') &&
758
764
  (cpuModel.includes('13th') || cpuModel.includes('14th') || cpuModel.includes('12th'));
759
765
 
760
766
  // GPU-based calculation (dedicated GPU only)
761
- if (vramGB > 0 && !hasIntegratedGPU) {
767
+ if (hasDedicatedGPU && vramGB > 0) {
762
768
  let gpuTPS = 20; // Conservative GPU baseline
763
769
  if (gpuModel.toLowerCase().includes('gb10') ||
764
770
  gpuModel.toLowerCase().includes('grace blackwell') ||
@@ -4,7 +4,7 @@
4
4
  * Focuses on AVX2, AVX512, AMX, and other SIMD extensions
5
5
  */
6
6
 
7
- const { execSync } = require('child_process');
7
+ const childProcess = require('child_process');
8
8
  const os = require('os');
9
9
  const fs = require('fs');
10
10
  const { normalizePlatform } = require('../../utils/platform');
@@ -92,7 +92,7 @@ class CPUDetector {
92
92
 
93
93
  try {
94
94
  if (platform === 'darwin') {
95
- return parseInt(execSync('sysctl -n hw.physicalcpu', { encoding: 'utf8', timeout: 5000 }).trim());
95
+ return parseInt(childProcess.execSync('sysctl -n hw.physicalcpu', { encoding: 'utf8', timeout: 5000 }).trim());
96
96
  } else if (platform === 'linux') {
97
97
  const cpuInfo = fs.readFileSync('/proc/cpuinfo', 'utf8');
98
98
  const coreIds = new Set();
@@ -142,7 +142,37 @@ class CPUDetector {
142
142
  * Execute shell command with consistent options.
143
143
  */
144
144
  runCommand(command) {
145
- return execSync(command, { encoding: 'utf8', timeout: 5000 });
145
+ const baseOptions = {
146
+ encoding: 'utf8',
147
+ timeout: 5000
148
+ };
149
+
150
+ if (normalizePlatform() === 'win32') {
151
+ const result = childProcess.spawnSync(command, {
152
+ ...baseOptions,
153
+ shell: true,
154
+ stdio: ['ignore', 'pipe', 'pipe'],
155
+ windowsHide: true
156
+ });
157
+
158
+ if (result.error) {
159
+ throw result.error;
160
+ }
161
+
162
+ if (result.status !== 0) {
163
+ const stderr = String(result.stderr || '').trim();
164
+ const stdout = String(result.stdout || '').trim();
165
+ const error = new Error(stderr || stdout || `Command failed: ${command}`);
166
+ error.status = result.status;
167
+ error.stdout = result.stdout;
168
+ error.stderr = result.stderr;
169
+ throw error;
170
+ }
171
+
172
+ return result.stdout;
173
+ }
174
+
175
+ return childProcess.execSync(command, baseOptions);
146
176
  }
147
177
 
148
178
  /**
@@ -215,10 +245,10 @@ class CPUDetector {
215
245
 
216
246
  try {
217
247
  if (platform === 'darwin') {
218
- cache.l1d = parseInt(execSync('sysctl -n hw.l1dcachesize', { encoding: 'utf8', timeout: 5000 })) / 1024 || 0;
219
- cache.l1i = parseInt(execSync('sysctl -n hw.l1icachesize', { encoding: 'utf8', timeout: 5000 })) / 1024 || 0;
220
- cache.l2 = parseInt(execSync('sysctl -n hw.l2cachesize', { encoding: 'utf8', timeout: 5000 })) / 1024 / 1024 || 0;
221
- cache.l3 = parseInt(execSync('sysctl -n hw.l3cachesize', { encoding: 'utf8', timeout: 5000 })) / 1024 / 1024 || 0;
248
+ cache.l1d = parseInt(childProcess.execSync('sysctl -n hw.l1dcachesize', { encoding: 'utf8', timeout: 5000 })) / 1024 || 0;
249
+ cache.l1i = parseInt(childProcess.execSync('sysctl -n hw.l1icachesize', { encoding: 'utf8', timeout: 5000 })) / 1024 || 0;
250
+ cache.l2 = parseInt(childProcess.execSync('sysctl -n hw.l2cachesize', { encoding: 'utf8', timeout: 5000 })) / 1024 / 1024 || 0;
251
+ cache.l3 = parseInt(childProcess.execSync('sysctl -n hw.l3cachesize', { encoding: 'utf8', timeout: 5000 })) / 1024 / 1024 || 0;
222
252
  } else if (platform === 'linux') {
223
253
  // Parse from /sys/devices/system/cpu/cpu0/cache/
224
254
  const cachePath = '/sys/devices/system/cpu/cpu0/cache';
@@ -288,12 +318,12 @@ class CPUDetector {
288
318
  } else {
289
319
  // Intel Mac - check via sysctl
290
320
  try {
291
- const features = execSync('sysctl -n machdep.cpu.features', {
321
+ const features = childProcess.execSync('sysctl -n machdep.cpu.features', {
292
322
  encoding: 'utf8',
293
323
  timeout: 5000
294
324
  }).toLowerCase();
295
325
 
296
- const leafFeatures = execSync('sysctl -n machdep.cpu.leaf7_features', {
326
+ const leafFeatures = childProcess.execSync('sysctl -n machdep.cpu.leaf7_features', {
297
327
  encoding: 'utf8',
298
328
  timeout: 5000
299
329
  }).toLowerCase();
@@ -15,6 +15,10 @@ class CUDADetector {
15
15
  this.detectionMode = null;
16
16
  }
17
17
 
18
+ execCommand(command, options = {}) {
19
+ return execSync(command, options);
20
+ }
21
+
18
22
  /**
19
23
  * Check if CUDA is available
20
24
  */
@@ -43,7 +47,7 @@ class CUDADetector {
43
47
 
44
48
  hasNvidiaSMI() {
45
49
  try {
46
- execSync('nvidia-smi --version', {
50
+ this.execCommand('nvidia-smi --version', {
47
51
  encoding: 'utf8',
48
52
  timeout: 5000,
49
53
  stdio: ['pipe', 'pipe', 'pipe']
@@ -142,7 +146,7 @@ class CUDADetector {
142
146
  }
143
147
 
144
148
  try {
145
- execSync('nvcc --version', {
149
+ this.execCommand('nvcc --version', {
146
150
  encoding: 'utf8',
147
151
  timeout: 5000,
148
152
  stdio: ['pipe', 'pipe', 'pipe']
@@ -197,17 +201,18 @@ class CUDADetector {
197
201
 
198
202
  try {
199
203
  // Get driver and CUDA version
200
- const versionInfo = execSync('nvidia-smi --query-gpu=driver_version --format=csv,noheader,nounits', {
204
+ const versionInfo = this.execCommand('nvidia-smi --query-gpu=driver_version --format=csv,noheader,nounits', {
201
205
  encoding: 'utf8',
202
206
  timeout: 5000
203
207
  }).trim().split('\n')[0];
204
208
  result.driver = versionInfo;
205
209
 
206
- // Get CUDA version from nvidia-smi header
207
- const header = execSync('nvidia-smi | head -n 3', {
210
+ // Parse the nvidia-smi banner in JS so Windows does not require shell-only tools like `head`.
211
+ const banner = this.execCommand('nvidia-smi', {
208
212
  encoding: 'utf8',
209
213
  timeout: 5000
210
214
  });
215
+ const header = banner.split('\n').slice(0, 3).join('\n');
211
216
  const cudaMatch = header.match(/CUDA Version:\s*([\d.]+)/);
212
217
  if (cudaMatch) {
213
218
  result.cuda = cudaMatch[1];
@@ -237,7 +242,7 @@ class CUDADetector {
237
242
  'clocks.max.sm'
238
243
  ].join(',');
239
244
 
240
- const gpuData = execSync(
245
+ const gpuData = this.execCommand(
241
246
  `nvidia-smi --query-gpu=${query} --format=csv,noheader,nounits`,
242
247
  { encoding: 'utf8', timeout: 10000 }
243
248
  ).trim();
@@ -286,7 +291,7 @@ class CUDADetector {
286
291
  } catch (e) {
287
292
  // Fallback to simpler query
288
293
  try {
289
- const simpleQuery = execSync(
294
+ const simpleQuery = this.execCommand(
290
295
  'nvidia-smi --query-gpu=name,memory.total --format=csv,noheader,nounits',
291
296
  { encoding: 'utf8', timeout: 5000 }
292
297
  ).trim();
@@ -408,7 +413,7 @@ class CUDADetector {
408
413
  }
409
414
 
410
415
  try {
411
- const nvccVersion = execSync('nvcc --version', {
416
+ const nvccVersion = this.execCommand('nvcc --version', {
412
417
  encoding: 'utf8',
413
418
  timeout: 5000,
414
419
  stdio: ['pipe', 'pipe', 'pipe']
@@ -41,7 +41,7 @@ class HardwareDetector {
41
41
  const systemInfo = {
42
42
  cpu: this.processCPUInfo(cpu),
43
43
  memory: this.processMemoryInfo(memory),
44
- gpu: this.processGPUInfo(graphics),
44
+ gpu: this.processGPUInfo(graphics, memory),
45
45
  system: this.processSystemInfo(system),
46
46
  os: this.processOSInfo(osInfo),
47
47
  timestamp: Date.now()
@@ -97,7 +97,48 @@ class HardwareDetector {
97
97
  };
98
98
  }
99
99
 
100
- processGPUInfo(graphics) {
100
+ getSystemMemoryGB(memoryInfo) {
101
+ const totalBytes = Number(memoryInfo?.total || 0);
102
+ if (!Number.isFinite(totalBytes) || totalBytes <= 0) return 0;
103
+ return Math.max(1, Math.round(totalBytes / (1024 ** 3)));
104
+ }
105
+
106
+ estimateIntegratedSharedMemory(gpu, memoryInfo) {
107
+ const dedicatedAperture = this.normalizeVRAM(gpu?.vram || 0);
108
+ const explicitSharedCandidates = [
109
+ gpu?.memoryTotal,
110
+ gpu?.memory,
111
+ gpu?.sharedMemory,
112
+ gpu?.memoryShared,
113
+ gpu?.memory?.shared,
114
+ gpu?.memory?.total
115
+ ]
116
+ .map((value) => this.normalizeVRAM(value))
117
+ .filter((value) => value > dedicatedAperture);
118
+
119
+ if (explicitSharedCandidates.length > 0) {
120
+ return Math.max(...explicitSharedCandidates);
121
+ }
122
+
123
+ const totalSystemGB = this.getSystemMemoryGB(memoryInfo);
124
+ if (gpu?.vramDynamic || dedicatedAperture <= 2) {
125
+ return this.estimateSystemSharedMemory(totalSystemGB, dedicatedAperture);
126
+ }
127
+
128
+ return dedicatedAperture;
129
+ }
130
+
131
+ estimateSystemSharedMemory(totalSystemGB, fallbackGB = 0) {
132
+ const fallback = Math.max(0, Number(fallbackGB) || 0);
133
+ if (!Number.isFinite(totalSystemGB) || totalSystemGB <= 0) {
134
+ return fallback;
135
+ }
136
+
137
+ // Integrated GPUs typically expose roughly half of system RAM as a shared pool.
138
+ return Math.max(fallback, Math.min(Math.max(1, Math.round(totalSystemGB / 2)), 16));
139
+ }
140
+
141
+ processGPUInfo(graphics, memoryInfo = null) {
101
142
  const controllers = graphics.controllers || [];
102
143
  const displays = graphics.displays || [];
103
144
 
@@ -199,8 +240,14 @@ class HardwareDetector {
199
240
  enhancedModel = this.getGPUModelFromDeviceId(primaryGPU.deviceId) || enhancedModel;
200
241
  }
201
242
 
243
+ const primaryIsIntegrated = this.isIntegratedGPU(enhancedModel);
244
+ const normalizedPrimaryVRAM = this.normalizeVRAM(primaryGPU.vram || 0);
245
+ const estimatedSharedMemory = primaryIsIntegrated
246
+ ? this.estimateIntegratedSharedMemory(primaryGPU, memoryInfo)
247
+ : 0;
248
+
202
249
  // Enhanced VRAM detection using the new normalizeVRAM function
203
- let vram = this.normalizeVRAM(primaryGPU.vram || 0);
250
+ let vram = primaryIsIntegrated ? estimatedSharedMemory : normalizedPrimaryVRAM;
204
251
 
205
252
  // If VRAM is still 0, try to estimate based on model or handle unified memory
206
253
  if (vram === 0 && primaryGPU.model) {
@@ -227,24 +274,41 @@ class HardwareDetector {
227
274
 
228
275
  // If we have multiple dedicated GPUs, use the combined VRAM
229
276
  const effectiveVRAM = gpuCount > 1 ? totalDedicatedVRAM : vram;
277
+ const sharedMemory = primaryIsIntegrated ? effectiveVRAM : 0;
278
+ const dedicatedMemory = primaryIsIntegrated ? normalizedPrimaryVRAM : effectiveVRAM;
279
+ const scoredGPU = {
280
+ ...primaryGPU,
281
+ model: enhancedModel,
282
+ vram: effectiveVRAM,
283
+ sharedMemory,
284
+ dedicatedMemory
285
+ };
230
286
 
231
287
  return {
232
288
  model: enhancedModel,
233
289
  vendor: primaryGPU.vendor || this.inferVendorFromGPUModel(enhancedModel, 'Unknown'),
234
290
  vram: effectiveVRAM,
235
291
  vramPerGPU: vram, // VRAM of primary GPU for reference
292
+ sharedMemory,
293
+ dedicatedMemory,
236
294
  vramDynamic: primaryGPU.vramDynamic || false,
237
- dedicated: !this.isIntegratedGPU(enhancedModel),
295
+ dedicated: !primaryIsIntegrated,
238
296
  driverVersion: primaryGPU.driverVersion || 'Unknown',
239
297
  gpuCount: gpuCount > 0 ? gpuCount : (dedicatedGPUs.length > 0 ? dedicatedGPUs.length : 1),
240
298
  isMultiGPU: gpuCount > 1,
241
299
  all: normalizedControllers.map(gpu => ({
242
300
  model: gpu.model,
243
- vram: this.normalizeVRAM(gpu.vram || 0),
301
+ vram: this.isIntegratedGPU(gpu.model)
302
+ ? this.estimateIntegratedSharedMemory(gpu, memoryInfo)
303
+ : this.normalizeVRAM(gpu.vram || 0),
304
+ sharedMemory: this.isIntegratedGPU(gpu.model)
305
+ ? this.estimateIntegratedSharedMemory(gpu, memoryInfo)
306
+ : 0,
307
+ dedicatedMemory: this.normalizeVRAM(gpu.vram || 0),
244
308
  vendor: gpu.vendor || this.inferVendorFromGPUModel(gpu.model, 'Unknown')
245
309
  })),
246
310
  displays: displays.length,
247
- score: this.calculateGPUScore(primaryGPU)
311
+ score: this.calculateGPUScore(scoredGPU)
248
312
  };
249
313
  }
250
314
 
@@ -303,17 +367,40 @@ class HardwareDetector {
303
367
  const modelFromUnified = summary.gpuModel || fallbackModel || systemInfo.gpu.model;
304
368
  const vendor = this.inferVendorFromGPUModel(modelFromUnified, systemInfo.gpu.vendor);
305
369
  const isAppleUnified = primaryType === 'metal';
370
+ const integratedSharedMemory = Math.max(
371
+ Number(systemInfo.gpu.sharedMemory || 0),
372
+ ...(Array.isArray(unified.systemGpu?.gpus)
373
+ ? unified.systemGpu.gpus
374
+ .filter((gpu) => gpu?.type === 'integrated')
375
+ .map((gpu) => Number(gpu?.memory?.total || gpu?.memoryTotal || 0))
376
+ : [0])
377
+ );
306
378
 
307
379
  systemInfo.summary = {
308
- ...summary
380
+ ...summary,
381
+ integratedSharedMemory: typeof summary.integratedSharedMemory === 'number'
382
+ ? summary.integratedSharedMemory
383
+ : integratedSharedMemory
309
384
  };
310
385
 
311
386
  systemInfo.gpu = {
312
387
  ...systemInfo.gpu,
313
388
  model: modelFromUnified,
314
389
  vendor,
315
- vram: isAppleUnified ? systemInfo.gpu.vram : (hasDedicatedGPU ? (totalVRAM || systemInfo.gpu.vram) : 0),
316
- vramPerGPU: isAppleUnified ? (systemInfo.gpu.vramPerGPU || 0) : (perGPUVRAM || systemInfo.gpu.vramPerGPU || 0),
390
+ vram: isAppleUnified
391
+ ? systemInfo.gpu.vram
392
+ : (hasDedicatedGPU ? (totalVRAM || systemInfo.gpu.vram) : (integratedSharedMemory || systemInfo.gpu.vram || 0)),
393
+ vramPerGPU: isAppleUnified
394
+ ? (systemInfo.gpu.vramPerGPU || 0)
395
+ : (hasDedicatedGPU
396
+ ? (perGPUVRAM || systemInfo.gpu.vramPerGPU || 0)
397
+ : (integratedSharedMemory || systemInfo.gpu.vramPerGPU || systemInfo.gpu.vram || 0)),
398
+ sharedMemory: isAppleUnified
399
+ ? (systemInfo.gpu.sharedMemory || 0)
400
+ : (hasIntegratedGPU ? Math.max(integratedSharedMemory || 0, systemInfo.gpu.sharedMemory || 0) : 0),
401
+ dedicatedMemory: hasDedicatedGPU
402
+ ? (totalVRAM || systemInfo.gpu.dedicatedMemory || systemInfo.gpu.vram || 0)
403
+ : 0,
317
404
  dedicated: hasDedicatedGPU,
318
405
  gpuCount: summary.gpuCount || gpuCount,
319
406
  isMultiGPU: Boolean(summary.isMultiGPU || gpuCount > 1),
@@ -557,13 +644,16 @@ class HardwareDetector {
557
644
 
558
645
  let score = 0;
559
646
  const model = gpu.model.toLowerCase();
560
- const vram = gpu.vram || 0;
647
+ const integrated = this.isIntegratedGPU(gpu.model);
648
+ const vram = integrated
649
+ ? Math.min(gpu.sharedMemory || gpu.vram || 0, 2)
650
+ : (gpu.vram || 0);
561
651
 
562
652
 
563
653
  score += vram * 8;
564
654
 
565
655
 
566
- if (!this.isIntegratedGPU(gpu.model)) {
656
+ if (!integrated) {
567
657
  score += 20;
568
658
  }
569
659
 
@@ -586,6 +676,10 @@ class HardwareDetector {
586
676
  else if (model.includes('apple m')) score += 15;
587
677
  else if (model.includes('r9700') || model.includes('ai pro r9700')) score += 23;
588
678
 
679
+ if (integrated) {
680
+ return Math.min(Math.round(score), 45);
681
+ }
682
+
589
683
  return Math.min(Math.round(score), 100);
590
684
  }
591
685
 
@@ -231,6 +231,7 @@ class UnifiedDetector {
231
231
  dedicatedGpuCount: 0,
232
232
  integratedGpuModels: [],
233
233
  dedicatedGpuModels: [],
234
+ integratedSharedMemory: 0,
234
235
  cpuModel: result.cpu?.brand || 'Unknown',
235
236
  systemRAM: require('os').totalmem() / (1024 ** 3)
236
237
  };
@@ -303,6 +304,7 @@ class UnifiedDetector {
303
304
  summary.dedicatedGpuCount = topology.dedicatedCount;
304
305
  summary.integratedGpuModels = topology.integratedModels;
305
306
  summary.dedicatedGpuModels = topology.dedicatedModels;
307
+ summary.integratedSharedMemory = topology.integratedSharedMemory;
306
308
  if (!summary.gpuModel) {
307
309
  summary.gpuModel = topology.primaryModel || null;
308
310
  }
@@ -355,6 +357,9 @@ class UnifiedDetector {
355
357
  models,
356
358
  integratedModels,
357
359
  dedicatedModels,
360
+ integratedSharedMemory: normalized
361
+ .filter((gpu) => gpu.type === 'integrated')
362
+ .reduce((max, gpu) => Math.max(max, Number(gpu?.memory?.total || 0)), 0),
358
363
  integratedCount: normalized.filter((gpu) => gpu.type === 'integrated').length,
359
364
  dedicatedCount: normalized.filter((gpu) => gpu.type === 'dedicated').length,
360
365
  hasIntegratedGPU: normalized.some((gpu) => gpu.type === 'integrated'),
@@ -412,6 +417,46 @@ class UnifiedDetector {
412
417
  .filter(Boolean);
413
418
  }
414
419
 
420
+ getSystemMemoryGB(memoryInfo) {
421
+ const totalBytes = Number(memoryInfo?.total || 0);
422
+ if (!Number.isFinite(totalBytes) || totalBytes <= 0) return 0;
423
+ return Math.max(1, Math.round(totalBytes / (1024 ** 3)));
424
+ }
425
+
426
+ estimateSystemSharedMemory(totalSystemGB, fallbackGB = 0) {
427
+ const fallback = Math.max(0, Number(fallbackGB) || 0);
428
+ if (!Number.isFinite(totalSystemGB) || totalSystemGB <= 0) {
429
+ return fallback;
430
+ }
431
+
432
+ return Math.max(fallback, Math.min(Math.max(1, Math.round(totalSystemGB / 2)), 16));
433
+ }
434
+
435
+ estimateIntegratedFallbackMemory(controller, memoryInfo) {
436
+ const dedicatedAperture = this.normalizeFallbackVRAM(controller?.vram || 0);
437
+ const explicitSharedCandidates = [
438
+ controller?.memoryTotal,
439
+ controller?.memory,
440
+ controller?.sharedMemory,
441
+ controller?.memoryShared,
442
+ controller?.memory?.shared,
443
+ controller?.memory?.total
444
+ ]
445
+ .map((value) => this.normalizeFallbackVRAM(value))
446
+ .filter((value) => value > dedicatedAperture);
447
+
448
+ if (explicitSharedCandidates.length > 0) {
449
+ return Math.max(...explicitSharedCandidates);
450
+ }
451
+
452
+ const totalSystemGB = this.getSystemMemoryGB(memoryInfo);
453
+ if (controller?.vramDynamic || dedicatedAperture <= 2) {
454
+ return this.estimateSystemSharedMemory(totalSystemGB, dedicatedAperture);
455
+ }
456
+
457
+ return dedicatedAperture;
458
+ }
459
+
415
460
  mergeGpuInventories(...gpuLists) {
416
461
  const normalizedLists = gpuLists.map((list) => this.normalizeGpuInventory(list));
417
462
  const primaryIndex = normalizedLists.findIndex((list) => list.length > 0);
@@ -433,6 +478,7 @@ class UnifiedDetector {
433
478
 
434
479
  async detectSystemGpuFallback() {
435
480
  const graphics = await si.graphics();
481
+ const memoryInfo = await si.mem().catch(() => null);
436
482
  const controllers = Array.isArray(graphics?.controllers) ? graphics.controllers : [];
437
483
 
438
484
  if (controllers.length === 0) {
@@ -455,7 +501,9 @@ class UnifiedDetector {
455
501
  if (nameLower.includes('microsoft basic') || nameLower.includes('standard vga')) return null;
456
502
 
457
503
  const isIntegrated = this.isIntegratedGPUModel(name);
458
- let vram = this.normalizeFallbackVRAM(controller?.vram || controller?.memoryTotal || controller?.memory || 0);
504
+ let vram = isIntegrated
505
+ ? this.estimateIntegratedFallbackMemory(controller, memoryInfo)
506
+ : this.normalizeFallbackVRAM(controller?.vram || controller?.memoryTotal || controller?.memory || 0);
459
507
 
460
508
  // For dedicated cards, estimate VRAM from model if runtime did not report memory.
461
509
  if (!isIntegrated && vram === 0) {
@@ -821,6 +869,9 @@ class UnifiedDetector {
821
869
  else {
822
870
  if (summary.gpuModel && summary.hasIntegratedGPU && !summary.hasDedicatedGPU) {
823
871
  const gpuDesc = summary.gpuInventory || summary.gpuModel;
872
+ if (summary.integratedSharedMemory > 0) {
873
+ return `${gpuDesc} (${summary.integratedSharedMemory}GB shared memory, CPU backend) + ${summary.cpuModel}`;
874
+ }
824
875
  return `${gpuDesc} (integrated/shared memory, CPU backend) + ${summary.cpuModel}`;
825
876
  }
826
877
  if (summary.gpuModel && summary.gpuCount > 0) {
package/src/index.js CHANGED
@@ -1373,6 +1373,15 @@ class LLMChecker {
1373
1373
  // Detect PC platform (Windows/Linux)
1374
1374
  const normalizedPlatform = normalizePlatform(hardware.os?.platform || process.platform);
1375
1375
  const isPC = !isAppleSilicon && (normalizedPlatform === 'win32' || normalizedPlatform === 'linux');
1376
+ const integratedGpuInventory = Array.isArray(hardware.summary?.integratedGpuModels)
1377
+ ? hardware.summary.integratedGpuModels.map(({ name }) => name).join(' ')
1378
+ : '';
1379
+ const hasIntegratedGPU = typeof hardware.summary?.hasIntegratedGPU === 'boolean'
1380
+ ? hardware.summary.hasIntegratedGPU
1381
+ : /iris.*xe|iris.*graphics|uhd.*graphics|vega.*integrated|radeon.*graphics|intel.*integrated|integrated/i.test(`${gpuModel} ${integratedGpuInventory}`);
1382
+ const hasDedicatedGPU = typeof hardware.summary?.hasDedicatedGPU === 'boolean'
1383
+ ? (!unified && hardware.summary.hasDedicatedGPU)
1384
+ : Boolean(!unified && (hardware.gpu?.dedicated || (vramGB > 0 && !hasIntegratedGPU)));
1376
1385
  const hasAVX512 = cpuModel.toLowerCase().includes('intel') &&
1377
1386
  (cpuModel.includes('12th') || cpuModel.includes('13th') || cpuModel.includes('14th'));
1378
1387
  const hasAVX2 = cpuModel.toLowerCase().includes('intel') ||
@@ -1381,7 +1390,7 @@ class LLMChecker {
1381
1390
  // 1) Capacidad efectiva para pesos del modelo (45%)
1382
1391
  let effMem;
1383
1392
 
1384
- if (vramGB > 0 && !unified) {
1393
+ if (hasDedicatedGPU && vramGB > 0 && !unified) {
1385
1394
  // Dedicated GPU path (Windows/Linux with discrete GPU)
1386
1395
  if (isPC) {
1387
1396
  // PC-specific GPU memory calculation with offload support
@@ -1469,16 +1478,6 @@ class LLMChecker {
1469
1478
  score >= 35 ? 'medium' : // 35-54 for mid-range systems
1470
1479
  score >= 20 ? 'low' : 'ultra_low'; // 20-34 for budget systems
1471
1480
 
1472
- const integratedGpuInventory = Array.isArray(hardware.summary?.integratedGpuModels)
1473
- ? hardware.summary.integratedGpuModels.map(({ name }) => name).join(' ')
1474
- : '';
1475
- const hasIntegratedGPU = typeof hardware.summary?.hasIntegratedGPU === 'boolean'
1476
- ? hardware.summary.hasIntegratedGPU
1477
- : /iris.*xe|iris.*graphics|uhd.*graphics|vega.*integrated|radeon.*graphics|intel.*integrated|integrated/i.test(`${gpuModel} ${integratedGpuInventory}`);
1478
- const hasDedicatedGPU = typeof hardware.summary?.hasDedicatedGPU === 'boolean'
1479
- ? (!unified && hardware.summary.hasDedicatedGPU)
1480
- : (vramGB > 0 && !hasIntegratedGPU && !unified);
1481
-
1482
1481
  // Debug logging for tier calculation
1483
1482
  if (process.env.DEBUG_TIER) {
1484
1483
  console.log(`GPU Model: "${gpuModel}"`);
@@ -1508,10 +1507,10 @@ class LLMChecker {
1508
1507
  } else if (!vramGB && !unified) {
1509
1508
  // Windows/Linux CPU-only - significativa limitación pero no extrema
1510
1509
  tier = this.bumpTier(tier, -1);
1511
- } else if (hasIntegratedGPU) {
1510
+ } else if (hasIntegratedGPU && !hasDedicatedGPU) {
1512
1511
  // iGPU - limitada pero algo mejor que CPU puro
1513
1512
  tier = this.bumpTier(tier, -1);
1514
- } else if (vramGB > 0 && vramGB < 6) {
1513
+ } else if (hasDedicatedGPU && vramGB > 0 && vramGB < 6) {
1515
1514
  // GPU dedicada con poca VRAM (GTX 1060, etc.)
1516
1515
  tier = this.bumpTier(tier, -1);
1517
1516
  }
@@ -751,40 +751,26 @@ class DeterministicModelSelector {
751
751
 
752
752
  return variants.map((variant) => {
753
753
  const variantTag = variant.tag || fallbackTag;
754
- const paramsB = this.extractParamsFromString(
755
- variant.size,
756
- variantTag,
757
- ollamaModel.main_size,
758
- ollamaModel.model_identifier
759
- );
754
+ const quant = this.resolveVariantQuantization(variant, variantTag);
755
+ const paramsB = this.resolveVariantParamsB(ollamaModel, variant, quant);
760
756
  const moeMetadata = this.extractMoEMetadata(ollamaModel, variant, paramsB, baseText);
761
- const quant = this.normalizeQuantization(
762
- variant.quantization ||
763
- this.extractQuantizationFromTag(variantTag) ||
764
- 'Q4_K_M'
765
- );
766
757
 
767
758
  const variantSizeGB = this.extractVariantSizeGB(variant, paramsB);
768
759
  const modalities = this.inferModalities(ollamaModel, variantTag);
769
760
  const modelTags = this.inferTagsForVariant(derivedTags, variant, variantTag);
770
761
  const sizeByQuant = {};
762
+ const variantIsCloud = this.isCloudVariantTag(variantTag);
771
763
 
772
764
  for (const sibling of variants) {
773
- const siblingParams = this.extractParamsFromString(
774
- sibling.size,
775
- sibling.tag,
776
- ollamaModel.main_size,
777
- ollamaModel.model_identifier
778
- );
765
+ const siblingTag = sibling.tag || fallbackTag;
766
+ if (this.isCloudVariantTag(siblingTag) !== variantIsCloud) continue;
767
+
768
+ const siblingQuant = this.resolveVariantQuantization(sibling, siblingTag);
769
+ const siblingParams = this.resolveVariantParamsB(ollamaModel, sibling, siblingQuant);
779
770
 
780
771
  // Keep quantization map parameter-aware: don't blend 8B/70B/405B sizes.
781
772
  if (Math.abs(siblingParams - paramsB) > 0.25) continue;
782
773
 
783
- const siblingQuant = this.normalizeQuantization(
784
- sibling.quantization ||
785
- this.extractQuantizationFromTag(sibling.tag || '') ||
786
- quant
787
- );
788
774
  const siblingSize = this.extractVariantSizeGB(sibling, siblingParams);
789
775
  if (!Number.isFinite(sizeByQuant[siblingQuant]) || siblingSize < sizeByQuant[siblingQuant]) {
790
776
  sizeByQuant[siblingQuant] = siblingSize;
@@ -1003,17 +989,130 @@ class DeterministicModelSelector {
1003
989
  }
1004
990
 
1005
991
  extractParamsFromString(...values) {
1006
- for (const value of values) {
1007
- if (typeof value === 'number' && Number.isFinite(value) && value > 0) {
1008
- return value;
992
+ const candidates = this.extractParameterCandidates(...values);
993
+ return candidates.length > 0 ? candidates[0] : null;
994
+ }
995
+
996
+ extractParameterCandidates(...values) {
997
+ const candidates = [];
998
+ const seen = new Set();
999
+
1000
+ const pushCandidate = (value) => {
1001
+ if (!Number.isFinite(value) || value <= 0) return;
1002
+ const rounded = Math.round(value * 1000) / 1000;
1003
+ const key = String(rounded);
1004
+ if (seen.has(key)) return;
1005
+ seen.add(key);
1006
+ candidates.push(rounded);
1007
+ };
1008
+
1009
+ const visit = (value) => {
1010
+ if (typeof value === 'number') {
1011
+ pushCandidate(value);
1012
+ return;
1013
+ }
1014
+
1015
+ if (Array.isArray(value)) {
1016
+ value.forEach(visit);
1017
+ return;
1018
+ }
1019
+
1020
+ if (value && typeof value === 'object') {
1021
+ Object.values(value).forEach(visit);
1022
+ return;
1023
+ }
1024
+
1025
+ if (typeof value !== 'string') return;
1026
+
1027
+ const regex = /(\d+\.?\d*)\s*([BbMm])/g;
1028
+ for (const match of value.matchAll(regex)) {
1029
+ const amount = parseFloat(match[1]);
1030
+ const unit = match[2].toUpperCase();
1031
+ pushCandidate(unit === 'M' ? amount / 1000 : amount);
1009
1032
  }
1010
- if (typeof value !== 'string') continue;
1033
+ };
1034
+
1035
+ values.forEach(visit);
1036
+ return candidates;
1037
+ }
1038
+
1039
+ extractArtifactSizeGBFromValue(value) {
1040
+ if (typeof value === 'number' && Number.isFinite(value) && value > 0) {
1041
+ return value;
1042
+ }
1043
+ if (typeof value !== 'string') return null;
1044
+
1045
+ const match = value.match(/(\d+\.?\d*)\s*g(?:i)?b\b/i);
1046
+ if (!match) return null;
1047
+ return parseFloat(match[1]);
1048
+ }
1049
+
1050
+ inferParamsFromArtifactSizeGB(sizeGB, quant = 'Q4_K_M') {
1051
+ const normalizedQuant = this.normalizeQuantization(quant);
1052
+ const bytesPerParam = {
1053
+ 'Q8_0': 1.05,
1054
+ 'Q6_K': 0.80,
1055
+ 'Q5_K_M': 0.68,
1056
+ 'Q4_K_M': 0.58,
1057
+ 'Q3_K': 0.48,
1058
+ 'Q2_K': 0.37
1059
+ };
1060
+ const bpp = bytesPerParam[normalizedQuant] || 0.58;
1061
+ const inferred = sizeGB / bpp;
1062
+ return Math.max(0.5, Math.round(inferred * 2) / 2);
1063
+ }
1064
+
1065
+ isCloudVariantTag(tag = '') {
1066
+ return /:cloud$/i.test(String(tag).trim());
1067
+ }
1068
+
1069
+ resolveVariantQuantization(variant = {}, variantTag = '') {
1070
+ const tagQuant = this.extractQuantizationFromTag(variantTag);
1071
+ if (tagQuant) {
1072
+ return this.normalizeQuantization(tagQuant);
1073
+ }
1074
+
1075
+ return this.normalizeQuantization(
1076
+ variant.quantization ||
1077
+ variant.quant ||
1078
+ 'Q4_K_M'
1079
+ );
1080
+ }
1011
1081
 
1012
- const match = value.match(/(\d+\.?\d*)\s*([BbMm])/);
1013
- if (!match) continue;
1014
- const n = parseFloat(match[1]);
1015
- const unit = match[2].toUpperCase();
1016
- return unit === 'M' ? n / 1000 : n;
1082
+ resolveVariantParamsB(ollamaModel = {}, variant = {}, quant = 'Q4_K_M') {
1083
+ const explicitParams = this.extractParamsFromString(
1084
+ variant.size,
1085
+ variant.tag,
1086
+ variant.label,
1087
+ variant.name,
1088
+ ollamaModel.model_identifier,
1089
+ ollamaModel.model_name,
1090
+ ollamaModel.parameter_size,
1091
+ ollamaModel.parameter_count,
1092
+ ollamaModel.parameters
1093
+ );
1094
+ if (Number.isFinite(explicitParams) && explicitParams > 0) {
1095
+ return explicitParams;
1096
+ }
1097
+
1098
+ const metadataCandidates = this.extractParameterCandidates(
1099
+ ollamaModel.model_sizes,
1100
+ ollamaModel.parameters,
1101
+ ollamaModel.parameter_size,
1102
+ ollamaModel.parameter_count
1103
+ );
1104
+ if (metadataCandidates.length > 0) {
1105
+ return Math.max(...metadataCandidates);
1106
+ }
1107
+
1108
+ const artifactSizeGB = this.extractVariantSizeGB(variant, null);
1109
+ if (!this.isCloudVariantTag(variant.tag) && Number.isFinite(artifactSizeGB) && artifactSizeGB > 0) {
1110
+ return this.inferParamsFromArtifactSizeGB(artifactSizeGB, quant);
1111
+ }
1112
+
1113
+ const modelArtifactSizeGB = this.extractArtifactSizeGBFromValue(ollamaModel.main_size);
1114
+ if (Number.isFinite(modelArtifactSizeGB) && modelArtifactSizeGB > 0) {
1115
+ return this.inferParamsFromArtifactSizeGB(modelArtifactSizeGB, quant);
1017
1116
  }
1018
1117
 
1019
1118
  return 7;
@@ -1038,6 +1137,7 @@ class DeterministicModelSelector {
1038
1137
  extractVariantSizeGB(variant, paramsB) {
1039
1138
  const candidate = Number(variant.real_size_gb ?? variant.estimated_size_gb ?? NaN);
1040
1139
  if (Number.isFinite(candidate) && candidate > 0) return candidate;
1140
+ if (!Number.isFinite(paramsB) || paramsB <= 0) return 0.5;
1041
1141
  return Math.max(0.5, Math.round((paramsB * 0.58 + 0.5) * 10) / 10);
1042
1142
  }
1043
1143
 
@@ -2049,6 +2149,20 @@ class DeterministicModelSelector {
2049
2149
  }
2050
2150
 
2051
2151
  mapHardwareTier(hardware = {}) {
2152
+ const summary = hardware?.summary || {};
2153
+ const effectiveMemory = Number(summary.effectiveMemory);
2154
+ const speedCoefficient = Number(summary.speedCoefficient);
2155
+ if (Number.isFinite(effectiveMemory) && effectiveMemory > 0 && Number.isFinite(speedCoefficient)) {
2156
+ if (effectiveMemory >= 80 && speedCoefficient >= 300) return 'ultra_high';
2157
+ if (effectiveMemory >= 48 && speedCoefficient >= 200) return 'very_high';
2158
+ if (effectiveMemory >= 24 && speedCoefficient >= 150) return 'high';
2159
+ if (effectiveMemory >= 16 && speedCoefficient >= 100) return 'medium_high';
2160
+ if (effectiveMemory >= 12 && speedCoefficient >= 80) return 'medium';
2161
+ if (effectiveMemory >= 8 && speedCoefficient >= 50) return 'medium_low';
2162
+ if (effectiveMemory >= 6 && speedCoefficient >= 30) return 'low';
2163
+ return 'ultra_low';
2164
+ }
2165
+
2052
2166
  let ram, cores;
2053
2167
 
2054
2168
  if (hardware?.memory?.totalGB) {