llm-checker 3.5.14 → 3.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,6 +10,7 @@
10
10
 
11
11
  const { MULTI_OBJECTIVE_WEIGHTS } = require('../models/scoring-config');
12
12
  const { normalizePlatform } = require('../utils/platform');
13
+ const { rankModels } = require('../models/scoring-core');
13
14
 
14
15
  class MultiObjectiveSelector {
15
16
  constructor() {
@@ -40,23 +41,124 @@ class MultiObjectiveSelector {
40
41
  }
41
42
 
42
43
  /**
43
- * Select best models using multi-objective ranking
44
+ * Select best models using the UNIFIED canonical scoring core (issue #88).
45
+ *
46
+ * `check` used to rank through this selector's own multi-objective math,
47
+ * which diverged from `recommend`/`smart-recommend` and never received the
48
+ * PR #89 high-capacity right-sizing fix. It now routes the ranking through
49
+ * the shared DeterministicModelSelector core (via scoring-core.rankModels)
50
+ * so identical (model, hardware) inputs score identically across all three
51
+ * commands and the high-capacity floor applies here too.
52
+ *
53
+ * The output shape is preserved exactly: `{ compatible, marginal,
54
+ * incompatible }`, each entry being the ORIGINAL model object spread with
55
+ * `totalScore`, `components { quality, speed, ttfb, context, hardwareMatch }`
56
+ * and `reasoning`, so downstream `check` rendering and the regression test
57
+ * (which calls `estimateModelParams` on the returned object) keep working.
44
58
  */
45
59
  async selectBestModels(hardware, models, category = 'general', topK = 10) {
46
- // Step 1: Hard filters - remove incompatible models
60
+ const inputModels = Array.isArray(models) ? models.filter(Boolean) : [];
61
+ if (inputModels.length === 0) {
62
+ return { compatible: [], marginal: [], incompatible: [] };
63
+ }
64
+
65
+ let ranking;
66
+ try {
67
+ ranking = await rankModels(inputModels, hardware, { category, topN: inputModels.length });
68
+ } catch (error) {
69
+ ranking = null;
70
+ }
71
+
72
+ // Defensive fallback: if the unified core is unavailable for any reason,
73
+ // fall back to the legacy multi-objective ranking so `check` still works.
74
+ if (!ranking || !Array.isArray(ranking.candidates)) {
75
+ return this.selectBestModelsLegacy(hardware, inputModels, category, topK);
76
+ }
77
+
78
+ const scoredModels = [];
79
+ const rankedSources = new Set();
80
+ for (const candidate of ranking.candidates) {
81
+ const source = candidate?.meta?.__source;
82
+ if (!source) continue;
83
+ rankedSources.add(source);
84
+ scoredModels.push(this.mapCoreCandidateToMultiObjective(candidate, source, hardware, category));
85
+ }
86
+
87
+ // Models the canonical core dropped (category filter / budget) are not
88
+ // viable on this hardware for this use case -> treat as incompatible,
89
+ // mirroring the previous hard-filter semantics.
90
+ const incompatibleExtras = inputModels
91
+ .filter((model) => !rankedSources.has(model))
92
+ .map((model) => ({
93
+ ...model,
94
+ totalScore: 0,
95
+ components: { quality: 0, speed: 0, ttfb: 0, context: 0, hardwareMatch: 0 },
96
+ reasoning: 'Filtered out by unified scoring core (does not fit hardware/use-case)'
97
+ }));
98
+
99
+ scoredModels.sort((a, b) => b.totalScore - a.totalScore);
100
+
101
+ const classified = this.classifyResults(scoredModels, topK);
102
+ classified.incompatible = [...classified.incompatible, ...incompatibleExtras].slice(0, 5);
103
+ return classified;
104
+ }
105
+
106
+ /**
107
+ * Map a unified-core candidate back into this selector's multi-objective
108
+ * output shape. The 0-100 `score` from the deterministic core becomes
109
+ * `totalScore`; component sub-scores are normalized to 0-1 to match the
110
+ * historical `components` contract consumed by `check` rendering.
111
+ */
112
+ mapCoreCandidateToMultiObjective(candidate, source, hardware, category) {
113
+ const components = candidate.components || {};
114
+ const to01 = (value) => {
115
+ const num = Number(value);
116
+ if (!Number.isFinite(num)) return 0;
117
+ return Math.max(0, Math.min(1, num / 100));
118
+ };
119
+
120
+ const quality = to01(components.Q);
121
+ const speed = to01(components.S);
122
+ const context = to01(components.C);
123
+ // The deterministic core folds hardware fitness into the `F` (fit) plus
124
+ // `H` (high-capacity right-sizing) components; surface that as the
125
+ // historical `hardwareMatch` signal so `check` insights stay meaningful.
126
+ const hardwareMatch = to01((Number(components.F) || 0) + (Number(components.H) || 0));
127
+
128
+ return {
129
+ ...source,
130
+ totalScore: Math.round(candidate.score * 100) / 100,
131
+ score: Math.round(candidate.score * 100) / 100,
132
+ components: {
133
+ quality,
134
+ speed,
135
+ ttfb: speed, // ttfb tracks speed; legacy field retained for shape
136
+ context,
137
+ hardwareMatch
138
+ },
139
+ quant: candidate.quant || source.quant,
140
+ estimatedRAM: candidate.requiredGB,
141
+ estimatedTPS: candidate.estTPS,
142
+ reasoning: candidate.rationale ||
143
+ this.generateReasoning(source, hardware, quality, hardwareMatch)
144
+ };
145
+ }
146
+
147
+ /**
148
+ * Legacy multi-objective ranking, retained ONLY as a defensive fallback if
149
+ * the unified core throws. Not used on the normal path.
150
+ */
151
+ selectBestModelsLegacy(hardware, models, category = 'general', topK = 10) {
47
152
  const compatibleModels = this.applyHardFilters(hardware, models);
48
-
153
+
49
154
  if (compatibleModels.length === 0) {
50
155
  return { compatible: [], marginal: [], incompatible: models };
51
156
  }
52
157
 
53
- // Step 2: Multi-objective scoring
54
- const scoredModels = compatibleModels.map(model =>
158
+ const scoredModels = compatibleModels.map(model =>
55
159
  this.calculateMultiObjectiveScore(hardware, model, category)
56
160
  ).filter(Boolean);
57
-
58
161
 
59
- // Step 3: Sort and classify
60
162
  scoredModels.sort((a, b) => b.totalScore - a.totalScore);
61
163
 
62
164
  return this.classifyResults(scoredModels, topK);
@@ -784,11 +886,16 @@ class MultiObjectiveSelector {
784
886
  }
785
887
 
786
888
  estimateKVCache(model, contextLength) {
787
- // Rough KV cache estimation: 2 * layers * hidden_size * seq_len * 2 bytes
889
+ // KV cache grows linearly with parameter count and context length. The old
890
+ // formula derived both "layers" and "hidden size" from params and multiplied
891
+ // them, making the estimate scale with params^2 — a 70B model at 8k came out
892
+ // at ~299 GB (real value ~11 GB), which made checkMemoryCompatibility reject
893
+ // every mid/large model. Use the same calibrated linear factor as the Ollama
894
+ // capacity planner (~0.08 GB per 1B params at 4k context).
788
895
  const params = this.estimateModelParams(model);
789
- const layers = Math.round(params * 2); // Rough approximation
790
- const hiddenSize = Math.round(params * 1000); // Rough approximation
791
- return (2 * layers * hiddenSize * contextLength * 2) / (1024 ** 3); // GB
896
+ const ctx = Number.isFinite(contextLength) && contextLength > 0 ? contextLength : 4096;
897
+ const kvFactorPer4k = 0.08; // GB per 1B params at 4k context (fp16)
898
+ return Math.max(0, params * kvFactorPer4k * (ctx / 4096)); // GB
792
899
  }
793
900
 
794
901
  estimateTokensPerSecond(hardware, model) {
@@ -320,7 +320,10 @@ class CalibrationManager {
320
320
  NO_COLOR: '1'
321
321
  }
322
322
  });
323
- const latencyMs = Number((process.hrtime.bigint() - started) / 1_000_000n);
323
+ // Convert ns->ms in floating point: dividing the BigInt first floored away
324
+ // all sub-millisecond precision (a 0.5 ms call read as 0 ms, skewing p50/p95,
325
+ // ttft and tokens/sec). The ns diff is well within Number's safe range.
326
+ const latencyMs = Number(process.hrtime.bigint() - started) / 1_000_000;
324
327
 
325
328
  if (result.error) {
326
329
  const error = new Error(result.error.message || 'Failed to execute runtime prompt.');
@@ -13,6 +13,11 @@ class ModelDatabase {
13
13
  this.seedDbPath = options.seedDbPath || path.join(__dirname, 'seed', 'models.db');
14
14
  this.db = null;
15
15
  this.initialized = false;
16
+ // Batched-write state: during a bulk sync we defer the (expensive) full
17
+ // sql.js export-and-write until the batch ends, instead of rewriting the
18
+ // whole DB file on every single row.
19
+ this._batchDepth = 0;
20
+ this._pendingSave = false;
16
21
  }
17
22
 
18
23
  /**
@@ -148,7 +153,29 @@ class ModelDatabase {
148
153
  if (!this.useBetterSqlite && this.db) {
149
154
  const data = this.db.export();
150
155
  const buffer = Buffer.from(data);
151
- fs.writeFileSync(this.dbPath, buffer);
156
+ // Write to a temp file then atomically rename, so a crash/SIGINT
157
+ // mid-write can't leave a truncated, unreadable models.db behind.
158
+ const tmpPath = `${this.dbPath}.tmp`;
159
+ fs.writeFileSync(tmpPath, buffer);
160
+ fs.renameSync(tmpPath, this.dbPath);
161
+ this._pendingSave = false;
162
+ }
163
+ }
164
+
165
+ /**
166
+ * Group many writes so the database file is exported/written once at the end
167
+ * instead of on every row. Nestable; the outermost endBatch() flushes.
168
+ */
169
+ beginBatch() {
170
+ this._batchDepth += 1;
171
+ }
172
+
173
+ endBatch() {
174
+ if (this._batchDepth > 0) {
175
+ this._batchDepth -= 1;
176
+ }
177
+ if (this._batchDepth === 0 && this._pendingSave) {
178
+ this.saveToFile();
152
179
  }
153
180
  }
154
181
 
@@ -160,7 +187,11 @@ class ModelDatabase {
160
187
  return this.db.prepare(sql).run(...params);
161
188
  } else {
162
189
  this.db.run(sql, params);
163
- this.saveToFile();
190
+ if (this._batchDepth > 0) {
191
+ this._pendingSave = true; // defer the full export until endBatch()
192
+ } else {
193
+ this.saveToFile();
194
+ }
164
195
  }
165
196
  }
166
197
 
@@ -406,9 +437,12 @@ class ModelDatabase {
406
437
  params.push(filters.maxSizeGB);
407
438
  }
408
439
 
409
- // Order by
410
- const orderBy = filters.orderBy || 'pulls';
411
- const orderDir = filters.orderDir || 'DESC';
440
+ // Order by — column names and direction can't be parameterized, so whitelist
441
+ // them. A future caller forwarding a user-supplied sort field would otherwise
442
+ // be a SQL-injection / crash vector on this public filters API.
443
+ const ORDERABLE_COLUMNS = new Set(['pulls', 'name', 'tags_count', 'updated_at', 'created_at']);
444
+ const orderBy = ORDERABLE_COLUMNS.has(filters.orderBy) ? filters.orderBy : 'pulls';
445
+ const orderDir = String(filters.orderDir).toUpperCase() === 'ASC' ? 'ASC' : 'DESC';
412
446
  sql += ` ORDER BY m.${orderBy} ${orderDir}`;
413
447
 
414
448
  // Limit
@@ -47,22 +47,27 @@ class SyncManager {
47
47
 
48
48
  this.onProgress({ phase: 'start', message: 'Starting full sync...' });
49
49
 
50
- // Clear existing data
51
- this.db.clear();
52
-
53
- // Scrape all models
54
- const result = await this.scraper.scrapeAll((model, variants) => {
55
- // Save model as we go
56
- this.db.upsertModel(model);
57
-
58
- // Save variants
59
- for (const variant of variants) {
60
- this.db.upsertVariant(variant);
61
- }
62
- });
50
+ // Batch all writes into a single atomic DB file write at the end. Saving on
51
+ // every upsert re-exported and rewrote the whole sql.js DB thousands of
52
+ // times, turning the sync into O(n^2) disk I/O.
53
+ this.db.beginBatch();
54
+ try {
55
+ // Clear existing data
56
+ this.db.clear();
57
+
58
+ // Scrape all models
59
+ await this.scraper.scrapeAll((model, variants) => {
60
+ this.db.upsertModel(model);
61
+ for (const variant of variants) {
62
+ this.db.upsertVariant(variant);
63
+ }
64
+ });
63
65
 
64
- // Update sync timestamp
65
- this.db.setLastSync(new Date().toISOString());
66
+ // Update sync timestamp
67
+ this.db.setLastSync(new Date().toISOString());
68
+ } finally {
69
+ this.db.endBatch();
70
+ }
66
71
 
67
72
  const stats = this.db.getStats();
68
73
 
@@ -110,6 +115,9 @@ class SyncManager {
110
115
  let updated = 0;
111
116
  let added = 0;
112
117
 
118
+ // Batch all upserts into a single atomic DB write at the end (see fullSync).
119
+ this.db.beginBatch();
120
+ try {
113
121
  // Process new models
114
122
  for (const { id } of newModels) {
115
123
  try {
@@ -157,12 +165,18 @@ class SyncManager {
157
165
 
158
166
  await this.sleep(100);
159
167
  } catch (error) {
160
- // Ignore errors during incremental update
168
+ // Log instead of silently swallowing: a systematic failure here
169
+ // (network down, schema mismatch) would otherwise report success
170
+ // with updated: 0 and leave the catalog quietly stale.
171
+ this.onError(`Error updating ${id}: ${error.message}`);
161
172
  }
162
173
  }
163
174
 
164
- // Update sync timestamp
165
- this.db.setLastSync(new Date().toISOString());
175
+ // Update sync timestamp
176
+ this.db.setLastSync(new Date().toISOString());
177
+ } finally {
178
+ this.db.endBatch();
179
+ }
166
180
 
167
181
  const stats = this.db.getStats();
168
182
 
@@ -283,7 +283,11 @@ class AppleSiliconDetector {
283
283
  const info = this.detect();
284
284
  if (!info) return null;
285
285
 
286
- return `apple-${info.chip.toLowerCase().replace(/\s+/g, '-')}-${info.memory.unified}gb`;
286
+ // info.chip stays null when the sysctl brand-string read fails (sandboxed
287
+ // env, missing binary); fall back so this can't throw on null.toLowerCase().
288
+ const chip = info.chip || 'apple-silicon';
289
+ const unified = info.memory?.unified || 0;
290
+ return `apple-${chip.toLowerCase().replace(/\s+/g, '-')}-${unified}gb`;
287
291
  }
288
292
 
289
293
  /**
@@ -249,40 +249,65 @@ class CUDADetector {
249
249
 
250
250
  const lines = gpuData.split('\n');
251
251
 
252
+ // Older drivers emit fewer columns (e.g. no power/clocks), and the CSV
253
+ // separator can be either ", " or "," depending on driver/locale. Split
254
+ // tolerantly and only require the leading identity + memory columns so a
255
+ // GPU is never dropped just because optional trailing fields are absent.
256
+ const toMB = (value) => {
257
+ const n = parseInt(value, 10);
258
+ return Number.isFinite(n) ? n : 0;
259
+ };
260
+ const toGB = (value) => {
261
+ const mb = toMB(value);
262
+ return mb > 0 ? Math.round(mb / 1024) : 0;
263
+ };
264
+ const toInt = (value) => {
265
+ const n = parseInt(value, 10);
266
+ return Number.isFinite(n) ? n : 0;
267
+ };
268
+ const toFloat = (value) => {
269
+ const n = parseFloat(value);
270
+ return Number.isFinite(n) ? n : 0;
271
+ };
272
+
252
273
  for (const line of lines) {
253
- const parts = line.split(', ').map(p => p.trim());
274
+ if (!line || !line.trim()) continue;
275
+ const parts = line.split(/\s*,\s*/).map(p => p.trim());
276
+
277
+ // Need at least index, name, uuid, memory.total to describe a GPU.
278
+ if (parts.length < 4) continue;
254
279
 
255
- if (parts.length < 10) continue;
280
+ const memTotalMB = toMB(parts[3]);
256
281
 
257
282
  const gpu = {
258
- index: parseInt(parts[0]) || 0,
283
+ index: toInt(parts[0]),
259
284
  name: parts[1] || 'Unknown NVIDIA GPU',
260
285
  uuid: parts[2] || null,
261
286
  memory: {
262
- total: Math.round(parseInt(parts[3]) / 1024) || 0, // Convert MB to GB
263
- free: Math.round(parseInt(parts[4]) / 1024) || 0,
264
- used: Math.round(parseInt(parts[5]) / 1024) || 0
287
+ total: toGB(parts[3]), // Convert MB to GB
288
+ free: toGB(parts[4]),
289
+ used: toGB(parts[5])
265
290
  },
266
291
  computeMode: parts[6] || 'Default',
267
292
  pcie: {
268
- generation: parseInt(parts[7]) || 0,
269
- width: parseInt(parts[8]) || 0
293
+ generation: toInt(parts[7]),
294
+ width: toInt(parts[8])
270
295
  },
271
296
  power: {
272
- draw: parseFloat(parts[9]) || 0,
273
- limit: parseFloat(parts[10]) || 0
297
+ draw: toFloat(parts[9]),
298
+ limit: toFloat(parts[10])
274
299
  },
275
- temperature: parseInt(parts[11]) || 0,
300
+ temperature: toInt(parts[11]),
276
301
  utilization: {
277
- gpu: parseInt(parts[12]) || 0,
278
- memory: parseInt(parts[13]) || 0
302
+ gpu: toInt(parts[12]),
303
+ memory: toInt(parts[13])
279
304
  },
280
305
  clocks: {
281
- current: parseInt(parts[14]) || 0,
282
- max: parseInt(parts[15]) || 0
306
+ current: toInt(parts[14]),
307
+ max: toInt(parts[15])
283
308
  },
284
309
  capabilities: this.getGPUCapabilities(parts[1]),
285
- speedCoefficient: this.calculateSpeedCoefficient(parts[1], parseInt(parts[3]))
310
+ speedCoefficient: this.calculateSpeedCoefficient(parts[1], memTotalMB)
286
311
  };
287
312
 
288
313
  result.gpus.push(gpu);
@@ -298,15 +323,18 @@ class CUDADetector {
298
323
 
299
324
  const lines = simpleQuery.split('\n');
300
325
  for (let i = 0; i < lines.length; i++) {
301
- const [name, memMB] = lines[i].split(', ').map(p => p.trim());
302
- const memGB = Math.round(parseInt(memMB) / 1024) || 0;
326
+ if (!lines[i] || !lines[i].trim()) continue;
327
+ const [name, memMB] = lines[i].split(/\s*,\s*/).map(p => p.trim());
328
+ const parsedMB = parseInt(memMB, 10);
329
+ const memMBSafe = Number.isFinite(parsedMB) ? parsedMB : 0;
330
+ const memGB = memMBSafe > 0 ? Math.round(memMBSafe / 1024) : 0;
303
331
 
304
332
  result.gpus.push({
305
333
  index: i,
306
334
  name: name || 'NVIDIA GPU',
307
335
  memory: { total: memGB, free: memGB, used: 0 },
308
336
  capabilities: this.getGPUCapabilities(name),
309
- speedCoefficient: this.calculateSpeedCoefficient(name, parseInt(memMB))
337
+ speedCoefficient: this.calculateSpeedCoefficient(name, memMBSafe)
310
338
  });
311
339
  result.totalVRAM += memGB;
312
340
  }
@@ -111,8 +111,12 @@ class IntelDetector {
111
111
  const name = nameMatch[0].replace(/Corporation\s*/i, '').trim();
112
112
  const isDedicated = name.toLowerCase().includes('arc');
113
113
 
114
- // Get VRAM from sysfs or estimate
115
- let vram = this.getVRAMFromSysfs(block) || this.estimateVRAM(name);
114
+ // Prefer the model-based estimate: getVRAMFromSysfs reads the PCI
115
+ // MMIO BAR size, which is NOT the card's VRAM (a non-Resizable-BAR
116
+ // Arc reports ~256M while having 8-16GB), so a wrong BAR value must
117
+ // not shadow the reliable per-model estimate. BAR is only a last
118
+ // resort when the model can't be recognized.
119
+ let vram = this.estimateVRAM(name) || this.getVRAMFromSysfs(block);
116
120
 
117
121
  const gpu = {
118
122
  index: result.gpus.length,
@@ -942,8 +942,12 @@ class ROCmDetector {
942
942
  // Try to match device ID to specific variant
943
943
  const deviceInfo = ROCmDetector.AMD_DEVICE_IDS[deviceId];
944
944
  if (deviceInfo) return deviceInfo.name;
945
- // Default to first variant with "AMD Radeon" prefix
946
- return `AMD Radeon ${variants[0]}`;
945
+ // Unknown device ID: lspci groups several SKUs behind one string
946
+ // (e.g. "Radeon RX 7900 XT/7900 XTX/7900M"). Committing to variants[0]
947
+ // mislabels the card as the lowest-tier SKU and yields the wrong VRAM,
948
+ // so keep the full variant list — honestly ambiguous beats confidently
949
+ // wrong.
950
+ return `AMD Radeon ${variants.join('/')}`;
947
951
  }
948
952
  return `AMD Radeon ${bracketName}`;
949
953
  }
@@ -85,12 +85,16 @@ class HardwareDetector {
85
85
  const freeGB = Math.round(memory.free / (1024 ** 3));
86
86
  const usedGB = totalGB - freeGB;
87
87
 
88
+ // Guard against a zero/unknown total (some virtualized or sandboxed hosts
89
+ // report memory.total === 0), which would otherwise make usagePercent NaN.
90
+ const usagePercent = totalGB > 0 ? Math.round((usedGB / totalGB) * 100) : 0;
91
+
88
92
  return {
89
93
  total: totalGB,
90
94
  free: freeGB,
91
95
  used: usedGB,
92
96
  available: Math.round(memory.available / (1024 ** 3)),
93
- usagePercent: Math.round((usedGB / totalGB) * 100),
97
+ usagePercent,
94
98
  swapTotal: Math.round(memory.swaptotal / (1024 ** 3)),
95
99
  swapUsed: Math.round(memory.swapused / (1024 ** 3)),
96
100
  score: this.calculateMemoryScore(totalGB, freeGB)
@@ -420,7 +424,12 @@ class HardwareDetector {
420
424
  driverVersion: backendInfo.driver || systemInfo.gpu.driverVersion
421
425
  };
422
426
  } catch (error) {
423
- // Keep systeminformation-only results when backend-specific detection is unavailable
427
+ // Keep systeminformation-only results when backend-specific detection is
428
+ // unavailable. Surface the cause under a debug flag so a genuine bug in the
429
+ // enrichment path is distinguishable from "no backend tools installed".
430
+ if (process.env.DEBUG_GPU || process.env.LLM_CHECKER_DEBUG) {
431
+ console.error('[llm-checker] enrichWithUnifiedHardware failed:', error && error.stack ? error.stack : error);
432
+ }
424
433
  }
425
434
  }
426
435
 
@@ -553,8 +562,23 @@ class HardwareDetector {
553
562
 
554
563
  // NVIDIA data-center / workstation
555
564
  if (modelLower.includes('gb10') || modelLower.includes('grace blackwell') || modelLower.includes('dgx spark')) return 96;
565
+
566
+ // NVIDIA Blackwell / Ada / Hopper workstation & datacenter cards. These are
567
+ // matched BEFORE the generic "rtx -> 8" fallback so high-VRAM professional
568
+ // GPUs (e.g. "RTX PRO 6000") are not collapsed to 8GB (issue #88).
569
+ if (modelLower.includes('rtx pro 6000') || modelLower.includes('rtx 6000 blackwell')) return 96;
570
+ if (modelLower.includes('rtx 6000 ada') || modelLower.includes('rtx 5000 ada')) return 48;
571
+ if (modelLower.includes('rtx a6000') || modelLower.includes('a6000')) return 48;
572
+ if (modelLower.includes('rtx a5000') || modelLower.includes('a5000')) return 24;
573
+ if (modelLower.includes('l40s') || modelLower.includes('l40')) return 48;
574
+ if (modelLower.includes('h200')) return 141;
575
+ if (modelLower.includes('h100')) return 80;
576
+ if (modelLower.includes('a100') && (modelLower.includes('40gb') || /a100[\s-]?(?:pcie[\s-]?)?40\b/.test(modelLower))) return 40;
577
+ if (modelLower.includes('a100')) return 80; // A100 defaults to the 80GB SKU
578
+ if (modelLower.includes('a40')) return 48;
579
+
556
580
  if (modelLower.includes('tesla p100') || modelLower.includes('p100')) return 16;
557
-
581
+
558
582
  // NVIDIA RTX 50 series
559
583
  if (modelLower.includes('rtx 5090')) return 32;
560
584
  if (modelLower.includes('rtx 5080')) return 16;
@@ -635,7 +659,7 @@ class HardwareDetector {
635
659
  else score += totalGB * 2;
636
660
 
637
661
  // Score basado en RAM disponible
638
- const freePercent = (freeGB / totalGB) * 100;
662
+ const freePercent = totalGB > 0 ? (freeGB / totalGB) * 100 : 0;
639
663
  if (freePercent > 50) score += 20;
640
664
  else if (freePercent > 30) score += 15;
641
665
  else if (freePercent > 20) score += 10;
@@ -738,34 +762,37 @@ class HardwareDetector {
738
762
  * Normalize VRAM values (handle different units and wrong totals)
739
763
  */
740
764
  normalizeVRAM(vram) {
741
- if (!vram || vram <= 0) return 0;
742
-
743
- let vramValue = vram;
744
-
745
- // Handle VRAM in bytes (some systems report this way)
746
- if (vramValue > 100000) {
747
- vramValue = Math.round(vramValue / (1024 * 1024)); // Convert bytes to MB
765
+ const raw = Number(vram);
766
+ if (!Number.isFinite(raw) || raw <= 0) return 0;
767
+
768
+ // Inputs reaching this function come from systeminformation / lspci (which
769
+ // express controller VRAM in megabytes), from raw byte counts on systems
770
+ // that report that way, and increasingly from our own curated GB tables
771
+ // (estimateVRAMFromModel, device-id maps) fed back through here. The unit
772
+ // is inferred from magnitude:
773
+ //
774
+ // > 1e6 -> raw bytes (a 192 GB card is ~2.06e11 bytes, while
775
+ // the same card in MB is ~196,608, well under 1e6).
776
+ // >= 1024 -> megabytes (the smallest dedicated framebuffer that
777
+ // still rounds to >=1 GB; this is the systeminformation
778
+ // reporting range, e.g. 8192, 16384, 16368).
779
+ // 1 <= v <= 256 -> already gigabytes. Real single-GPU VRAM tops out
780
+ // around 192 GB (H200 ~141, B200/MI ~192), so any
781
+ // small integer in this band is a GB value. This is
782
+ // the dead-zone fix for issue #88: normalizeVRAM(96)
783
+ // used to return 0 (treated 96 as 96 MB -> 0 GB).
784
+ // 257 <= v < 1024 -> sub-gigabyte framebuffer in MB (e.g. a 512 MB
785
+ // aperture) -> rounds to 0/1 GB as before.
786
+ if (raw > 1_000_000) {
787
+ return Math.max(0, Math.round(raw / (1024 * 1024 * 1024))); // bytes -> GB
748
788
  }
749
-
750
- // Now determine if we have MB or GB values
751
- if (vramValue >= 1024) {
752
- // Values >= 1024 are likely MB, convert to GB
753
- vramValue = Math.round(vramValue / 1024);
754
- } else if (vramValue >= 512 && vramValue < 1024) {
755
- // 512-1023 MB, round to 1GB
756
- vramValue = 1;
757
- } else if (vramValue > 80) {
758
- // Values between 80-511 are likely incorrect MB values, treat as MB
759
- vramValue = Math.round(vramValue / 1024) || 1;
760
- } else if (vramValue >= 1 && vramValue <= 80) {
761
- // Values 1-80 are likely already in GB, keep as is
762
- vramValue = vramValue;
763
- } else {
764
- // Values < 1 round to 0
765
- vramValue = 0;
789
+ if (raw >= 1024) {
790
+ return Math.max(0, Math.round(raw / 1024)); // MB -> GB
766
791
  }
767
-
768
- return vramValue;
792
+ if (raw <= 256) {
793
+ return Math.round(raw); // already GB (plausible single-GPU range)
794
+ }
795
+ return Math.max(0, Math.round(raw / 1024)); // 257..1023 MB -> GB
769
796
  }
770
797
 
771
798
  /**