llm-checker 3.5.14 → 3.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -1
- package/analyzer/compatibility.js +5 -0
- package/analyzer/performance.js +5 -4
- package/bin/cli.js +5 -39
- package/bin/enhanced_cli.js +88 -19
- package/bin/mcp-server.mjs +266 -101
- package/package.json +7 -7
- package/src/ai/multi-objective-selector.js +118 -11
- package/src/calibration/calibration-manager.js +4 -1
- package/src/data/model-database.js +39 -5
- package/src/data/sync-manager.js +32 -18
- package/src/hardware/backends/apple-silicon.js +5 -1
- package/src/hardware/backends/cuda-detector.js +47 -19
- package/src/hardware/backends/intel-detector.js +6 -2
- package/src/hardware/backends/rocm-detector.js +6 -2
- package/src/hardware/detector.js +57 -30
- package/src/hardware/unified-detector.js +129 -25
- package/src/models/ai-check-selector.js +36 -5
- package/src/models/deterministic-selector.js +163 -15
- package/src/models/expanded_database.js +9 -5
- package/src/models/intelligent-selector.js +87 -1
- package/src/models/requirements.js +16 -11
- package/src/models/scoring-core.js +341 -0
- package/src/models/scoring-engine.js +9 -2
- package/src/ollama/capacity-planner.js +15 -2
- package/src/ollama/client.js +70 -30
- package/src/ollama/enhanced-client.js +20 -2
- package/src/ollama/manager.js +14 -2
- package/src/policy/cli-policy.js +8 -2
- package/src/policy/policy-engine.js +2 -1
- package/src/provenance/model-provenance.js +4 -1
- package/src/ui/cli-theme.js +57 -7
- package/src/ui/interactive-panel.js +176 -20
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
|
|
11
11
|
const { MULTI_OBJECTIVE_WEIGHTS } = require('../models/scoring-config');
|
|
12
12
|
const { normalizePlatform } = require('../utils/platform');
|
|
13
|
+
const { rankModels } = require('../models/scoring-core');
|
|
13
14
|
|
|
14
15
|
class MultiObjectiveSelector {
|
|
15
16
|
constructor() {
|
|
@@ -40,23 +41,124 @@ class MultiObjectiveSelector {
|
|
|
40
41
|
}
|
|
41
42
|
|
|
42
43
|
/**
|
|
43
|
-
* Select best models using
|
|
44
|
+
* Select best models using the UNIFIED canonical scoring core (issue #88).
|
|
45
|
+
*
|
|
46
|
+
* `check` used to rank through this selector's own multi-objective math,
|
|
47
|
+
* which diverged from `recommend`/`smart-recommend` and never received the
|
|
48
|
+
* PR #89 high-capacity right-sizing fix. It now routes the ranking through
|
|
49
|
+
* the shared DeterministicModelSelector core (via scoring-core.rankModels)
|
|
50
|
+
* so identical (model, hardware) inputs score identically across all three
|
|
51
|
+
* commands and the high-capacity floor applies here too.
|
|
52
|
+
*
|
|
53
|
+
* The output shape is preserved exactly: `{ compatible, marginal,
|
|
54
|
+
* incompatible }`, each entry being the ORIGINAL model object spread with
|
|
55
|
+
* `totalScore`, `components { quality, speed, ttfb, context, hardwareMatch }`
|
|
56
|
+
* and `reasoning`, so downstream `check` rendering and the regression test
|
|
57
|
+
* (which calls `estimateModelParams` on the returned object) keep working.
|
|
44
58
|
*/
|
|
45
59
|
async selectBestModels(hardware, models, category = 'general', topK = 10) {
|
|
46
|
-
|
|
60
|
+
const inputModels = Array.isArray(models) ? models.filter(Boolean) : [];
|
|
61
|
+
if (inputModels.length === 0) {
|
|
62
|
+
return { compatible: [], marginal: [], incompatible: [] };
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
let ranking;
|
|
66
|
+
try {
|
|
67
|
+
ranking = await rankModels(inputModels, hardware, { category, topN: inputModels.length });
|
|
68
|
+
} catch (error) {
|
|
69
|
+
ranking = null;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// Defensive fallback: if the unified core is unavailable for any reason,
|
|
73
|
+
// fall back to the legacy multi-objective ranking so `check` still works.
|
|
74
|
+
if (!ranking || !Array.isArray(ranking.candidates)) {
|
|
75
|
+
return this.selectBestModelsLegacy(hardware, inputModels, category, topK);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const scoredModels = [];
|
|
79
|
+
const rankedSources = new Set();
|
|
80
|
+
for (const candidate of ranking.candidates) {
|
|
81
|
+
const source = candidate?.meta?.__source;
|
|
82
|
+
if (!source) continue;
|
|
83
|
+
rankedSources.add(source);
|
|
84
|
+
scoredModels.push(this.mapCoreCandidateToMultiObjective(candidate, source, hardware, category));
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// Models the canonical core dropped (category filter / budget) are not
|
|
88
|
+
// viable on this hardware for this use case -> treat as incompatible,
|
|
89
|
+
// mirroring the previous hard-filter semantics.
|
|
90
|
+
const incompatibleExtras = inputModels
|
|
91
|
+
.filter((model) => !rankedSources.has(model))
|
|
92
|
+
.map((model) => ({
|
|
93
|
+
...model,
|
|
94
|
+
totalScore: 0,
|
|
95
|
+
components: { quality: 0, speed: 0, ttfb: 0, context: 0, hardwareMatch: 0 },
|
|
96
|
+
reasoning: 'Filtered out by unified scoring core (does not fit hardware/use-case)'
|
|
97
|
+
}));
|
|
98
|
+
|
|
99
|
+
scoredModels.sort((a, b) => b.totalScore - a.totalScore);
|
|
100
|
+
|
|
101
|
+
const classified = this.classifyResults(scoredModels, topK);
|
|
102
|
+
classified.incompatible = [...classified.incompatible, ...incompatibleExtras].slice(0, 5);
|
|
103
|
+
return classified;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Map a unified-core candidate back into this selector's multi-objective
|
|
108
|
+
* output shape. The 0-100 `score` from the deterministic core becomes
|
|
109
|
+
* `totalScore`; component sub-scores are normalized to 0-1 to match the
|
|
110
|
+
* historical `components` contract consumed by `check` rendering.
|
|
111
|
+
*/
|
|
112
|
+
mapCoreCandidateToMultiObjective(candidate, source, hardware, category) {
|
|
113
|
+
const components = candidate.components || {};
|
|
114
|
+
const to01 = (value) => {
|
|
115
|
+
const num = Number(value);
|
|
116
|
+
if (!Number.isFinite(num)) return 0;
|
|
117
|
+
return Math.max(0, Math.min(1, num / 100));
|
|
118
|
+
};
|
|
119
|
+
|
|
120
|
+
const quality = to01(components.Q);
|
|
121
|
+
const speed = to01(components.S);
|
|
122
|
+
const context = to01(components.C);
|
|
123
|
+
// The deterministic core folds hardware fitness into the `F` (fit) plus
|
|
124
|
+
// `H` (high-capacity right-sizing) components; surface that as the
|
|
125
|
+
// historical `hardwareMatch` signal so `check` insights stay meaningful.
|
|
126
|
+
const hardwareMatch = to01((Number(components.F) || 0) + (Number(components.H) || 0));
|
|
127
|
+
|
|
128
|
+
return {
|
|
129
|
+
...source,
|
|
130
|
+
totalScore: Math.round(candidate.score * 100) / 100,
|
|
131
|
+
score: Math.round(candidate.score * 100) / 100,
|
|
132
|
+
components: {
|
|
133
|
+
quality,
|
|
134
|
+
speed,
|
|
135
|
+
ttfb: speed, // ttfb tracks speed; legacy field retained for shape
|
|
136
|
+
context,
|
|
137
|
+
hardwareMatch
|
|
138
|
+
},
|
|
139
|
+
quant: candidate.quant || source.quant,
|
|
140
|
+
estimatedRAM: candidate.requiredGB,
|
|
141
|
+
estimatedTPS: candidate.estTPS,
|
|
142
|
+
reasoning: candidate.rationale ||
|
|
143
|
+
this.generateReasoning(source, hardware, quality, hardwareMatch)
|
|
144
|
+
};
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* Legacy multi-objective ranking, retained ONLY as a defensive fallback if
|
|
149
|
+
* the unified core throws. Not used on the normal path.
|
|
150
|
+
*/
|
|
151
|
+
selectBestModelsLegacy(hardware, models, category = 'general', topK = 10) {
|
|
47
152
|
const compatibleModels = this.applyHardFilters(hardware, models);
|
|
48
|
-
|
|
153
|
+
|
|
49
154
|
if (compatibleModels.length === 0) {
|
|
50
155
|
return { compatible: [], marginal: [], incompatible: models };
|
|
51
156
|
}
|
|
52
157
|
|
|
53
|
-
|
|
54
|
-
const scoredModels = compatibleModels.map(model =>
|
|
158
|
+
const scoredModels = compatibleModels.map(model =>
|
|
55
159
|
this.calculateMultiObjectiveScore(hardware, model, category)
|
|
56
160
|
).filter(Boolean);
|
|
57
|
-
|
|
58
161
|
|
|
59
|
-
// Step 3: Sort and classify
|
|
60
162
|
scoredModels.sort((a, b) => b.totalScore - a.totalScore);
|
|
61
163
|
|
|
62
164
|
return this.classifyResults(scoredModels, topK);
|
|
@@ -784,11 +886,16 @@ class MultiObjectiveSelector {
|
|
|
784
886
|
}
|
|
785
887
|
|
|
786
888
|
estimateKVCache(model, contextLength) {
|
|
787
|
-
//
|
|
889
|
+
// KV cache grows linearly with parameter count and context length. The old
|
|
890
|
+
// formula derived both "layers" and "hidden size" from params and multiplied
|
|
891
|
+
// them, making the estimate scale with params^2 — a 70B model at 8k came out
|
|
892
|
+
// at ~299 GB (real value ~11 GB), which made checkMemoryCompatibility reject
|
|
893
|
+
// every mid/large model. Use the same calibrated linear factor as the Ollama
|
|
894
|
+
// capacity planner (~0.08 GB per 1B params at 4k context).
|
|
788
895
|
const params = this.estimateModelParams(model);
|
|
789
|
-
const
|
|
790
|
-
const
|
|
791
|
-
return (
|
|
896
|
+
const ctx = Number.isFinite(contextLength) && contextLength > 0 ? contextLength : 4096;
|
|
897
|
+
const kvFactorPer4k = 0.08; // GB per 1B params at 4k context (fp16)
|
|
898
|
+
return Math.max(0, params * kvFactorPer4k * (ctx / 4096)); // GB
|
|
792
899
|
}
|
|
793
900
|
|
|
794
901
|
estimateTokensPerSecond(hardware, model) {
|
|
@@ -320,7 +320,10 @@ class CalibrationManager {
|
|
|
320
320
|
NO_COLOR: '1'
|
|
321
321
|
}
|
|
322
322
|
});
|
|
323
|
-
|
|
323
|
+
// Convert ns->ms in floating point: dividing the BigInt first floored away
|
|
324
|
+
// all sub-millisecond precision (a 0.5 ms call read as 0 ms, skewing p50/p95,
|
|
325
|
+
// ttft and tokens/sec). The ns diff is well within Number's safe range.
|
|
326
|
+
const latencyMs = Number(process.hrtime.bigint() - started) / 1_000_000;
|
|
324
327
|
|
|
325
328
|
if (result.error) {
|
|
326
329
|
const error = new Error(result.error.message || 'Failed to execute runtime prompt.');
|
|
@@ -13,6 +13,11 @@ class ModelDatabase {
|
|
|
13
13
|
this.seedDbPath = options.seedDbPath || path.join(__dirname, 'seed', 'models.db');
|
|
14
14
|
this.db = null;
|
|
15
15
|
this.initialized = false;
|
|
16
|
+
// Batched-write state: during a bulk sync we defer the (expensive) full
|
|
17
|
+
// sql.js export-and-write until the batch ends, instead of rewriting the
|
|
18
|
+
// whole DB file on every single row.
|
|
19
|
+
this._batchDepth = 0;
|
|
20
|
+
this._pendingSave = false;
|
|
16
21
|
}
|
|
17
22
|
|
|
18
23
|
/**
|
|
@@ -148,7 +153,29 @@ class ModelDatabase {
|
|
|
148
153
|
if (!this.useBetterSqlite && this.db) {
|
|
149
154
|
const data = this.db.export();
|
|
150
155
|
const buffer = Buffer.from(data);
|
|
151
|
-
|
|
156
|
+
// Write to a temp file then atomically rename, so a crash/SIGINT
|
|
157
|
+
// mid-write can't leave a truncated, unreadable models.db behind.
|
|
158
|
+
const tmpPath = `${this.dbPath}.tmp`;
|
|
159
|
+
fs.writeFileSync(tmpPath, buffer);
|
|
160
|
+
fs.renameSync(tmpPath, this.dbPath);
|
|
161
|
+
this._pendingSave = false;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Group many writes so the database file is exported/written once at the end
|
|
167
|
+
* instead of on every row. Nestable; the outermost endBatch() flushes.
|
|
168
|
+
*/
|
|
169
|
+
beginBatch() {
|
|
170
|
+
this._batchDepth += 1;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
endBatch() {
|
|
174
|
+
if (this._batchDepth > 0) {
|
|
175
|
+
this._batchDepth -= 1;
|
|
176
|
+
}
|
|
177
|
+
if (this._batchDepth === 0 && this._pendingSave) {
|
|
178
|
+
this.saveToFile();
|
|
152
179
|
}
|
|
153
180
|
}
|
|
154
181
|
|
|
@@ -160,7 +187,11 @@ class ModelDatabase {
|
|
|
160
187
|
return this.db.prepare(sql).run(...params);
|
|
161
188
|
} else {
|
|
162
189
|
this.db.run(sql, params);
|
|
163
|
-
this.
|
|
190
|
+
if (this._batchDepth > 0) {
|
|
191
|
+
this._pendingSave = true; // defer the full export until endBatch()
|
|
192
|
+
} else {
|
|
193
|
+
this.saveToFile();
|
|
194
|
+
}
|
|
164
195
|
}
|
|
165
196
|
}
|
|
166
197
|
|
|
@@ -406,9 +437,12 @@ class ModelDatabase {
|
|
|
406
437
|
params.push(filters.maxSizeGB);
|
|
407
438
|
}
|
|
408
439
|
|
|
409
|
-
// Order by
|
|
410
|
-
|
|
411
|
-
|
|
440
|
+
// Order by — column names and direction can't be parameterized, so whitelist
|
|
441
|
+
// them. A future caller forwarding a user-supplied sort field would otherwise
|
|
442
|
+
// be a SQL-injection / crash vector on this public filters API.
|
|
443
|
+
const ORDERABLE_COLUMNS = new Set(['pulls', 'name', 'tags_count', 'updated_at', 'created_at']);
|
|
444
|
+
const orderBy = ORDERABLE_COLUMNS.has(filters.orderBy) ? filters.orderBy : 'pulls';
|
|
445
|
+
const orderDir = String(filters.orderDir).toUpperCase() === 'ASC' ? 'ASC' : 'DESC';
|
|
412
446
|
sql += ` ORDER BY m.${orderBy} ${orderDir}`;
|
|
413
447
|
|
|
414
448
|
// Limit
|
package/src/data/sync-manager.js
CHANGED
|
@@ -47,22 +47,27 @@ class SyncManager {
|
|
|
47
47
|
|
|
48
48
|
this.onProgress({ phase: 'start', message: 'Starting full sync...' });
|
|
49
49
|
|
|
50
|
-
//
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
//
|
|
56
|
-
this.db.
|
|
57
|
-
|
|
58
|
-
//
|
|
59
|
-
|
|
60
|
-
this.db.
|
|
61
|
-
|
|
62
|
-
|
|
50
|
+
// Batch all writes into a single atomic DB file write at the end. Saving on
|
|
51
|
+
// every upsert re-exported and rewrote the whole sql.js DB thousands of
|
|
52
|
+
// times, turning the sync into O(n^2) disk I/O.
|
|
53
|
+
this.db.beginBatch();
|
|
54
|
+
try {
|
|
55
|
+
// Clear existing data
|
|
56
|
+
this.db.clear();
|
|
57
|
+
|
|
58
|
+
// Scrape all models
|
|
59
|
+
await this.scraper.scrapeAll((model, variants) => {
|
|
60
|
+
this.db.upsertModel(model);
|
|
61
|
+
for (const variant of variants) {
|
|
62
|
+
this.db.upsertVariant(variant);
|
|
63
|
+
}
|
|
64
|
+
});
|
|
63
65
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
+
// Update sync timestamp
|
|
67
|
+
this.db.setLastSync(new Date().toISOString());
|
|
68
|
+
} finally {
|
|
69
|
+
this.db.endBatch();
|
|
70
|
+
}
|
|
66
71
|
|
|
67
72
|
const stats = this.db.getStats();
|
|
68
73
|
|
|
@@ -110,6 +115,9 @@ class SyncManager {
|
|
|
110
115
|
let updated = 0;
|
|
111
116
|
let added = 0;
|
|
112
117
|
|
|
118
|
+
// Batch all upserts into a single atomic DB write at the end (see fullSync).
|
|
119
|
+
this.db.beginBatch();
|
|
120
|
+
try {
|
|
113
121
|
// Process new models
|
|
114
122
|
for (const { id } of newModels) {
|
|
115
123
|
try {
|
|
@@ -157,12 +165,18 @@ class SyncManager {
|
|
|
157
165
|
|
|
158
166
|
await this.sleep(100);
|
|
159
167
|
} catch (error) {
|
|
160
|
-
//
|
|
168
|
+
// Log instead of silently swallowing: a systematic failure here
|
|
169
|
+
// (network down, schema mismatch) would otherwise report success
|
|
170
|
+
// with updated: 0 and leave the catalog quietly stale.
|
|
171
|
+
this.onError(`Error updating ${id}: ${error.message}`);
|
|
161
172
|
}
|
|
162
173
|
}
|
|
163
174
|
|
|
164
|
-
|
|
165
|
-
|
|
175
|
+
// Update sync timestamp
|
|
176
|
+
this.db.setLastSync(new Date().toISOString());
|
|
177
|
+
} finally {
|
|
178
|
+
this.db.endBatch();
|
|
179
|
+
}
|
|
166
180
|
|
|
167
181
|
const stats = this.db.getStats();
|
|
168
182
|
|
|
@@ -283,7 +283,11 @@ class AppleSiliconDetector {
|
|
|
283
283
|
const info = this.detect();
|
|
284
284
|
if (!info) return null;
|
|
285
285
|
|
|
286
|
-
|
|
286
|
+
// info.chip stays null when the sysctl brand-string read fails (sandboxed
|
|
287
|
+
// env, missing binary); fall back so this can't throw on null.toLowerCase().
|
|
288
|
+
const chip = info.chip || 'apple-silicon';
|
|
289
|
+
const unified = info.memory?.unified || 0;
|
|
290
|
+
return `apple-${chip.toLowerCase().replace(/\s+/g, '-')}-${unified}gb`;
|
|
287
291
|
}
|
|
288
292
|
|
|
289
293
|
/**
|
|
@@ -249,40 +249,65 @@ class CUDADetector {
|
|
|
249
249
|
|
|
250
250
|
const lines = gpuData.split('\n');
|
|
251
251
|
|
|
252
|
+
// Older drivers emit fewer columns (e.g. no power/clocks), and the CSV
|
|
253
|
+
// separator can be either ", " or "," depending on driver/locale. Split
|
|
254
|
+
// tolerantly and only require the leading identity + memory columns so a
|
|
255
|
+
// GPU is never dropped just because optional trailing fields are absent.
|
|
256
|
+
const toMB = (value) => {
|
|
257
|
+
const n = parseInt(value, 10);
|
|
258
|
+
return Number.isFinite(n) ? n : 0;
|
|
259
|
+
};
|
|
260
|
+
const toGB = (value) => {
|
|
261
|
+
const mb = toMB(value);
|
|
262
|
+
return mb > 0 ? Math.round(mb / 1024) : 0;
|
|
263
|
+
};
|
|
264
|
+
const toInt = (value) => {
|
|
265
|
+
const n = parseInt(value, 10);
|
|
266
|
+
return Number.isFinite(n) ? n : 0;
|
|
267
|
+
};
|
|
268
|
+
const toFloat = (value) => {
|
|
269
|
+
const n = parseFloat(value);
|
|
270
|
+
return Number.isFinite(n) ? n : 0;
|
|
271
|
+
};
|
|
272
|
+
|
|
252
273
|
for (const line of lines) {
|
|
253
|
-
|
|
274
|
+
if (!line || !line.trim()) continue;
|
|
275
|
+
const parts = line.split(/\s*,\s*/).map(p => p.trim());
|
|
276
|
+
|
|
277
|
+
// Need at least index, name, uuid, memory.total to describe a GPU.
|
|
278
|
+
if (parts.length < 4) continue;
|
|
254
279
|
|
|
255
|
-
|
|
280
|
+
const memTotalMB = toMB(parts[3]);
|
|
256
281
|
|
|
257
282
|
const gpu = {
|
|
258
|
-
index:
|
|
283
|
+
index: toInt(parts[0]),
|
|
259
284
|
name: parts[1] || 'Unknown NVIDIA GPU',
|
|
260
285
|
uuid: parts[2] || null,
|
|
261
286
|
memory: {
|
|
262
|
-
total:
|
|
263
|
-
free:
|
|
264
|
-
used:
|
|
287
|
+
total: toGB(parts[3]), // Convert MB to GB
|
|
288
|
+
free: toGB(parts[4]),
|
|
289
|
+
used: toGB(parts[5])
|
|
265
290
|
},
|
|
266
291
|
computeMode: parts[6] || 'Default',
|
|
267
292
|
pcie: {
|
|
268
|
-
generation:
|
|
269
|
-
width:
|
|
293
|
+
generation: toInt(parts[7]),
|
|
294
|
+
width: toInt(parts[8])
|
|
270
295
|
},
|
|
271
296
|
power: {
|
|
272
|
-
draw:
|
|
273
|
-
limit:
|
|
297
|
+
draw: toFloat(parts[9]),
|
|
298
|
+
limit: toFloat(parts[10])
|
|
274
299
|
},
|
|
275
|
-
temperature:
|
|
300
|
+
temperature: toInt(parts[11]),
|
|
276
301
|
utilization: {
|
|
277
|
-
gpu:
|
|
278
|
-
memory:
|
|
302
|
+
gpu: toInt(parts[12]),
|
|
303
|
+
memory: toInt(parts[13])
|
|
279
304
|
},
|
|
280
305
|
clocks: {
|
|
281
|
-
current:
|
|
282
|
-
max:
|
|
306
|
+
current: toInt(parts[14]),
|
|
307
|
+
max: toInt(parts[15])
|
|
283
308
|
},
|
|
284
309
|
capabilities: this.getGPUCapabilities(parts[1]),
|
|
285
|
-
speedCoefficient: this.calculateSpeedCoefficient(parts[1],
|
|
310
|
+
speedCoefficient: this.calculateSpeedCoefficient(parts[1], memTotalMB)
|
|
286
311
|
};
|
|
287
312
|
|
|
288
313
|
result.gpus.push(gpu);
|
|
@@ -298,15 +323,18 @@ class CUDADetector {
|
|
|
298
323
|
|
|
299
324
|
const lines = simpleQuery.split('\n');
|
|
300
325
|
for (let i = 0; i < lines.length; i++) {
|
|
301
|
-
|
|
302
|
-
const
|
|
326
|
+
if (!lines[i] || !lines[i].trim()) continue;
|
|
327
|
+
const [name, memMB] = lines[i].split(/\s*,\s*/).map(p => p.trim());
|
|
328
|
+
const parsedMB = parseInt(memMB, 10);
|
|
329
|
+
const memMBSafe = Number.isFinite(parsedMB) ? parsedMB : 0;
|
|
330
|
+
const memGB = memMBSafe > 0 ? Math.round(memMBSafe / 1024) : 0;
|
|
303
331
|
|
|
304
332
|
result.gpus.push({
|
|
305
333
|
index: i,
|
|
306
334
|
name: name || 'NVIDIA GPU',
|
|
307
335
|
memory: { total: memGB, free: memGB, used: 0 },
|
|
308
336
|
capabilities: this.getGPUCapabilities(name),
|
|
309
|
-
speedCoefficient: this.calculateSpeedCoefficient(name,
|
|
337
|
+
speedCoefficient: this.calculateSpeedCoefficient(name, memMBSafe)
|
|
310
338
|
});
|
|
311
339
|
result.totalVRAM += memGB;
|
|
312
340
|
}
|
|
@@ -111,8 +111,12 @@ class IntelDetector {
|
|
|
111
111
|
const name = nameMatch[0].replace(/Corporation\s*/i, '').trim();
|
|
112
112
|
const isDedicated = name.toLowerCase().includes('arc');
|
|
113
113
|
|
|
114
|
-
//
|
|
115
|
-
|
|
114
|
+
// Prefer the model-based estimate: getVRAMFromSysfs reads the PCI
|
|
115
|
+
// MMIO BAR size, which is NOT the card's VRAM (a non-Resizable-BAR
|
|
116
|
+
// Arc reports ~256M while having 8-16GB), so a wrong BAR value must
|
|
117
|
+
// not shadow the reliable per-model estimate. BAR is only a last
|
|
118
|
+
// resort when the model can't be recognized.
|
|
119
|
+
let vram = this.estimateVRAM(name) || this.getVRAMFromSysfs(block);
|
|
116
120
|
|
|
117
121
|
const gpu = {
|
|
118
122
|
index: result.gpus.length,
|
|
@@ -942,8 +942,12 @@ class ROCmDetector {
|
|
|
942
942
|
// Try to match device ID to specific variant
|
|
943
943
|
const deviceInfo = ROCmDetector.AMD_DEVICE_IDS[deviceId];
|
|
944
944
|
if (deviceInfo) return deviceInfo.name;
|
|
945
|
-
//
|
|
946
|
-
|
|
945
|
+
// Unknown device ID: lspci groups several SKUs behind one string
|
|
946
|
+
// (e.g. "Radeon RX 7900 XT/7900 XTX/7900M"). Committing to variants[0]
|
|
947
|
+
// mislabels the card as the lowest-tier SKU and yields the wrong VRAM,
|
|
948
|
+
// so keep the full variant list — honestly ambiguous beats confidently
|
|
949
|
+
// wrong.
|
|
950
|
+
return `AMD Radeon ${variants.join('/')}`;
|
|
947
951
|
}
|
|
948
952
|
return `AMD Radeon ${bracketName}`;
|
|
949
953
|
}
|
package/src/hardware/detector.js
CHANGED
|
@@ -85,12 +85,16 @@ class HardwareDetector {
|
|
|
85
85
|
const freeGB = Math.round(memory.free / (1024 ** 3));
|
|
86
86
|
const usedGB = totalGB - freeGB;
|
|
87
87
|
|
|
88
|
+
// Guard against a zero/unknown total (some virtualized or sandboxed hosts
|
|
89
|
+
// report memory.total === 0), which would otherwise make usagePercent NaN.
|
|
90
|
+
const usagePercent = totalGB > 0 ? Math.round((usedGB / totalGB) * 100) : 0;
|
|
91
|
+
|
|
88
92
|
return {
|
|
89
93
|
total: totalGB,
|
|
90
94
|
free: freeGB,
|
|
91
95
|
used: usedGB,
|
|
92
96
|
available: Math.round(memory.available / (1024 ** 3)),
|
|
93
|
-
usagePercent
|
|
97
|
+
usagePercent,
|
|
94
98
|
swapTotal: Math.round(memory.swaptotal / (1024 ** 3)),
|
|
95
99
|
swapUsed: Math.round(memory.swapused / (1024 ** 3)),
|
|
96
100
|
score: this.calculateMemoryScore(totalGB, freeGB)
|
|
@@ -420,7 +424,12 @@ class HardwareDetector {
|
|
|
420
424
|
driverVersion: backendInfo.driver || systemInfo.gpu.driverVersion
|
|
421
425
|
};
|
|
422
426
|
} catch (error) {
|
|
423
|
-
// Keep systeminformation-only results when backend-specific detection is
|
|
427
|
+
// Keep systeminformation-only results when backend-specific detection is
|
|
428
|
+
// unavailable. Surface the cause under a debug flag so a genuine bug in the
|
|
429
|
+
// enrichment path is distinguishable from "no backend tools installed".
|
|
430
|
+
if (process.env.DEBUG_GPU || process.env.LLM_CHECKER_DEBUG) {
|
|
431
|
+
console.error('[llm-checker] enrichWithUnifiedHardware failed:', error && error.stack ? error.stack : error);
|
|
432
|
+
}
|
|
424
433
|
}
|
|
425
434
|
}
|
|
426
435
|
|
|
@@ -553,8 +562,23 @@ class HardwareDetector {
|
|
|
553
562
|
|
|
554
563
|
// NVIDIA data-center / workstation
|
|
555
564
|
if (modelLower.includes('gb10') || modelLower.includes('grace blackwell') || modelLower.includes('dgx spark')) return 96;
|
|
565
|
+
|
|
566
|
+
// NVIDIA Blackwell / Ada / Hopper workstation & datacenter cards. These are
|
|
567
|
+
// matched BEFORE the generic "rtx -> 8" fallback so high-VRAM professional
|
|
568
|
+
// GPUs (e.g. "RTX PRO 6000") are not collapsed to 8GB (issue #88).
|
|
569
|
+
if (modelLower.includes('rtx pro 6000') || modelLower.includes('rtx 6000 blackwell')) return 96;
|
|
570
|
+
if (modelLower.includes('rtx 6000 ada') || modelLower.includes('rtx 5000 ada')) return 48;
|
|
571
|
+
if (modelLower.includes('rtx a6000') || modelLower.includes('a6000')) return 48;
|
|
572
|
+
if (modelLower.includes('rtx a5000') || modelLower.includes('a5000')) return 24;
|
|
573
|
+
if (modelLower.includes('l40s') || modelLower.includes('l40')) return 48;
|
|
574
|
+
if (modelLower.includes('h200')) return 141;
|
|
575
|
+
if (modelLower.includes('h100')) return 80;
|
|
576
|
+
if (modelLower.includes('a100') && (modelLower.includes('40gb') || /a100[\s-]?(?:pcie[\s-]?)?40\b/.test(modelLower))) return 40;
|
|
577
|
+
if (modelLower.includes('a100')) return 80; // A100 defaults to the 80GB SKU
|
|
578
|
+
if (modelLower.includes('a40')) return 48;
|
|
579
|
+
|
|
556
580
|
if (modelLower.includes('tesla p100') || modelLower.includes('p100')) return 16;
|
|
557
|
-
|
|
581
|
+
|
|
558
582
|
// NVIDIA RTX 50 series
|
|
559
583
|
if (modelLower.includes('rtx 5090')) return 32;
|
|
560
584
|
if (modelLower.includes('rtx 5080')) return 16;
|
|
@@ -635,7 +659,7 @@ class HardwareDetector {
|
|
|
635
659
|
else score += totalGB * 2;
|
|
636
660
|
|
|
637
661
|
// Score basado en RAM disponible
|
|
638
|
-
const freePercent = (freeGB / totalGB) * 100;
|
|
662
|
+
const freePercent = totalGB > 0 ? (freeGB / totalGB) * 100 : 0;
|
|
639
663
|
if (freePercent > 50) score += 20;
|
|
640
664
|
else if (freePercent > 30) score += 15;
|
|
641
665
|
else if (freePercent > 20) score += 10;
|
|
@@ -738,34 +762,37 @@ class HardwareDetector {
|
|
|
738
762
|
* Normalize VRAM values (handle different units and wrong totals)
|
|
739
763
|
*/
|
|
740
764
|
normalizeVRAM(vram) {
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
//
|
|
746
|
-
|
|
747
|
-
|
|
765
|
+
const raw = Number(vram);
|
|
766
|
+
if (!Number.isFinite(raw) || raw <= 0) return 0;
|
|
767
|
+
|
|
768
|
+
// Inputs reaching this function come from systeminformation / lspci (which
|
|
769
|
+
// express controller VRAM in megabytes), from raw byte counts on systems
|
|
770
|
+
// that report that way, and increasingly from our own curated GB tables
|
|
771
|
+
// (estimateVRAMFromModel, device-id maps) fed back through here. The unit
|
|
772
|
+
// is inferred from magnitude:
|
|
773
|
+
//
|
|
774
|
+
// > 1e6 -> raw bytes (a 192 GB card is ~2.06e11 bytes, while
|
|
775
|
+
// the same card in MB is ~196,608, well under 1e6).
|
|
776
|
+
// >= 1024 -> megabytes (the smallest dedicated framebuffer that
|
|
777
|
+
// still rounds to >=1 GB; this is the systeminformation
|
|
778
|
+
// reporting range, e.g. 8192, 16384, 16368).
|
|
779
|
+
// 1 <= v <= 256 -> already gigabytes. Real single-GPU VRAM tops out
|
|
780
|
+
// around 192 GB (H200 ~141, B200/MI ~192), so any
|
|
781
|
+
// small integer in this band is a GB value. This is
|
|
782
|
+
// the dead-zone fix for issue #88: normalizeVRAM(96)
|
|
783
|
+
// used to return 0 (treated 96 as 96 MB -> 0 GB).
|
|
784
|
+
// 257 <= v < 1024 -> sub-gigabyte framebuffer in MB (e.g. a 512 MB
|
|
785
|
+
// aperture) -> rounds to 0/1 GB as before.
|
|
786
|
+
if (raw > 1_000_000) {
|
|
787
|
+
return Math.max(0, Math.round(raw / (1024 * 1024 * 1024))); // bytes -> GB
|
|
748
788
|
}
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
if (vramValue >= 1024) {
|
|
752
|
-
// Values >= 1024 are likely MB, convert to GB
|
|
753
|
-
vramValue = Math.round(vramValue / 1024);
|
|
754
|
-
} else if (vramValue >= 512 && vramValue < 1024) {
|
|
755
|
-
// 512-1023 MB, round to 1GB
|
|
756
|
-
vramValue = 1;
|
|
757
|
-
} else if (vramValue > 80) {
|
|
758
|
-
// Values between 80-511 are likely incorrect MB values, treat as MB
|
|
759
|
-
vramValue = Math.round(vramValue / 1024) || 1;
|
|
760
|
-
} else if (vramValue >= 1 && vramValue <= 80) {
|
|
761
|
-
// Values 1-80 are likely already in GB, keep as is
|
|
762
|
-
vramValue = vramValue;
|
|
763
|
-
} else {
|
|
764
|
-
// Values < 1 round to 0
|
|
765
|
-
vramValue = 0;
|
|
789
|
+
if (raw >= 1024) {
|
|
790
|
+
return Math.max(0, Math.round(raw / 1024)); // MB -> GB
|
|
766
791
|
}
|
|
767
|
-
|
|
768
|
-
|
|
792
|
+
if (raw <= 256) {
|
|
793
|
+
return Math.round(raw); // already GB (plausible single-GPU range)
|
|
794
|
+
}
|
|
795
|
+
return Math.max(0, Math.round(raw / 1024)); // 257..1023 MB -> GB
|
|
769
796
|
}
|
|
770
797
|
|
|
771
798
|
/**
|