llm-checker 3.6.1 → 3.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -7
- package/bin/enhanced_cli.js +361 -5
- package/package.json +7 -2
- package/src/data/model-database.js +450 -0
- package/src/data/registry-ingestors.js +751 -0
- package/src/data/registry-recommender.js +514 -0
- package/src/data/seed/README.md +11 -3
- package/src/data/seed/models.db +0 -0
- package/src/index.js +68 -4
- package/src/models/deterministic-selector.js +16 -3
- package/src/models/moe-assumptions.js +11 -0
|
@@ -0,0 +1,514 @@
|
|
|
1
|
+
const ModelDatabase = require('./model-database');
|
|
2
|
+
const DeterministicModelSelector = require('../models/deterministic-selector');
|
|
3
|
+
|
|
4
|
+
function toArray(value) {
|
|
5
|
+
return Array.isArray(value) ? value : [];
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
function parseParamsB(...values) {
|
|
9
|
+
for (const value of values) {
|
|
10
|
+
if (typeof value === 'number' && Number.isFinite(value) && value > 0) {
|
|
11
|
+
return value;
|
|
12
|
+
}
|
|
13
|
+
const text = String(value || '').replace(/,/g, '');
|
|
14
|
+
// Mixture-of-Experts "NxMB" (e.g. 8x7B) -> experts * per-expert size,
|
|
15
|
+
// so MoE models are not sized as if they were a single expert.
|
|
16
|
+
const moe = text.match(/(\d+)\s*x\s*(\d+(?:\.\d+)?)\s*b\b/i);
|
|
17
|
+
if (moe) {
|
|
18
|
+
const experts = Number(moe[1]);
|
|
19
|
+
const perExpert = Number(moe[2]);
|
|
20
|
+
if (experts > 0 && Number.isFinite(perExpert) && perExpert > 0) {
|
|
21
|
+
return experts * perExpert;
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
const match = text.match(/(\d+(?:\.\d+)?)\s*([bmt])\b/i);
|
|
25
|
+
if (!match) continue;
|
|
26
|
+
const amount = Number(match[1]);
|
|
27
|
+
if (!Number.isFinite(amount) || amount <= 0) continue;
|
|
28
|
+
const unit = match[2].toLowerCase();
|
|
29
|
+
if (unit === 't') return amount * 1000;
|
|
30
|
+
if (unit === 'm') return amount / 1000;
|
|
31
|
+
return amount;
|
|
32
|
+
}
|
|
33
|
+
return null;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// Active-param naming, e.g. "...-A17B" in "Qwen3-397B-A17B".
|
|
37
|
+
function parseActiveParamsFromName(...values) {
|
|
38
|
+
for (const value of values) {
|
|
39
|
+
const m = String(value || '').match(/(?:^|[-_\s/])a(\d+(?:\.\d+)?)\s*b\b/i);
|
|
40
|
+
if (m) {
|
|
41
|
+
const v = Number(m[1]);
|
|
42
|
+
if (Number.isFinite(v) && v > 0) return v;
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
return null;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// Detect Mixture-of-Experts naming so we size by total params (memory) and can
|
|
49
|
+
// apply MoE speed assumptions. Covers "8x7B", "397B-A17B", "moe", and Mixtral.
|
|
50
|
+
function isMoEName(...values) {
|
|
51
|
+
return values.some((value) =>
|
|
52
|
+
/(\d+\s*x\s*\d+(?:\.\d+)?\s*b\b)|(\d+(?:\.\d+)?\s*b[-_\s]*a\d)|\bmoe\b|mixtral/i.test(String(value || ''))
|
|
53
|
+
);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
function inferFamily(identifier = '') {
|
|
57
|
+
const text = String(identifier || '').toLowerCase();
|
|
58
|
+
const families = [
|
|
59
|
+
['deepseek-r1', /deepseek[-_ ]?r1/],
|
|
60
|
+
['deepseek-coder', /deepseek[-_ ]?coder/],
|
|
61
|
+
['deepseek', /deepseek/],
|
|
62
|
+
['qwen3', /qwen3/],
|
|
63
|
+
['qwen2.5', /qwen2\.5/],
|
|
64
|
+
['qwen2', /qwen2/],
|
|
65
|
+
['qwen', /qwen/],
|
|
66
|
+
['llama3.2', /llama3\.2|llama-?3\.2/],
|
|
67
|
+
['llama3.1', /llama3\.1|llama-?3\.1/],
|
|
68
|
+
['llama3', /llama3|llama-?3/],
|
|
69
|
+
['llama2', /llama2|llama-?2/],
|
|
70
|
+
['mistral', /mistral/],
|
|
71
|
+
['mixtral', /mixtral/],
|
|
72
|
+
['gemma3', /gemma3/],
|
|
73
|
+
['gemma2', /gemma2/],
|
|
74
|
+
['gemma', /gemma/],
|
|
75
|
+
['phi4', /phi-?4/],
|
|
76
|
+
['phi3', /phi-?3/],
|
|
77
|
+
['phi', /phi/],
|
|
78
|
+
['codellama', /codellama|code-?llama/],
|
|
79
|
+
['starcoder', /starcoder/],
|
|
80
|
+
['llava', /llava/],
|
|
81
|
+
['nomic-embed', /nomic-embed/],
|
|
82
|
+
['bge', /\bbge\b/]
|
|
83
|
+
];
|
|
84
|
+
|
|
85
|
+
for (const [family, pattern] of families) {
|
|
86
|
+
if (pattern.test(text)) return family;
|
|
87
|
+
}
|
|
88
|
+
return 'other';
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
function normalizeQuantization(row = {}) {
|
|
92
|
+
const raw = row.quantization || row.precision || '';
|
|
93
|
+
if (raw) return raw;
|
|
94
|
+
if (row.format === 'safetensors' || row.format === 'pytorch' || row.format === 'pytorch_bin') {
|
|
95
|
+
return 'FP16';
|
|
96
|
+
}
|
|
97
|
+
return 'Q4_K_M';
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
function isShardedWeightFile(filename = '') {
|
|
101
|
+
return /-\d{5,}-of-\d{5,}\.(safetensors|bin)$/i.test(String(filename || ''));
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
function choosePreferredRuntime(runtimeSupport = [], format = '', sourceId = '') {
|
|
105
|
+
const runtimes = toArray(runtimeSupport).map((runtime) => String(runtime).toLowerCase());
|
|
106
|
+
const normalizedFormat = String(format || '').toLowerCase();
|
|
107
|
+
const source = String(sourceId || '').toLowerCase();
|
|
108
|
+
|
|
109
|
+
if (source === 'ollama' || runtimes.includes('ollama')) return 'ollama';
|
|
110
|
+
if (normalizedFormat === 'mlx' || runtimes.includes('mlx')) return 'mlx';
|
|
111
|
+
if (normalizedFormat === 'gguf' || runtimes.includes('llama.cpp')) return 'llama.cpp';
|
|
112
|
+
if (runtimes.includes('vllm')) return 'vllm';
|
|
113
|
+
if (runtimes.includes('transformers')) return 'transformers';
|
|
114
|
+
return runtimes[0] || 'transformers';
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
function artifactToSelectorModel(row) {
|
|
118
|
+
const shardedFile = row.source_id === 'huggingface' && isShardedWeightFile(row.filename || row.artifact_name);
|
|
119
|
+
const identifier = shardedFile
|
|
120
|
+
? (row.canonical_model_id || row.repo_id)
|
|
121
|
+
: (row.artifact_name || row.filename || row.canonical_model_id || row.repo_id);
|
|
122
|
+
const displayName = row.canonical_model_id || row.repo_display_name || identifier;
|
|
123
|
+
const quant = normalizeQuantization(row);
|
|
124
|
+
|
|
125
|
+
// MEMORY sizing must use the TOTAL parameter count (for MoE, ALL experts are
|
|
126
|
+
// resident), never the active count. We re-derive the total from the model
|
|
127
|
+
// name (MoE-aware) and take the max with the stored column, so a stale or
|
|
128
|
+
// under-reported DB value (an MoE saved as one expert, or an active-param
|
|
129
|
+
// count) can never make a huge model look tiny and "fit" small hardware.
|
|
130
|
+
const nameStrings = [row.artifact_name, row.filename, row.canonical_model_id, row.repo_id];
|
|
131
|
+
const storedTotalB = parseParamsB(row.parameter_count_b);
|
|
132
|
+
const nameTotalB = parseParamsB(...nameStrings);
|
|
133
|
+
const totalParamsB = Math.max(storedTotalB || 0, nameTotalB || 0) || null;
|
|
134
|
+
const activeParamsB = parseParamsB(row.active_parameter_count_b) || parseActiveParamsFromName(...nameStrings);
|
|
135
|
+
const isMoE = isMoEName(...nameStrings)
|
|
136
|
+
|| (Number.isFinite(activeParamsB) && Number.isFinite(totalParamsB) && activeParamsB < totalParamsB);
|
|
137
|
+
|
|
138
|
+
// The sizing param is the total; fall back to the active count only if no
|
|
139
|
+
// total can be determined at all.
|
|
140
|
+
const paramsB = Number.isFinite(totalParamsB) && totalParamsB > 0
|
|
141
|
+
? totalParamsB
|
|
142
|
+
: parseParamsB(row.active_parameter_count_b);
|
|
143
|
+
|
|
144
|
+
if (!identifier || !Number.isFinite(paramsB) || paramsB <= 0) {
|
|
145
|
+
return null;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// Flag MoE and carry the TOTAL parameter count so memory is sized by the
|
|
149
|
+
// full weight set (all experts are resident under Ollama / Metal / vLLM).
|
|
150
|
+
// We deliberately do NOT set activeParamsB here: that would switch the memory
|
|
151
|
+
// model to "sparse inference" (sizing by active params), which would let a
|
|
152
|
+
// 397B-A17B model falsely "fit" ~11GB. Active params drive speed only, and
|
|
153
|
+
// sparse offload is not how the local runtimes this tool targets behave, so
|
|
154
|
+
// we stay conservative on memory.
|
|
155
|
+
const moeFields = {};
|
|
156
|
+
if (isMoE && Number.isFinite(totalParamsB) && totalParamsB > 0) {
|
|
157
|
+
moeFields.isMoE = true;
|
|
158
|
+
moeFields.totalParamsB = totalParamsB;
|
|
159
|
+
moeFields.total_params_b = totalParamsB;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
const runtimeSupport = toArray(row.runtime_support);
|
|
163
|
+
const preferredRuntime = choosePreferredRuntime(runtimeSupport, row.format, row.source_id);
|
|
164
|
+
const tasks = toArray(row.tasks);
|
|
165
|
+
const modalities = toArray(row.modalities);
|
|
166
|
+
const tags = [
|
|
167
|
+
row.source_id,
|
|
168
|
+
row.format,
|
|
169
|
+
quant,
|
|
170
|
+
...runtimeSupport,
|
|
171
|
+
...tasks
|
|
172
|
+
]
|
|
173
|
+
.filter(Boolean)
|
|
174
|
+
.map((tag) => String(tag).toLowerCase());
|
|
175
|
+
|
|
176
|
+
const sizeGB = Number(row.size_gb);
|
|
177
|
+
const sizeByQuant = Number.isFinite(sizeGB) && sizeGB > 0
|
|
178
|
+
? { [quant]: sizeGB }
|
|
179
|
+
: {};
|
|
180
|
+
|
|
181
|
+
return {
|
|
182
|
+
name: displayName,
|
|
183
|
+
model_identifier: identifier,
|
|
184
|
+
family: inferFamily(`${displayName} ${identifier}`),
|
|
185
|
+
paramsB,
|
|
186
|
+
...moeFields,
|
|
187
|
+
quant,
|
|
188
|
+
availableQuantizations: [quant],
|
|
189
|
+
sizeGB: Number.isFinite(sizeGB) && sizeGB > 0 ? sizeGB : undefined,
|
|
190
|
+
sizeByQuant,
|
|
191
|
+
ctxMax: Number(row.context_length) > 0 ? Number(row.context_length) : 4096,
|
|
192
|
+
tags,
|
|
193
|
+
modalities: modalities.length > 0 ? modalities : ['text'],
|
|
194
|
+
pulls: Number(row.downloads) || 0,
|
|
195
|
+
source: row.source_id,
|
|
196
|
+
registry: row.source_name || row.source_id,
|
|
197
|
+
version: shardedFile ? (row.repo_id || identifier) : (row.artifact_name || row.filename || identifier),
|
|
198
|
+
license: row.license || 'unknown',
|
|
199
|
+
digest: row.sha256 || row.etag || 'unknown',
|
|
200
|
+
installCommand: shardedFile && row.repo_id ? `hf download ${row.repo_id}` : (row.install_command || ''),
|
|
201
|
+
downloadUrl: shardedFile ? (row.repo_url || '') : (row.download_url || ''),
|
|
202
|
+
preferredRuntime,
|
|
203
|
+
artifact: row,
|
|
204
|
+
provenance: {
|
|
205
|
+
source: row.source_id,
|
|
206
|
+
registry: row.source_name || row.source_id,
|
|
207
|
+
version: shardedFile ? (row.repo_id || identifier) : (row.artifact_name || row.filename || identifier),
|
|
208
|
+
license: row.license || 'unknown',
|
|
209
|
+
digest: row.sha256 || row.etag || 'unknown',
|
|
210
|
+
download_url: shardedFile ? (row.repo_url || '') : (row.download_url || ''),
|
|
211
|
+
install_command: shardedFile && row.repo_id ? `hf download ${row.repo_id}` : (row.install_command || ''),
|
|
212
|
+
repo_url: row.repo_url || ''
|
|
213
|
+
}
|
|
214
|
+
};
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
function dedupeRecommendationPool(models) {
|
|
218
|
+
const deduped = new Map();
|
|
219
|
+
for (const model of models) {
|
|
220
|
+
const artifact = model.artifact || {};
|
|
221
|
+
const key = [
|
|
222
|
+
model.source,
|
|
223
|
+
artifact.repo_id || model.name,
|
|
224
|
+
model.model_identifier,
|
|
225
|
+
model.preferredRuntime
|
|
226
|
+
].join('|');
|
|
227
|
+
|
|
228
|
+
const existing = deduped.get(key);
|
|
229
|
+
if (!existing) {
|
|
230
|
+
deduped.set(key, model);
|
|
231
|
+
continue;
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
const existingSize = Number(existing.sizeGB || existing.artifact?.size_gb || Number.MAX_SAFE_INTEGER);
|
|
235
|
+
const size = Number(model.sizeGB || model.artifact?.size_gb || Number.MAX_SAFE_INTEGER);
|
|
236
|
+
if (size < existingSize) {
|
|
237
|
+
deduped.set(key, model);
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
return [...deduped.values()];
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
function candidateToRecommendation(candidate) {
|
|
244
|
+
const artifact = candidate.meta.artifact || {};
|
|
245
|
+
return {
|
|
246
|
+
model: candidate.meta.name,
|
|
247
|
+
artifact: candidate.meta.model_identifier,
|
|
248
|
+
source: candidate.meta.source,
|
|
249
|
+
registry: candidate.meta.registry,
|
|
250
|
+
score: candidate.score,
|
|
251
|
+
params_b: candidate.meta.paramsB,
|
|
252
|
+
quantization: candidate.quant,
|
|
253
|
+
size_gb: candidate.meta.sizeGB || artifact.size_gb || null,
|
|
254
|
+
required_gb: candidate.requiredGB,
|
|
255
|
+
estimated_tps: candidate.estTPS,
|
|
256
|
+
runtime: candidate.runtime,
|
|
257
|
+
install_command: candidate.meta.installCommand || artifact.install_command || '',
|
|
258
|
+
download_url: candidate.meta.downloadUrl || artifact.download_url || '',
|
|
259
|
+
license: candidate.meta.license,
|
|
260
|
+
gated: Boolean(artifact.gated),
|
|
261
|
+
requires_auth: Boolean(artifact.requires_auth),
|
|
262
|
+
tasks: toArray(artifact.tasks),
|
|
263
|
+
modalities: toArray(artifact.modalities),
|
|
264
|
+
rationale: candidate.rationale,
|
|
265
|
+
components: candidate.components,
|
|
266
|
+
memory: candidate.memory,
|
|
267
|
+
speed: candidate.speed
|
|
268
|
+
};
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
function normalizeHardwareForSelector(hardware = {}) {
|
|
272
|
+
if (hardware.memory?.totalGB && hardware.gpu && hardware.acceleration) {
|
|
273
|
+
return hardware;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
const summary = hardware.summary || {};
|
|
277
|
+
const cpuInfo = hardware.cpu || hardware.backends?.cpu?.info || {};
|
|
278
|
+
const cpuCores = cpuInfo.cores || {};
|
|
279
|
+
const bestBackend = summary.bestBackend || hardware.primary?.type || 'cpu';
|
|
280
|
+
const systemRAM = Number(summary.systemRAM || summary.effectiveMemory || 8);
|
|
281
|
+
const totalVRAM = Number(summary.totalVRAM || 0);
|
|
282
|
+
const gpuModel = summary.gpuModel || summary.gpuInventory || hardware.primary?.name || '';
|
|
283
|
+
const isMetal = bestBackend === 'metal';
|
|
284
|
+
const isCuda = bestBackend === 'cuda';
|
|
285
|
+
const isRocm = bestBackend === 'rocm';
|
|
286
|
+
|
|
287
|
+
return {
|
|
288
|
+
cpu: {
|
|
289
|
+
architecture: cpuInfo.architecture || process.arch,
|
|
290
|
+
cores: Number(cpuCores.logical || cpuCores.physical || cpuInfo.cores || 4),
|
|
291
|
+
model: cpuInfo.brand || summary.cpuModel || ''
|
|
292
|
+
},
|
|
293
|
+
gpu: {
|
|
294
|
+
type: isMetal ? 'apple_silicon' : (isCuda ? 'nvidia' : (isRocm ? 'amd' : 'cpu_only')),
|
|
295
|
+
model: gpuModel,
|
|
296
|
+
vramGB: totalVRAM,
|
|
297
|
+
totalVRAM,
|
|
298
|
+
gpuCount: Math.max(1, Number(summary.gpuCount || 1)),
|
|
299
|
+
unified: Boolean(isMetal || (summary.hasIntegratedGPU && !summary.hasDedicatedGPU)),
|
|
300
|
+
isMultiGPU: Boolean(summary.isMultiGPU)
|
|
301
|
+
},
|
|
302
|
+
memory: {
|
|
303
|
+
totalGB: systemRAM,
|
|
304
|
+
total: systemRAM
|
|
305
|
+
},
|
|
306
|
+
acceleration: {
|
|
307
|
+
supports_metal: isMetal,
|
|
308
|
+
supports_cuda: isCuda,
|
|
309
|
+
supports_rocm: isRocm
|
|
310
|
+
},
|
|
311
|
+
usableMemGB: Number(summary.effectiveMemory) > 0 ? Number(summary.effectiveMemory) : undefined
|
|
312
|
+
};
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
class RegistryRecommender {
|
|
316
|
+
constructor(options = {}) {
|
|
317
|
+
this.database = options.database || new ModelDatabase(options.databaseOptions || {});
|
|
318
|
+
this.selector = options.selector || new DeterministicModelSelector();
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
async initialize() {
|
|
322
|
+
await this.database.initialize();
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
async recommend(options = {}) {
|
|
326
|
+
const selection = await this.selectCategory(options);
|
|
327
|
+
return {
|
|
328
|
+
category: selection.category,
|
|
329
|
+
runtime: selection.runtime,
|
|
330
|
+
optimizeFor: selection.result.optimizeFor,
|
|
331
|
+
total_artifacts: selection.rows.length,
|
|
332
|
+
total_candidates: selection.modelPool.length,
|
|
333
|
+
total_evaluated: selection.result.total_evaluated,
|
|
334
|
+
recommendations: selection.result.candidates.map(candidateToRecommendation),
|
|
335
|
+
registry: this.database.getRegistryStats(),
|
|
336
|
+
generated_at: new Date().toISOString()
|
|
337
|
+
};
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
async selectCategory(options = {}) {
|
|
341
|
+
const category = options.category || 'general';
|
|
342
|
+
const runtime = options.runtime || 'auto';
|
|
343
|
+
const runtimeFilter = ['auto', 'all', '*'].includes(String(runtime).toLowerCase()) ? undefined : runtime;
|
|
344
|
+
const limit = Number(options.limit) > 0 ? Number(options.limit) : 10;
|
|
345
|
+
const poolLimit = Number(options.poolLimit) > 0 ? Number(options.poolLimit) : 20000;
|
|
346
|
+
const targetCtx = Number(options.targetContext) > 0 ? Number(options.targetContext) : undefined;
|
|
347
|
+
|
|
348
|
+
const rows = this.database.searchModelArtifacts(options.query || '', {
|
|
349
|
+
source: options.source,
|
|
350
|
+
format: options.format,
|
|
351
|
+
runtime: runtimeFilter,
|
|
352
|
+
quantization: options.quantization,
|
|
353
|
+
maxSizeGB: options.maxSizeGB,
|
|
354
|
+
minParamsB: options.minParamsB,
|
|
355
|
+
maxParamsB: options.maxParamsB,
|
|
356
|
+
localOnly: options.localOnly !== false,
|
|
357
|
+
limit: poolLimit
|
|
358
|
+
});
|
|
359
|
+
const modelPool = dedupeRecommendationPool(rows.map(artifactToSelectorModel).filter(Boolean));
|
|
360
|
+
|
|
361
|
+
const selectorHardware = normalizeHardwareForSelector(options.hardware || {});
|
|
362
|
+
const normalizedRuntime = runtimeFilter || 'auto';
|
|
363
|
+
const result = runtimeFilter
|
|
364
|
+
? await this.selector.selectModels(category, {
|
|
365
|
+
topN: limit,
|
|
366
|
+
enableProbe: false,
|
|
367
|
+
silent: true,
|
|
368
|
+
optimizeFor: options.optimizeFor || 'balanced',
|
|
369
|
+
runtime: runtimeFilter,
|
|
370
|
+
targetCtx,
|
|
371
|
+
hardware: selectorHardware,
|
|
372
|
+
installedModels: [],
|
|
373
|
+
modelPool
|
|
374
|
+
})
|
|
375
|
+
: this.scoreAutoRuntimePool({
|
|
376
|
+
category,
|
|
377
|
+
limit,
|
|
378
|
+
targetCtx,
|
|
379
|
+
optimizeFor: options.optimizeFor || 'balanced',
|
|
380
|
+
hardware: selectorHardware,
|
|
381
|
+
modelPool
|
|
382
|
+
});
|
|
383
|
+
|
|
384
|
+
return {
|
|
385
|
+
category,
|
|
386
|
+
runtime: normalizedRuntime,
|
|
387
|
+
rows,
|
|
388
|
+
modelPool,
|
|
389
|
+
result
|
|
390
|
+
};
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
async getBestModelsForHardware(hardware, options = {}) {
|
|
394
|
+
const categories = options.categories || ['coding', 'reasoning', 'multimodal', 'creative', 'talking', 'reading', 'general'];
|
|
395
|
+
const recommendations = {};
|
|
396
|
+
const runtime = options.runtime || 'auto';
|
|
397
|
+
const optimizeFor = options.optimizeFor || options.optimize || 'balanced';
|
|
398
|
+
const limit = Number(options.limit) > 0 ? Number(options.limit) : 3;
|
|
399
|
+
const registryStats = this.database.getRegistryStats();
|
|
400
|
+
const analyzedModels = new Set();
|
|
401
|
+
|
|
402
|
+
for (const category of categories) {
|
|
403
|
+
try {
|
|
404
|
+
const selection = await this.selectCategory({
|
|
405
|
+
...options,
|
|
406
|
+
category,
|
|
407
|
+
runtime,
|
|
408
|
+
optimizeFor,
|
|
409
|
+
limit,
|
|
410
|
+
hardware
|
|
411
|
+
});
|
|
412
|
+
for (const model of selection.modelPool) {
|
|
413
|
+
const artifact = model.artifact || {};
|
|
414
|
+
analyzedModels.add([
|
|
415
|
+
artifact.artifact_id || artifact.id || artifact.filename || model.model_identifier,
|
|
416
|
+
model.source,
|
|
417
|
+
model.preferredRuntime
|
|
418
|
+
].filter(Boolean).join('|'));
|
|
419
|
+
}
|
|
420
|
+
const normalizedHardware = this.selector.normalizeHardwareProfile(
|
|
421
|
+
normalizeHardwareForSelector(hardware || {})
|
|
422
|
+
);
|
|
423
|
+
recommendations[category] = {
|
|
424
|
+
tier: this.selector.mapHardwareTier(normalizedHardware),
|
|
425
|
+
optimizeFor: selection.result.optimizeFor,
|
|
426
|
+
runtime: selection.runtime,
|
|
427
|
+
source: 'registry',
|
|
428
|
+
bestModels: selection.result.candidates.map((candidate) => this.selector.mapCandidateToLegacyFormat(candidate)),
|
|
429
|
+
totalEvaluated: selection.result.total_evaluated,
|
|
430
|
+
totalArtifacts: selection.rows.length,
|
|
431
|
+
totalCandidates: selection.modelPool.length,
|
|
432
|
+
category: this.selector.getCategoryInfo(category)
|
|
433
|
+
};
|
|
434
|
+
} catch (error) {
|
|
435
|
+
recommendations[category] = {
|
|
436
|
+
tier: 'unknown',
|
|
437
|
+
optimizeFor,
|
|
438
|
+
runtime,
|
|
439
|
+
source: 'registry',
|
|
440
|
+
bestModels: [],
|
|
441
|
+
totalEvaluated: 0,
|
|
442
|
+
totalArtifacts: 0,
|
|
443
|
+
totalCandidates: 0,
|
|
444
|
+
error: error.message,
|
|
445
|
+
category: this.selector.getCategoryInfo(category)
|
|
446
|
+
};
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
return {
|
|
451
|
+
recommendations,
|
|
452
|
+
registryStats,
|
|
453
|
+
totalModelsAnalyzed: analyzedModels.size
|
|
454
|
+
};
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
scoreAutoRuntimePool({ category, limit, targetCtx, optimizeFor, hardware, modelPool }) {
|
|
458
|
+
const normalizedHardware = this.selector.normalizeHardwareProfile(hardware);
|
|
459
|
+
const objective = this.selector.normalizeOptimizationObjective(optimizeFor);
|
|
460
|
+
const ctx = targetCtx || this.selector.targetContexts[category] || this.selector.targetContexts.general;
|
|
461
|
+
const totalMem = normalizedHardware?.memory?.totalGB ?? normalizedHardware?.memory?.total ?? 8;
|
|
462
|
+
const usableMem = typeof normalizedHardware.usableMemGB === 'number'
|
|
463
|
+
? normalizedHardware.usableMemGB
|
|
464
|
+
: Math.max(1, Math.min(0.8 * totalMem, totalMem - 2));
|
|
465
|
+
const isUnified = Boolean(normalizedHardware?.gpu?.unified) || normalizedHardware?.gpu?.type === 'apple_silicon';
|
|
466
|
+
const vram = normalizedHardware?.gpu?.vramGB ?? normalizedHardware?.gpu?.vram ?? 0;
|
|
467
|
+
const budget = isUnified ? usableMem : (vram || usableMem);
|
|
468
|
+
const filtered = this.selector.filterByCategory(modelPool, category);
|
|
469
|
+
const candidates = [];
|
|
470
|
+
|
|
471
|
+
for (const model of filtered) {
|
|
472
|
+
const runtime = model.preferredRuntime || choosePreferredRuntime(
|
|
473
|
+
model.artifact?.runtime_support,
|
|
474
|
+
model.artifact?.format,
|
|
475
|
+
model.source
|
|
476
|
+
);
|
|
477
|
+
const candidate = this.selector.evaluateModel(
|
|
478
|
+
model,
|
|
479
|
+
normalizedHardware,
|
|
480
|
+
category,
|
|
481
|
+
ctx,
|
|
482
|
+
budget,
|
|
483
|
+
objective,
|
|
484
|
+
runtime
|
|
485
|
+
);
|
|
486
|
+
if (candidate) candidates.push(candidate);
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
candidates.sort((a, b) => b.score - a.score);
|
|
490
|
+
|
|
491
|
+
return {
|
|
492
|
+
category,
|
|
493
|
+
optimizeFor: objective,
|
|
494
|
+
runtime: 'auto',
|
|
495
|
+
hardware: normalizedHardware,
|
|
496
|
+
candidates: candidates.slice(0, limit),
|
|
497
|
+
total_evaluated: filtered.length,
|
|
498
|
+
timestamp: new Date().toISOString()
|
|
499
|
+
};
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
close() {
|
|
503
|
+
this.database.close();
|
|
504
|
+
}
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
module.exports = {
|
|
508
|
+
RegistryRecommender,
|
|
509
|
+
artifactToSelectorModel,
|
|
510
|
+
candidateToRecommendation,
|
|
511
|
+
normalizeHardwareForSelector,
|
|
512
|
+
choosePreferredRuntime,
|
|
513
|
+
dedupeRecommendationPool
|
|
514
|
+
};
|
package/src/data/seed/README.md
CHANGED
|
@@ -1,8 +1,16 @@
|
|
|
1
|
-
This directory contains the packaged
|
|
2
|
-
first run.
|
|
1
|
+
This directory contains the packaged model database snapshot used on first run.
|
|
3
2
|
|
|
4
3
|
`models.db` is copied to `~/.llm-checker/models.db` only when the user does not
|
|
5
4
|
already have a local database. After that, `llm-checker sync` updates the user's
|
|
6
|
-
local copy
|
|
5
|
+
local Ollama copy, and `llm-checker registry-sync` can refresh the multi-source
|
|
6
|
+
registry in the user's local copy.
|
|
7
|
+
|
|
8
|
+
The snapshot includes:
|
|
9
|
+
|
|
10
|
+
- the Ollama catalog used by classic recommendation/search commands
|
|
11
|
+
- a multi-source registry of exact installable/downloadable artifacts from
|
|
12
|
+
Hugging Face, Ollama, and GPT4All
|
|
13
|
+
- Hugging Face pages are fetched with cursor pagination; the default packaged
|
|
14
|
+
snapshot uses the top 3000 repositories by downloads
|
|
7
15
|
|
|
8
16
|
Refresh cadence: weekly via `.github/workflows/update-model-db.yml`.
|
package/src/data/seed/models.db
CHANGED
|
Binary file
|
package/src/index.js
CHANGED
|
@@ -20,6 +20,17 @@ const {
|
|
|
20
20
|
} = require('./provenance/model-provenance');
|
|
21
21
|
const { normalizePlatform } = require('./utils/platform');
|
|
22
22
|
|
|
23
|
+
function normalizeRecommendationRuntime(runtime = 'auto') {
|
|
24
|
+
const normalized = String(runtime || 'auto').trim().toLowerCase();
|
|
25
|
+
if (['auto', 'all', '*'].includes(normalized)) return 'auto';
|
|
26
|
+
if (['ollama', 'vllm', 'mlx', 'llama.cpp', 'llamacpp', 'llama_cpp', 'transformers', 'hf'].includes(normalized)) {
|
|
27
|
+
if (normalized === 'llamacpp' || normalized === 'llama_cpp') return 'llama.cpp';
|
|
28
|
+
if (normalized === 'hf') return 'transformers';
|
|
29
|
+
return normalized;
|
|
30
|
+
}
|
|
31
|
+
return normalizeRuntime(normalized);
|
|
32
|
+
}
|
|
33
|
+
|
|
23
34
|
class LLMChecker {
|
|
24
35
|
constructor(options = {}) {
|
|
25
36
|
this.hardwareDetector = new HardwareDetector();
|
|
@@ -2467,7 +2478,59 @@ class LLMChecker {
|
|
|
2467
2478
|
async generateIntelligentRecommendations(hardware, options = {}) {
|
|
2468
2479
|
try {
|
|
2469
2480
|
this.logger.info('Generating intelligent recommendations...');
|
|
2470
|
-
const selectedRuntime =
|
|
2481
|
+
const selectedRuntime = normalizeRecommendationRuntime(options.runtime || 'auto');
|
|
2482
|
+
const optimizeFor = options.optimizeFor || options.optimize || 'balanced';
|
|
2483
|
+
|
|
2484
|
+
if (options.registry !== false) {
|
|
2485
|
+
let registryRecommender = null;
|
|
2486
|
+
try {
|
|
2487
|
+
const { RegistryRecommender } = require('./data/registry-recommender');
|
|
2488
|
+
registryRecommender = new RegistryRecommender();
|
|
2489
|
+
await registryRecommender.initialize();
|
|
2490
|
+
|
|
2491
|
+
const registryResult = await registryRecommender.getBestModelsForHardware(hardware, {
|
|
2492
|
+
runtime: selectedRuntime,
|
|
2493
|
+
optimizeFor,
|
|
2494
|
+
limit: 3,
|
|
2495
|
+
poolLimit: options.poolLimit || 20000,
|
|
2496
|
+
localOnly: options.includeGated ? false : true
|
|
2497
|
+
});
|
|
2498
|
+
const recommendations = registryResult.recommendations;
|
|
2499
|
+
const hasRegistryRecommendations = Object.values(recommendations)
|
|
2500
|
+
.some((group) => Array.isArray(group.bestModels) && group.bestModels.length > 0);
|
|
2501
|
+
|
|
2502
|
+
if (hasRegistryRecommendations) {
|
|
2503
|
+
const summary = this.intelligentRecommender.generateRecommendationSummary(
|
|
2504
|
+
recommendations,
|
|
2505
|
+
hardware,
|
|
2506
|
+
{ optimizeFor }
|
|
2507
|
+
);
|
|
2508
|
+
const totalModelsAnalyzed = Number(registryResult.totalModelsAnalyzed) || Object.values(recommendations)
|
|
2509
|
+
.reduce((sum, group) => sum + (Number(group.totalCandidates) || Number(group.totalEvaluated) || 0), 0);
|
|
2510
|
+
|
|
2511
|
+
this.logger.info(`Generated registry recommendations for ${Object.keys(recommendations).length} categories`);
|
|
2512
|
+
|
|
2513
|
+
return {
|
|
2514
|
+
recommendations,
|
|
2515
|
+
summary,
|
|
2516
|
+
optimizeFor: summary.optimize_for || optimizeFor,
|
|
2517
|
+
runtime: selectedRuntime,
|
|
2518
|
+
recommendationSource: 'registry',
|
|
2519
|
+
registryStats: registryResult.registryStats,
|
|
2520
|
+
totalModelsAnalyzed,
|
|
2521
|
+
generatedAt: new Date().toISOString()
|
|
2522
|
+
};
|
|
2523
|
+
}
|
|
2524
|
+
|
|
2525
|
+
this.logger.warn('Registry recommendations were empty, falling back to Ollama catalog');
|
|
2526
|
+
} catch (error) {
|
|
2527
|
+
this.logger.warn('Registry recommendations unavailable, falling back to Ollama catalog', { error: error.message });
|
|
2528
|
+
} finally {
|
|
2529
|
+
if (registryRecommender) {
|
|
2530
|
+
registryRecommender.close();
|
|
2531
|
+
}
|
|
2532
|
+
}
|
|
2533
|
+
}
|
|
2471
2534
|
|
|
2472
2535
|
// Prefer the synced SQLite catalog so `llm-checker sync` updates recommendations immediately.
|
|
2473
2536
|
const ollamaData = await this.loadOllamaModelData();
|
|
@@ -2479,11 +2542,11 @@ class LLMChecker {
|
|
|
2479
2542
|
}
|
|
2480
2543
|
|
|
2481
2544
|
// Generar recomendaciones inteligentes
|
|
2482
|
-
const
|
|
2545
|
+
const fallbackRuntime = selectedRuntime === 'auto' ? 'ollama' : selectedRuntime;
|
|
2483
2546
|
const recommendations = await this.intelligentRecommender.getBestModelsForHardware(
|
|
2484
2547
|
hardware,
|
|
2485
2548
|
allModels,
|
|
2486
|
-
{ optimizeFor, runtime:
|
|
2549
|
+
{ optimizeFor, runtime: fallbackRuntime }
|
|
2487
2550
|
);
|
|
2488
2551
|
const summary = this.intelligentRecommender.generateRecommendationSummary(
|
|
2489
2552
|
recommendations,
|
|
@@ -2497,7 +2560,8 @@ class LLMChecker {
|
|
|
2497
2560
|
recommendations,
|
|
2498
2561
|
summary,
|
|
2499
2562
|
optimizeFor: summary.optimize_for || optimizeFor,
|
|
2500
|
-
runtime:
|
|
2563
|
+
runtime: fallbackRuntime,
|
|
2564
|
+
recommendationSource: 'ollama_catalog',
|
|
2501
2565
|
totalModelsAnalyzed: allModels.length,
|
|
2502
2566
|
generatedAt: new Date().toISOString()
|
|
2503
2567
|
};
|