llm-checker 3.6.1 → 3.7.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,632 @@
1
+ const ModelDatabase = require('./model-database');
2
+ const DeterministicModelSelector = require('../models/deterministic-selector');
3
+
4
+ function toArray(value) {
5
+ return Array.isArray(value) ? value : [];
6
+ }
7
+
8
+ function parseParamsB(...values) {
9
+ for (const value of values) {
10
+ if (typeof value === 'number' && Number.isFinite(value) && value > 0) {
11
+ return value;
12
+ }
13
+ const text = String(value || '').replace(/,/g, '');
14
+ // Mixture-of-Experts "NxMB" (e.g. 8x7B) -> experts * per-expert size,
15
+ // so MoE models are not sized as if they were a single expert.
16
+ const moe = text.match(/(\d+)\s*x\s*(\d+(?:\.\d+)?)\s*b\b/i);
17
+ if (moe) {
18
+ const experts = Number(moe[1]);
19
+ const perExpert = Number(moe[2]);
20
+ if (experts > 0 && Number.isFinite(perExpert) && perExpert > 0) {
21
+ return experts * perExpert;
22
+ }
23
+ }
24
+ const match = text.match(/(\d+(?:\.\d+)?)\s*([bmt])\b/i);
25
+ if (!match) continue;
26
+ const amount = Number(match[1]);
27
+ if (!Number.isFinite(amount) || amount <= 0) continue;
28
+ const unit = match[2].toLowerCase();
29
+ if (unit === 't') return amount * 1000;
30
+ if (unit === 'm') return amount / 1000;
31
+ return amount;
32
+ }
33
+ return null;
34
+ }
35
+
36
+ // Active-param naming, e.g. "...-A17B" in "Qwen3-397B-A17B".
37
+ function parseActiveParamsFromName(...values) {
38
+ for (const value of values) {
39
+ const m = String(value || '').match(/(?:^|[-_\s/])a(\d+(?:\.\d+)?)\s*b\b/i);
40
+ if (m) {
41
+ const v = Number(m[1]);
42
+ if (Number.isFinite(v) && v > 0) return v;
43
+ }
44
+ }
45
+ return null;
46
+ }
47
+
48
+ // Detect Mixture-of-Experts naming so we size by total params (memory) and can
49
+ // apply MoE speed assumptions. Covers "8x7B", "397B-A17B", "moe", and Mixtral.
50
+ function isMoEName(...values) {
51
+ return values.some((value) =>
52
+ /(\d+\s*x\s*\d+(?:\.\d+)?\s*b\b)|(\d+(?:\.\d+)?\s*b[-_\s]*a\d)|\bmoe\b|mixtral/i.test(String(value || ''))
53
+ );
54
+ }
55
+
56
+ function inferFamily(identifier = '') {
57
+ const text = String(identifier || '').toLowerCase();
58
+ const families = [
59
+ ['deepseek-r1', /deepseek[-_ ]?r1/],
60
+ ['deepseek-coder', /deepseek[-_ ]?coder/],
61
+ ['deepseek', /deepseek/],
62
+ ['qwen3', /qwen3/],
63
+ ['qwen2.5', /qwen2\.5/],
64
+ ['qwen2', /qwen2/],
65
+ ['qwen', /qwen/],
66
+ ['llama3.2', /llama3\.2|llama-?3\.2/],
67
+ ['llama3.1', /llama3\.1|llama-?3\.1/],
68
+ ['llama3', /llama3|llama-?3/],
69
+ ['llama2', /llama2|llama-?2/],
70
+ ['mistral', /mistral/],
71
+ ['mixtral', /mixtral/],
72
+ ['gemma3', /gemma3/],
73
+ ['gemma2', /gemma2/],
74
+ ['gemma', /gemma/],
75
+ ['phi4', /phi-?4/],
76
+ ['phi3', /phi-?3/],
77
+ ['phi', /phi/],
78
+ ['codellama', /codellama|code-?llama/],
79
+ ['starcoder', /starcoder/],
80
+ ['llava', /llava/],
81
+ ['nomic-embed', /nomic-embed/],
82
+ ['bge', /\bbge\b/]
83
+ ];
84
+
85
+ for (const [family, pattern] of families) {
86
+ if (pattern.test(text)) return family;
87
+ }
88
+ return 'other';
89
+ }
90
+
91
+ function normalizeQuantization(row = {}) {
92
+ const raw = row.quantization || row.precision || '';
93
+ if (raw) return raw;
94
+ if (row.format === 'safetensors' || row.format === 'pytorch' || row.format === 'pytorch_bin') {
95
+ return 'FP16';
96
+ }
97
+ return 'Q4_K_M';
98
+ }
99
+
100
+ function isShardedWeightFile(filename = '') {
101
+ return /-\d{5,}-of-\d{5,}\.(safetensors|bin)$/i.test(String(filename || ''));
102
+ }
103
+
104
+ function choosePreferredRuntime(runtimeSupport = [], format = '', sourceId = '') {
105
+ const runtimes = toArray(runtimeSupport).map((runtime) => String(runtime).toLowerCase());
106
+ const normalizedFormat = String(format || '').toLowerCase();
107
+ const source = String(sourceId || '').toLowerCase();
108
+
109
+ if (source === 'ollama' || runtimes.includes('ollama')) return 'ollama';
110
+ if (normalizedFormat === 'mlx' || runtimes.includes('mlx')) return 'mlx';
111
+ if (normalizedFormat === 'gguf' || runtimes.includes('llama.cpp')) return 'llama.cpp';
112
+ if (runtimes.includes('vllm')) return 'vllm';
113
+ if (runtimes.includes('transformers')) return 'transformers';
114
+ return runtimes[0] || 'transformers';
115
+ }
116
+
117
+ function artifactToSelectorModel(row) {
118
+ const shardedFile = row.source_id === 'huggingface' && isShardedWeightFile(row.filename || row.artifact_name);
119
+ const identifier = shardedFile
120
+ ? (row.canonical_model_id || row.repo_id)
121
+ : (row.artifact_name || row.filename || row.canonical_model_id || row.repo_id);
122
+ const displayName = row.canonical_model_id || row.repo_display_name || identifier;
123
+ const quant = normalizeQuantization(row);
124
+
125
+ // MEMORY sizing must use the TOTAL parameter count (for MoE, ALL experts are
126
+ // resident), never the active count. We re-derive the total from the model
127
+ // name (MoE-aware) and take the max with the stored column, so a stale or
128
+ // under-reported DB value (an MoE saved as one expert, or an active-param
129
+ // count) can never make a huge model look tiny and "fit" small hardware.
130
+ const nameStrings = [row.artifact_name, row.filename, row.canonical_model_id, row.repo_id];
131
+ const storedTotalB = parseParamsB(row.parameter_count_b);
132
+ const nameTotalB = parseParamsB(...nameStrings);
133
+ const totalParamsB = Math.max(storedTotalB || 0, nameTotalB || 0) || null;
134
+ const activeParamsB = parseParamsB(row.active_parameter_count_b) || parseActiveParamsFromName(...nameStrings);
135
+ const isMoE = isMoEName(...nameStrings)
136
+ || (Number.isFinite(activeParamsB) && Number.isFinite(totalParamsB) && activeParamsB < totalParamsB);
137
+
138
+ // The sizing param is the total; fall back to the active count only if no
139
+ // total can be determined at all.
140
+ const paramsB = Number.isFinite(totalParamsB) && totalParamsB > 0
141
+ ? totalParamsB
142
+ : parseParamsB(row.active_parameter_count_b);
143
+
144
+ if (!identifier || !Number.isFinite(paramsB) || paramsB <= 0) {
145
+ return null;
146
+ }
147
+
148
+ // Flag MoE and carry the TOTAL parameter count so memory is sized by the
149
+ // full weight set (all experts are resident under Ollama / Metal / vLLM).
150
+ // We deliberately do NOT set activeParamsB here: that would switch the memory
151
+ // model to "sparse inference" (sizing by active params), which would let a
152
+ // 397B-A17B model falsely "fit" ~11GB. Active params drive speed only, and
153
+ // sparse offload is not how the local runtimes this tool targets behave, so
154
+ // we stay conservative on memory.
155
+ const moeFields = {};
156
+ if (isMoE && Number.isFinite(totalParamsB) && totalParamsB > 0) {
157
+ moeFields.isMoE = true;
158
+ moeFields.totalParamsB = totalParamsB;
159
+ moeFields.total_params_b = totalParamsB;
160
+ }
161
+
162
+ const runtimeSupport = toArray(row.runtime_support);
163
+ const preferredRuntime = choosePreferredRuntime(runtimeSupport, row.format, row.source_id);
164
+ const tasks = toArray(row.tasks);
165
+ const modalities = toArray(row.modalities);
166
+ const tags = [
167
+ row.source_id,
168
+ row.format,
169
+ quant,
170
+ ...runtimeSupport,
171
+ ...tasks
172
+ ]
173
+ .filter(Boolean)
174
+ .map((tag) => String(tag).toLowerCase());
175
+
176
+ // A sharded weight file's size is only ONE shard, not the whole model. Don't
177
+ // let it stand in for the model's memory (that made a 56B model look like
178
+ // ~4.6GB and "fit" tiny hardware); leave size unset so memory estimates from
179
+ // the (total) parameter count instead.
180
+ const rawSizeGB = Number(row.size_gb);
181
+ const sizeGB = (!shardedFile && Number.isFinite(rawSizeGB) && rawSizeGB > 0) ? rawSizeGB : NaN;
182
+ const sizeByQuant = Number.isFinite(sizeGB) && sizeGB > 0
183
+ ? { [quant]: sizeGB }
184
+ : {};
185
+
186
+ return {
187
+ name: displayName,
188
+ model_identifier: identifier,
189
+ family: inferFamily(`${displayName} ${identifier}`),
190
+ paramsB,
191
+ ...moeFields,
192
+ quant,
193
+ availableQuantizations: [quant],
194
+ sizeGB: Number.isFinite(sizeGB) && sizeGB > 0 ? sizeGB : undefined,
195
+ sizeByQuant,
196
+ ctxMax: Number(row.context_length) > 0 ? Number(row.context_length) : 4096,
197
+ tags,
198
+ modalities: modalities.length > 0 ? modalities : ['text'],
199
+ pulls: Number(row.downloads) || 0,
200
+ source: row.source_id,
201
+ registry: row.source_name || row.source_id,
202
+ version: shardedFile ? (row.repo_id || identifier) : (row.artifact_name || row.filename || identifier),
203
+ license: row.license || 'unknown',
204
+ digest: row.sha256 || row.etag || 'unknown',
205
+ installCommand: shardedFile && row.repo_id ? `hf download ${row.repo_id}` : (row.install_command || ''),
206
+ downloadUrl: shardedFile ? (row.repo_url || '') : (row.download_url || ''),
207
+ preferredRuntime,
208
+ artifact: row,
209
+ provenance: {
210
+ source: row.source_id,
211
+ registry: row.source_name || row.source_id,
212
+ version: shardedFile ? (row.repo_id || identifier) : (row.artifact_name || row.filename || identifier),
213
+ license: row.license || 'unknown',
214
+ digest: row.sha256 || row.etag || 'unknown',
215
+ download_url: shardedFile ? (row.repo_url || '') : (row.download_url || ''),
216
+ install_command: shardedFile && row.repo_id ? `hf download ${row.repo_id}` : (row.install_command || ''),
217
+ repo_url: row.repo_url || ''
218
+ }
219
+ };
220
+ }
221
+
222
+ function dedupeRecommendationPool(models) {
223
+ const deduped = new Map();
224
+ for (const model of models) {
225
+ const artifact = model.artifact || {};
226
+ const key = [
227
+ model.source,
228
+ artifact.repo_id || model.name,
229
+ model.model_identifier,
230
+ model.preferredRuntime
231
+ ].join('|');
232
+
233
+ const existing = deduped.get(key);
234
+ if (!existing) {
235
+ deduped.set(key, model);
236
+ continue;
237
+ }
238
+
239
+ const existingSize = Number(existing.sizeGB || existing.artifact?.size_gb || Number.MAX_SAFE_INTEGER);
240
+ const size = Number(model.sizeGB || model.artifact?.size_gb || Number.MAX_SAFE_INTEGER);
241
+ if (size < existingSize) {
242
+ deduped.set(key, model);
243
+ }
244
+ }
245
+ return [...deduped.values()];
246
+ }
247
+
248
+ // A source may trail the top score by up to this and still earn a guaranteed slot.
249
+ const SOURCE_DIVERSITY_MARGIN = 15;
250
+ // Never surface a model below this score purely for source diversity.
251
+ const SOURCE_DIVERSITY_FLOOR = 55;
252
+
253
+ // Group key that ignores quantization / shard / tag so variants of the SAME
254
+ // model collapse together (e.g. all `qwen2.5-coder:7b-*` quants, or every
255
+ // `layers-N.safetensors` shard of one HF repo).
256
+ function modelDiversityKey(candidate) {
257
+ const meta = (candidate && candidate.meta) || {};
258
+ const name = String(meta.name || meta.model_identifier || '')
259
+ .toLowerCase()
260
+ .replace(/:.*$/, '') // drop an ollama :tag
261
+ .replace(/\s+/g, ' ')
262
+ .trim();
263
+ const p = Number(meta.paramsB);
264
+ if (Number.isFinite(p) && p > 0) {
265
+ return `${name}|${Math.round(p * 10) / 10}`;
266
+ }
267
+ // Params unknown: do NOT bucket every unknown-size model of the same name
268
+ // together (that silently drops distinct models / sources). Keep them apart by
269
+ // source + identifier.
270
+ const src = String(meta.source || '').toLowerCase();
271
+ const id = String(meta.model_identifier || meta.name || '').toLowerCase();
272
+ return `${name}|na|${src}|${id}`;
273
+ }
274
+
275
+ // Collapse quant/shard/tag variants of the same model to a single best-scoring
276
+ // entry, so the top picks are DISTINCT models instead of 12 quants of one.
277
+ function collapseToDistinctModels(candidates) {
278
+ const best = new Map();
279
+ for (const c of Array.isArray(candidates) ? candidates : []) {
280
+ if (!c) continue;
281
+ const key = modelDiversityKey(c);
282
+ const cur = best.get(key);
283
+ if (!cur || (Number(c.score) || 0) > (Number(cur.score) || 0)) best.set(key, c);
284
+ }
285
+ return [...best.values()].sort((a, b) => (Number(b.score) || 0) - (Number(a.score) || 0));
286
+ }
287
+
288
+ // Guarantee that each source with a competitive candidate appears in the top
289
+ // `limit`, so Hugging Face / GPT4All artifacts are visible when they score close
290
+ // to Ollama. Diversity never promotes a clearly worse model (floor + margin gates).
291
+ function applySourceDiversity(distinctSorted, limit) {
292
+ const list = Array.isArray(distinctSorted) ? distinctSorted : [];
293
+ if (list.length === 0) return [];
294
+ const max = Number(limit) > 0 ? Number(limit) : 10;
295
+ if (list.length <= max) return list.slice(0, max);
296
+ const topScore = Number(list[0].score) || 0;
297
+
298
+ // Reserve most slots for the genuine best-by-score so diversity can never
299
+ // displace several real top picks for several obscure sources. Only the tail
300
+ // (~40% of slots) is used to surface competitive alternate sources.
301
+ const guaranteed = Math.max(1, Math.ceil(max * 0.6));
302
+ const result = list.slice(0, guaranteed);
303
+ const chosen = new Set(result);
304
+ const present = new Set(result.map((c) => (c.meta && c.meta.source) || 'unknown'));
305
+
306
+ while (result.length < max) {
307
+ // Prefer the best candidate from a not-yet-shown source that is still
308
+ // competitive (within margin + above floor); otherwise the next best overall.
309
+ let pick = list.find((c) => {
310
+ if (chosen.has(c)) return false;
311
+ const src = (c.meta && c.meta.source) || 'unknown';
312
+ const score = Number(c.score) || 0;
313
+ return !present.has(src) && score >= SOURCE_DIVERSITY_FLOOR && score >= topScore - SOURCE_DIVERSITY_MARGIN;
314
+ });
315
+ if (!pick) pick = list.find((c) => !chosen.has(c));
316
+ if (!pick) break;
317
+ result.push(pick);
318
+ chosen.add(pick);
319
+ present.add((pick.meta && pick.meta.source) || 'unknown');
320
+ }
321
+ return result
322
+ .sort((a, b) => (Number(b.score) || 0) - (Number(a.score) || 0))
323
+ .slice(0, max);
324
+ }
325
+
326
+ function candidateToRecommendation(candidate) {
327
+ const artifact = candidate.meta.artifact || {};
328
+ return {
329
+ model: candidate.meta.name,
330
+ artifact: candidate.meta.model_identifier,
331
+ source: candidate.meta.source,
332
+ registry: candidate.meta.registry,
333
+ score: candidate.score,
334
+ params_b: candidate.meta.paramsB,
335
+ quantization: candidate.quant,
336
+ size_gb: candidate.meta.sizeGB || artifact.size_gb || null,
337
+ required_gb: candidate.requiredGB,
338
+ estimated_tps: candidate.estTPS,
339
+ runtime: candidate.runtime,
340
+ install_command: candidate.meta.installCommand || artifact.install_command || '',
341
+ download_url: candidate.meta.downloadUrl || artifact.download_url || '',
342
+ license: candidate.meta.license,
343
+ gated: Boolean(artifact.gated),
344
+ requires_auth: Boolean(artifact.requires_auth),
345
+ tasks: toArray(artifact.tasks),
346
+ modalities: toArray(artifact.modalities),
347
+ rationale: candidate.rationale,
348
+ components: candidate.components,
349
+ memory: candidate.memory,
350
+ speed: candidate.speed
351
+ };
352
+ }
353
+
354
+ function normalizeHardwareForSelector(hardware = {}) {
355
+ if (hardware.memory?.totalGB && hardware.gpu && hardware.acceleration) {
356
+ return hardware;
357
+ }
358
+
359
+ const summary = hardware.summary || {};
360
+ const cpuInfo = hardware.cpu || hardware.backends?.cpu?.info || {};
361
+ const cpuCores = cpuInfo.cores || {};
362
+ const bestBackend = summary.bestBackend || hardware.primary?.type || 'cpu';
363
+ const systemRAM = Number(summary.systemRAM || summary.effectiveMemory || 8);
364
+ const totalVRAM = Number(summary.totalVRAM || 0);
365
+ const gpuModel = summary.gpuModel || summary.gpuInventory || hardware.primary?.name || '';
366
+ const isMetal = bestBackend === 'metal';
367
+ const isCuda = bestBackend === 'cuda';
368
+ const isRocm = bestBackend === 'rocm';
369
+
370
+ return {
371
+ cpu: {
372
+ architecture: cpuInfo.architecture || process.arch,
373
+ cores: Number(cpuCores.logical || cpuCores.physical || cpuInfo.cores || 4),
374
+ model: cpuInfo.brand || summary.cpuModel || ''
375
+ },
376
+ gpu: {
377
+ type: isMetal ? 'apple_silicon' : (isCuda ? 'nvidia' : (isRocm ? 'amd' : 'cpu_only')),
378
+ model: gpuModel,
379
+ vramGB: totalVRAM,
380
+ totalVRAM,
381
+ gpuCount: Math.max(1, Number(summary.gpuCount || 1)),
382
+ unified: Boolean(isMetal || (summary.hasIntegratedGPU && !summary.hasDedicatedGPU)),
383
+ isMultiGPU: Boolean(summary.isMultiGPU)
384
+ },
385
+ memory: {
386
+ totalGB: systemRAM,
387
+ total: systemRAM
388
+ },
389
+ acceleration: {
390
+ supports_metal: isMetal,
391
+ supports_cuda: isCuda,
392
+ supports_rocm: isRocm
393
+ },
394
+ usableMemGB: Number(summary.effectiveMemory) > 0 ? Number(summary.effectiveMemory) : undefined
395
+ };
396
+ }
397
+
398
+ class RegistryRecommender {
399
+ constructor(options = {}) {
400
+ this.database = options.database || new ModelDatabase(options.databaseOptions || {});
401
+ this.selector = options.selector || new DeterministicModelSelector();
402
+ }
403
+
404
+ async initialize() {
405
+ await this.database.initialize();
406
+ }
407
+
408
+ async recommend(options = {}) {
409
+ const selection = await this.selectCategory(options);
410
+ return {
411
+ category: selection.category,
412
+ runtime: selection.runtime,
413
+ optimizeFor: selection.result.optimizeFor,
414
+ total_artifacts: selection.rows.length,
415
+ total_candidates: selection.modelPool.length,
416
+ total_evaluated: selection.result.total_evaluated,
417
+ recommendations: selection.result.candidates.map(candidateToRecommendation),
418
+ registry: this.database.getRegistryStats(),
419
+ generated_at: new Date().toISOString()
420
+ };
421
+ }
422
+
423
+ async selectCategory(options = {}) {
424
+ const category = options.category || 'general';
425
+ const runtime = options.runtime || 'auto';
426
+ const runtimeFilter = ['auto', 'all', '*'].includes(String(runtime).toLowerCase()) ? undefined : runtime;
427
+ const limit = Number(options.limit) > 0 ? Number(options.limit) : 10;
428
+ const poolLimit = Number(options.poolLimit) > 0 ? Number(options.poolLimit) : 20000;
429
+ const targetCtx = Number(options.targetContext) > 0 ? Number(options.targetContext) : undefined;
430
+
431
+ const rows = this.database.searchModelArtifacts(options.query || '', {
432
+ source: options.source,
433
+ format: options.format,
434
+ runtime: runtimeFilter,
435
+ quantization: options.quantization,
436
+ maxSizeGB: options.maxSizeGB,
437
+ minParamsB: options.minParamsB,
438
+ maxParamsB: options.maxParamsB,
439
+ localOnly: options.localOnly !== false,
440
+ limit: poolLimit
441
+ });
442
+ const modelPool = dedupeRecommendationPool(rows.map(artifactToSelectorModel).filter(Boolean));
443
+
444
+ const selectorHardware = normalizeHardwareForSelector(options.hardware || {});
445
+ const normalizedRuntime = runtimeFilter || 'auto';
446
+
447
+ // No registry artifacts matched the filters: return an empty result rather
448
+ // than letting the deterministic selector silently substitute its built-in
449
+ // catalog (which would mislabel non-registry models as "registry" rows).
450
+ if (modelPool.length === 0) {
451
+ return {
452
+ category,
453
+ runtime: normalizedRuntime,
454
+ rows,
455
+ modelPool,
456
+ result: {
457
+ category,
458
+ optimizeFor: this.selector.normalizeOptimizationObjective(options.optimizeFor || 'balanced'),
459
+ runtime: normalizedRuntime,
460
+ candidates: [],
461
+ total_evaluated: 0,
462
+ timestamp: new Date().toISOString()
463
+ }
464
+ };
465
+ }
466
+ // Rank a wider window than requested so we can collapse model variants and
467
+ // apply source diversity before trimming to the caller's limit.
468
+ const rankWindow = Math.max(limit * 8, 200);
469
+ const result = runtimeFilter
470
+ ? await this.selector.selectModels(category, {
471
+ topN: rankWindow,
472
+ enableProbe: false,
473
+ silent: true,
474
+ optimizeFor: options.optimizeFor || 'balanced',
475
+ runtime: runtimeFilter,
476
+ targetCtx,
477
+ hardware: selectorHardware,
478
+ installedModels: [],
479
+ modelPool
480
+ })
481
+ : this.scoreAutoRuntimePool({
482
+ category,
483
+ limit: rankWindow,
484
+ targetCtx,
485
+ optimizeFor: options.optimizeFor || 'balanced',
486
+ hardware: selectorHardware,
487
+ modelPool
488
+ });
489
+
490
+ // Collapse quant/shard variants to distinct models, then guarantee source
491
+ // diversity, and finally trim to the requested limit.
492
+ if (result && Array.isArray(result.candidates)) {
493
+ const distinct = collapseToDistinctModels(result.candidates);
494
+ result.candidates = applySourceDiversity(distinct, limit);
495
+ }
496
+
497
+ return {
498
+ category,
499
+ runtime: normalizedRuntime,
500
+ rows,
501
+ modelPool,
502
+ result
503
+ };
504
+ }
505
+
506
+ async getBestModelsForHardware(hardware, options = {}) {
507
+ const categories = options.categories || ['coding', 'reasoning', 'multimodal', 'creative', 'talking', 'reading', 'general'];
508
+ const recommendations = {};
509
+ const runtime = options.runtime || 'auto';
510
+ const optimizeFor = options.optimizeFor || options.optimize || 'balanced';
511
+ const limit = Number(options.limit) > 0 ? Number(options.limit) : 3;
512
+ const registryStats = this.database.getRegistryStats();
513
+ const analyzedModels = new Set();
514
+
515
+ for (const category of categories) {
516
+ try {
517
+ const selection = await this.selectCategory({
518
+ ...options,
519
+ category,
520
+ runtime,
521
+ optimizeFor,
522
+ limit,
523
+ hardware
524
+ });
525
+ for (const model of selection.modelPool) {
526
+ const artifact = model.artifact || {};
527
+ analyzedModels.add([
528
+ artifact.artifact_id || artifact.id || artifact.filename || model.model_identifier,
529
+ model.source,
530
+ model.preferredRuntime
531
+ ].filter(Boolean).join('|'));
532
+ }
533
+ const normalizedHardware = this.selector.normalizeHardwareProfile(
534
+ normalizeHardwareForSelector(hardware || {})
535
+ );
536
+ recommendations[category] = {
537
+ tier: this.selector.mapHardwareTier(normalizedHardware),
538
+ optimizeFor: selection.result.optimizeFor,
539
+ runtime: selection.runtime,
540
+ source: 'registry',
541
+ bestModels: selection.result.candidates.map((candidate) => this.selector.mapCandidateToLegacyFormat(candidate)),
542
+ totalEvaluated: selection.result.total_evaluated,
543
+ totalArtifacts: selection.rows.length,
544
+ totalCandidates: selection.modelPool.length,
545
+ category: this.selector.getCategoryInfo(category)
546
+ };
547
+ } catch (error) {
548
+ recommendations[category] = {
549
+ tier: 'unknown',
550
+ optimizeFor,
551
+ runtime,
552
+ source: 'registry',
553
+ bestModels: [],
554
+ totalEvaluated: 0,
555
+ totalArtifacts: 0,
556
+ totalCandidates: 0,
557
+ error: error.message,
558
+ category: this.selector.getCategoryInfo(category)
559
+ };
560
+ }
561
+ }
562
+
563
+ return {
564
+ recommendations,
565
+ registryStats,
566
+ totalModelsAnalyzed: analyzedModels.size
567
+ };
568
+ }
569
+
570
+ scoreAutoRuntimePool({ category, limit, targetCtx, optimizeFor, hardware, modelPool }) {
571
+ const normalizedHardware = this.selector.normalizeHardwareProfile(hardware);
572
+ const objective = this.selector.normalizeOptimizationObjective(optimizeFor);
573
+ const ctx = targetCtx || this.selector.targetContexts[category] || this.selector.targetContexts.general;
574
+ const totalMem = normalizedHardware?.memory?.totalGB ?? normalizedHardware?.memory?.total ?? 8;
575
+ const usableMem = typeof normalizedHardware.usableMemGB === 'number'
576
+ ? normalizedHardware.usableMemGB
577
+ : Math.max(1, Math.min(0.8 * totalMem, totalMem - 2));
578
+ const isUnified = Boolean(normalizedHardware?.gpu?.unified) || normalizedHardware?.gpu?.type === 'apple_silicon';
579
+ const vram = normalizedHardware?.gpu?.vramGB ?? normalizedHardware?.gpu?.vram ?? 0;
580
+ const budget = isUnified ? usableMem : (vram || usableMem);
581
+ const filtered = this.selector.filterByCategory(modelPool, category);
582
+ const candidates = [];
583
+
584
+ for (const model of filtered) {
585
+ const runtime = model.preferredRuntime || choosePreferredRuntime(
586
+ model.artifact?.runtime_support,
587
+ model.artifact?.format,
588
+ model.source
589
+ );
590
+ const candidate = this.selector.evaluateModel(
591
+ model,
592
+ normalizedHardware,
593
+ category,
594
+ ctx,
595
+ budget,
596
+ objective,
597
+ runtime
598
+ );
599
+ if (candidate) candidates.push(candidate);
600
+ }
601
+
602
+ candidates.sort((a, b) => b.score - a.score);
603
+
604
+ return {
605
+ category,
606
+ optimizeFor: objective,
607
+ runtime: 'auto',
608
+ hardware: normalizedHardware,
609
+ // Return a wide sorted window; selectCategory collapses variants and
610
+ // applies source diversity before trimming to the caller's limit.
611
+ candidates: candidates.slice(0, Math.max(limit, 2000)),
612
+ total_evaluated: filtered.length,
613
+ timestamp: new Date().toISOString()
614
+ };
615
+ }
616
+
617
+ close() {
618
+ this.database.close();
619
+ }
620
+ }
621
+
622
+ module.exports = {
623
+ RegistryRecommender,
624
+ collapseToDistinctModels,
625
+ applySourceDiversity,
626
+ modelDiversityKey,
627
+ artifactToSelectorModel,
628
+ candidateToRecommendation,
629
+ normalizeHardwareForSelector,
630
+ choosePreferredRuntime,
631
+ dedupeRecommendationPool
632
+ };
@@ -1,8 +1,16 @@
1
- This directory contains the packaged Ollama model database snapshot used on
2
- first run.
1
+ This directory contains the packaged model database snapshot used on first run.
3
2
 
4
3
  `models.db` is copied to `~/.llm-checker/models.db` only when the user does not
5
4
  already have a local database. After that, `llm-checker sync` updates the user's
6
- local copy.
5
+ local Ollama copy, and `llm-checker registry-sync` can refresh the multi-source
6
+ registry in the user's local copy.
7
+
8
+ The snapshot includes:
9
+
10
+ - the Ollama catalog used by classic recommendation/search commands
11
+ - a multi-source registry of exact installable/downloadable artifacts from
12
+ Hugging Face, Ollama, and GPT4All
13
+ - Hugging Face pages are fetched with cursor pagination; the default packaged
14
+ snapshot uses the top 3000 repositories by downloads
7
15
 
8
16
  Refresh cadence: weekly via `.github/workflows/update-model-db.yml`.
Binary file