llm-checker 3.5.15 → 3.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/README.md +28 -8
  2. package/analyzer/compatibility.js +5 -0
  3. package/analyzer/performance.js +5 -4
  4. package/bin/cli.js +5 -39
  5. package/bin/enhanced_cli.js +449 -24
  6. package/bin/mcp-server.mjs +266 -101
  7. package/package.json +13 -8
  8. package/src/ai/multi-objective-selector.js +118 -11
  9. package/src/calibration/calibration-manager.js +4 -1
  10. package/src/data/model-database.js +489 -5
  11. package/src/data/registry-ingestors.js +751 -0
  12. package/src/data/registry-recommender.js +514 -0
  13. package/src/data/seed/README.md +11 -3
  14. package/src/data/seed/models.db +0 -0
  15. package/src/data/sync-manager.js +32 -18
  16. package/src/hardware/backends/apple-silicon.js +5 -1
  17. package/src/hardware/backends/cuda-detector.js +47 -19
  18. package/src/hardware/backends/intel-detector.js +6 -2
  19. package/src/hardware/backends/rocm-detector.js +6 -2
  20. package/src/hardware/detector.js +57 -30
  21. package/src/hardware/unified-detector.js +129 -25
  22. package/src/index.js +68 -4
  23. package/src/models/ai-check-selector.js +36 -5
  24. package/src/models/deterministic-selector.js +179 -18
  25. package/src/models/expanded_database.js +9 -5
  26. package/src/models/intelligent-selector.js +87 -1
  27. package/src/models/moe-assumptions.js +11 -0
  28. package/src/models/requirements.js +16 -11
  29. package/src/models/scoring-core.js +341 -0
  30. package/src/models/scoring-engine.js +9 -2
  31. package/src/ollama/capacity-planner.js +15 -2
  32. package/src/ollama/client.js +70 -30
  33. package/src/ollama/enhanced-client.js +20 -2
  34. package/src/ollama/manager.js +14 -2
  35. package/src/policy/cli-policy.js +8 -2
  36. package/src/policy/policy-engine.js +2 -1
  37. package/src/provenance/model-provenance.js +4 -1
  38. package/src/ui/cli-theme.js +47 -7
  39. package/src/ui/interactive-panel.js +162 -24
@@ -0,0 +1,514 @@
1
+ const ModelDatabase = require('./model-database');
2
+ const DeterministicModelSelector = require('../models/deterministic-selector');
3
+
4
+ function toArray(value) {
5
+ return Array.isArray(value) ? value : [];
6
+ }
7
+
8
+ function parseParamsB(...values) {
9
+ for (const value of values) {
10
+ if (typeof value === 'number' && Number.isFinite(value) && value > 0) {
11
+ return value;
12
+ }
13
+ const text = String(value || '').replace(/,/g, '');
14
+ // Mixture-of-Experts "NxMB" (e.g. 8x7B) -> experts * per-expert size,
15
+ // so MoE models are not sized as if they were a single expert.
16
+ const moe = text.match(/(\d+)\s*x\s*(\d+(?:\.\d+)?)\s*b\b/i);
17
+ if (moe) {
18
+ const experts = Number(moe[1]);
19
+ const perExpert = Number(moe[2]);
20
+ if (experts > 0 && Number.isFinite(perExpert) && perExpert > 0) {
21
+ return experts * perExpert;
22
+ }
23
+ }
24
+ const match = text.match(/(\d+(?:\.\d+)?)\s*([bmt])\b/i);
25
+ if (!match) continue;
26
+ const amount = Number(match[1]);
27
+ if (!Number.isFinite(amount) || amount <= 0) continue;
28
+ const unit = match[2].toLowerCase();
29
+ if (unit === 't') return amount * 1000;
30
+ if (unit === 'm') return amount / 1000;
31
+ return amount;
32
+ }
33
+ return null;
34
+ }
35
+
36
+ // Active-param naming, e.g. "...-A17B" in "Qwen3-397B-A17B".
37
+ function parseActiveParamsFromName(...values) {
38
+ for (const value of values) {
39
+ const m = String(value || '').match(/(?:^|[-_\s/])a(\d+(?:\.\d+)?)\s*b\b/i);
40
+ if (m) {
41
+ const v = Number(m[1]);
42
+ if (Number.isFinite(v) && v > 0) return v;
43
+ }
44
+ }
45
+ return null;
46
+ }
47
+
48
+ // Detect Mixture-of-Experts naming so we size by total params (memory) and can
49
+ // apply MoE speed assumptions. Covers "8x7B", "397B-A17B", "moe", and Mixtral.
50
+ function isMoEName(...values) {
51
+ return values.some((value) =>
52
+ /(\d+\s*x\s*\d+(?:\.\d+)?\s*b\b)|(\d+(?:\.\d+)?\s*b[-_\s]*a\d)|\bmoe\b|mixtral/i.test(String(value || ''))
53
+ );
54
+ }
55
+
56
+ function inferFamily(identifier = '') {
57
+ const text = String(identifier || '').toLowerCase();
58
+ const families = [
59
+ ['deepseek-r1', /deepseek[-_ ]?r1/],
60
+ ['deepseek-coder', /deepseek[-_ ]?coder/],
61
+ ['deepseek', /deepseek/],
62
+ ['qwen3', /qwen3/],
63
+ ['qwen2.5', /qwen2\.5/],
64
+ ['qwen2', /qwen2/],
65
+ ['qwen', /qwen/],
66
+ ['llama3.2', /llama3\.2|llama-?3\.2/],
67
+ ['llama3.1', /llama3\.1|llama-?3\.1/],
68
+ ['llama3', /llama3|llama-?3/],
69
+ ['llama2', /llama2|llama-?2/],
70
+ ['mistral', /mistral/],
71
+ ['mixtral', /mixtral/],
72
+ ['gemma3', /gemma3/],
73
+ ['gemma2', /gemma2/],
74
+ ['gemma', /gemma/],
75
+ ['phi4', /phi-?4/],
76
+ ['phi3', /phi-?3/],
77
+ ['phi', /phi/],
78
+ ['codellama', /codellama|code-?llama/],
79
+ ['starcoder', /starcoder/],
80
+ ['llava', /llava/],
81
+ ['nomic-embed', /nomic-embed/],
82
+ ['bge', /\bbge\b/]
83
+ ];
84
+
85
+ for (const [family, pattern] of families) {
86
+ if (pattern.test(text)) return family;
87
+ }
88
+ return 'other';
89
+ }
90
+
91
+ function normalizeQuantization(row = {}) {
92
+ const raw = row.quantization || row.precision || '';
93
+ if (raw) return raw;
94
+ if (row.format === 'safetensors' || row.format === 'pytorch' || row.format === 'pytorch_bin') {
95
+ return 'FP16';
96
+ }
97
+ return 'Q4_K_M';
98
+ }
99
+
100
+ function isShardedWeightFile(filename = '') {
101
+ return /-\d{5,}-of-\d{5,}\.(safetensors|bin)$/i.test(String(filename || ''));
102
+ }
103
+
104
+ function choosePreferredRuntime(runtimeSupport = [], format = '', sourceId = '') {
105
+ const runtimes = toArray(runtimeSupport).map((runtime) => String(runtime).toLowerCase());
106
+ const normalizedFormat = String(format || '').toLowerCase();
107
+ const source = String(sourceId || '').toLowerCase();
108
+
109
+ if (source === 'ollama' || runtimes.includes('ollama')) return 'ollama';
110
+ if (normalizedFormat === 'mlx' || runtimes.includes('mlx')) return 'mlx';
111
+ if (normalizedFormat === 'gguf' || runtimes.includes('llama.cpp')) return 'llama.cpp';
112
+ if (runtimes.includes('vllm')) return 'vllm';
113
+ if (runtimes.includes('transformers')) return 'transformers';
114
+ return runtimes[0] || 'transformers';
115
+ }
116
+
117
+ function artifactToSelectorModel(row) {
118
+ const shardedFile = row.source_id === 'huggingface' && isShardedWeightFile(row.filename || row.artifact_name);
119
+ const identifier = shardedFile
120
+ ? (row.canonical_model_id || row.repo_id)
121
+ : (row.artifact_name || row.filename || row.canonical_model_id || row.repo_id);
122
+ const displayName = row.canonical_model_id || row.repo_display_name || identifier;
123
+ const quant = normalizeQuantization(row);
124
+
125
+ // MEMORY sizing must use the TOTAL parameter count (for MoE, ALL experts are
126
+ // resident), never the active count. We re-derive the total from the model
127
+ // name (MoE-aware) and take the max with the stored column, so a stale or
128
+ // under-reported DB value (an MoE saved as one expert, or an active-param
129
+ // count) can never make a huge model look tiny and "fit" small hardware.
130
+ const nameStrings = [row.artifact_name, row.filename, row.canonical_model_id, row.repo_id];
131
+ const storedTotalB = parseParamsB(row.parameter_count_b);
132
+ const nameTotalB = parseParamsB(...nameStrings);
133
+ const totalParamsB = Math.max(storedTotalB || 0, nameTotalB || 0) || null;
134
+ const activeParamsB = parseParamsB(row.active_parameter_count_b) || parseActiveParamsFromName(...nameStrings);
135
+ const isMoE = isMoEName(...nameStrings)
136
+ || (Number.isFinite(activeParamsB) && Number.isFinite(totalParamsB) && activeParamsB < totalParamsB);
137
+
138
+ // The sizing param is the total; fall back to the active count only if no
139
+ // total can be determined at all.
140
+ const paramsB = Number.isFinite(totalParamsB) && totalParamsB > 0
141
+ ? totalParamsB
142
+ : parseParamsB(row.active_parameter_count_b);
143
+
144
+ if (!identifier || !Number.isFinite(paramsB) || paramsB <= 0) {
145
+ return null;
146
+ }
147
+
148
+ // Flag MoE and carry the TOTAL parameter count so memory is sized by the
149
+ // full weight set (all experts are resident under Ollama / Metal / vLLM).
150
+ // We deliberately do NOT set activeParamsB here: that would switch the memory
151
+ // model to "sparse inference" (sizing by active params), which would let a
152
+ // 397B-A17B model falsely "fit" ~11GB. Active params drive speed only, and
153
+ // sparse offload is not how the local runtimes this tool targets behave, so
154
+ // we stay conservative on memory.
155
+ const moeFields = {};
156
+ if (isMoE && Number.isFinite(totalParamsB) && totalParamsB > 0) {
157
+ moeFields.isMoE = true;
158
+ moeFields.totalParamsB = totalParamsB;
159
+ moeFields.total_params_b = totalParamsB;
160
+ }
161
+
162
+ const runtimeSupport = toArray(row.runtime_support);
163
+ const preferredRuntime = choosePreferredRuntime(runtimeSupport, row.format, row.source_id);
164
+ const tasks = toArray(row.tasks);
165
+ const modalities = toArray(row.modalities);
166
+ const tags = [
167
+ row.source_id,
168
+ row.format,
169
+ quant,
170
+ ...runtimeSupport,
171
+ ...tasks
172
+ ]
173
+ .filter(Boolean)
174
+ .map((tag) => String(tag).toLowerCase());
175
+
176
+ const sizeGB = Number(row.size_gb);
177
+ const sizeByQuant = Number.isFinite(sizeGB) && sizeGB > 0
178
+ ? { [quant]: sizeGB }
179
+ : {};
180
+
181
+ return {
182
+ name: displayName,
183
+ model_identifier: identifier,
184
+ family: inferFamily(`${displayName} ${identifier}`),
185
+ paramsB,
186
+ ...moeFields,
187
+ quant,
188
+ availableQuantizations: [quant],
189
+ sizeGB: Number.isFinite(sizeGB) && sizeGB > 0 ? sizeGB : undefined,
190
+ sizeByQuant,
191
+ ctxMax: Number(row.context_length) > 0 ? Number(row.context_length) : 4096,
192
+ tags,
193
+ modalities: modalities.length > 0 ? modalities : ['text'],
194
+ pulls: Number(row.downloads) || 0,
195
+ source: row.source_id,
196
+ registry: row.source_name || row.source_id,
197
+ version: shardedFile ? (row.repo_id || identifier) : (row.artifact_name || row.filename || identifier),
198
+ license: row.license || 'unknown',
199
+ digest: row.sha256 || row.etag || 'unknown',
200
+ installCommand: shardedFile && row.repo_id ? `hf download ${row.repo_id}` : (row.install_command || ''),
201
+ downloadUrl: shardedFile ? (row.repo_url || '') : (row.download_url || ''),
202
+ preferredRuntime,
203
+ artifact: row,
204
+ provenance: {
205
+ source: row.source_id,
206
+ registry: row.source_name || row.source_id,
207
+ version: shardedFile ? (row.repo_id || identifier) : (row.artifact_name || row.filename || identifier),
208
+ license: row.license || 'unknown',
209
+ digest: row.sha256 || row.etag || 'unknown',
210
+ download_url: shardedFile ? (row.repo_url || '') : (row.download_url || ''),
211
+ install_command: shardedFile && row.repo_id ? `hf download ${row.repo_id}` : (row.install_command || ''),
212
+ repo_url: row.repo_url || ''
213
+ }
214
+ };
215
+ }
216
+
217
+ function dedupeRecommendationPool(models) {
218
+ const deduped = new Map();
219
+ for (const model of models) {
220
+ const artifact = model.artifact || {};
221
+ const key = [
222
+ model.source,
223
+ artifact.repo_id || model.name,
224
+ model.model_identifier,
225
+ model.preferredRuntime
226
+ ].join('|');
227
+
228
+ const existing = deduped.get(key);
229
+ if (!existing) {
230
+ deduped.set(key, model);
231
+ continue;
232
+ }
233
+
234
+ const existingSize = Number(existing.sizeGB || existing.artifact?.size_gb || Number.MAX_SAFE_INTEGER);
235
+ const size = Number(model.sizeGB || model.artifact?.size_gb || Number.MAX_SAFE_INTEGER);
236
+ if (size < existingSize) {
237
+ deduped.set(key, model);
238
+ }
239
+ }
240
+ return [...deduped.values()];
241
+ }
242
+
243
+ function candidateToRecommendation(candidate) {
244
+ const artifact = candidate.meta.artifact || {};
245
+ return {
246
+ model: candidate.meta.name,
247
+ artifact: candidate.meta.model_identifier,
248
+ source: candidate.meta.source,
249
+ registry: candidate.meta.registry,
250
+ score: candidate.score,
251
+ params_b: candidate.meta.paramsB,
252
+ quantization: candidate.quant,
253
+ size_gb: candidate.meta.sizeGB || artifact.size_gb || null,
254
+ required_gb: candidate.requiredGB,
255
+ estimated_tps: candidate.estTPS,
256
+ runtime: candidate.runtime,
257
+ install_command: candidate.meta.installCommand || artifact.install_command || '',
258
+ download_url: candidate.meta.downloadUrl || artifact.download_url || '',
259
+ license: candidate.meta.license,
260
+ gated: Boolean(artifact.gated),
261
+ requires_auth: Boolean(artifact.requires_auth),
262
+ tasks: toArray(artifact.tasks),
263
+ modalities: toArray(artifact.modalities),
264
+ rationale: candidate.rationale,
265
+ components: candidate.components,
266
+ memory: candidate.memory,
267
+ speed: candidate.speed
268
+ };
269
+ }
270
+
271
+ function normalizeHardwareForSelector(hardware = {}) {
272
+ if (hardware.memory?.totalGB && hardware.gpu && hardware.acceleration) {
273
+ return hardware;
274
+ }
275
+
276
+ const summary = hardware.summary || {};
277
+ const cpuInfo = hardware.cpu || hardware.backends?.cpu?.info || {};
278
+ const cpuCores = cpuInfo.cores || {};
279
+ const bestBackend = summary.bestBackend || hardware.primary?.type || 'cpu';
280
+ const systemRAM = Number(summary.systemRAM || summary.effectiveMemory || 8);
281
+ const totalVRAM = Number(summary.totalVRAM || 0);
282
+ const gpuModel = summary.gpuModel || summary.gpuInventory || hardware.primary?.name || '';
283
+ const isMetal = bestBackend === 'metal';
284
+ const isCuda = bestBackend === 'cuda';
285
+ const isRocm = bestBackend === 'rocm';
286
+
287
+ return {
288
+ cpu: {
289
+ architecture: cpuInfo.architecture || process.arch,
290
+ cores: Number(cpuCores.logical || cpuCores.physical || cpuInfo.cores || 4),
291
+ model: cpuInfo.brand || summary.cpuModel || ''
292
+ },
293
+ gpu: {
294
+ type: isMetal ? 'apple_silicon' : (isCuda ? 'nvidia' : (isRocm ? 'amd' : 'cpu_only')),
295
+ model: gpuModel,
296
+ vramGB: totalVRAM,
297
+ totalVRAM,
298
+ gpuCount: Math.max(1, Number(summary.gpuCount || 1)),
299
+ unified: Boolean(isMetal || (summary.hasIntegratedGPU && !summary.hasDedicatedGPU)),
300
+ isMultiGPU: Boolean(summary.isMultiGPU)
301
+ },
302
+ memory: {
303
+ totalGB: systemRAM,
304
+ total: systemRAM
305
+ },
306
+ acceleration: {
307
+ supports_metal: isMetal,
308
+ supports_cuda: isCuda,
309
+ supports_rocm: isRocm
310
+ },
311
+ usableMemGB: Number(summary.effectiveMemory) > 0 ? Number(summary.effectiveMemory) : undefined
312
+ };
313
+ }
314
+
315
+ class RegistryRecommender {
316
+ constructor(options = {}) {
317
+ this.database = options.database || new ModelDatabase(options.databaseOptions || {});
318
+ this.selector = options.selector || new DeterministicModelSelector();
319
+ }
320
+
321
+ async initialize() {
322
+ await this.database.initialize();
323
+ }
324
+
325
+ async recommend(options = {}) {
326
+ const selection = await this.selectCategory(options);
327
+ return {
328
+ category: selection.category,
329
+ runtime: selection.runtime,
330
+ optimizeFor: selection.result.optimizeFor,
331
+ total_artifacts: selection.rows.length,
332
+ total_candidates: selection.modelPool.length,
333
+ total_evaluated: selection.result.total_evaluated,
334
+ recommendations: selection.result.candidates.map(candidateToRecommendation),
335
+ registry: this.database.getRegistryStats(),
336
+ generated_at: new Date().toISOString()
337
+ };
338
+ }
339
+
340
+ async selectCategory(options = {}) {
341
+ const category = options.category || 'general';
342
+ const runtime = options.runtime || 'auto';
343
+ const runtimeFilter = ['auto', 'all', '*'].includes(String(runtime).toLowerCase()) ? undefined : runtime;
344
+ const limit = Number(options.limit) > 0 ? Number(options.limit) : 10;
345
+ const poolLimit = Number(options.poolLimit) > 0 ? Number(options.poolLimit) : 20000;
346
+ const targetCtx = Number(options.targetContext) > 0 ? Number(options.targetContext) : undefined;
347
+
348
+ const rows = this.database.searchModelArtifacts(options.query || '', {
349
+ source: options.source,
350
+ format: options.format,
351
+ runtime: runtimeFilter,
352
+ quantization: options.quantization,
353
+ maxSizeGB: options.maxSizeGB,
354
+ minParamsB: options.minParamsB,
355
+ maxParamsB: options.maxParamsB,
356
+ localOnly: options.localOnly !== false,
357
+ limit: poolLimit
358
+ });
359
+ const modelPool = dedupeRecommendationPool(rows.map(artifactToSelectorModel).filter(Boolean));
360
+
361
+ const selectorHardware = normalizeHardwareForSelector(options.hardware || {});
362
+ const normalizedRuntime = runtimeFilter || 'auto';
363
+ const result = runtimeFilter
364
+ ? await this.selector.selectModels(category, {
365
+ topN: limit,
366
+ enableProbe: false,
367
+ silent: true,
368
+ optimizeFor: options.optimizeFor || 'balanced',
369
+ runtime: runtimeFilter,
370
+ targetCtx,
371
+ hardware: selectorHardware,
372
+ installedModels: [],
373
+ modelPool
374
+ })
375
+ : this.scoreAutoRuntimePool({
376
+ category,
377
+ limit,
378
+ targetCtx,
379
+ optimizeFor: options.optimizeFor || 'balanced',
380
+ hardware: selectorHardware,
381
+ modelPool
382
+ });
383
+
384
+ return {
385
+ category,
386
+ runtime: normalizedRuntime,
387
+ rows,
388
+ modelPool,
389
+ result
390
+ };
391
+ }
392
+
393
+ async getBestModelsForHardware(hardware, options = {}) {
394
+ const categories = options.categories || ['coding', 'reasoning', 'multimodal', 'creative', 'talking', 'reading', 'general'];
395
+ const recommendations = {};
396
+ const runtime = options.runtime || 'auto';
397
+ const optimizeFor = options.optimizeFor || options.optimize || 'balanced';
398
+ const limit = Number(options.limit) > 0 ? Number(options.limit) : 3;
399
+ const registryStats = this.database.getRegistryStats();
400
+ const analyzedModels = new Set();
401
+
402
+ for (const category of categories) {
403
+ try {
404
+ const selection = await this.selectCategory({
405
+ ...options,
406
+ category,
407
+ runtime,
408
+ optimizeFor,
409
+ limit,
410
+ hardware
411
+ });
412
+ for (const model of selection.modelPool) {
413
+ const artifact = model.artifact || {};
414
+ analyzedModels.add([
415
+ artifact.artifact_id || artifact.id || artifact.filename || model.model_identifier,
416
+ model.source,
417
+ model.preferredRuntime
418
+ ].filter(Boolean).join('|'));
419
+ }
420
+ const normalizedHardware = this.selector.normalizeHardwareProfile(
421
+ normalizeHardwareForSelector(hardware || {})
422
+ );
423
+ recommendations[category] = {
424
+ tier: this.selector.mapHardwareTier(normalizedHardware),
425
+ optimizeFor: selection.result.optimizeFor,
426
+ runtime: selection.runtime,
427
+ source: 'registry',
428
+ bestModels: selection.result.candidates.map((candidate) => this.selector.mapCandidateToLegacyFormat(candidate)),
429
+ totalEvaluated: selection.result.total_evaluated,
430
+ totalArtifacts: selection.rows.length,
431
+ totalCandidates: selection.modelPool.length,
432
+ category: this.selector.getCategoryInfo(category)
433
+ };
434
+ } catch (error) {
435
+ recommendations[category] = {
436
+ tier: 'unknown',
437
+ optimizeFor,
438
+ runtime,
439
+ source: 'registry',
440
+ bestModels: [],
441
+ totalEvaluated: 0,
442
+ totalArtifacts: 0,
443
+ totalCandidates: 0,
444
+ error: error.message,
445
+ category: this.selector.getCategoryInfo(category)
446
+ };
447
+ }
448
+ }
449
+
450
+ return {
451
+ recommendations,
452
+ registryStats,
453
+ totalModelsAnalyzed: analyzedModels.size
454
+ };
455
+ }
456
+
457
+ scoreAutoRuntimePool({ category, limit, targetCtx, optimizeFor, hardware, modelPool }) {
458
+ const normalizedHardware = this.selector.normalizeHardwareProfile(hardware);
459
+ const objective = this.selector.normalizeOptimizationObjective(optimizeFor);
460
+ const ctx = targetCtx || this.selector.targetContexts[category] || this.selector.targetContexts.general;
461
+ const totalMem = normalizedHardware?.memory?.totalGB ?? normalizedHardware?.memory?.total ?? 8;
462
+ const usableMem = typeof normalizedHardware.usableMemGB === 'number'
463
+ ? normalizedHardware.usableMemGB
464
+ : Math.max(1, Math.min(0.8 * totalMem, totalMem - 2));
465
+ const isUnified = Boolean(normalizedHardware?.gpu?.unified) || normalizedHardware?.gpu?.type === 'apple_silicon';
466
+ const vram = normalizedHardware?.gpu?.vramGB ?? normalizedHardware?.gpu?.vram ?? 0;
467
+ const budget = isUnified ? usableMem : (vram || usableMem);
468
+ const filtered = this.selector.filterByCategory(modelPool, category);
469
+ const candidates = [];
470
+
471
+ for (const model of filtered) {
472
+ const runtime = model.preferredRuntime || choosePreferredRuntime(
473
+ model.artifact?.runtime_support,
474
+ model.artifact?.format,
475
+ model.source
476
+ );
477
+ const candidate = this.selector.evaluateModel(
478
+ model,
479
+ normalizedHardware,
480
+ category,
481
+ ctx,
482
+ budget,
483
+ objective,
484
+ runtime
485
+ );
486
+ if (candidate) candidates.push(candidate);
487
+ }
488
+
489
+ candidates.sort((a, b) => b.score - a.score);
490
+
491
+ return {
492
+ category,
493
+ optimizeFor: objective,
494
+ runtime: 'auto',
495
+ hardware: normalizedHardware,
496
+ candidates: candidates.slice(0, limit),
497
+ total_evaluated: filtered.length,
498
+ timestamp: new Date().toISOString()
499
+ };
500
+ }
501
+
502
+ close() {
503
+ this.database.close();
504
+ }
505
+ }
506
+
507
+ module.exports = {
508
+ RegistryRecommender,
509
+ artifactToSelectorModel,
510
+ candidateToRecommendation,
511
+ normalizeHardwareForSelector,
512
+ choosePreferredRuntime,
513
+ dedupeRecommendationPool
514
+ };
@@ -1,8 +1,16 @@
1
- This directory contains the packaged Ollama model database snapshot used on
2
- first run.
1
+ This directory contains the packaged model database snapshot used on first run.
3
2
 
4
3
  `models.db` is copied to `~/.llm-checker/models.db` only when the user does not
5
4
  already have a local database. After that, `llm-checker sync` updates the user's
6
- local copy.
5
+ local Ollama copy, and `llm-checker registry-sync` can refresh the multi-source
6
+ registry in the user's local copy.
7
+
8
+ The snapshot includes:
9
+
10
+ - the Ollama catalog used by classic recommendation/search commands
11
+ - a multi-source registry of exact installable/downloadable artifacts from
12
+ Hugging Face, Ollama, and GPT4All
13
+ - Hugging Face pages are fetched with cursor pagination; the default packaged
14
+ snapshot uses the top 3000 repositories by downloads
7
15
 
8
16
  Refresh cadence: weekly via `.github/workflows/update-model-db.yml`.
Binary file
@@ -47,22 +47,27 @@ class SyncManager {
47
47
 
48
48
  this.onProgress({ phase: 'start', message: 'Starting full sync...' });
49
49
 
50
- // Clear existing data
51
- this.db.clear();
52
-
53
- // Scrape all models
54
- const result = await this.scraper.scrapeAll((model, variants) => {
55
- // Save model as we go
56
- this.db.upsertModel(model);
57
-
58
- // Save variants
59
- for (const variant of variants) {
60
- this.db.upsertVariant(variant);
61
- }
62
- });
50
+ // Batch all writes into a single atomic DB file write at the end. Saving on
51
+ // every upsert re-exported and rewrote the whole sql.js DB thousands of
52
+ // times, turning the sync into O(n^2) disk I/O.
53
+ this.db.beginBatch();
54
+ try {
55
+ // Clear existing data
56
+ this.db.clear();
57
+
58
+ // Scrape all models
59
+ await this.scraper.scrapeAll((model, variants) => {
60
+ this.db.upsertModel(model);
61
+ for (const variant of variants) {
62
+ this.db.upsertVariant(variant);
63
+ }
64
+ });
63
65
 
64
- // Update sync timestamp
65
- this.db.setLastSync(new Date().toISOString());
66
+ // Update sync timestamp
67
+ this.db.setLastSync(new Date().toISOString());
68
+ } finally {
69
+ this.db.endBatch();
70
+ }
66
71
 
67
72
  const stats = this.db.getStats();
68
73
 
@@ -110,6 +115,9 @@ class SyncManager {
110
115
  let updated = 0;
111
116
  let added = 0;
112
117
 
118
+ // Batch all upserts into a single atomic DB write at the end (see fullSync).
119
+ this.db.beginBatch();
120
+ try {
113
121
  // Process new models
114
122
  for (const { id } of newModels) {
115
123
  try {
@@ -157,12 +165,18 @@ class SyncManager {
157
165
 
158
166
  await this.sleep(100);
159
167
  } catch (error) {
160
- // Ignore errors during incremental update
168
+ // Log instead of silently swallowing: a systematic failure here
169
+ // (network down, schema mismatch) would otherwise report success
170
+ // with updated: 0 and leave the catalog quietly stale.
171
+ this.onError(`Error updating ${id}: ${error.message}`);
161
172
  }
162
173
  }
163
174
 
164
- // Update sync timestamp
165
- this.db.setLastSync(new Date().toISOString());
175
+ // Update sync timestamp
176
+ this.db.setLastSync(new Date().toISOString());
177
+ } finally {
178
+ this.db.endBatch();
179
+ }
166
180
 
167
181
  const stats = this.db.getStats();
168
182
 
@@ -283,7 +283,11 @@ class AppleSiliconDetector {
283
283
  const info = this.detect();
284
284
  if (!info) return null;
285
285
 
286
- return `apple-${info.chip.toLowerCase().replace(/\s+/g, '-')}-${info.memory.unified}gb`;
286
+ // info.chip stays null when the sysctl brand-string read fails (sandboxed
287
+ // env, missing binary); fall back so this can't throw on null.toLowerCase().
288
+ const chip = info.chip || 'apple-silicon';
289
+ const unified = info.memory?.unified || 0;
290
+ return `apple-${chip.toLowerCase().replace(/\s+/g, '-')}-${unified}gb`;
287
291
  }
288
292
 
289
293
  /**