llm-checker 3.5.11 โ†’ 3.5.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -33,19 +33,20 @@ class AIModelSelector {
33
33
  }
34
34
  }
35
35
 
36
- async selectBestModel(candidateModels, systemSpecs = null, userPreference = 'general') {
36
+ async selectBestModel(candidateModels, systemSpecs = null, userPreference = 'general', options = {}) {
37
+ const log = options.silent ? () => {} : console.log;
38
+ const warn = options.silent ? () => {} : console.warn;
39
+
37
40
  try {
38
41
  // Para ai-run: usar TODOS los modelos de la base de datos para encontrar el mejor
39
42
  // y luego verificar si estรก instalado localmente
40
- console.log('๐Ÿ” Using comprehensive model database for selection...');
43
+ log('๐Ÿ” Using comprehensive model database for selection...');
41
44
 
42
45
  // Obtener todos los modelos de la base de datos de Ollama
43
- const { OllamaNativeScraper } = require('../ollama/native-scraper');
44
- const scraper = new OllamaNativeScraper();
45
- const allModelData = await scraper.scrapeAllModels(false);
46
+ const allModelData = await this.loadModelDatabase();
46
47
  const allAvailableModels = allModelData.models || [];
47
48
 
48
- console.log(`Evaluating against ${allAvailableModels.length} models from database`);
49
+ log(`Evaluating against ${allAvailableModels.length} models from database`);
49
50
 
50
51
  // Usar el selector inteligente con TODOS los modelos disponibles
51
52
  const result = this.intelligentSelector.selectBestModels(
@@ -68,7 +69,7 @@ class AIModelSelector {
68
69
  let reason = result.best_model.reasoning;
69
70
 
70
71
  if (!isLocallyInstalled) {
71
- console.log(`Best model ${recommendedId} not installed locally`);
72
+ log(`Best model ${recommendedId} not installed locally`);
72
73
 
73
74
  // Buscar el mejor modelo entre los instalados localmente
74
75
  const localResult = this.intelligentSelector.selectBestModels(
@@ -83,7 +84,7 @@ class AIModelSelector {
83
84
  confidence = localResult.best_model.confidence * 0.9; // Reducir confianza
84
85
  reason = `${localResult.best_model.reasoning} (Locally installed alternative to recommended ${recommendedId})`;
85
86
 
86
- console.log(`๐Ÿ”„ Using best local alternative: ${finalModel}`);
87
+ log(`๐Ÿ”„ Using best local alternative: ${finalModel}`);
87
88
  }
88
89
  }
89
90
 
@@ -111,7 +112,7 @@ class AIModelSelector {
111
112
  };
112
113
  }
113
114
  } catch (error) {
114
- console.warn(`Comprehensive database selection failed: ${error.message}`);
115
+ warn(`Comprehensive database selection failed: ${error.message}`);
115
116
 
116
117
  // Fallback al mรฉtodo anterior con solo modelos locales
117
118
  try {
@@ -139,7 +140,7 @@ class AIModelSelector {
139
140
  };
140
141
  }
141
142
  } catch (localError) {
142
- console.warn(`Local intelligent selection also failed: ${localError.message}`);
143
+ warn(`Local intelligent selection also failed: ${localError.message}`);
143
144
  }
144
145
  }
145
146
 
@@ -161,15 +162,45 @@ class AIModelSelector {
161
162
  };
162
163
 
163
164
  } catch (error) {
164
- console.warn(`ONNX AI selection failed: ${error.message}`);
165
+ warn(`ONNX AI selection failed: ${error.message}`);
165
166
  }
166
167
  }
167
168
 
168
169
  // Final fallback to simple heuristic
169
- return this.fallbackSelection(candidateModels, systemSpecs);
170
+ return this.fallbackSelection(candidateModels, systemSpecs, options);
171
+ }
172
+
173
+ async loadModelDatabase() {
174
+ try {
175
+ const ModelDatabase = require('../data/model-database');
176
+ const database = new ModelDatabase();
177
+ await database.initialize();
178
+
179
+ try {
180
+ const models = database.getAllModelsWithVariants();
181
+ if (models.length > 0) {
182
+ return {
183
+ models,
184
+ total_count: models.length,
185
+ source: 'ollama_sqlite_database'
186
+ };
187
+ }
188
+ } finally {
189
+ database.close();
190
+ }
191
+ } catch {
192
+ // Fall through to scraper cache.
193
+ }
194
+
195
+ const { OllamaNativeScraper } = require('../ollama/native-scraper');
196
+ const scraper = new OllamaNativeScraper();
197
+ return scraper.scrapeAllModels(false);
170
198
  }
171
199
 
172
- fallbackSelection(candidateModels, systemSpecs = null) {
200
+ fallbackSelection(candidateModels, systemSpecs = null, options = {}) {
201
+ const log = options.silent ? () => {} : console.log;
202
+ const warn = options.silent ? () => {} : console.warn;
203
+
173
204
  if (!systemSpecs) {
174
205
  systemSpecs = {
175
206
  total_ram_gb: 8,
@@ -179,7 +210,7 @@ class AIModelSelector {
179
210
  };
180
211
  }
181
212
 
182
- console.log('๐Ÿ”„ Using fallback heuristic selection...');
213
+ log('๐Ÿ”„ Using fallback heuristic selection...');
183
214
 
184
215
  // Use intelligent selector with basic heuristic mode
185
216
  try {
@@ -202,7 +233,7 @@ class AIModelSelector {
202
233
  };
203
234
  }
204
235
  } catch (error) {
205
- console.warn(`Intelligent fallback failed: ${error.message}`);
236
+ warn(`Intelligent fallback failed: ${error.message}`);
206
237
  }
207
238
 
208
239
  // Ultimate fallback: simple memory-based selection
@@ -309,4 +340,4 @@ class AIModelSelector {
309
340
  }
310
341
  }
311
342
 
312
- module.exports = AIModelSelector;
343
+ module.exports = AIModelSelector;
@@ -111,12 +111,47 @@ class MultiObjectiveSelector {
111
111
  return false; // Model too large for this tier regardless of RAM
112
112
  }
113
113
 
114
- // Memory check with tier-appropriate safety margin
115
- const availableMemory = hardware.memory.total * limits.availableMemoryRatio;
114
+ // Memory check with tier-appropriate safety margin. Dedicated GPUs can
115
+ // run quantized models primarily from VRAM with limited RAM offload, so
116
+ // using only system RAM underestimates mid-range cards such as RTX 5060.
117
+ const availableMemory = this.getAvailableModelMemoryGB(hardware, limits.availableMemoryRatio);
116
118
 
117
119
  return totalMemoryNeeded <= availableMemory;
118
120
  }
119
121
 
122
+ getAvailableModelMemoryGB(hardware, fallbackRatio = 0.7) {
123
+ const ramGB = Number(hardware?.memory?.total ?? hardware?.memory?.totalGB ?? 0) || 0;
124
+ const vramGB = Number(
125
+ hardware?.gpu?.vram ??
126
+ hardware?.gpu?.vramGB ??
127
+ hardware?.summary?.totalVRAM ??
128
+ 0
129
+ ) || 0;
130
+ const hasIntegratedGPU = typeof hardware?.summary?.hasIntegratedGPU === 'boolean'
131
+ ? hardware.summary.hasIntegratedGPU
132
+ : false;
133
+ const hasDedicatedGPU = typeof hardware?.summary?.hasDedicatedGPU === 'boolean'
134
+ ? hardware.summary.hasDedicatedGPU
135
+ : Boolean(hardware?.gpu?.dedicated || (vramGB > 0 && !hasIntegratedGPU));
136
+
137
+ if (hasDedicatedGPU && vramGB > 0) {
138
+ const pcSpecs = this.getPCGPUSpecs(hardware, vramGB, ramGB);
139
+ const vramBudget = vramGB * (pcSpecs.memoryEfficiency || 0.85);
140
+ const offloadBudget = Math.min(
141
+ pcSpecs.offloadCapacity || 0,
142
+ Math.max(0, ramGB * 0.5)
143
+ );
144
+ return Math.max(vramBudget, vramBudget + offloadBudget);
145
+ }
146
+
147
+ const sharedMemory = Number(hardware?.summary?.integratedSharedMemory || hardware?.gpu?.sharedMemory || 0);
148
+ if (sharedMemory > 0 && !hasDedicatedGPU) {
149
+ return sharedMemory * Math.max(fallbackRatio, 0.85);
150
+ }
151
+
152
+ return ramGB * fallbackRatio;
153
+ }
154
+
120
155
  /**
121
156
  * Step 2: Multi-objective scoring
122
157
  */
@@ -274,8 +309,10 @@ class MultiObjectiveSelector {
274
309
  return num / (1024 ** 3); // Convert bytes to GB
275
310
  } else if (num >= 0.1 && num <= 100) {
276
311
  // Small numbers (0.1-100) are likely billion parameters - convert to file size
277
- // Rough estimate: 1B params โ‰ˆ 2GB in Q4 quantization
278
- return Math.max(0.5, num * 2);
312
+ // Static catalog `B` values are parameter counts. Default check
313
+ // recommendations target quantized local inference, where Q4
314
+ // artifacts are roughly 0.6-0.7GB per billion parameters.
315
+ return Math.max(0.5, Math.round(num * 0.65 * 10) / 10);
279
316
  } else {
280
317
  // Fallback for edge cases
281
318
  return Math.max(0.5, num);
@@ -329,7 +366,7 @@ class MultiObjectiveSelector {
329
366
  const clamp = (x, a = 0, b = 1) => Math.max(a, Math.min(b, x));
330
367
 
331
368
  const ramGB = hardware.memory.total || 0;
332
- const vramGB = hardware.gpu?.vram || 0;
369
+ const vramGB = hardware.gpu?.vram || hardware.gpu?.vramGB || hardware.summary?.totalVRAM || 0;
333
370
  const cpuModel = hardware.cpu?.brand || hardware.cpu?.model || '';
334
371
  const gpuModel = hardware.gpu?.model || '';
335
372
  const architecture = hardware.cpu?.architecture || hardware.cpu?.brand || '';
@@ -406,6 +443,7 @@ class MultiObjectiveSelector {
406
443
  else if (gpu.includes('rtx 4090')) memBandwidthGBs = 1008;
407
444
  else if (gpu.includes('rtx 4080')) memBandwidthGBs = 716;
408
445
  else if (gpu.includes('rtx 4070')) memBandwidthGBs = 448;
446
+ else if (gpu.includes('rtx 5060')) memBandwidthGBs = 336;
409
447
  else if (gpu.includes('iris xe')) memBandwidthGBs = 68;
410
448
 
411
449
  const mem_bw = clamp(memBandwidthGBs / 500); // Match main algorithm
@@ -419,6 +457,7 @@ class MultiObjectiveSelector {
419
457
  else if (gpu.includes('m4')) compute = clamp(15 / 80);
420
458
  else if (gpu.includes('rtx 4090')) compute = clamp(165 / 80);
421
459
  else if (gpu.includes('rtx 4080')) compute = clamp(121 / 80);
460
+ else if (gpu.includes('rtx 5060')) compute = clamp(38 / 80);
422
461
  else if (gpu.includes('iris xe')) compute = 0.02;
423
462
  else {
424
463
  // CPU fallback
@@ -464,7 +503,7 @@ class MultiObjectiveSelector {
464
503
  }
465
504
 
466
505
  // Special flagship GPU detection by model name
467
- if (gpuModel.toLowerCase().includes('rtx 50') ||
506
+ if (gpuModel.toLowerCase().includes('rtx 5090') ||
468
507
  gpuModel.toLowerCase().includes('gb10') ||
469
508
  gpuModel.toLowerCase().includes('grace blackwell') ||
470
509
  gpuModel.toLowerCase().includes('dgx spark') ||
@@ -472,6 +511,10 @@ class MultiObjectiveSelector {
472
511
  gpuModel.toLowerCase().includes('h100') ||
473
512
  gpuModel.toLowerCase().includes('a100')) {
474
513
  tier = 'flagship';
514
+ } else if (gpuModel.toLowerCase().includes('rtx 5080')) {
515
+ tier = bumpTier(tier, tier === 'ultra_high' || tier === 'flagship' ? 0 : +1);
516
+ } else if (gpuModel.toLowerCase().includes('rtx 5070') && !gpuModel.toLowerCase().includes('rtx 5070 ti')) {
517
+ tier = bumpTier(tier, tier === 'high' || tier === 'ultra_high' || tier === 'flagship' ? 0 : +1);
475
518
  }
476
519
 
477
520
  return tier;
@@ -624,10 +667,13 @@ class MultiObjectiveSelector {
624
667
  specs.offloadCapacity = Math.min(ramGB * 0.6, 32);
625
668
  specs.memoryEfficiency = 0.96;
626
669
  specs.backendOptimization = 1.25;
627
- } else if (gpu.includes('rtx 50')) {
628
- // RTX 50xx series - flagship tier with massive VRAM + excellent offload
670
+ } else if (gpu.includes('rtx 5090') || gpu.includes('rtx 5080') || gpu.includes('rtx 5070')) {
671
+ // Upper RTX 50xx cards have excellent offload behavior.
629
672
  specs.offloadCapacity = Math.min(ramGB * 0.5, 24);
630
673
  specs.memoryEfficiency = 0.95;
674
+ } else if (gpu.includes('rtx 5060')) {
675
+ specs.offloadCapacity = Math.min(ramGB * 0.35, 12);
676
+ specs.memoryEfficiency = 0.90;
631
677
  } else if (gpu.includes('rtx 40')) {
632
678
  specs.offloadCapacity = Math.min(ramGB * 0.35, 16);
633
679
  specs.memoryEfficiency = 0.90;
@@ -751,7 +797,7 @@ class MultiObjectiveSelector {
751
797
  const gpuModel = hardware.gpu?.model || '';
752
798
  const cores = hardware.cpu?.physicalCores || hardware.cpu?.cores || 1;
753
799
  const baseSpeed = hardware.cpu?.speed || 2.0;
754
- const vramGB = hardware.gpu?.vram || 0;
800
+ const vramGB = hardware.gpu?.vram || hardware.gpu?.vramGB || hardware.summary?.totalVRAM || 0;
755
801
  const hasIntegratedGPU = typeof hardware.summary?.hasIntegratedGPU === 'boolean'
756
802
  ? hardware.summary.hasIntegratedGPU
757
803
  : false;
@@ -10,10 +10,23 @@ const fs = require('fs');
10
10
  class ModelDatabase {
11
11
  constructor(options = {}) {
12
12
  this.dbPath = options.dbPath || path.join(os.homedir(), '.llm-checker', 'models.db');
13
+ this.seedDbPath = options.seedDbPath || path.join(__dirname, 'seed', 'models.db');
13
14
  this.db = null;
14
15
  this.initialized = false;
15
16
  }
16
17
 
18
+ /**
19
+ * Seed a first-run user database from the packaged npm snapshot.
20
+ */
21
+ seedDatabaseIfNeeded() {
22
+ if (fs.existsSync(this.dbPath) || !fs.existsSync(this.seedDbPath)) {
23
+ return false;
24
+ }
25
+
26
+ fs.copyFileSync(this.seedDbPath, this.dbPath);
27
+ return true;
28
+ }
29
+
17
30
  /**
18
31
  * Initialize database with schema
19
32
  */
@@ -25,6 +38,7 @@ class ModelDatabase {
25
38
  if (!fs.existsSync(dbDir)) {
26
39
  fs.mkdirSync(dbDir, { recursive: true });
27
40
  }
41
+ this.seedDatabaseIfNeeded();
28
42
 
29
43
  // Use sql.js (optional dependency)
30
44
  let initSqlJs;
@@ -229,18 +243,20 @@ class ModelDatabase {
229
243
  { pattern: /llama3\.1/, family: 'llama3.1' },
230
244
  { pattern: /llama3/, family: 'llama3' },
231
245
  { pattern: /llama2/, family: 'llama2' },
246
+ { pattern: /qwen3/, family: 'qwen3' },
232
247
  { pattern: /qwen2\.5/, family: 'qwen2.5' },
233
248
  { pattern: /qwen2/, family: 'qwen2' },
234
249
  { pattern: /qwen/, family: 'qwen' },
235
250
  { pattern: /mistral/, family: 'mistral' },
236
251
  { pattern: /mixtral/, family: 'mixtral' },
252
+ { pattern: /gemma3/, family: 'gemma3' },
237
253
  { pattern: /gemma2/, family: 'gemma2' },
238
254
  { pattern: /gemma/, family: 'gemma' },
239
255
  { pattern: /phi-?3/, family: 'phi3' },
240
256
  { pattern: /phi-?4/, family: 'phi4' },
241
257
  { pattern: /phi/, family: 'phi' },
242
- { pattern: /deepseek-?coder/, family: 'deepseek-coder' },
243
258
  { pattern: /deepseek-?r1/, family: 'deepseek-r1' },
259
+ { pattern: /deepseek-?coder/, family: 'deepseek-coder' },
244
260
  { pattern: /deepseek/, family: 'deepseek' },
245
261
  { pattern: /codellama/, family: 'codellama' },
246
262
  { pattern: /starcoder/, family: 'starcoder' },
@@ -251,6 +267,7 @@ class ModelDatabase {
251
267
  { pattern: /neural-chat/, family: 'neural-chat' },
252
268
  { pattern: /orca/, family: 'orca' },
253
269
  { pattern: /vicuna/, family: 'vicuna' },
270
+ { pattern: /yi-?coder/, family: 'yi-coder' },
254
271
  { pattern: /yi/, family: 'yi' },
255
272
  { pattern: /solar/, family: 'solar' },
256
273
  { pattern: /command-r/, family: 'command-r' },
@@ -540,6 +557,80 @@ class ModelDatabase {
540
557
  return this.all(sql, params);
541
558
  }
542
559
 
560
+ /**
561
+ * Export the synced SQLite catalog in the shape expected by recommendation engines.
562
+ */
563
+ getAllModelsWithVariants() {
564
+ const models = this.all(`SELECT * FROM models ORDER BY pulls DESC, id ASC`);
565
+ const variants = this.all(`SELECT * FROM variants ORDER BY model_id ASC, params_b DESC, size_gb ASC`);
566
+ const variantsByModel = new Map();
567
+
568
+ const parseJson = (value, fallback) => {
569
+ if (!value) return fallback;
570
+ try {
571
+ const parsed = JSON.parse(value);
572
+ return parsed;
573
+ } catch {
574
+ return fallback;
575
+ }
576
+ };
577
+
578
+ for (const variant of variants) {
579
+ const list = variantsByModel.get(variant.model_id) || [];
580
+ const inputTypes = parseJson(variant.input_types, ['text']);
581
+ list.push({
582
+ model_id: variant.model_id,
583
+ tag: variant.tag,
584
+ params_b: variant.params_b,
585
+ quant: variant.quant,
586
+ quantization: variant.quant,
587
+ size_gb: variant.size_gb,
588
+ real_size_gb: variant.size_gb,
589
+ estimated_size_gb: variant.size_gb,
590
+ context_length: variant.context_length,
591
+ input_types: Array.isArray(inputTypes) ? inputTypes : ['text'],
592
+ is_moe: Boolean(variant.is_moe),
593
+ expert_count: variant.expert_count
594
+ });
595
+ variantsByModel.set(variant.model_id, list);
596
+ }
597
+
598
+ return models.map((model) => {
599
+ const capabilities = parseJson(model.capabilities, []);
600
+ const capabilityList = Array.isArray(capabilities) ? capabilities : [];
601
+ const primaryCategory =
602
+ capabilityList.find((cap) => ['coding', 'reasoning', 'multimodal', 'embeddings', 'creative', 'chat'].includes(cap)) ||
603
+ (capabilityList.includes('multimodal') ? 'multimodal' : 'general');
604
+
605
+ return {
606
+ id: model.id,
607
+ model_identifier: model.id,
608
+ model_name: model.name || model.id,
609
+ family: model.family || this.inferFamily(model.id),
610
+ model_type: model.type || 'official',
611
+ type: model.type || 'official',
612
+ description: model.description || '',
613
+ capabilities: capabilityList,
614
+ categories: capabilityList,
615
+ primary_category: primaryCategory,
616
+ use_cases: capabilityList,
617
+ pulls: model.pulls || 0,
618
+ actual_pulls: model.pulls || 0,
619
+ tags_count: model.tags_count || 0,
620
+ namespace: model.namespace || '',
621
+ url: model.url || `https://ollama.com/library/${model.id}`,
622
+ last_updated: model.last_updated || '',
623
+ updated_at: model.updated_at || '',
624
+ variants: variantsByModel.get(model.id) || [],
625
+ source: 'ollama_sqlite_database',
626
+ registry: 'ollama.com',
627
+ version: model.updated_at || model.last_updated || 'unknown',
628
+ license: 'unknown',
629
+ digest: 'unknown'
630
+ };
631
+ });
632
+ }
633
+
543
634
  /**
544
635
  * Get benchmarks for a variant on specific hardware
545
636
  */
@@ -0,0 +1,8 @@
1
+ This directory contains the packaged Ollama model database snapshot used on
2
+ first run.
3
+
4
+ `models.db` is copied to `~/.llm-checker/models.db` only when the user does not
5
+ already have a local database. After that, `llm-checker sync` updates the user's
6
+ local copy.
7
+
8
+ Refresh cadence: weekly via `.github/workflows/update-model-db.yml`.
Binary file