llm-checker 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +418 -0
  3. package/analyzer/compatibility.js +584 -0
  4. package/analyzer/performance.js +505 -0
  5. package/bin/CLAUDE.md +12 -0
  6. package/bin/enhanced_cli.js +3118 -0
  7. package/bin/test-deterministic.js +41 -0
  8. package/package.json +96 -0
  9. package/src/CLAUDE.md +12 -0
  10. package/src/ai/intelligent-selector.js +615 -0
  11. package/src/ai/model-selector.js +312 -0
  12. package/src/ai/multi-objective-selector.js +820 -0
  13. package/src/commands/check.js +58 -0
  14. package/src/data/CLAUDE.md +11 -0
  15. package/src/data/model-database.js +637 -0
  16. package/src/data/sync-manager.js +279 -0
  17. package/src/hardware/CLAUDE.md +12 -0
  18. package/src/hardware/backends/CLAUDE.md +11 -0
  19. package/src/hardware/backends/apple-silicon.js +318 -0
  20. package/src/hardware/backends/cpu-detector.js +490 -0
  21. package/src/hardware/backends/cuda-detector.js +417 -0
  22. package/src/hardware/backends/intel-detector.js +436 -0
  23. package/src/hardware/backends/rocm-detector.js +440 -0
  24. package/src/hardware/detector.js +573 -0
  25. package/src/hardware/pc-optimizer.js +635 -0
  26. package/src/hardware/specs.js +286 -0
  27. package/src/hardware/unified-detector.js +442 -0
  28. package/src/index.js +2289 -0
  29. package/src/models/CLAUDE.md +17 -0
  30. package/src/models/ai-check-selector.js +806 -0
  31. package/src/models/catalog.json +426 -0
  32. package/src/models/deterministic-selector.js +1145 -0
  33. package/src/models/expanded_database.js +1142 -0
  34. package/src/models/intelligent-selector.js +532 -0
  35. package/src/models/requirements.js +310 -0
  36. package/src/models/scoring-config.js +57 -0
  37. package/src/models/scoring-engine.js +715 -0
  38. package/src/ollama/.cache/README.md +33 -0
  39. package/src/ollama/CLAUDE.md +24 -0
  40. package/src/ollama/client.js +438 -0
  41. package/src/ollama/enhanced-client.js +113 -0
  42. package/src/ollama/enhanced-scraper.js +634 -0
  43. package/src/ollama/manager.js +357 -0
  44. package/src/ollama/native-scraper.js +776 -0
  45. package/src/plugins/CLAUDE.md +11 -0
  46. package/src/plugins/examples/custom_model_plugin.js +87 -0
  47. package/src/plugins/index.js +295 -0
  48. package/src/utils/CLAUDE.md +11 -0
  49. package/src/utils/config.js +359 -0
  50. package/src/utils/formatter.js +315 -0
  51. package/src/utils/logger.js +272 -0
  52. package/src/utils/model-classifier.js +167 -0
  53. package/src/utils/verbose-progress.js +266 -0
@@ -0,0 +1,1145 @@
1
+ /**
2
+ * LLM-Checker: Deterministic Model Selection Algorithm (Spec v1.0)
3
+ *
4
+ * A two-phase selector that picks the best Ollama model + quantization
5
+ * for a given machine and task category.
6
+ */
7
+
8
+ const fs = require('fs');
9
+ const path = require('path');
10
+ const { spawn } = require('child_process');
11
+ const { DETERMINISTIC_WEIGHTS } = require('./scoring-config');
12
+
13
+ class DeterministicModelSelector {
14
+ constructor() {
15
+ this.catalogPath = path.join(__dirname, 'catalog.json');
16
+ this.benchCachePath = path.join(require('os').homedir(), '.llm-checker', 'bench.json');
17
+
18
+ // Quality priors table
19
+ this.baseQualityByParams = {
20
+ 0.5: 45, 1: 45, 1.5: 45,
21
+ 2: 60, 3: 60, 4: 60,
22
+ 7: 75, 8: 75, 9: 75,
23
+ 13: 82, 14: 82, 15: 82,
24
+ 30: 89, 32: 89, 34: 89,
25
+ 70: 95, 72: 95
26
+ };
27
+
28
+ // Family quality bumps
29
+ this.familyBumps = {
30
+ 'qwen2.5': 2,
31
+ 'deepseek': 3,
32
+ 'mistral': 1,
33
+ 'llama3.1': 1,
34
+ 'llama3.2': 2,
35
+ 'gemma2': 1,
36
+ 'phi-3': 0,
37
+ 'granite': 0,
38
+ 'solar': 0,
39
+ 'starcoder': 1,
40
+ 'minicpm': 0,
41
+ 'llava': 0
42
+ };
43
+
44
+ // Quantization penalties
45
+ this.quantPenalties = {
46
+ 'Q8_0': 0,
47
+ 'Q6_K': -1,
48
+ 'Q5_K_M': -2,
49
+ 'Q4_K_M': -5,
50
+ 'Q3_K': -8,
51
+ 'Q2_K': -12
52
+ };
53
+
54
+ // Quantization hierarchy (best to worst)
55
+ this.quantHierarchy = ['Q8_0', 'Q6_K', 'Q5_K_M', 'Q4_K_M', 'Q3_K', 'Q2_K'];
56
+
57
+ // Quantization speed multipliers
58
+ this.quantSpeedMultipliers = {
59
+ 'Q8_0': 0.8,
60
+ 'Q6_K': 0.95,
61
+ 'Q5_K_M': 1.00,
62
+ 'Q4_K_M': 1.15,
63
+ 'Q3_K': 1.25,
64
+ 'Q2_K': 1.35
65
+ };
66
+
67
+ // Backend speed constants (K)
68
+ this.backendK = {
69
+ 'metal': 160, // Apple Metal
70
+ 'cuda': 220, // NVIDIA CUDA
71
+ 'cpu_x86': 70, // CPU x86_64
72
+ 'cpu_arm': 90 // CPU ARM64
73
+ };
74
+
75
+ // Category target speeds (tokens/sec)
76
+ this.targetSpeeds = {
77
+ 'general': 40,
78
+ 'coding': 40,
79
+ 'reasoning': 25,
80
+ 'summarization': 60,
81
+ 'reading': 60,
82
+ 'multimodal': 40,
83
+ 'embeddings': 200
84
+ };
85
+
86
+ // Category target contexts
87
+ this.targetContexts = {
88
+ 'general': 4096,
89
+ 'coding': 8192,
90
+ 'reasoning': 8192,
91
+ 'summarization': 8192,
92
+ 'reading': 8192,
93
+ 'multimodal': 4096,
94
+ 'embeddings': 512
95
+ };
96
+
97
+ // Category scoring weights [Q, S, F, C] from centralized config
98
+ this.categoryWeights = DETERMINISTIC_WEIGHTS;
99
+ }
100
+
101
+ // ============================================================================
102
+ // PHASE 0: DATA SOURCES
103
+ // ============================================================================
104
+
105
+ /**
106
+ * Hardware Profiler - Detect CPU, GPU, RAM, and acceleration support
107
+ */
108
+ async getHardware() {
109
+ const hardware = {
110
+ cpu: await this.getCPUInfo(),
111
+ gpu: await this.getGPUInfo(),
112
+ memory: await this.getMemoryInfo(),
113
+ os: await this.getOSInfo(),
114
+ acceleration: await this.getAccelerationSupport()
115
+ };
116
+
117
+ // Calculate usable memory: min(0.8 * total_ram, total_ram - 2GB)
118
+ hardware.usableMemGB = Math.min(
119
+ 0.8 * hardware.memory.totalGB,
120
+ hardware.memory.totalGB - 2
121
+ );
122
+
123
+ return hardware;
124
+ }
125
+
126
+ async getCPUInfo() {
127
+ const os = require('os');
128
+ return {
129
+ architecture: os.arch(),
130
+ cores: os.cpus().length,
131
+ threads: os.cpus().length, // Simplified
132
+ platform: os.platform()
133
+ };
134
+ }
135
+
136
+ async getGPUInfo() {
137
+ const cpu = await this.getCPUInfo();
138
+
139
+ // Simplified GPU detection
140
+ if (cpu.platform === 'darwin' && cpu.architecture === 'arm64') {
141
+ return {
142
+ type: 'apple_silicon',
143
+ vramGB: 0, // Unified memory
144
+ unified: true
145
+ };
146
+ }
147
+
148
+ // TODO: Add NVIDIA/AMD detection for other platforms
149
+ return {
150
+ type: 'cpu_only',
151
+ vramGB: 0,
152
+ unified: false
153
+ };
154
+ }
155
+
156
+ async getMemoryInfo() {
157
+ const os = require('os');
158
+ const totalBytes = os.totalmem();
159
+ return {
160
+ totalGB: Math.round((totalBytes / (1024**3)) * 10) / 10
161
+ };
162
+ }
163
+
164
+ async getOSInfo() {
165
+ const os = require('os');
166
+ return {
167
+ platform: os.platform(),
168
+ arch: os.arch(),
169
+ release: os.release()
170
+ };
171
+ }
172
+
173
+ async getAccelerationSupport() {
174
+ const cpu = await this.getCPUInfo();
175
+ const gpu = await this.getGPUInfo();
176
+
177
+ return {
178
+ supports_metal: gpu.type === 'apple_silicon',
179
+ supports_cuda: gpu.type === 'nvidia',
180
+ supports_rocm: gpu.type === 'amd'
181
+ };
182
+ }
183
+
184
+ /**
185
+ * Local Ollama Inventory - Get installed models from `ollama list`
186
+ */
187
+ async getInstalledModels() {
188
+ try {
189
+ const models = await this.runOllamaCommand(['list']);
190
+ const parsed = [];
191
+
192
+ for (const line of models.split('\n').slice(1)) { // Skip header
193
+ if (!line.trim()) continue;
194
+
195
+ const parts = line.trim().split(/\s+/);
196
+ if (parts.length < 3) continue;
197
+
198
+ const modelName = parts[0];
199
+ const modelId = parts[1];
200
+ const size = parts.length >= 4 ? `${parts[2]} ${parts[3]}` : parts[2];
201
+
202
+ // Get detailed info for each model
203
+ try {
204
+ const details = await this.getModelDetails(modelName);
205
+ parsed.push({
206
+ ...details,
207
+ installed: true,
208
+ installedSize: size
209
+ });
210
+ } catch (error) {
211
+ console.warn(`Failed to get details for ${modelName}:`, error.message);
212
+ }
213
+ }
214
+
215
+ return parsed;
216
+ } catch (error) {
217
+ // Silently fail when Ollama is not available - this is expected
218
+ return [];
219
+ }
220
+ }
221
+
222
+ async getModelDetails(modelName) {
223
+ try {
224
+ const details = await this.runOllamaCommand(['show', modelName]);
225
+
226
+ // Parse model details from ollama show output
227
+ const meta = {
228
+ name: modelName,
229
+ family: this.extractFamily(modelName),
230
+ paramsB: this.extractParams(details),
231
+ ctxMax: this.extractContextLength(details),
232
+ quant: this.extractQuantization(details),
233
+ sizeGB: this.extractSizeGB(details),
234
+ modalities: this.extractModalities(details),
235
+ tags: this.extractTags(details),
236
+ model_identifier: modelName
237
+ };
238
+
239
+ return meta;
240
+ } catch (error) {
241
+ // If Ollama is not available or model details can't be fetched, return minimal info
242
+ return {
243
+ name: modelName,
244
+ family: 'unknown',
245
+ paramsB: 0,
246
+ ctxMax: 2048,
247
+ quant: 'unknown',
248
+ sizeGB: 0,
249
+ modalities: ['text'],
250
+ tags: [],
251
+ model_identifier: modelName,
252
+ error: error.message
253
+ };
254
+ }
255
+ }
256
+
257
+ /**
258
+ * Curated Catalog - Load known models from catalog.json
259
+ */
260
+ async loadCatalog() {
261
+ try {
262
+ if (!fs.existsSync(this.catalogPath)) {
263
+ console.warn('Catalog not found, creating default...');
264
+ await this.createDefaultCatalog();
265
+ }
266
+
267
+ const catalogData = fs.readFileSync(this.catalogPath, 'utf8');
268
+ const catalog = JSON.parse(catalogData);
269
+
270
+ return catalog.models.map(model => ({
271
+ ...model,
272
+ installed: false
273
+ }));
274
+ } catch (error) {
275
+ console.warn('Failed to load catalog:', error.message);
276
+ return [];
277
+ }
278
+ }
279
+
280
+ async createDefaultCatalog() {
281
+ const defaultCatalog = {
282
+ version: "1.0",
283
+ updated: new Date().toISOString(),
284
+ models: [
285
+ {
286
+ name: "qwen2.5-coder:0.5b",
287
+ family: "qwen2.5",
288
+ paramsB: 0.5,
289
+ ctxMax: 32768,
290
+ quant: "Q4_K_M",
291
+ sizeGB: 0.4,
292
+ modalities: ["text"],
293
+ tags: ["coder", "instruct"],
294
+ model_identifier: "qwen2.5-coder:0.5b"
295
+ },
296
+ {
297
+ name: "qwen2.5-coder:1.5b",
298
+ family: "qwen2.5",
299
+ paramsB: 1.5,
300
+ ctxMax: 32768,
301
+ quant: "Q4_K_M",
302
+ sizeGB: 1.1,
303
+ modalities: ["text"],
304
+ tags: ["coder", "instruct"],
305
+ model_identifier: "qwen2.5-coder:1.5b"
306
+ },
307
+ {
308
+ name: "qwen2.5-coder:7b",
309
+ family: "qwen2.5",
310
+ paramsB: 7,
311
+ ctxMax: 32768,
312
+ quant: "Q4_K_M",
313
+ sizeGB: 4.4,
314
+ modalities: ["text"],
315
+ tags: ["coder", "instruct"],
316
+ model_identifier: "qwen2.5-coder:7b"
317
+ },
318
+ {
319
+ name: "llama3.2:3b",
320
+ family: "llama3.2",
321
+ paramsB: 3,
322
+ ctxMax: 131072,
323
+ quant: "Q4_K_M",
324
+ sizeGB: 2.0,
325
+ modalities: ["text"],
326
+ tags: ["instruct", "chat"],
327
+ model_identifier: "llama3.2:3b"
328
+ },
329
+ {
330
+ name: "llava:7b",
331
+ family: "llava",
332
+ paramsB: 7,
333
+ ctxMax: 4096,
334
+ quant: "Q4_K_M",
335
+ sizeGB: 4.7,
336
+ modalities: ["text", "vision"],
337
+ tags: ["multimodal", "vision"],
338
+ model_identifier: "llava:7b"
339
+ }
340
+ ]
341
+ };
342
+
343
+ // Ensure directory exists
344
+ const dir = path.dirname(this.catalogPath);
345
+ if (!fs.existsSync(dir)) {
346
+ fs.mkdirSync(dir, { recursive: true });
347
+ }
348
+
349
+ fs.writeFileSync(this.catalogPath, JSON.stringify(defaultCatalog, null, 2));
350
+ }
351
+
352
+ // ============================================================================
353
+ // HELPER METHODS FOR PARSING OLLAMA OUTPUT
354
+ // ============================================================================
355
+
356
+ extractFamily(modelName) {
357
+ const name = modelName.toLowerCase();
358
+ if (name.includes('qwen2.5')) return 'qwen2.5';
359
+ if (name.includes('qwen3')) return 'qwen2.5';
360
+ if (name.includes('qwen')) return 'qwen2.5';
361
+ if (name.includes('deepseek')) return 'deepseek';
362
+ if (name.includes('llama3.2') || name.includes('llama3.3')) return 'llama3.2';
363
+ if (name.includes('llama3.1')) return 'llama3.1';
364
+ if (name.includes('llama')) return 'llama';
365
+ if (name.includes('mistral')) return 'mistral';
366
+ if (name.includes('gemma')) return 'gemma2';
367
+ if (name.includes('phi')) return 'phi-3';
368
+ if (name.includes('llava')) return 'llava';
369
+ if (name.includes('granite')) return 'granite';
370
+ if (name.includes('solar')) return 'solar';
371
+ if (name.includes('starcoder')) return 'starcoder';
372
+ if (name.includes('minicpm')) return 'minicpm';
373
+ return 'unknown';
374
+ }
375
+
376
+ extractParams(details) {
377
+ // Look for parameter info in ollama show output
378
+ const match = details.match(/parameters\s+(\d+\.?\d*)[BM]/i);
379
+ if (match) {
380
+ const num = parseFloat(match[1]);
381
+ return match[0].toUpperCase().includes('B') ? num : num / 1000;
382
+ }
383
+ return 7; // Default fallback
384
+ }
385
+
386
+ extractContextLength(details) {
387
+ const match = details.match(/context_length\s+(\d+)/i);
388
+ return match ? parseInt(match[1]) : 4096;
389
+ }
390
+
391
+ extractQuantization(details) {
392
+ const match = details.match(/quantization\s+(Q\d+_[A-Z0-9_]+)/i);
393
+ return match ? match[1] : 'Q4_K_M';
394
+ }
395
+
396
+ extractSizeGB(details) {
397
+ const match = details.match(/size\s+(\d+\.?\d*)\s*GB/i);
398
+ return match ? parseFloat(match[1]) : 4.0;
399
+ }
400
+
401
+ extractModalities(details) {
402
+ const modalities = ['text'];
403
+ if (details.toLowerCase().includes('vision') || details.toLowerCase().includes('image')) {
404
+ modalities.push('vision');
405
+ }
406
+ return modalities;
407
+ }
408
+
409
+ extractTags(details) {
410
+ const tags = [];
411
+ const lowerDetails = details.toLowerCase();
412
+
413
+ if (lowerDetails.includes('instruct')) tags.push('instruct');
414
+ if (lowerDetails.includes('chat')) tags.push('chat');
415
+ if (lowerDetails.includes('code')) tags.push('coder');
416
+ if (lowerDetails.includes('vision')) tags.push('vision');
417
+ // Only mark as embedding if it's explicitly an embedding model
418
+ if (lowerDetails.includes('embed-text') ||
419
+ lowerDetails.includes('nomic-embed') ||
420
+ lowerDetails.includes('bge-') ||
421
+ lowerDetails.includes('all-minilm')) tags.push('embedding');
422
+
423
+ return tags;
424
+ }
425
+
426
+ async runOllamaCommand(args) {
427
+ return new Promise((resolve, reject) => {
428
+ try {
429
+ const proc = spawn('ollama', args, { stdio: 'pipe' });
430
+ let output = '';
431
+ let error = '';
432
+
433
+ proc.stdout.on('data', (data) => output += data);
434
+ proc.stderr.on('data', (data) => error += data);
435
+
436
+ proc.on('close', (code) => {
437
+ if (code === 0) {
438
+ resolve(output);
439
+ } else {
440
+ reject(new Error(`Ollama command failed: ${error}`));
441
+ }
442
+ });
443
+
444
+ proc.on('error', (err) => {
445
+ // Handle ENOENT and other spawn errors gracefully
446
+ if (err.code === 'ENOENT') {
447
+ reject(new Error('Ollama not found. Please install Ollama from https://ollama.ai'));
448
+ } else {
449
+ reject(new Error(`Ollama spawn error: ${err.message}`));
450
+ }
451
+ });
452
+ } catch (spawnError) {
453
+ // Handle synchronous spawn errors
454
+ reject(new Error(`Failed to start Ollama: ${spawnError.message}`));
455
+ }
456
+ });
457
+ }
458
+
459
+ // ============================================================================
460
+ // PHASE 1: ESTIMATION FILTER
461
+ // ============================================================================
462
+
463
+ /**
464
+ * Main model selection function
465
+ */
466
+ async selectModels(category = 'general', options = {}) {
467
+ const {
468
+ targetCtx = this.targetContexts[category],
469
+ topN = 5,
470
+ enableProbe = false,
471
+ silent = false
472
+ } = options;
473
+
474
+ if (!silent) {
475
+ console.log(`🔍 Selecting models for category: ${category}`);
476
+ }
477
+
478
+ // Phase 0: Gather data
479
+ const hardware = await this.getHardware();
480
+ const installed = await this.getInstalledModels();
481
+ const catalog = await this.loadCatalog();
482
+
483
+ if (!silent) {
484
+ console.log(`Found ${installed.length} installed, ${catalog.length} catalog models`);
485
+ console.log(`Hardware: ${hardware.cpu.cores} cores, ${hardware.memory.totalGB}GB RAM, ${hardware.gpu.type}`);
486
+ }
487
+
488
+ // Combine and dedupe models (prefer installed versions)
489
+ const pool = this.combineModels(installed, catalog);
490
+ const filtered = this.filterByCategory(pool, category);
491
+
492
+ if (!silent) {
493
+ console.log(`Evaluating ${filtered.length} models for ${category} category`);
494
+ }
495
+
496
+ // Phase 1: Estimation filter
497
+ const candidates = [];
498
+ const budget = hardware.gpu.unified ? hardware.usableMemGB :
499
+ (hardware.gpu.vramGB || hardware.usableMemGB);
500
+
501
+ for (const model of filtered) {
502
+ const result = this.evaluateModel(model, hardware, category, targetCtx, budget);
503
+ if (result) {
504
+ candidates.push(result);
505
+ }
506
+ }
507
+
508
+ // Sort by score
509
+ candidates.sort((a, b) => b.score - a.score);
510
+ const topCandidates = candidates.slice(0, topN);
511
+
512
+ if (!silent) {
513
+ console.log(`✨ Selected ${topCandidates.length} top candidates`);
514
+ }
515
+
516
+ // Phase 2: Quick probe (optional)
517
+ if (enableProbe && topCandidates.length > 0) {
518
+ if (!silent) {
519
+ console.log(`🔬 Running quick probes...`);
520
+ }
521
+ await this.runQuickProbes(topCandidates, hardware, category);
522
+ // Re-sort after probing
523
+ topCandidates.sort((a, b) => b.score - a.score);
524
+ }
525
+
526
+ return {
527
+ category,
528
+ hardware,
529
+ candidates: topCandidates,
530
+ total_evaluated: filtered.length,
531
+ timestamp: new Date().toISOString()
532
+ };
533
+ }
534
+
535
+ combineModels(installed, catalog) {
536
+ const combined = [...installed];
537
+ const installedNames = new Set(installed.map(m => m.model_identifier));
538
+
539
+ // Add catalog models that aren't installed
540
+ for (const model of catalog) {
541
+ if (!installedNames.has(model.model_identifier)) {
542
+ combined.push(model);
543
+ }
544
+ }
545
+
546
+ return combined;
547
+ }
548
+
549
+ filterByCategory(models, category) {
550
+ return models.filter(model => {
551
+ switch (category) {
552
+ case 'coding':
553
+ return model.tags.some(tag => ['coder', 'code', 'instruct'].includes(tag)) ||
554
+ model.name.toLowerCase().includes('code');
555
+
556
+ case 'multimodal':
557
+ return model.modalities.includes('vision') ||
558
+ model.tags.includes('vision');
559
+
560
+ case 'embeddings':
561
+ return model.tags.includes('embedding') ||
562
+ model.tags.includes('embeddings') ||
563
+ model.name.toLowerCase().includes('embed') ||
564
+ model.name.toLowerCase().includes('bge-') ||
565
+ model.name.toLowerCase().includes('nomic-embed') ||
566
+ model.name.toLowerCase().includes('all-minilm') ||
567
+ model.specialization === 'embeddings';
568
+
569
+ case 'reasoning':
570
+ return model.tags.includes('instruct') ||
571
+ model.paramsB >= 7; // Prefer larger models for reasoning
572
+
573
+ default: // general, reading, summarization
574
+ return true; // Most models can handle these
575
+ }
576
+ });
577
+ }
578
+
579
+ evaluateModel(model, hardware, category, targetCtx, budget) {
580
+ // 1. Select best fitting quantization
581
+ const bestQuant = this.selectBestQuantization(model, budget, targetCtx);
582
+ if (!bestQuant) return null;
583
+
584
+ // 2. Calculate required memory
585
+ const requiredGB = this.estimateRequiredGB(model, bestQuant.quant, targetCtx);
586
+ if (requiredGB > budget) return null;
587
+
588
+ // 3. Calculate component scores
589
+ const Q = this.calculateQualityPrior(model, bestQuant.quant, category);
590
+ const S = this.estimateSpeed(hardware, model, bestQuant.quant, category);
591
+ const F = this.calculateFitScore(requiredGB, budget);
592
+ const C = this.calculateContextScore(model, targetCtx);
593
+
594
+ // 4. Calculate final weighted score
595
+ const weights = this.categoryWeights[category];
596
+ const score = Math.round((Q * weights[0] + S * weights[1] + F * weights[2] + C * weights[3]) * 10) / 10;
597
+
598
+ // 5. Build rationale
599
+ const rationale = this.buildRationale(hardware, model, bestQuant.quant, requiredGB, budget, category, Q, S);
600
+
601
+ return {
602
+ meta: model,
603
+ quant: bestQuant.quant,
604
+ requiredGB: Math.round(requiredGB * 10) / 10,
605
+ estTPS: S,
606
+ score,
607
+ rationale,
608
+ components: { Q, S, F, C }
609
+ };
610
+ }
611
+
612
+ selectBestQuantization(model, budget, targetCtx) {
613
+ // Try quantizations from best to worst quality
614
+ for (const quant of this.quantHierarchy) {
615
+ const requiredGB = this.estimateRequiredGB(model, quant, targetCtx);
616
+ if (requiredGB <= budget) {
617
+ return { quant, sizeGB: requiredGB };
618
+ }
619
+ }
620
+
621
+ // If nothing fits at target context, try halving context once
622
+ const halfCtx = Math.floor(targetCtx / 2);
623
+ if (halfCtx >= 1024) {
624
+ for (const quant of this.quantHierarchy) {
625
+ const requiredGB = this.estimateRequiredGB(model, quant, halfCtx);
626
+ if (requiredGB <= budget) {
627
+ return { quant, sizeGB: requiredGB };
628
+ }
629
+ }
630
+ }
631
+
632
+ return null; // Model doesn't fit
633
+ }
634
+
635
+ estimateRequiredGB(model, quant, ctx) {
636
+ // Bytes per parameter by quantization level (calibrated to real Ollama sizes)
637
+ // 7B Q4_K_M=~4.5GB, 14B Q4_K_M=~9GB, 32B Q4_K_M=~19GB
638
+ const bytesPerParam = {
639
+ 'Q8_0': 1.05,
640
+ 'Q6_K': 0.80,
641
+ 'Q5_K_M': 0.68,
642
+ 'Q4_K_M': 0.58,
643
+ 'Q3_K': 0.48,
644
+ 'Q2_K': 0.37
645
+ };
646
+ const bpp = bytesPerParam[quant] || 0.63;
647
+ const modelMemGB = model.paramsB * bpp;
648
+
649
+ // KV cache: ~2 * numLayers * hiddenDim * 2bytes * ctx / 1e9
650
+ // Simplified: ~0.000008 GB per billion params per context token
651
+ const kvCacheGB = 0.000008 * model.paramsB * ctx;
652
+
653
+ // Runtime overhead (Metal/CUDA context, buffers)
654
+ const runtimeOverhead = 0.5;
655
+
656
+ return modelMemGB + kvCacheGB + runtimeOverhead;
657
+ }
658
+
659
+ calculateQualityPrior(model, quant, category) {
660
+ // Base quality by parameter count
661
+ let Q = this.getBaseQuality(model.paramsB);
662
+
663
+ // Family bump
664
+ const familyBump = this.familyBumps[model.family] || 0;
665
+ Q += familyBump;
666
+
667
+ // Quantization penalty
668
+ const quantPenalty = this.quantPenalties[quant] || -5;
669
+ Q += quantPenalty;
670
+
671
+ // Task alignment bump
672
+ const taskBump = this.getTaskAlignmentBump(model, category);
673
+ Q += taskBump;
674
+
675
+ // Reasoning bonus for larger models
676
+ if (category === 'reasoning' && model.paramsB >= 13) {
677
+ Q += 5;
678
+ }
679
+
680
+ // Coding penalty for non-instruct models
681
+ if (category === 'coding' && !model.tags.some(tag => ['coder', 'instruct'].includes(tag))) {
682
+ Q -= 15;
683
+ }
684
+
685
+ return Math.max(0, Math.min(100, Q));
686
+ }
687
+
688
+ getBaseQuality(paramsB) {
689
+ // Find closest parameter count in our table
690
+ const keys = Object.keys(this.baseQualityByParams).map(Number).sort((a, b) => a - b);
691
+
692
+ for (let i = 0; i < keys.length; i++) {
693
+ if (paramsB <= keys[i]) {
694
+ return this.baseQualityByParams[keys[i]];
695
+ }
696
+ }
697
+
698
+ // If larger than our table, return the largest
699
+ return this.baseQualityByParams[keys[keys.length - 1]];
700
+ }
701
+
702
+ getTaskAlignmentBump(model, category) {
703
+ const name = model.name.toLowerCase();
704
+ const tags = model.tags;
705
+
706
+ switch (category) {
707
+ case 'coding':
708
+ if (tags.includes('coder') || name.includes('code')) return 6;
709
+ if (tags.includes('instruct')) return 2;
710
+ return 0;
711
+
712
+ case 'multimodal':
713
+ if (model.modalities.includes('vision')) return 6;
714
+ return 0;
715
+
716
+ case 'general':
717
+ if (tags.includes('chat') || tags.includes('instruct')) return 4;
718
+ if (name.includes('code')) return 2;
719
+ return 0;
720
+
721
+ default:
722
+ return 0;
723
+ }
724
+ }
725
+
726
+ estimateSpeed(hardware, model, quant, category) {
727
+ // Determine backend
728
+ let backend = 'cpu_x86';
729
+ if (hardware.acceleration.supports_metal) backend = 'metal';
730
+ else if (hardware.acceleration.supports_cuda) backend = 'cuda';
731
+ else if (hardware.cpu.architecture === 'arm64') backend = 'cpu_arm';
732
+
733
+ // Base speed calculation
734
+ const K = this.backendK[backend];
735
+ let base = K / model.paramsB;
736
+
737
+ // Quantization multiplier
738
+ const quantMultiplier = this.quantSpeedMultipliers[quant] || 1.0;
739
+ base *= quantMultiplier;
740
+
741
+ // Threading multiplier
742
+ if (hardware.cpu.cores >= 8) base *= 1.1;
743
+ if (hardware.acceleration.supports_metal || hardware.acceleration.supports_cuda) base *= 1.2;
744
+
745
+ // Normalize to 0-100 score
746
+ const target = this.targetSpeeds[category];
747
+ return Math.min(100, Math.round((100 * base / target) * 10) / 10);
748
+ }
749
+
750
+ calculateFitScore(requiredGB, budgetGB) {
751
+ const ratio = requiredGB / budgetGB;
752
+ if (ratio <= 0.9) return 100;
753
+ if (ratio <= 1.0) return 70;
754
+ return 0; // Should be filtered out earlier
755
+ }
756
+
757
+ calculateContextScore(model, targetCtx) {
758
+ if (model.ctxMax >= targetCtx) return 100;
759
+ if (model.ctxMax >= targetCtx * 0.5) return 70;
760
+ return 0; // Should be filtered out earlier
761
+ }
762
+
763
+ buildRationale(hardware, model, quant, requiredGB, budget, category, Q, S) {
764
+ const parts = [];
765
+
766
+ // Memory fit
767
+ parts.push(`fits in ${requiredGB}/${budget}GB`);
768
+
769
+ // Quantization
770
+ parts.push(quant);
771
+
772
+ // Special attributes
773
+ if (model.tags.includes('coder')) parts.push('coder-tuned');
774
+ if (model.modalities.includes('vision')) parts.push('vision-capable');
775
+
776
+ // Size sweet spot
777
+ if (model.paramsB >= 7 && model.paramsB <= 13) {
778
+ parts.push(`${model.paramsB}B is sweet spot`);
779
+ }
780
+
781
+ // Backend
782
+ if (hardware.acceleration.supports_metal) parts.push('Metal backend');
783
+ else if (hardware.acceleration.supports_cuda) parts.push('CUDA backend');
784
+
785
+ return parts.join(', ');
786
+ }
787
+
788
+ // ============================================================================
789
+ // PHASE 2: QUICK PROBE (Optional)
790
+ // ============================================================================
791
+
792
+ async runQuickProbes(candidates, hardware, category) {
793
+ // Load cached results
794
+ const cache = this.loadBenchCache();
795
+ const hardwareFingerprint = this.getHardwareFingerprint(hardware);
796
+
797
+ for (const candidate of candidates) {
798
+ const cacheKey = `${hardwareFingerprint}_${candidate.meta.model_identifier}@${candidate.quant}`;
799
+
800
+ // Check cache first
801
+ if (cache[cacheKey] && this.isCacheValid(cache[cacheKey])) {
802
+ const cachedTPS = cache[cacheKey].tps;
803
+ this.updateCandidateWithMeasuredSpeed(candidate, cachedTPS, category);
804
+ candidate.rationale += ` | measured ${cachedTPS.toFixed(1)} t/s (cached)`;
805
+ continue;
806
+ }
807
+
808
+ // Run probe
809
+ try {
810
+ const measuredTPS = await this.runSingleProbe(candidate.meta.model_identifier, category);
811
+ this.updateCandidateWithMeasuredSpeed(candidate, measuredTPS, category);
812
+ candidate.rationale += ` | measured ${measuredTPS.toFixed(1)} t/s`;
813
+
814
+ // Cache result
815
+ cache[cacheKey] = {
816
+ tps: measuredTPS,
817
+ timestamp: Date.now(),
818
+ category
819
+ };
820
+ this.saveBenchCache(cache);
821
+
822
+ } catch (error) {
823
+ console.warn(`Probe failed for ${candidate.meta.name}: ${error.message}`);
824
+ }
825
+ }
826
+ }
827
+
828
+ async runSingleProbe(modelId, category) {
829
+ const prompts = {
830
+ 'coding': 'Write 3 bullet points about the benefits of unit tests.',
831
+ 'general': 'Explain the benefits of regular exercise in 3 sentences.',
832
+ 'reasoning': 'What are the steps to solve a quadratic equation?',
833
+ 'multimodal': 'Describe what you see in this image.', // Text-only fallback
834
+ 'summarization': 'Summarize the key points of effective communication.',
835
+ 'reading': 'What are the main themes in classic literature?'
836
+ };
837
+
838
+ const prompt = prompts[category] || prompts['general'];
839
+ const targetTokens = 128;
840
+
841
+ const startTime = Date.now();
842
+
843
+ // Make HTTP request to Ollama API
844
+ const response = await fetch('http://localhost:11434/api/generate', {
845
+ method: 'POST',
846
+ headers: { 'Content-Type': 'application/json' },
847
+ body: JSON.stringify({
848
+ model: modelId,
849
+ prompt: prompt,
850
+ stream: false,
851
+ options: {
852
+ num_predict: targetTokens
853
+ }
854
+ })
855
+ });
856
+
857
+ if (!response.ok) {
858
+ throw new Error(`HTTP ${response.status}: ${response.statusText}`);
859
+ }
860
+
861
+ const result = await response.json();
862
+ const elapsedSeconds = (Date.now() - startTime) / 1000;
863
+
864
+ // Estimate tokens generated (simplified)
865
+ const tokensGenerated = result.response ? result.response.split(' ').length * 1.3 : targetTokens;
866
+
867
+ return tokensGenerated / elapsedSeconds;
868
+ }
869
+
870
+ updateCandidateWithMeasuredSpeed(candidate, measuredTPS, category) {
871
+ const normalizedS = this.normalizeTPSToScore(measuredTPS, category);
872
+
873
+ // Recalculate final score with measured speed
874
+ const weights = this.categoryWeights[category];
875
+ const { Q, F, C } = candidate.components;
876
+
877
+ candidate.estTPS = measuredTPS;
878
+ candidate.components.S = normalizedS;
879
+ candidate.score = Math.round((Q * weights[0] + normalizedS * weights[1] + F * weights[2] + C * weights[3]) * 10) / 10;
880
+ }
881
+
882
+ normalizeTPSToScore(tps, category) {
883
+ const target = this.targetSpeeds[category];
884
+ return Math.min(100, Math.round((100 * tps / target) * 10) / 10);
885
+ }
886
+
887
+ loadBenchCache() {
888
+ try {
889
+ if (fs.existsSync(this.benchCachePath)) {
890
+ return JSON.parse(fs.readFileSync(this.benchCachePath, 'utf8'));
891
+ }
892
+ } catch (error) {
893
+ console.warn('Failed to load benchmark cache:', error.message);
894
+ }
895
+ return {};
896
+ }
897
+
898
+ saveBenchCache(cache) {
899
+ try {
900
+ const dir = path.dirname(this.benchCachePath);
901
+ if (!fs.existsSync(dir)) {
902
+ fs.mkdirSync(dir, { recursive: true });
903
+ }
904
+ fs.writeFileSync(this.benchCachePath, JSON.stringify(cache, null, 2));
905
+ } catch (error) {
906
+ console.warn('Failed to save benchmark cache:', error.message);
907
+ }
908
+ }
909
+
910
+ isCacheValid(cacheEntry) {
911
+ const maxAge = 7 * 24 * 60 * 60 * 1000; // 7 days
912
+ return (Date.now() - cacheEntry.timestamp) < maxAge;
913
+ }
914
+
915
+ getHardwareFingerprint(hardware) {
916
+ return `${hardware.cpu.architecture}_${hardware.cpu.cores}c_${hardware.memory.totalGB}gb_${hardware.gpu.type}`;
917
+ }
918
+
919
+ // ============================================================================
920
+ // FORMAT HELPERS (migrated from enhanced-selector.js)
921
+ // ============================================================================
922
+
923
+ /**
924
+ * Map a candidate to the legacy format expected by callers
925
+ */
926
+ mapCandidateToLegacyFormat(candidate) {
927
+ return {
928
+ model_name: candidate.meta.name,
929
+ model_identifier: candidate.meta.model_identifier,
930
+ categoryScore: candidate.score,
931
+ hardwareScore: candidate.components ? candidate.components.F : 90,
932
+ specializationScore: candidate.components ? candidate.components.Q : 85,
933
+ popularityScore: candidate.components ? Math.min(100, (candidate.meta.pulls || 0) / 100000 * 100) : 10,
934
+ efficiencyScore: candidate.components ? candidate.components.S : 80,
935
+ pulls: candidate.meta.pulls || 0,
936
+ size: candidate.meta.paramsB,
937
+ family: candidate.meta.family,
938
+ category: this.inferCategoryFromModel(candidate.meta),
939
+ tags: candidate.meta.tags || [],
940
+ quantization: candidate.quant,
941
+ estimatedRAM: candidate.requiredGB,
942
+ reasoning: candidate.rationale
943
+ };
944
+ }
945
+
946
+ mapHardwareTier(hardware) {
947
+ let ram, cores;
948
+
949
+ if (hardware.memory && hardware.memory.totalGB) {
950
+ ram = hardware.memory.totalGB;
951
+ } else if (hardware.memory && hardware.memory.total) {
952
+ ram = hardware.memory.total;
953
+ } else if (hardware.total_ram_gb) {
954
+ ram = hardware.total_ram_gb;
955
+ } else {
956
+ ram = 8;
957
+ }
958
+
959
+ if (hardware.cpu && hardware.cpu.cores) {
960
+ cores = hardware.cpu.cores;
961
+ } else if (hardware.cpu_cores) {
962
+ cores = hardware.cpu_cores;
963
+ } else {
964
+ cores = 4;
965
+ }
966
+
967
+ if (ram >= 64 && cores >= 16) return 'extreme';
968
+ if (ram >= 32 && cores >= 12) return 'very_high';
969
+ if (ram >= 16 && cores >= 8) return 'high';
970
+ if (ram >= 8 && cores >= 4) return 'medium';
971
+ return 'low';
972
+ }
973
+
974
+ getCategoryInfo(category) {
975
+ const categoryData = {
976
+ coding: { weight: 1.0, keywords: ['code', 'programming', 'coder'] },
977
+ reasoning: { weight: 1.2, keywords: ['reasoning', 'logic', 'math'] },
978
+ multimodal: { weight: 1.1, keywords: ['vision', 'image', 'multimodal'] },
979
+ creative: { weight: 0.9, keywords: ['creative', 'writing', 'story'] },
980
+ talking: { weight: 1.0, keywords: ['chat', 'conversation', 'assistant'] },
981
+ reading: { weight: 1.0, keywords: ['reading', 'comprehension', 'text'] },
982
+ general: { weight: 1.0, keywords: ['general', 'assistant', 'helper'] }
983
+ };
984
+ return categoryData[category] || categoryData.general;
985
+ }
986
+
987
+ inferCategoryFromModel(model) {
988
+ const name = model.name.toLowerCase();
989
+ const tags = model.tags || [];
990
+
991
+ if (tags.includes('coder') || name.includes('code')) return 'coding';
992
+ if (tags.includes('vision') || (model.modalities && model.modalities.includes('vision'))) return 'multimodal';
993
+ if (tags.includes('embed')) return 'embeddings';
994
+ if (name.includes('creative') || name.includes('wizard')) return 'creative';
995
+
996
+ return 'general';
997
+ }
998
+
999
+ formatModelSize(model) {
1000
+ if (model.paramsB) return `${model.paramsB}B`;
1001
+ if (model.size) return `${model.size}B`;
1002
+ return 'Unknown';
1003
+ }
1004
+
1005
+ /**
1006
+ * Generate recommendations by category (main API, replaces EnhancedModelSelector)
1007
+ */
1008
+ async getBestModelsForHardware(hardware, allModels) {
1009
+ const categories = ['coding', 'reasoning', 'multimodal', 'creative', 'talking', 'reading', 'general'];
1010
+ const recommendations = {};
1011
+
1012
+ for (const category of categories) {
1013
+ try {
1014
+ const result = await this.selectModels(category, {
1015
+ topN: 3,
1016
+ enableProbe: false,
1017
+ silent: true
1018
+ });
1019
+
1020
+ recommendations[category] = {
1021
+ tier: this.mapHardwareTier(hardware),
1022
+ bestModels: result.candidates.map(candidate => this.mapCandidateToLegacyFormat(candidate)),
1023
+ totalEvaluated: result.total_evaluated,
1024
+ category: this.getCategoryInfo(category)
1025
+ };
1026
+ } catch (error) {
1027
+ recommendations[category] = {
1028
+ tier: this.mapHardwareTier(hardware),
1029
+ bestModels: [],
1030
+ totalEvaluated: 0,
1031
+ category: this.getCategoryInfo(category)
1032
+ };
1033
+ }
1034
+ }
1035
+
1036
+ return recommendations;
1037
+ }
1038
+
1039
+ /**
1040
+ * Generate recommendation summary
1041
+ */
1042
+ generateRecommendationSummary(recommendations, hardware) {
1043
+ const summary = {
1044
+ hardware_tier: this.mapHardwareTier(hardware),
1045
+ total_categories: Object.keys(recommendations).length,
1046
+ best_overall: null,
1047
+ by_category: {},
1048
+ quick_commands: []
1049
+ };
1050
+
1051
+ let bestOverallScore = 0;
1052
+ let bestOverallModel = null;
1053
+ let bestOverallCategory = null;
1054
+
1055
+ Object.entries(recommendations).forEach(([category, data]) => {
1056
+ const bestModel = data.bestModels[0];
1057
+ if (bestModel) {
1058
+ summary.by_category[category] = {
1059
+ name: bestModel.model_name || bestModel.name,
1060
+ identifier: bestModel.model_identifier,
1061
+ score: Math.round(bestModel.categoryScore || bestModel.score),
1062
+ command: `ollama pull ${bestModel.model_identifier}`,
1063
+ size: this.formatModelSize(bestModel),
1064
+ pulls: bestModel.pulls || 0
1065
+ };
1066
+
1067
+ summary.quick_commands.push(`ollama pull ${bestModel.model_identifier}`);
1068
+
1069
+ const isGeneralCategory = ['general', 'coding', 'talking', 'reading'].includes(category);
1070
+ const score = bestModel.categoryScore || bestModel.score || 0;
1071
+
1072
+ if (isGeneralCategory && (score > bestOverallScore || !bestOverallModel)) {
1073
+ bestOverallScore = score;
1074
+ bestOverallModel = bestModel;
1075
+ bestOverallCategory = category;
1076
+ }
1077
+ }
1078
+ });
1079
+
1080
+ if (bestOverallModel) {
1081
+ summary.best_overall = {
1082
+ name: bestOverallModel.model_name || bestOverallModel.name,
1083
+ identifier: bestOverallModel.model_identifier,
1084
+ category: bestOverallCategory,
1085
+ score: Math.round(bestOverallScore),
1086
+ command: `ollama pull ${bestOverallModel.model_identifier}`
1087
+ };
1088
+ }
1089
+
1090
+ return summary;
1091
+ }
1092
+
1093
+ // ============================================================================
1094
+ // PUBLIC API
1095
+ // ============================================================================
1096
+
1097
+ async recommend(category = 'general', options = {}) {
1098
+ const result = await this.selectModels(category, options);
1099
+ return this.formatRecommendations(result);
1100
+ }
1101
+
1102
+ formatRecommendations(result) {
1103
+ const { category, hardware, candidates, total_evaluated } = result;
1104
+
1105
+ console.log(`\n${category.toUpperCase()} RECOMMENDATIONS`);
1106
+ console.log(`Hardware: ${hardware.cpu.cores} cores, ${hardware.memory.totalGB}GB RAM, ${hardware.gpu.type}`);
1107
+ console.log(`Evaluated ${total_evaluated} models\n`);
1108
+
1109
+ if (candidates.length === 0) {
1110
+ console.log('❌ No suitable models found for your hardware');
1111
+ return result;
1112
+ }
1113
+
1114
+ // Table header
1115
+ console.log('┌─────────────────────────────┬────────┬───────┬─────────┬──────────┬───────┬─────────────────────────────┐');
1116
+ console.log('│ Model │ Params │ Quant │ Est t/s │ Mem GB │ Score │ Why │');
1117
+ console.log('├─────────────────────────────┼────────┼───────┼─────────┼──────────┼───────┼─────────────────────────────┤');
1118
+
1119
+ candidates.forEach((candidate, index) => {
1120
+ const isInstalled = candidate.meta.installed ? 'INSTALLED' : 'CLOUD';
1121
+ const name = candidate.meta.name.padEnd(26);
1122
+ const params = `${candidate.meta.paramsB}B`.padEnd(5);
1123
+ const quant = candidate.quant.padEnd(6);
1124
+ const tps = candidate.estTPS.toFixed(1).padStart(7);
1125
+ const mem = `${candidate.requiredGB}/${hardware.usableMemGB}`.padEnd(9);
1126
+ const score = candidate.score.toFixed(1).padStart(5);
1127
+ const why = candidate.rationale.substring(0, 29);
1128
+
1129
+ console.log(`│ ${isInstalled}${name} │ ${params} │ ${quant} │ ${tps} │ ${mem} │ ${score} │ ${why} │`);
1130
+ });
1131
+
1132
+ console.log('└─────────────────────────────┴────────┴───────┴─────────┴──────────┴───────┴─────────────────────────────┘');
1133
+
1134
+ // Best pick
1135
+ const best = candidates[0];
1136
+ console.log(`\nBEST PICK: ${best.meta.name}`);
1137
+ console.log(`Command: ollama pull ${best.meta.model_identifier}`);
1138
+ console.log(`Why: ${best.rationale}`);
1139
+ console.log(`Score: ${best.score} (Q:${best.components.Q} S:${best.components.S} F:${best.components.F} C:${best.components.C})`);
1140
+
1141
+ return result;
1142
+ }
1143
+ }
1144
+
1145
+ module.exports = DeterministicModelSelector;