llm-checker 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +418 -0
  3. package/analyzer/compatibility.js +584 -0
  4. package/analyzer/performance.js +505 -0
  5. package/bin/CLAUDE.md +12 -0
  6. package/bin/enhanced_cli.js +3118 -0
  7. package/bin/test-deterministic.js +41 -0
  8. package/package.json +96 -0
  9. package/src/CLAUDE.md +12 -0
  10. package/src/ai/intelligent-selector.js +615 -0
  11. package/src/ai/model-selector.js +312 -0
  12. package/src/ai/multi-objective-selector.js +820 -0
  13. package/src/commands/check.js +58 -0
  14. package/src/data/CLAUDE.md +11 -0
  15. package/src/data/model-database.js +637 -0
  16. package/src/data/sync-manager.js +279 -0
  17. package/src/hardware/CLAUDE.md +12 -0
  18. package/src/hardware/backends/CLAUDE.md +11 -0
  19. package/src/hardware/backends/apple-silicon.js +318 -0
  20. package/src/hardware/backends/cpu-detector.js +490 -0
  21. package/src/hardware/backends/cuda-detector.js +417 -0
  22. package/src/hardware/backends/intel-detector.js +436 -0
  23. package/src/hardware/backends/rocm-detector.js +440 -0
  24. package/src/hardware/detector.js +573 -0
  25. package/src/hardware/pc-optimizer.js +635 -0
  26. package/src/hardware/specs.js +286 -0
  27. package/src/hardware/unified-detector.js +442 -0
  28. package/src/index.js +2289 -0
  29. package/src/models/CLAUDE.md +17 -0
  30. package/src/models/ai-check-selector.js +806 -0
  31. package/src/models/catalog.json +426 -0
  32. package/src/models/deterministic-selector.js +1145 -0
  33. package/src/models/expanded_database.js +1142 -0
  34. package/src/models/intelligent-selector.js +532 -0
  35. package/src/models/requirements.js +310 -0
  36. package/src/models/scoring-config.js +57 -0
  37. package/src/models/scoring-engine.js +715 -0
  38. package/src/ollama/.cache/README.md +33 -0
  39. package/src/ollama/CLAUDE.md +24 -0
  40. package/src/ollama/client.js +438 -0
  41. package/src/ollama/enhanced-client.js +113 -0
  42. package/src/ollama/enhanced-scraper.js +634 -0
  43. package/src/ollama/manager.js +357 -0
  44. package/src/ollama/native-scraper.js +776 -0
  45. package/src/plugins/CLAUDE.md +11 -0
  46. package/src/plugins/examples/custom_model_plugin.js +87 -0
  47. package/src/plugins/index.js +295 -0
  48. package/src/utils/CLAUDE.md +11 -0
  49. package/src/utils/config.js +359 -0
  50. package/src/utils/formatter.js +315 -0
  51. package/src/utils/logger.js +272 -0
  52. package/src/utils/model-classifier.js +167 -0
  53. package/src/utils/verbose-progress.js +266 -0
@@ -0,0 +1,715 @@
1
+ /**
2
+ * Scoring Engine - Multi-dimensional model scoring system
3
+ *
4
+ * Calculates scores based on:
5
+ * - Q (Quality): Model quality based on params, family, quantization
6
+ * - S (Speed): Estimated inference speed on target hardware
7
+ * - F (Fit): How well the model fits in available memory
8
+ * - C (Context): Context length capability
9
+ *
10
+ * FinalScore = Q × wQ + S × wS + F × wF + C × wC
11
+ */
12
+
13
+ const { SCORING_ENGINE_WEIGHTS } = require('./scoring-config');
14
+
15
+ class ScoringEngine {
16
+ constructor(options = {}) {
17
+ // Weight presets from centralized config
18
+ this.weightPresets = SCORING_ENGINE_WEIGHTS;
19
+
20
+ // Model family quality rankings (0-100 base score)
21
+ this.familyQuality = {
22
+ // Frontier models
23
+ 'qwen2.5': 95,
24
+ 'qwen2': 90,
25
+ 'llama3.3': 95,
26
+ 'llama3.2': 92,
27
+ 'llama3.1': 90,
28
+ 'llama3': 88,
29
+ 'deepseek-v3': 96,
30
+ 'deepseek-v2.5': 94,
31
+ 'deepseek-coder-v2': 92,
32
+ 'deepseek-r1': 96,
33
+ 'gemma2': 90,
34
+ 'gemma': 82,
35
+ 'phi-4': 92,
36
+ 'phi-3.5': 88,
37
+ 'phi-3': 85,
38
+ 'phi-2': 75,
39
+ 'mistral-large': 94,
40
+ 'mistral': 85,
41
+ 'mixtral': 88,
42
+ 'command-r': 90,
43
+ 'command-r-plus': 93,
44
+
45
+ // Coding specialists
46
+ 'qwen2.5-coder': 96,
47
+ 'codellama': 82,
48
+ 'starcoder2': 85,
49
+ 'deepseek-coder': 88,
50
+ 'codegemma': 80,
51
+ 'granite-code': 78,
52
+
53
+ // Chat/instruct
54
+ 'yi': 85,
55
+ 'yi-coder': 88,
56
+ 'openchat': 78,
57
+ 'neural-chat': 75,
58
+ 'zephyr': 80,
59
+ 'openhermes': 82,
60
+ 'nous-hermes': 82,
61
+ 'dolphin': 80,
62
+ 'orca': 78,
63
+
64
+ // Vision models
65
+ 'llava': 82,
66
+ 'llava-llama3': 85,
67
+ 'llava-phi3': 80,
68
+ 'bakllava': 78,
69
+ 'moondream': 75,
70
+
71
+ // Embeddings
72
+ 'nomic-embed-text': 85,
73
+ 'mxbai-embed-large': 88,
74
+ 'all-minilm': 80,
75
+ 'snowflake-arctic-embed': 85,
76
+
77
+ // Other notable models
78
+ 'solar': 82,
79
+ 'falcon': 75,
80
+ 'vicuna': 72,
81
+ 'wizardlm': 78,
82
+ 'aya': 85,
83
+ 'smollm': 70,
84
+ 'tinyllama': 65
85
+ };
86
+
87
+ // Quantization quality penalties (subtracted from base score)
88
+ this.quantPenalties = {
89
+ 'FP16': 0,
90
+ 'F16': 0,
91
+ 'Q8_0': 2,
92
+ 'Q6_K': 4,
93
+ 'Q5_K_M': 6,
94
+ 'Q5_K_S': 7,
95
+ 'Q5_0': 8,
96
+ 'Q4_K_M': 10,
97
+ 'Q4_K_S': 11,
98
+ 'Q4_0': 12,
99
+ 'Q3_K_M': 16,
100
+ 'Q3_K_S': 18,
101
+ 'Q3_K_L': 15,
102
+ 'IQ4_XS': 11,
103
+ 'IQ4_NL': 10,
104
+ 'IQ3_XXS': 20,
105
+ 'IQ3_XS': 18,
106
+ 'IQ3_S': 17,
107
+ 'IQ2_XS': 25,
108
+ 'IQ2_XXS': 28,
109
+ 'Q2_K': 22,
110
+ 'Q2_K_S': 24
111
+ };
112
+
113
+ // Task-specific bonuses for model families
114
+ this.taskBonuses = {
115
+ coding: {
116
+ 'qwen2.5-coder': 15,
117
+ 'deepseek-coder': 12,
118
+ 'deepseek-coder-v2': 15,
119
+ 'codellama': 10,
120
+ 'starcoder2': 12,
121
+ 'codegemma': 8,
122
+ 'yi-coder': 10,
123
+ 'granite-code': 8
124
+ },
125
+ reasoning: {
126
+ 'deepseek-r1': 15,
127
+ 'qwen2.5': 10,
128
+ 'llama3.3': 10,
129
+ 'phi-4': 12,
130
+ 'command-r-plus': 10,
131
+ 'mistral-large': 10
132
+ },
133
+ chat: {
134
+ 'llama3.2': 10,
135
+ 'mistral': 8,
136
+ 'gemma2': 8,
137
+ 'openchat': 10,
138
+ 'neural-chat': 8,
139
+ 'dolphin': 8
140
+ },
141
+ vision: {
142
+ 'llava': 15,
143
+ 'llava-llama3': 18,
144
+ 'llava-phi3': 15,
145
+ 'bakllava': 12,
146
+ 'moondream': 10
147
+ },
148
+ embeddings: {
149
+ 'nomic-embed-text': 15,
150
+ 'mxbai-embed-large': 18,
151
+ 'all-minilm': 12,
152
+ 'snowflake-arctic-embed': 15
153
+ },
154
+ creative: {
155
+ 'mistral': 8,
156
+ 'mixtral': 10,
157
+ 'openhermes': 8,
158
+ 'dolphin': 10
159
+ },
160
+ multilingual: {
161
+ 'aya': 15,
162
+ 'qwen2.5': 10,
163
+ 'command-r': 12
164
+ }
165
+ };
166
+
167
+ // Speed coefficients by backend (tokens/sec for 7B Q4_K_M as baseline)
168
+ // These are realistic values based on actual Ollama benchmarks
169
+ this.backendSpeed = {
170
+ // NVIDIA - based on real llama.cpp/Ollama benchmarks
171
+ 'cuda_h100': 120, // ~100-140 TPS for 7B Q4
172
+ 'cuda_a100': 90, // ~80-100 TPS for 7B Q4
173
+ 'cuda_4090': 70, // ~60-80 TPS for 7B Q4
174
+ 'cuda_4080': 55, // ~50-60 TPS for 7B Q4
175
+ 'cuda_3090': 50, // ~45-55 TPS for 7B Q4
176
+ 'cuda_3080': 40, // ~35-45 TPS for 7B Q4
177
+ 'cuda_3070': 32, // ~28-35 TPS for 7B Q4
178
+ 'cuda_3060': 25, // ~20-28 TPS for 7B Q4
179
+ 'cuda_2080': 28, // ~25-30 TPS for 7B Q4
180
+ 'cuda_default': 30,
181
+
182
+ // AMD - slightly lower than equivalent NVIDIA
183
+ 'rocm_mi300': 100,
184
+ 'rocm_mi250': 70,
185
+ 'rocm_7900xtx': 55,
186
+ 'rocm_7900xt': 45,
187
+ 'rocm_7800xt': 38,
188
+ 'rocm_6900xt': 35,
189
+ 'rocm_default': 30,
190
+
191
+ // Apple Silicon - based on real M-series benchmarks
192
+ 'metal_m4_ultra': 75, // ~70-80 TPS for 7B Q4
193
+ 'metal_m4_max': 60, // ~55-65 TPS for 7B Q4
194
+ 'metal_m4_pro': 45, // ~40-50 TPS for 7B Q4
195
+ 'metal_m4': 35, // ~30-40 TPS for 7B Q4
196
+ 'metal_m3_ultra': 65,
197
+ 'metal_m3_max': 50,
198
+ 'metal_m3_pro': 40,
199
+ 'metal_m3': 30,
200
+ 'metal_m2_ultra': 55,
201
+ 'metal_m2_max': 45,
202
+ 'metal_m2_pro': 35,
203
+ 'metal_m2': 28,
204
+ 'metal_m1_ultra': 45,
205
+ 'metal_m1_max': 38,
206
+ 'metal_m1_pro': 30,
207
+ 'metal_m1': 22,
208
+ 'metal_default': 30,
209
+
210
+ // Intel Arc - limited real-world data
211
+ 'intel_arc_a770': 30,
212
+ 'intel_arc_a750': 25,
213
+ 'intel_arc_default': 20,
214
+
215
+ // CPU - very conservative, based on actual CPU inference speeds
216
+ 'cpu_avx512_amx': 12, // Best case server CPU
217
+ 'cpu_avx512': 8, // Good desktop CPU
218
+ 'cpu_avx2': 5, // Most modern CPUs
219
+ 'cpu_neon': 4, // Apple Silicon fallback
220
+ 'cpu_avx': 3, // Older CPUs
221
+ 'cpu_default': 2 // Very old CPUs
222
+ };
223
+
224
+ // Quantization speed multipliers (relative to Q4_K_M baseline)
225
+ // More conservative than theoretical - accounts for real overhead
226
+ this.quantSpeedMult = {
227
+ 'FP16': 0.5, // Half speed of Q4 (2x memory bandwidth)
228
+ 'F16': 0.5,
229
+ 'Q8_0': 0.7, // ~70% of Q4 speed
230
+ 'Q6_K': 0.85, // ~85% of Q4 speed
231
+ 'Q5_K_M': 0.92,
232
+ 'Q5_K_S': 0.92,
233
+ 'Q5_0': 0.92,
234
+ 'Q4_K_M': 1.0, // Baseline
235
+ 'Q4_K_S': 1.0,
236
+ 'Q4_0': 1.05,
237
+ 'Q3_K_M': 1.15, // Faster but quality loss
238
+ 'Q3_K_S': 1.15,
239
+ 'Q3_K_L': 1.1,
240
+ 'IQ4_XS': 1.02,
241
+ 'IQ4_NL': 1.0,
242
+ 'IQ3_XXS': 1.2,
243
+ 'IQ3_XS': 1.18,
244
+ 'IQ3_S': 1.15,
245
+ 'IQ2_XS': 1.25,
246
+ 'IQ2_XXS': 1.28,
247
+ 'Q2_K': 1.22,
248
+ 'Q2_K_S': 1.25
249
+ };
250
+
251
+ this.options = options;
252
+ }
253
+
254
+ /**
255
+ * Estimate model size from params and quantization
256
+ * Returns estimated size in GB, or null if cannot estimate
257
+ */
258
+ estimateSizeFromParams(variant) {
259
+ const params = variant.params_b || variant.paramsB;
260
+ if (!params) return null;
261
+
262
+ const quant = (variant.quant || 'Q4_K_M').toUpperCase();
263
+
264
+ if (quant.includes('FP16') || quant.includes('F16')) {
265
+ return params * 2; // FP16: ~2GB per 1B params
266
+ } else if (quant.includes('Q8')) {
267
+ return params * 1; // Q8: ~1GB per 1B params
268
+ } else if (quant.includes('Q6')) {
269
+ return params * 0.75; // Q6: ~0.75GB per 1B params
270
+ } else if (quant.includes('Q5')) {
271
+ return params * 0.6; // Q5: ~0.6GB per 1B params
272
+ } else if (quant.includes('Q4')) {
273
+ return params * 0.5; // Q4: ~0.5GB per 1B params
274
+ } else if (quant.includes('Q3')) {
275
+ return params * 0.4; // Q3: ~0.4GB per 1B params
276
+ } else if (quant.includes('Q2') || quant.includes('IQ2')) {
277
+ return params * 0.3; // Q2: ~0.3GB per 1B params
278
+ } else {
279
+ return params * 0.5; // Default to Q4 estimate
280
+ }
281
+ }
282
+
283
+ /**
284
+ * Get model size (actual or estimated)
285
+ */
286
+ getModelSize(variant) {
287
+ const size = variant.size_gb || variant.sizeGB;
288
+ if (size && size > 0) return size;
289
+ return this.estimateSizeFromParams(variant);
290
+ }
291
+
292
+ /**
293
+ * Calculate overall score for a model variant
294
+ *
295
+ * @param {Object} variant - Model variant data
296
+ * @param {Object} hardware - Hardware info from UnifiedDetector
297
+ * @param {Object} options - Scoring options
298
+ * @returns {Object} Score breakdown and final score
299
+ */
300
+ score(variant, hardware, options = {}) {
301
+ const useCase = options.useCase || 'general';
302
+ const targetContext = options.targetContext || 8192;
303
+ const targetTPS = options.targetTPS || 20; // Target tokens per second
304
+
305
+ const weights = this.weightPresets[useCase] || this.weightPresets.general;
306
+
307
+ // Calculate individual scores
308
+ const Q = this.calculateQualityScore(variant, useCase);
309
+ const S = this.calculateSpeedScore(variant, hardware, targetTPS);
310
+ const F = this.calculateFitScore(variant, hardware);
311
+ const C = this.calculateContextScore(variant, targetContext);
312
+
313
+ // Calculate weighted final score
314
+ const finalScore = Math.round(
315
+ Q * weights.Q +
316
+ S * weights.S +
317
+ F * weights.F +
318
+ C * weights.C
319
+ );
320
+
321
+ return {
322
+ final: Math.min(100, Math.max(0, finalScore)),
323
+ components: {
324
+ quality: Math.round(Q),
325
+ speed: Math.round(S),
326
+ fit: Math.round(F),
327
+ context: Math.round(C)
328
+ },
329
+ weights,
330
+ meta: {
331
+ useCase,
332
+ family: this.extractFamily(variant.model_id || variant.modelId),
333
+ params: variant.params_b || variant.paramsB,
334
+ quant: variant.quant,
335
+ estimatedTPS: this.estimateTPS(variant, hardware),
336
+ estimatedSize: variant.size_gb || variant.sizeGB
337
+ }
338
+ };
339
+ }
340
+
341
+ /**
342
+ * Calculate Quality score (Q)
343
+ * Based on model family, parameter count, and quantization
344
+ */
345
+ calculateQualityScore(variant, useCase) {
346
+ const family = this.extractFamily(variant.model_id || variant.modelId);
347
+ const params = variant.params_b || variant.paramsB || 7;
348
+ const quant = (variant.quant || 'Q4_K_M').toUpperCase();
349
+
350
+ // Base family score
351
+ let baseScore = this.getFamilyScore(family);
352
+
353
+ // Parameter size bonus (larger models generally better, with diminishing returns)
354
+ let paramBonus = 0;
355
+ if (params >= 70) paramBonus = 15;
356
+ else if (params >= 32) paramBonus = 12;
357
+ else if (params >= 14) paramBonus = 8;
358
+ else if (params >= 7) paramBonus = 5;
359
+ else if (params >= 3) paramBonus = 2;
360
+ else paramBonus = 0;
361
+
362
+ // Quantization penalty
363
+ const quantPenalty = this.quantPenalties[quant] || 10;
364
+
365
+ // Task-specific bonus
366
+ const taskBonus = this.getTaskBonus(family, useCase);
367
+
368
+ // MoE bonus (mixture of experts models are often better quality/speed ratio)
369
+ const moeBonus = (variant.is_moe || variant.isMoE) ? 5 : 0;
370
+
371
+ const score = baseScore + paramBonus - quantPenalty + taskBonus + moeBonus;
372
+
373
+ return Math.min(100, Math.max(0, score));
374
+ }
375
+
376
+ /**
377
+ * Calculate Speed score (S)
378
+ * Based on estimated tokens per second vs target
379
+ */
380
+ calculateSpeedScore(variant, hardware, targetTPS) {
381
+ const estimatedTPS = this.estimateTPS(variant, hardware);
382
+
383
+ if (estimatedTPS >= targetTPS * 2) {
384
+ return 100; // 2x target = perfect score
385
+ } else if (estimatedTPS >= targetTPS) {
386
+ // Linear scaling from 80-100 for 1x-2x target
387
+ return 80 + (estimatedTPS - targetTPS) / targetTPS * 20;
388
+ } else if (estimatedTPS >= targetTPS * 0.5) {
389
+ // Linear scaling from 50-80 for 0.5x-1x target
390
+ return 50 + (estimatedTPS / targetTPS) * 30;
391
+ } else {
392
+ // Below 50% target, steep penalty
393
+ return Math.max(0, (estimatedTPS / targetTPS) * 50);
394
+ }
395
+ }
396
+
397
+ /**
398
+ * Calculate Fit score (F)
399
+ * Based on how well model fits in available memory
400
+ */
401
+ calculateFitScore(variant, hardware) {
402
+ const modelSize = this.getModelSize(variant);
403
+
404
+ // No size info available - give moderate score
405
+ if (!modelSize) return 70;
406
+
407
+ const availableMemory = hardware?.summary?.effectiveMemory || 8;
408
+ const headroom = 2; // GB reserved for system
409
+
410
+ const effectiveAvailable = availableMemory - headroom;
411
+ const usage = modelSize / effectiveAvailable;
412
+
413
+ if (usage <= 0.7) {
414
+ return 100; // Plenty of room
415
+ } else if (usage <= 0.85) {
416
+ // Comfortable fit
417
+ return 90 + (0.85 - usage) / 0.15 * 10;
418
+ } else if (usage <= 1.0) {
419
+ // Tight fit
420
+ return 70 + (1.0 - usage) / 0.15 * 20;
421
+ } else if (usage <= 1.2) {
422
+ // May work with swapping (especially on Mac)
423
+ return 50 - (usage - 1.0) * 100;
424
+ } else {
425
+ // Won't fit
426
+ return 0;
427
+ }
428
+ }
429
+
430
+ /**
431
+ * Calculate Context score (C)
432
+ * Based on context length capability vs target
433
+ */
434
+ calculateContextScore(variant, targetContext) {
435
+ const contextLength = variant.context_length || variant.contextLength || 4096;
436
+
437
+ if (contextLength >= targetContext * 2) {
438
+ return 100; // Much more than needed
439
+ } else if (contextLength >= targetContext) {
440
+ // Meets requirement
441
+ return 85 + (contextLength - targetContext) / targetContext * 15;
442
+ } else if (contextLength >= targetContext * 0.5) {
443
+ // Partially meets requirement
444
+ return 50 + (contextLength / targetContext) * 35;
445
+ } else {
446
+ // Inadequate
447
+ return (contextLength / targetContext) * 50;
448
+ }
449
+ }
450
+
451
+ /**
452
+ * Estimate tokens per second
453
+ *
454
+ * Formula is based on:
455
+ * - baseSpeed: realistic TPS for 7B Q4_K_M model on this hardware
456
+ * - Model size scaling with diminishing returns
457
+ * - Quantization adjustment
458
+ * - MoE efficiency bonus
459
+ */
460
+ estimateTPS(variant, hardware) {
461
+ const params = variant.params_b || variant.paramsB || 7;
462
+ const quant = (variant.quant || 'Q4_K_M').toUpperCase();
463
+ const isMoE = variant.is_moe || variant.isMoE || false;
464
+
465
+ // Get backend speed coefficient (TPS for 7B Q4_K_M)
466
+ const backendKey = this.getBackendKey(hardware);
467
+ const baseSpeed = this.backendSpeed[backendKey] || this.backendSpeed.cpu_default;
468
+
469
+ // Get quantization multiplier (relative to Q4_K_M = 1.0)
470
+ const quantMult = this.quantSpeedMult[quant] || 1.0;
471
+
472
+ // Model size scaling with diminishing returns
473
+ // Small models don't get proportionally faster due to overhead
474
+ // Large models don't slow proportionally due to batching efficiency
475
+ let sizeRatio = 7 / params;
476
+
477
+ // Apply diminishing returns curve
478
+ // For small models (< 7B): cap the speedup factor
479
+ // For large models (> 7B): slow down more gradually
480
+ let sizeMult;
481
+ if (params < 3) {
482
+ // Very small models: limited by overhead, max ~2x baseline
483
+ sizeMult = Math.min(2.0, 1 + (sizeRatio - 1) * 0.35);
484
+ } else if (params < 7) {
485
+ // Small models: some speedup but not linear (~1.4x for 3B)
486
+ sizeMult = 1 + (sizeRatio - 1) * 0.35;
487
+ } else if (params <= 14) {
488
+ // Medium models: close to linear
489
+ sizeMult = sizeRatio;
490
+ } else if (params <= 32) {
491
+ // Large models: slight efficiency boost
492
+ sizeMult = sizeRatio * 1.1;
493
+ } else {
494
+ // Very large models: memory bandwidth limited, slower than linear
495
+ sizeMult = sizeRatio * 0.85;
496
+ }
497
+
498
+ // Calculate base TPS
499
+ let tps = baseSpeed * sizeMult * quantMult;
500
+
501
+ // MoE models are faster because only ~1/3 of params are active
502
+ // But communication overhead limits the speedup
503
+ if (isMoE) {
504
+ tps *= 1.8; // ~1.8x speedup (not 3x due to routing overhead)
505
+ }
506
+
507
+ // Apply minimum floor (can't go below 1 TPS)
508
+ return Math.max(1, Math.round(tps));
509
+ }
510
+
511
+ /**
512
+ * Get backend speed key from hardware info
513
+ */
514
+ getBackendKey(hardware) {
515
+ if (!hardware?.summary) return 'cpu_default';
516
+
517
+ const backend = hardware.summary.bestBackend;
518
+ const gpuModel = (hardware.summary.gpuModel || '').toLowerCase();
519
+
520
+ if (backend === 'cuda') {
521
+ if (gpuModel.includes('h100')) return 'cuda_h100';
522
+ if (gpuModel.includes('a100')) return 'cuda_a100';
523
+ if (gpuModel.includes('4090')) return 'cuda_4090';
524
+ if (gpuModel.includes('4080')) return 'cuda_4080';
525
+ if (gpuModel.includes('3090')) return 'cuda_3090';
526
+ if (gpuModel.includes('3080')) return 'cuda_3080';
527
+ if (gpuModel.includes('3070')) return 'cuda_3070';
528
+ if (gpuModel.includes('3060')) return 'cuda_3060';
529
+ if (gpuModel.includes('2080')) return 'cuda_2080';
530
+ return 'cuda_default';
531
+ }
532
+
533
+ if (backend === 'rocm') {
534
+ if (gpuModel.includes('mi300')) return 'rocm_mi300';
535
+ if (gpuModel.includes('mi250')) return 'rocm_mi250';
536
+ if (gpuModel.includes('7900 xtx')) return 'rocm_7900xtx';
537
+ if (gpuModel.includes('7900 xt')) return 'rocm_7900xt';
538
+ if (gpuModel.includes('7800')) return 'rocm_7800xt';
539
+ if (gpuModel.includes('6900')) return 'rocm_6900xt';
540
+ return 'rocm_default';
541
+ }
542
+
543
+ if (backend === 'metal') {
544
+ if (gpuModel.includes('m4 ultra')) return 'metal_m4_ultra';
545
+ if (gpuModel.includes('m4 max')) return 'metal_m4_max';
546
+ if (gpuModel.includes('m4 pro')) return 'metal_m4_pro';
547
+ if (gpuModel.includes('m4')) return 'metal_m4';
548
+ if (gpuModel.includes('m3 ultra')) return 'metal_m3_ultra';
549
+ if (gpuModel.includes('m3 max')) return 'metal_m3_max';
550
+ if (gpuModel.includes('m3 pro')) return 'metal_m3_pro';
551
+ if (gpuModel.includes('m3')) return 'metal_m3';
552
+ if (gpuModel.includes('m2 ultra')) return 'metal_m2_ultra';
553
+ if (gpuModel.includes('m2 max')) return 'metal_m2_max';
554
+ if (gpuModel.includes('m2 pro')) return 'metal_m2_pro';
555
+ if (gpuModel.includes('m2')) return 'metal_m2';
556
+ if (gpuModel.includes('m1 ultra')) return 'metal_m1_ultra';
557
+ if (gpuModel.includes('m1 max')) return 'metal_m1_max';
558
+ if (gpuModel.includes('m1 pro')) return 'metal_m1_pro';
559
+ if (gpuModel.includes('m1')) return 'metal_m1';
560
+ return 'metal_default';
561
+ }
562
+
563
+ if (backend === 'intel') {
564
+ if (gpuModel.includes('a770')) return 'intel_arc_a770';
565
+ if (gpuModel.includes('a750')) return 'intel_arc_a750';
566
+ return 'intel_arc_default';
567
+ }
568
+
569
+ // CPU backend
570
+ const cpu = hardware.cpu || hardware.backends?.cpu?.info;
571
+ if (cpu?.capabilities) {
572
+ if (cpu.capabilities.amx) return 'cpu_avx512_amx';
573
+ if (cpu.capabilities.avx512) return 'cpu_avx512';
574
+ if (cpu.capabilities.avx2) return 'cpu_avx2';
575
+ if (cpu.capabilities.neon) return 'cpu_neon';
576
+ if (cpu.capabilities.avx) return 'cpu_avx';
577
+ }
578
+
579
+ return 'cpu_default';
580
+ }
581
+
582
+ /**
583
+ * Extract model family from model ID
584
+ */
585
+ extractFamily(modelId) {
586
+ if (!modelId) return 'unknown';
587
+
588
+ const id = modelId.toLowerCase();
589
+
590
+ // Remove namespace if present (e.g., "library/qwen2.5" -> "qwen2.5")
591
+ const name = id.includes('/') ? id.split('/').pop() : id;
592
+
593
+ // Remove tag if present (e.g., "qwen2.5:7b-q4" -> "qwen2.5")
594
+ const base = name.split(':')[0];
595
+
596
+ // Match against known families
597
+ for (const family of Object.keys(this.familyQuality).sort((a, b) => b.length - a.length)) {
598
+ if (base.includes(family)) {
599
+ return family;
600
+ }
601
+ }
602
+
603
+ return base;
604
+ }
605
+
606
+ /**
607
+ * Get family quality score
608
+ */
609
+ getFamilyScore(family) {
610
+ // Direct match
611
+ if (this.familyQuality[family]) {
612
+ return this.familyQuality[family];
613
+ }
614
+
615
+ // Partial match
616
+ for (const [key, score] of Object.entries(this.familyQuality)) {
617
+ if (family.includes(key) || key.includes(family)) {
618
+ return score;
619
+ }
620
+ }
621
+
622
+ // Default for unknown families
623
+ return 70;
624
+ }
625
+
626
+ /**
627
+ * Get task-specific bonus
628
+ */
629
+ getTaskBonus(family, useCase) {
630
+ const bonuses = this.taskBonuses[useCase] || {};
631
+
632
+ // Direct match
633
+ if (bonuses[family]) {
634
+ return bonuses[family];
635
+ }
636
+
637
+ // Partial match
638
+ for (const [key, bonus] of Object.entries(bonuses)) {
639
+ if (family.includes(key) || key.includes(family)) {
640
+ return bonus;
641
+ }
642
+ }
643
+
644
+ return 0;
645
+ }
646
+
647
+ /**
648
+ * Score multiple variants and return sorted by score
649
+ */
650
+ scoreAll(variants, hardware, options = {}) {
651
+ const scored = variants.map(variant => ({
652
+ variant,
653
+ score: this.score(variant, hardware, options)
654
+ }));
655
+
656
+ // Sort by final score (descending)
657
+ scored.sort((a, b) => b.score.final - a.score.final);
658
+
659
+ return scored;
660
+ }
661
+
662
+ /**
663
+ * Filter and score variants for hardware constraints
664
+ */
665
+ filterAndScore(variants, hardware, options = {}) {
666
+ const maxSize = hardware?.summary?.effectiveMemory || 8;
667
+ const headroom = options.headroom || 2;
668
+ const effectiveMax = maxSize - headroom;
669
+
670
+ // Filter variants that fit
671
+ const fitting = variants.filter(v => {
672
+ const size = this.getModelSize(v);
673
+
674
+ // No size info - include but will get moderate fit score
675
+ if (!size) return true;
676
+
677
+ return size <= effectiveMax * 1.1; // Allow 10% overflow
678
+ });
679
+
680
+ return this.scoreAll(fitting, hardware, options);
681
+ }
682
+
683
+ /**
684
+ * Get recommendation categories
685
+ */
686
+ categorizeScores(scoredVariants) {
687
+ const categories = {
688
+ excellent: [], // 85+
689
+ recommended: [], // 70-84
690
+ acceptable: [], // 55-69
691
+ marginal: [], // 40-54
692
+ notRecommended: [] // <40
693
+ };
694
+
695
+ for (const item of scoredVariants) {
696
+ const score = item.score.final;
697
+
698
+ if (score >= 85) {
699
+ categories.excellent.push(item);
700
+ } else if (score >= 70) {
701
+ categories.recommended.push(item);
702
+ } else if (score >= 55) {
703
+ categories.acceptable.push(item);
704
+ } else if (score >= 40) {
705
+ categories.marginal.push(item);
706
+ } else {
707
+ categories.notRecommended.push(item);
708
+ }
709
+ }
710
+
711
+ return categories;
712
+ }
713
+ }
714
+
715
+ module.exports = ScoringEngine;