llm-checker 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +418 -0
  3. package/analyzer/compatibility.js +584 -0
  4. package/analyzer/performance.js +505 -0
  5. package/bin/CLAUDE.md +12 -0
  6. package/bin/enhanced_cli.js +3118 -0
  7. package/bin/test-deterministic.js +41 -0
  8. package/package.json +96 -0
  9. package/src/CLAUDE.md +12 -0
  10. package/src/ai/intelligent-selector.js +615 -0
  11. package/src/ai/model-selector.js +312 -0
  12. package/src/ai/multi-objective-selector.js +820 -0
  13. package/src/commands/check.js +58 -0
  14. package/src/data/CLAUDE.md +11 -0
  15. package/src/data/model-database.js +637 -0
  16. package/src/data/sync-manager.js +279 -0
  17. package/src/hardware/CLAUDE.md +12 -0
  18. package/src/hardware/backends/CLAUDE.md +11 -0
  19. package/src/hardware/backends/apple-silicon.js +318 -0
  20. package/src/hardware/backends/cpu-detector.js +490 -0
  21. package/src/hardware/backends/cuda-detector.js +417 -0
  22. package/src/hardware/backends/intel-detector.js +436 -0
  23. package/src/hardware/backends/rocm-detector.js +440 -0
  24. package/src/hardware/detector.js +573 -0
  25. package/src/hardware/pc-optimizer.js +635 -0
  26. package/src/hardware/specs.js +286 -0
  27. package/src/hardware/unified-detector.js +442 -0
  28. package/src/index.js +2289 -0
  29. package/src/models/CLAUDE.md +17 -0
  30. package/src/models/ai-check-selector.js +806 -0
  31. package/src/models/catalog.json +426 -0
  32. package/src/models/deterministic-selector.js +1145 -0
  33. package/src/models/expanded_database.js +1142 -0
  34. package/src/models/intelligent-selector.js +532 -0
  35. package/src/models/requirements.js +310 -0
  36. package/src/models/scoring-config.js +57 -0
  37. package/src/models/scoring-engine.js +715 -0
  38. package/src/ollama/.cache/README.md +33 -0
  39. package/src/ollama/CLAUDE.md +24 -0
  40. package/src/ollama/client.js +438 -0
  41. package/src/ollama/enhanced-client.js +113 -0
  42. package/src/ollama/enhanced-scraper.js +634 -0
  43. package/src/ollama/manager.js +357 -0
  44. package/src/ollama/native-scraper.js +776 -0
  45. package/src/plugins/CLAUDE.md +11 -0
  46. package/src/plugins/examples/custom_model_plugin.js +87 -0
  47. package/src/plugins/index.js +295 -0
  48. package/src/utils/CLAUDE.md +11 -0
  49. package/src/utils/config.js +359 -0
  50. package/src/utils/formatter.js +315 -0
  51. package/src/utils/logger.js +272 -0
  52. package/src/utils/model-classifier.js +167 -0
  53. package/src/utils/verbose-progress.js +266 -0
@@ -0,0 +1,820 @@
1
+ /**
2
+ * Multi-Objective Model Selector
3
+ *
4
+ * Implements the improved algorithm with:
5
+ * 1. Hard filters (memory, compatibility)
6
+ * 2. Multi-objective scoring (quality, speed, hardware-match, context, headroom)
7
+ * 3. Hardware-appropriate model size recommendations
8
+ * 4. Benchmark-based quality scoring
9
+ */
10
+
11
+ const { MULTI_OBJECTIVE_WEIGHTS } = require('../models/scoring-config');
12
+
13
+ class MultiObjectiveSelector {
14
+ constructor() {
15
+ // Performance weights from centralized config
16
+ this.categoryWeights = MULTI_OBJECTIVE_WEIGHTS;
17
+
18
+ // Optimal model sizes by hardware tier (in billions of parameters)
19
+ this.optimalSizes = {
20
+ 'flagship': { min: 30, max: 175, sweet: 70 }, // RTX 5090 tier: 30B-175B models
21
+ 'ultra_high': { min: 20, max: 105, sweet: 35 }, // RTX 4090 tier: 20B-105B models
22
+ 'high': { min: 8, max: 50, sweet: 20 }, // RTX 4080, Apple 32GB: 8B-50B models
23
+ 'medium': { min: 3, max: 20, sweet: 8 }, // RTX 4070, Apple 16GB: 3B-20B models
24
+ 'low': { min: 1, max: 8, sweet: 3 }, // Budget systems: 1B-8B models
25
+ 'ultra_low': { min: 0.1, max: 3, sweet: 1 } // Very limited: <3B models
26
+ };
27
+
28
+ // Benchmark-based quality priors (normalized 0-1)
29
+ this.qualityPriors = {
30
+ // By family and scale - rough estimates from public benchmarks
31
+ 'llama': { base: 0.75, coding: 0.80, reasoning: 0.78 },
32
+ 'qwen': { base: 0.73, coding: 0.85, reasoning: 0.76 },
33
+ 'mistral': { base: 0.72, coding: 0.75, reasoning: 0.80 },
34
+ 'gemma': { base: 0.68, coding: 0.70, reasoning: 0.72 },
35
+ 'phi': { base: 0.65, coding: 0.78, reasoning: 0.70 },
36
+ 'tinyllama': { base: 0.45, coding: 0.50, reasoning: 0.48 },
37
+ 'default': { base: 0.60, coding: 0.60, reasoning: 0.60 }
38
+ };
39
+ }
40
+
41
+ /**
42
+ * Select best models using multi-objective ranking
43
+ */
44
+ async selectBestModels(hardware, models, category = 'general', topK = 10) {
45
+ // Step 1: Hard filters - remove incompatible models
46
+ const compatibleModels = this.applyHardFilters(hardware, models);
47
+
48
+ if (compatibleModels.length === 0) {
49
+ return { compatible: [], marginal: [], incompatible: models };
50
+ }
51
+
52
+ // Step 2: Multi-objective scoring
53
+ const scoredModels = compatibleModels.map(model =>
54
+ this.calculateMultiObjectiveScore(hardware, model, category)
55
+ ).filter(Boolean);
56
+
57
+
58
+ // Step 3: Sort and classify
59
+ scoredModels.sort((a, b) => b.totalScore - a.totalScore);
60
+
61
+ return this.classifyResults(scoredModels, topK);
62
+ }
63
+
64
+ /**
65
+ * Step 1: Hard filters - memory, compatibility, architecture
66
+ */
67
+ applyHardFilters(hardware, models) {
68
+ return models.filter(model => {
69
+ // Memory filter - can it fit?
70
+ const memoryFits = this.checkMemoryCompatibility(hardware, model);
71
+
72
+ // Architecture compatibility
73
+ const archCompatible = this.checkArchitectureCompatibility(hardware, model);
74
+
75
+ // Basic requirements met
76
+ const requirementsMet = this.checkBasicRequirements(hardware, model);
77
+
78
+ return memoryFits && archCompatible && requirementsMet;
79
+ });
80
+ }
81
+
82
+ /**
83
+ * Realistic hardware compatibility check based on tier and actual capabilities
84
+ */
85
+ checkMemoryCompatibility(hardware, model) {
86
+ const modelSizeGB = this.parseModelSize(model.size);
87
+ const contextLength = model.context || 4096;
88
+
89
+ // Get hardware tier using same algorithm as main system
90
+ const hardwareTier = this.getHardwareTier(hardware);
91
+
92
+ // Estimate KV cache (rough approximation)
93
+ const kvCacheGB = this.estimateKVCache(model, contextLength);
94
+ const totalMemoryNeeded = modelSizeGB + kvCacheGB;
95
+
96
+ // Tier-based realistic limits (not just memory, but practical performance)
97
+ const tierLimits = {
98
+ 'flagship': { maxModelSize: 180, availableMemoryRatio: 0.85 }, // RTX 5090 tier, massive VRAM
99
+ 'ultra_high': { maxModelSize: 105, availableMemoryRatio: 0.8 }, // RTX 4090, H100 tier
100
+ 'high': { maxModelSize: 50, availableMemoryRatio: 0.75 }, // RTX 4080, Apple Silicon 32GB+
101
+ 'medium': { maxModelSize: 20, availableMemoryRatio: 0.7 }, // RTX 4070, Apple Silicon 16GB
102
+ 'low': { maxModelSize: 8, availableMemoryRatio: 0.6 }, // Budget systems, iGPU
103
+ 'ultra_low': { maxModelSize: 3, availableMemoryRatio: 0.5 } // Very limited systems
104
+ };
105
+
106
+ const limits = tierLimits[hardwareTier] || tierLimits['ultra_low'];
107
+
108
+ // Hard size limit based on what the hardware tier can realistically handle
109
+ if (modelSizeGB > limits.maxModelSize) {
110
+ return false; // Model too large for this tier regardless of RAM
111
+ }
112
+
113
+ // Memory check with tier-appropriate safety margin
114
+ const availableMemory = hardware.memory.total * limits.availableMemoryRatio;
115
+
116
+ return totalMemoryNeeded <= availableMemory;
117
+ }
118
+
119
+ /**
120
+ * Step 2: Multi-objective scoring
121
+ */
122
+ calculateMultiObjectiveScore(hardware, model, category) {
123
+ const weights = this.categoryWeights[category] || this.categoryWeights['general'];
124
+
125
+
126
+ // Individual component scores (0-1)
127
+ const qualityScore = this.calculateQualityScore(model, category);
128
+ const speedScore = this.calculateSpeedScore(hardware, model);
129
+ const ttfbScore = this.calculateTTFBScore(hardware, model);
130
+ const contextScore = this.calculateContextScore(model, category);
131
+ const hardwareMatchScore = this.calculateHardwareMatchScore(hardware, model);
132
+
133
+ // Weighted total
134
+ const totalScore = (
135
+ weights.quality * qualityScore +
136
+ weights.speed * speedScore +
137
+ weights.ttfb * ttfbScore +
138
+ weights.context * contextScore +
139
+ weights.hardwareMatch * hardwareMatchScore
140
+ ) * 100; // Scale to 0-100
141
+
142
+ return {
143
+ ...model,
144
+ totalScore: Math.round(totalScore * 100) / 100,
145
+ components: {
146
+ quality: qualityScore,
147
+ speed: speedScore,
148
+ ttfb: ttfbScore,
149
+ context: contextScore,
150
+ hardwareMatch: hardwareMatchScore
151
+ },
152
+ reasoning: this.generateReasoning(model, hardware, qualityScore, hardwareMatchScore)
153
+ };
154
+ }
155
+
156
+ /**
157
+ * Quality score based on model family and benchmarks
158
+ */
159
+ calculateQualityScore(model, category) {
160
+ const family = this.guessModelFamily(model.name);
161
+ const priors = this.qualityPriors[family] || this.qualityPriors['default'];
162
+
163
+ // Base quality by family
164
+ let baseQuality = priors.base;
165
+
166
+ // Category-specific adjustments
167
+ if (category === 'coding' && priors.coding) {
168
+ baseQuality = priors.coding;
169
+ } else if (category === 'reasoning' && priors.reasoning) {
170
+ baseQuality = priors.reasoning;
171
+ }
172
+
173
+ // Scale adjustment (larger models generally better quality)
174
+ const modelSizeB = this.estimateModelParams(model);
175
+ const scaleBonus = Math.min(0.15, Math.log2(Math.max(1, modelSizeB)) * 0.03);
176
+
177
+ return Math.min(1.0, baseQuality + scaleBonus);
178
+ }
179
+
180
+ /**
181
+ * Hardware-size matching score - key improvement!
182
+ */
183
+ calculateHardwareMatchScore(hardware, model) {
184
+ const hardwareTier = this.getHardwareTier(hardware);
185
+ const modelSizeB = this.estimateModelParams(model);
186
+ const optimal = this.optimalSizes[hardwareTier];
187
+
188
+
189
+ if (!optimal) return 0.5; // Fallback
190
+
191
+ if (modelSizeB < optimal.min) {
192
+ // Underutilization penalty
193
+ const underutilization = optimal.min / modelSizeB;
194
+ if (underutilization >= 10) return 0.1; // Severe penalty
195
+ if (underutilization >= 5) return 0.3; // Moderate penalty
196
+ return 0.6; // Small penalty
197
+ } else if (modelSizeB > optimal.max) {
198
+ // Too large, but let memory filter handle this
199
+ return 0.4;
200
+ } else {
201
+ // In good range - bonus for sweet spot
202
+ const distanceFromSweet = Math.abs(modelSizeB - optimal.sweet) / optimal.sweet;
203
+ if (distanceFromSweet <= 0.3) return 1.0; // Perfect match
204
+ return 0.8; // Good range
205
+ }
206
+ }
207
+
208
+ /**
209
+ * Speed score estimation
210
+ */
211
+ calculateSpeedScore(hardware, model) {
212
+ const estimatedTokps = this.estimateTokensPerSecond(hardware, model);
213
+ // Normalize against reasonable expectation (50 tok/s = 1.0)
214
+ return Math.min(1.0, estimatedTokps / 50);
215
+ }
216
+
217
+ /**
218
+ * Time to First Byte score
219
+ */
220
+ calculateTTFBScore(hardware, model) {
221
+ const estimatedTTFB = this.estimateTTFB(hardware, model);
222
+ // Lower TTFB is better, normalize against 1000ms
223
+ return Math.min(1.0, 1000 / Math.max(100, estimatedTTFB));
224
+ }
225
+
226
+ /**
227
+ * Context score based on model's context window
228
+ */
229
+ calculateContextScore(model, category) {
230
+ const contextLength = model.context || 4096;
231
+ const targetContext = category === 'longctx' ? 32768 : 4096;
232
+ return Math.min(1.0, contextLength / targetContext);
233
+ }
234
+
235
+ /**
236
+ * Classify results into compatible/marginal/incompatible
237
+ * Lower thresholds for more realistic classification
238
+ */
239
+ classifyResults(scoredModels, topK) {
240
+ const compatible = scoredModels.filter(m => m.totalScore >= 65);
241
+ const marginal = scoredModels.filter(m => m.totalScore >= 45 && m.totalScore < 65);
242
+ const incompatible = scoredModels.filter(m => m.totalScore < 45);
243
+
244
+ return {
245
+ compatible: compatible.slice(0, topK),
246
+ marginal: marginal.slice(0, topK),
247
+ incompatible: incompatible.slice(0, 5) // Limit incompatible list
248
+ };
249
+ }
250
+
251
+ // Helper methods
252
+ parseModelSize(sizeString) {
253
+ if (!sizeString) return 4; // Default 4GB
254
+
255
+ // Handle different size formats
256
+ let cleanSize = sizeString.toString().toUpperCase();
257
+
258
+ // Extract number and unit - be more specific about units
259
+ const match = cleanSize.match(/([0-9.]+)\s*(GB|MB|B)$/i);
260
+ if (!match) return 4;
261
+
262
+ const num = parseFloat(match[1]);
263
+ const unit = match[2].toUpperCase();
264
+
265
+ // Handle file size units (GB, MB) vs parameter count indicators
266
+ if (unit === 'MB') return num / 1024; // Convert MB to GB
267
+ if (unit === 'GB') return num; // Already in GB
268
+
269
+ // If unit is 'B', determine if it's bytes or billion parameters
270
+ if (unit === 'B') {
271
+ if (num >= 1000000) {
272
+ // Large numbers (>= 1M) are likely bytes
273
+ return num / (1024 ** 3); // Convert bytes to GB
274
+ } else if (num >= 0.1 && num <= 100) {
275
+ // Small numbers (0.1-100) are likely billion parameters - convert to file size
276
+ // Rough estimate: 1B params ≈ 2GB in Q4 quantization
277
+ return Math.max(0.5, num * 2);
278
+ } else {
279
+ // Fallback for edge cases
280
+ return Math.max(0.5, num);
281
+ }
282
+ }
283
+
284
+ return num; // Default fallback
285
+ }
286
+
287
+ estimateModelParams(model) {
288
+ // First, try to extract parameter count directly from model name
289
+ const nameMatch = model.name.match(/(\d+\.?\d*)[bB]\b/i);
290
+ if (nameMatch) {
291
+ const paramCount = parseFloat(nameMatch[1]);
292
+ // Sanity check: parameter counts should be reasonable (0.1B to 100B)
293
+ if (paramCount >= 0.1 && paramCount <= 100) {
294
+ return paramCount;
295
+ }
296
+ }
297
+
298
+ // Use installedSize from Ollama if available, otherwise fall back to size field
299
+ let sizeGB;
300
+
301
+ if (model.installedSize) {
302
+ // Use real size from Ollama (this is file size)
303
+ sizeGB = this.parseModelSize(model.installedSize);
304
+ } else if (model.size) {
305
+ // Use size field from database (this is file size)
306
+ sizeGB = this.parseModelSize(model.size);
307
+ } else {
308
+ sizeGB = 4; // Default fallback file size
309
+ }
310
+
311
+ // Convert file size to parameter count: roughly 1B params ≈ 2GB in Q4 quantization
312
+ return sizeGB / 2;
313
+ }
314
+
315
+ guessModelFamily(name) {
316
+ const n = name.toLowerCase();
317
+ if (n.includes('llama')) return 'llama';
318
+ if (n.includes('qwen')) return 'qwen';
319
+ if (n.includes('mistral')) return 'mistral';
320
+ if (n.includes('gemma')) return 'gemma';
321
+ if (n.includes('phi')) return 'phi';
322
+ if (n.includes('tinyllama')) return 'tinyllama';
323
+ return 'default';
324
+ }
325
+
326
+ getHardwareTier(hardware) {
327
+ // Use the same advanced scoring algorithm for consistency
328
+ const clamp = (x, a = 0, b = 1) => Math.max(a, Math.min(b, x));
329
+
330
+ const ramGB = hardware.memory.total || 0;
331
+ const vramGB = hardware.gpu?.vram || 0;
332
+ const cpuModel = hardware.cpu?.brand || hardware.cpu?.model || '';
333
+ const gpuModel = hardware.gpu?.model || '';
334
+ const architecture = hardware.cpu?.architecture || hardware.cpu?.brand || '';
335
+ const cpuCoresPhys = hardware.cpu?.physicalCores || hardware.cpu?.cores || 1;
336
+ const cpuGHzBase = hardware.cpu?.speed || 2.0;
337
+
338
+ // Enhanced Apple Silicon detection to match main algorithm
339
+ const isAppleSilicon = architecture.toLowerCase().includes('apple') ||
340
+ architecture.toLowerCase().includes('m1') ||
341
+ architecture.toLowerCase().includes('m2') ||
342
+ architecture.toLowerCase().includes('m3') ||
343
+ architecture.toLowerCase().includes('m4') ||
344
+ cpuModel.toLowerCase().includes('apple') ||
345
+ gpuModel.toLowerCase().includes('apple');
346
+ const unified = isAppleSilicon;
347
+
348
+ // Detect PC platform (Windows/Linux) to match main algorithm
349
+ const isPC = !isAppleSilicon && (process.platform === 'win32' || process.platform === 'linux');
350
+
351
+ // 1) Effective memory for model weights (45%) - Apple Silicon & PC optimized
352
+ let effMem;
353
+
354
+ if (vramGB > 0 && !unified) {
355
+ // Dedicated GPU path (Windows/Linux with discrete GPU)
356
+ if (isPC) {
357
+ // PC-specific GPU memory calculation with offload support
358
+ const pcSpecs = this.getPCGPUSpecs(hardware, vramGB, ramGB);
359
+ effMem = vramGB + pcSpecs.offloadCapacity;
360
+ } else {
361
+ // Generic discrete GPU calculation
362
+ effMem = vramGB + Math.min(0.25 * ramGB, 8);
363
+ }
364
+ } else if (unified && isAppleSilicon) {
365
+ // Apple Silicon unified memory optimization
366
+ const appleSiliconInfo = this.getAppleSiliconSpecs(cpuModel, gpuModel, ramGB);
367
+ effMem = appleSiliconInfo.effectiveMemoryRatio * ramGB;
368
+
369
+ // Apply model size bonus for larger unified memory pools
370
+ if (ramGB >= 32) {
371
+ effMem += appleSiliconInfo.largeMemoryBonus;
372
+ }
373
+ } else {
374
+ // Traditional CPU-only path or integrated GPU
375
+ if (isPC) {
376
+ // PC CPU-only with potential iGPU assist
377
+ const pcSpecs = this.getPCCPUSpecs(hardware, ramGB);
378
+ effMem = pcSpecs.effectiveMemoryRatio * ramGB;
379
+ } else {
380
+ // Generic CPU-only calculation
381
+ effMem = 0.6 * ramGB;
382
+ }
383
+ }
384
+
385
+ const mem_cap = clamp(effMem / 32); // More realistic normalization
386
+
387
+ // 2) Memory bandwidth (20%) - simplified estimation
388
+ let memBandwidthGBs = 50; // fallback
389
+ const gpu = gpuModel.toLowerCase();
390
+ if (gpu.includes('m4 pro')) memBandwidthGBs = 273;
391
+ else if (gpu.includes('m4')) memBandwidthGBs = 120;
392
+ else if (gpu.includes('rtx 4090')) memBandwidthGBs = 1008;
393
+ else if (gpu.includes('rtx 4080')) memBandwidthGBs = 716;
394
+ else if (gpu.includes('rtx 4070')) memBandwidthGBs = 448;
395
+ else if (gpu.includes('iris xe')) memBandwidthGBs = 68;
396
+
397
+ const mem_bw = clamp(memBandwidthGBs / 500); // Match main algorithm
398
+
399
+ // 3) Compute (20%) - simplified estimation
400
+ let compute = 0;
401
+ if (gpu.includes('m4 pro')) compute = clamp(28 / 80); // Match main algorithm
402
+ else if (gpu.includes('m4')) compute = clamp(15 / 80);
403
+ else if (gpu.includes('rtx 4090')) compute = clamp(165 / 80);
404
+ else if (gpu.includes('rtx 4080')) compute = clamp(121 / 80);
405
+ else if (gpu.includes('iris xe')) compute = 0.02;
406
+ else {
407
+ // CPU fallback
408
+ compute = clamp((cpuCoresPhys * cpuGHzBase) / 60);
409
+ }
410
+
411
+ // 4) System RAM for KV-cache (10%)
412
+ const sys_ram = clamp(ramGB / 64);
413
+
414
+ // 5) Storage (5%) - assume NVMe
415
+ const storage = 1.0;
416
+
417
+ // Final score
418
+ const score = 100 * (0.45 * mem_cap + 0.20 * mem_bw + 0.20 * compute + 0.10 * sys_ram + 0.05 * storage);
419
+
420
+ // Map to tier (expanded for flagship hardware)
421
+ let tier = score >= 85 ? 'flagship' : // New flagship tier for extreme hardware
422
+ score >= 75 ? 'ultra_high' :
423
+ score >= 55 ? 'high' :
424
+ score >= 35 ? 'medium' :
425
+ score >= 20 ? 'low' : 'ultra_low';
426
+
427
+ // Apply same reality-based adjustments as main algorithm
428
+ const bumpTier = (t, direction) => {
429
+ const tiers = ['ultra_low', 'low', 'medium', 'high', 'ultra_high', 'flagship'];
430
+ const index = tiers.indexOf(t);
431
+ const newIndex = Math.max(0, Math.min(tiers.length - 1, index + direction));
432
+ return tiers[newIndex];
433
+ };
434
+
435
+ // Enhanced tier adjustments for flagship hardware
436
+ if (vramGB >= 32) {
437
+ // RTX 5090 tier - automatic flagship promotion
438
+ tier = 'flagship';
439
+ } else if (vramGB >= 24) {
440
+ tier = bumpTier(tier, +1); // High-end GPU boost
441
+ } else if (!vramGB && !unified) {
442
+ tier = bumpTier(tier, -1); // CPU-only penalty (moderate)
443
+ } else if (/iris xe|uhd.*graphics|vega.*integrated|radeon.*graphics/i.test(gpuModel)) {
444
+ tier = bumpTier(tier, -1); // iGPU penalty
445
+ } else if (vramGB > 0 && vramGB < 6) {
446
+ tier = bumpTier(tier, -1); // Low VRAM penalty
447
+ }
448
+
449
+ // Special flagship GPU detection by model name
450
+ if (gpuModel.toLowerCase().includes('rtx 50') ||
451
+ gpuModel.toLowerCase().includes('h100') ||
452
+ gpuModel.toLowerCase().includes('a100')) {
453
+ tier = 'flagship';
454
+ }
455
+
456
+ return tier;
457
+ }
458
+
459
+ /**
460
+ * Apple Silicon-specific specifications and optimization parameters
461
+ * Shared implementation with main algorithm for consistency
462
+ */
463
+ getAppleSiliconSpecs(cpuModel, gpuModel, ramGB) {
464
+ const cpu = cpuModel.toLowerCase();
465
+ const gpu = gpuModel.toLowerCase();
466
+
467
+ // Base specs for different Apple Silicon generations
468
+ let baseSpecs = {
469
+ effectiveMemoryRatio: 0.85, // Default unified memory efficiency
470
+ largeMemoryBonus: 0, // Bonus for large memory configs
471
+ memoryBandwidth: 100, // GB/s
472
+ quantizationEfficiency: 1.0, // Quantization optimization factor
473
+ metalOptimization: 1.2 // Metal backend boost
474
+ };
475
+
476
+ // M4 Pro/Max optimizations
477
+ if (cpu.includes('m4 pro') || gpu.includes('m4 pro')) {
478
+ baseSpecs = {
479
+ effectiveMemoryRatio: 0.90, // Higher efficiency due to newer architecture
480
+ largeMemoryBonus: 4, // 4GB bonus for 32GB+ configs
481
+ memoryBandwidth: 273, // 273 GB/s memory bandwidth
482
+ quantizationEfficiency: 1.15, // Better quantization support
483
+ metalOptimization: 1.3 // Enhanced Metal backend
484
+ };
485
+ } else if (cpu.includes('m4') || gpu.includes('m4')) {
486
+ baseSpecs = {
487
+ effectiveMemoryRatio: 0.88,
488
+ largeMemoryBonus: 2,
489
+ memoryBandwidth: 120,
490
+ quantizationEfficiency: 1.10,
491
+ metalOptimization: 1.25
492
+ };
493
+ }
494
+ // M3 optimizations
495
+ else if (cpu.includes('m3 max') || gpu.includes('m3 max')) {
496
+ baseSpecs = {
497
+ effectiveMemoryRatio: 0.87,
498
+ largeMemoryBonus: 3,
499
+ memoryBandwidth: 400,
500
+ quantizationEfficiency: 1.08,
501
+ metalOptimization: 1.2
502
+ };
503
+ } else if (cpu.includes('m3 pro') || gpu.includes('m3 pro')) {
504
+ baseSpecs = {
505
+ effectiveMemoryRatio: 0.86,
506
+ largeMemoryBonus: 2,
507
+ memoryBandwidth: 150,
508
+ quantizationEfficiency: 1.05,
509
+ metalOptimization: 1.15
510
+ };
511
+ } else if (cpu.includes('m3') || gpu.includes('m3')) {
512
+ baseSpecs = {
513
+ effectiveMemoryRatio: 0.85,
514
+ largeMemoryBonus: 1,
515
+ memoryBandwidth: 100,
516
+ quantizationEfficiency: 1.03,
517
+ metalOptimization: 1.1
518
+ };
519
+ }
520
+ // M2 optimizations
521
+ else if (cpu.includes('m2 max') || gpu.includes('m2 max')) {
522
+ baseSpecs = {
523
+ effectiveMemoryRatio: 0.84,
524
+ largeMemoryBonus: 2,
525
+ memoryBandwidth: 400,
526
+ quantizationEfficiency: 1.02,
527
+ metalOptimization: 1.1
528
+ };
529
+ } else if (cpu.includes('m2 pro') || gpu.includes('m2 pro')) {
530
+ baseSpecs = {
531
+ effectiveMemoryRatio: 0.83,
532
+ largeMemoryBonus: 1,
533
+ memoryBandwidth: 200,
534
+ quantizationEfficiency: 1.0,
535
+ metalOptimization: 1.05
536
+ };
537
+ } else if (cpu.includes('m2') || gpu.includes('m2')) {
538
+ baseSpecs = {
539
+ effectiveMemoryRatio: 0.82,
540
+ largeMemoryBonus: 0,
541
+ memoryBandwidth: 100,
542
+ quantizationEfficiency: 1.0,
543
+ metalOptimization: 1.0
544
+ };
545
+ }
546
+ // M1 optimizations (legacy but still supported)
547
+ else if (cpu.includes('m1 max') || gpu.includes('m1 max')) {
548
+ baseSpecs = {
549
+ effectiveMemoryRatio: 0.80,
550
+ largeMemoryBonus: 1,
551
+ memoryBandwidth: 400,
552
+ quantizationEfficiency: 0.95,
553
+ metalOptimization: 1.0
554
+ };
555
+ } else if (cpu.includes('m1 pro') || gpu.includes('m1 pro')) {
556
+ baseSpecs = {
557
+ effectiveMemoryRatio: 0.78,
558
+ largeMemoryBonus: 0,
559
+ memoryBandwidth: 200,
560
+ quantizationEfficiency: 0.95,
561
+ metalOptimization: 0.95
562
+ };
563
+ } else if (cpu.includes('m1') || gpu.includes('m1')) {
564
+ baseSpecs = {
565
+ effectiveMemoryRatio: 0.75,
566
+ largeMemoryBonus: 0,
567
+ memoryBandwidth: 68.25,
568
+ quantizationEfficiency: 0.90,
569
+ metalOptimization: 0.90
570
+ };
571
+ }
572
+
573
+ // Apply memory configuration scaling
574
+ if (ramGB >= 64) {
575
+ baseSpecs.effectiveMemoryRatio += 0.03; // Bonus for very large memory
576
+ baseSpecs.largeMemoryBonus += 2;
577
+ } else if (ramGB >= 32) {
578
+ baseSpecs.effectiveMemoryRatio += 0.02; // Bonus for large memory
579
+ } else if (ramGB <= 8) {
580
+ baseSpecs.effectiveMemoryRatio -= 0.05; // Penalty for small memory
581
+ }
582
+
583
+ return baseSpecs;
584
+ }
585
+
586
+ /**
587
+ * PC GPU-specific specifications - shared implementation with main algorithm
588
+ */
589
+ getPCGPUSpecs(hardware, vramGB, ramGB) {
590
+ const gpuModel = hardware.gpu?.model || '';
591
+ const gpu = gpuModel.toLowerCase();
592
+
593
+ let specs = {
594
+ offloadCapacity: 0, // Additional effective memory from RAM offload
595
+ memoryEfficiency: 0.85, // VRAM utilization efficiency
596
+ backendOptimization: 1.0, // Backend-specific optimization
597
+ quantizationSupport: 1.0 // Quantization efficiency
598
+ };
599
+
600
+ // NVIDIA GPU optimizations
601
+ if (gpu.includes('nvidia') || gpu.includes('geforce') || gpu.includes('rtx') || gpu.includes('gtx')) {
602
+ if (gpu.includes('rtx 50')) {
603
+ // RTX 50xx series - flagship tier with massive VRAM + excellent offload
604
+ specs.offloadCapacity = Math.min(ramGB * 0.5, 24);
605
+ specs.memoryEfficiency = 0.95;
606
+ } else if (gpu.includes('rtx 40')) {
607
+ specs.offloadCapacity = Math.min(ramGB * 0.35, 16);
608
+ specs.memoryEfficiency = 0.90;
609
+ } else if (gpu.includes('rtx 30')) {
610
+ specs.offloadCapacity = Math.min(ramGB * 0.25, 12);
611
+ } else if (gpu.includes('rtx 20') || gpu.includes('gtx 16')) {
612
+ specs.offloadCapacity = Math.min(ramGB * 0.15, 6);
613
+ }
614
+ }
615
+ // AMD GPU optimizations
616
+ else if (gpu.includes('amd') || gpu.includes('radeon') || gpu.includes('rx ')) {
617
+ if (gpu.includes('rx 7000') || gpu.includes('rx 7900') || gpu.includes('rx 7800')) {
618
+ specs.offloadCapacity = Math.min(ramGB * 0.2, 8);
619
+ } else if (gpu.includes('rx 6000')) {
620
+ specs.offloadCapacity = Math.min(ramGB * 0.15, 6);
621
+ }
622
+ }
623
+ // Intel GPU optimizations
624
+ else if (gpu.includes('intel') || gpu.includes('arc')) {
625
+ if (gpu.includes('arc a7') || gpu.includes('arc a5')) {
626
+ specs.offloadCapacity = Math.min(ramGB * 0.2, 6);
627
+ }
628
+ }
629
+
630
+ // Apply memory scaling bonuses for high-end systems
631
+ if (ramGB >= 64) {
632
+ specs.offloadCapacity += 8; // Massive system bonus
633
+ } else if (ramGB >= 32) {
634
+ specs.offloadCapacity += 4; // High-end system bonus
635
+ }
636
+
637
+ return specs;
638
+ }
639
+
640
+ /**
641
+ * PC CPU-specific specifications - shared implementation with main algorithm
642
+ */
643
+ getPCCPUSpecs(hardware, ramGB) {
644
+ const cpuModel = hardware.cpu?.brand || hardware.cpu?.model || '';
645
+ const gpuModel = hardware.gpu?.model || '';
646
+ const cpu = cpuModel.toLowerCase();
647
+ const gpu = gpuModel.toLowerCase();
648
+ const cores = hardware.cpu?.physicalCores || hardware.cpu?.cores || 1;
649
+
650
+ let specs = {
651
+ effectiveMemoryRatio: 0.6, // Default CPU memory efficiency
652
+ instructionOptimization: 1.0, // CPU instruction set bonus
653
+ iGPUAssist: 0, // Integrated GPU assistance
654
+ thermalHeadroom: 1.0 // Thermal performance factor
655
+ };
656
+
657
+ // Intel CPU optimizations
658
+ if (cpu.includes('intel')) {
659
+ if (cpu.includes('i9') || cpu.includes('13th gen') || cpu.includes('14th gen')) {
660
+ specs.effectiveMemoryRatio = 0.75;
661
+ } else if (cpu.includes('i7') || cpu.includes('12th gen')) {
662
+ specs.effectiveMemoryRatio = 0.70;
663
+ } else if (cpu.includes('i5')) {
664
+ specs.effectiveMemoryRatio = 0.65;
665
+ }
666
+
667
+ // Intel iGPU assistance
668
+ if (gpu.includes('iris xe')) {
669
+ specs.effectiveMemoryRatio += 0.05;
670
+ } else if (gpu.includes('uhd')) {
671
+ specs.effectiveMemoryRatio += 0.02;
672
+ }
673
+ }
674
+ // AMD CPU optimizations
675
+ else if (cpu.includes('amd') || cpu.includes('ryzen')) {
676
+ if (cpu.includes('ryzen 9') || cpu.includes('7000') || cpu.includes('9000')) {
677
+ specs.effectiveMemoryRatio = 0.72;
678
+ } else if (cpu.includes('ryzen 7') || cpu.includes('5000') || cpu.includes('6000')) {
679
+ specs.effectiveMemoryRatio = 0.68;
680
+ } else if (cpu.includes('ryzen 5')) {
681
+ specs.effectiveMemoryRatio = 0.65;
682
+ }
683
+
684
+ // AMD iGPU assistance (RDNA2/3 in APUs)
685
+ if (gpu.includes('radeon') && gpu.includes('graphics')) {
686
+ if (gpu.includes('780m') || gpu.includes('880m')) {
687
+ specs.effectiveMemoryRatio += 0.08;
688
+ } else if (gpu.includes('680m') || gpu.includes('660m')) {
689
+ specs.effectiveMemoryRatio += 0.06;
690
+ }
691
+ }
692
+ }
693
+
694
+ // Multi-core and memory scaling
695
+ if (cores >= 16) {
696
+ specs.effectiveMemoryRatio += 0.05;
697
+ } else if (cores >= 8) {
698
+ specs.effectiveMemoryRatio += 0.03;
699
+ }
700
+
701
+ if (ramGB >= 64) {
702
+ specs.effectiveMemoryRatio += 0.05;
703
+ } else if (ramGB >= 32) {
704
+ specs.effectiveMemoryRatio += 0.03;
705
+ } else if (ramGB <= 8) {
706
+ specs.effectiveMemoryRatio -= 0.05;
707
+ }
708
+
709
+ return specs;
710
+ }
711
+
712
+ estimateKVCache(model, contextLength) {
713
+ // Rough KV cache estimation: 2 * layers * hidden_size * seq_len * 2 bytes
714
+ const params = this.estimateModelParams(model);
715
+ const layers = Math.round(params * 2); // Rough approximation
716
+ const hiddenSize = Math.round(params * 1000); // Rough approximation
717
+ return (2 * layers * hiddenSize * contextLength * 2) / (1024 ** 3); // GB
718
+ }
719
+
720
+ estimateTokensPerSecond(hardware, model) {
721
+ const params = this.estimateModelParams(model);
722
+ const cpuModel = hardware.cpu?.brand || hardware.cpu?.model || '';
723
+ const gpuModel = hardware.gpu?.model || '';
724
+ const cores = hardware.cpu?.physicalCores || hardware.cpu?.cores || 1;
725
+ const baseSpeed = hardware.cpu?.speed || 2.0;
726
+ const vramGB = hardware.gpu?.vram || 0;
727
+
728
+ // Use improved CPU estimation function for more realistic and varying speeds
729
+ const hasAVX512 = cpuModel.toLowerCase().includes('intel') &&
730
+ (cpuModel.includes('13th') || cpuModel.includes('14th') || cpuModel.includes('12th'));
731
+
732
+ // GPU-based calculation (dedicated GPU only)
733
+ if (vramGB > 0 && !gpuModel.toLowerCase().includes('iris') && !gpuModel.toLowerCase().includes('integrated')) {
734
+ let gpuTPS = 20; // Conservative GPU baseline
735
+ if (gpuModel.toLowerCase().includes('rtx 50')) {
736
+ gpuTPS = 60; // RTX 50 series - more realistic
737
+ } else if (gpuModel.toLowerCase().includes('rtx 40')) {
738
+ gpuTPS = 45; // RTX 40 series
739
+ } else if (gpuModel.toLowerCase().includes('rtx 30')) {
740
+ gpuTPS = 35; // RTX 30 series
741
+ } else if (gpuModel.toLowerCase().includes('rtx 20')) {
742
+ gpuTPS = 25; // RTX 20 series
743
+ } else if (vramGB >= 8) {
744
+ gpuTPS = 30; // Other high-end GPUs
745
+ } else if (vramGB >= 4) {
746
+ gpuTPS = 20; // Mid-range GPUs
747
+ }
748
+
749
+ // Scale by model size for GPU
750
+ return Math.max(5, Math.round((gpuTPS / Math.max(0.8, params)) * 100) / 100);
751
+ }
752
+ // CPU-based calculation (more realistic formula)
753
+ else {
754
+ return this.estimateCpuTps({
755
+ ghz: baseSpeed,
756
+ threads: cores,
757
+ paramsB: params,
758
+ avx512: hasAVX512,
759
+ isIrisXe: gpuModel.toLowerCase().includes('iris xe')
760
+ });
761
+ }
762
+ }
763
+
764
+ /**
765
+ * Realistic CPU token per second estimation that varies significantly by model size
766
+ */
767
+ estimateCpuTps({ghz, threads, paramsB, avx512, isIrisXe}) {
768
+ // Base coefficient chosen empirically for realistic results
769
+ const k = avx512 ? 2.8 : 2.0;
770
+
771
+ // Apply iGPU boost for Iris Xe
772
+ const iGpuMultiplier = isIrisXe ? 1.25 : 1.0;
773
+
774
+ // More aggressive scaling for larger models (they really slow down on CPU)
775
+ const sizeScaling = Math.max(0.5, paramsB); // Linear scaling, larger models much slower
776
+
777
+ // Calculate base TPS with realistic threading efficiency
778
+ const effectiveThreads = Math.min(threads, 8); // Diminishing returns after 8 threads
779
+ const baseTPS = (k * ghz * effectiveThreads * iGpuMultiplier) / sizeScaling;
780
+
781
+ // Apply realistic upper bound for CPU (even with best CPUs)
782
+ const maxCpuTPS = avx512 ? 40 : 30;
783
+ const finalTPS = Math.min(maxCpuTPS, baseTPS);
784
+
785
+ return Math.max(3, Math.round(finalTPS * 100) / 100);
786
+ }
787
+
788
+ estimateTTFB(hardware, model) {
789
+ const sizeGB = this.parseModelSize(model.size);
790
+ const loadTime = sizeGB * (hardware.gpu ? 50 : 100); // ms per GB
791
+ return Math.max(200, loadTime);
792
+ }
793
+
794
+ checkArchitectureCompatibility(hardware, model) {
795
+ // For now, assume all models are compatible
796
+ // TODO: Add specific architecture checks
797
+ return true;
798
+ }
799
+
800
+ checkBasicRequirements(hardware, model) {
801
+ // Basic CPU/memory requirements
802
+ const minRAM = model.requirements?.ram || 2;
803
+ return hardware.memory.total >= minRAM;
804
+ }
805
+
806
+ generateReasoning(model, hardware, qualityScore, hardwareMatchScore) {
807
+ const tier = this.getHardwareTier(hardware);
808
+ const params = this.estimateModelParams(model);
809
+
810
+ if (hardwareMatchScore >= 0.9) {
811
+ return `Excellent match for your ${tier.replace('_', ' ')} hardware (${params.toFixed(1)}B params)`;
812
+ } else if (hardwareMatchScore >= 0.6) {
813
+ return `Good fit for your system with quality score ${(qualityScore * 100).toFixed(0)}%`;
814
+ } else {
815
+ return `Suboptimal - model ${params < 3 ? 'underutilizes' : 'may strain'} your hardware`;
816
+ }
817
+ }
818
+ }
819
+
820
+ module.exports = MultiObjectiveSelector;