llm-checker 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +418 -0
- package/analyzer/compatibility.js +584 -0
- package/analyzer/performance.js +505 -0
- package/bin/CLAUDE.md +12 -0
- package/bin/enhanced_cli.js +3118 -0
- package/bin/test-deterministic.js +41 -0
- package/package.json +96 -0
- package/src/CLAUDE.md +12 -0
- package/src/ai/intelligent-selector.js +615 -0
- package/src/ai/model-selector.js +312 -0
- package/src/ai/multi-objective-selector.js +820 -0
- package/src/commands/check.js +58 -0
- package/src/data/CLAUDE.md +11 -0
- package/src/data/model-database.js +637 -0
- package/src/data/sync-manager.js +279 -0
- package/src/hardware/CLAUDE.md +12 -0
- package/src/hardware/backends/CLAUDE.md +11 -0
- package/src/hardware/backends/apple-silicon.js +318 -0
- package/src/hardware/backends/cpu-detector.js +490 -0
- package/src/hardware/backends/cuda-detector.js +417 -0
- package/src/hardware/backends/intel-detector.js +436 -0
- package/src/hardware/backends/rocm-detector.js +440 -0
- package/src/hardware/detector.js +573 -0
- package/src/hardware/pc-optimizer.js +635 -0
- package/src/hardware/specs.js +286 -0
- package/src/hardware/unified-detector.js +442 -0
- package/src/index.js +2289 -0
- package/src/models/CLAUDE.md +17 -0
- package/src/models/ai-check-selector.js +806 -0
- package/src/models/catalog.json +426 -0
- package/src/models/deterministic-selector.js +1145 -0
- package/src/models/expanded_database.js +1142 -0
- package/src/models/intelligent-selector.js +532 -0
- package/src/models/requirements.js +310 -0
- package/src/models/scoring-config.js +57 -0
- package/src/models/scoring-engine.js +715 -0
- package/src/ollama/.cache/README.md +33 -0
- package/src/ollama/CLAUDE.md +24 -0
- package/src/ollama/client.js +438 -0
- package/src/ollama/enhanced-client.js +113 -0
- package/src/ollama/enhanced-scraper.js +634 -0
- package/src/ollama/manager.js +357 -0
- package/src/ollama/native-scraper.js +776 -0
- package/src/plugins/CLAUDE.md +11 -0
- package/src/plugins/examples/custom_model_plugin.js +87 -0
- package/src/plugins/index.js +295 -0
- package/src/utils/CLAUDE.md +11 -0
- package/src/utils/config.js +359 -0
- package/src/utils/formatter.js +315 -0
- package/src/utils/logger.js +272 -0
- package/src/utils/model-classifier.js +167 -0
- package/src/utils/verbose-progress.js +266 -0
|
@@ -0,0 +1,820 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Multi-Objective Model Selector
|
|
3
|
+
*
|
|
4
|
+
* Implements the improved algorithm with:
|
|
5
|
+
* 1. Hard filters (memory, compatibility)
|
|
6
|
+
* 2. Multi-objective scoring (quality, speed, hardware-match, context, headroom)
|
|
7
|
+
* 3. Hardware-appropriate model size recommendations
|
|
8
|
+
* 4. Benchmark-based quality scoring
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
const { MULTI_OBJECTIVE_WEIGHTS } = require('../models/scoring-config');
|
|
12
|
+
|
|
13
|
+
class MultiObjectiveSelector {
|
|
14
|
+
constructor() {
|
|
15
|
+
// Performance weights from centralized config
|
|
16
|
+
this.categoryWeights = MULTI_OBJECTIVE_WEIGHTS;
|
|
17
|
+
|
|
18
|
+
// Optimal model sizes by hardware tier (in billions of parameters)
|
|
19
|
+
this.optimalSizes = {
|
|
20
|
+
'flagship': { min: 30, max: 175, sweet: 70 }, // RTX 5090 tier: 30B-175B models
|
|
21
|
+
'ultra_high': { min: 20, max: 105, sweet: 35 }, // RTX 4090 tier: 20B-105B models
|
|
22
|
+
'high': { min: 8, max: 50, sweet: 20 }, // RTX 4080, Apple 32GB: 8B-50B models
|
|
23
|
+
'medium': { min: 3, max: 20, sweet: 8 }, // RTX 4070, Apple 16GB: 3B-20B models
|
|
24
|
+
'low': { min: 1, max: 8, sweet: 3 }, // Budget systems: 1B-8B models
|
|
25
|
+
'ultra_low': { min: 0.1, max: 3, sweet: 1 } // Very limited: <3B models
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
// Benchmark-based quality priors (normalized 0-1)
|
|
29
|
+
this.qualityPriors = {
|
|
30
|
+
// By family and scale - rough estimates from public benchmarks
|
|
31
|
+
'llama': { base: 0.75, coding: 0.80, reasoning: 0.78 },
|
|
32
|
+
'qwen': { base: 0.73, coding: 0.85, reasoning: 0.76 },
|
|
33
|
+
'mistral': { base: 0.72, coding: 0.75, reasoning: 0.80 },
|
|
34
|
+
'gemma': { base: 0.68, coding: 0.70, reasoning: 0.72 },
|
|
35
|
+
'phi': { base: 0.65, coding: 0.78, reasoning: 0.70 },
|
|
36
|
+
'tinyllama': { base: 0.45, coding: 0.50, reasoning: 0.48 },
|
|
37
|
+
'default': { base: 0.60, coding: 0.60, reasoning: 0.60 }
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Select best models using multi-objective ranking
|
|
43
|
+
*/
|
|
44
|
+
async selectBestModels(hardware, models, category = 'general', topK = 10) {
|
|
45
|
+
// Step 1: Hard filters - remove incompatible models
|
|
46
|
+
const compatibleModels = this.applyHardFilters(hardware, models);
|
|
47
|
+
|
|
48
|
+
if (compatibleModels.length === 0) {
|
|
49
|
+
return { compatible: [], marginal: [], incompatible: models };
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// Step 2: Multi-objective scoring
|
|
53
|
+
const scoredModels = compatibleModels.map(model =>
|
|
54
|
+
this.calculateMultiObjectiveScore(hardware, model, category)
|
|
55
|
+
).filter(Boolean);
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
// Step 3: Sort and classify
|
|
59
|
+
scoredModels.sort((a, b) => b.totalScore - a.totalScore);
|
|
60
|
+
|
|
61
|
+
return this.classifyResults(scoredModels, topK);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Step 1: Hard filters - memory, compatibility, architecture
|
|
66
|
+
*/
|
|
67
|
+
applyHardFilters(hardware, models) {
|
|
68
|
+
return models.filter(model => {
|
|
69
|
+
// Memory filter - can it fit?
|
|
70
|
+
const memoryFits = this.checkMemoryCompatibility(hardware, model);
|
|
71
|
+
|
|
72
|
+
// Architecture compatibility
|
|
73
|
+
const archCompatible = this.checkArchitectureCompatibility(hardware, model);
|
|
74
|
+
|
|
75
|
+
// Basic requirements met
|
|
76
|
+
const requirementsMet = this.checkBasicRequirements(hardware, model);
|
|
77
|
+
|
|
78
|
+
return memoryFits && archCompatible && requirementsMet;
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Realistic hardware compatibility check based on tier and actual capabilities
|
|
84
|
+
*/
|
|
85
|
+
checkMemoryCompatibility(hardware, model) {
|
|
86
|
+
const modelSizeGB = this.parseModelSize(model.size);
|
|
87
|
+
const contextLength = model.context || 4096;
|
|
88
|
+
|
|
89
|
+
// Get hardware tier using same algorithm as main system
|
|
90
|
+
const hardwareTier = this.getHardwareTier(hardware);
|
|
91
|
+
|
|
92
|
+
// Estimate KV cache (rough approximation)
|
|
93
|
+
const kvCacheGB = this.estimateKVCache(model, contextLength);
|
|
94
|
+
const totalMemoryNeeded = modelSizeGB + kvCacheGB;
|
|
95
|
+
|
|
96
|
+
// Tier-based realistic limits (not just memory, but practical performance)
|
|
97
|
+
const tierLimits = {
|
|
98
|
+
'flagship': { maxModelSize: 180, availableMemoryRatio: 0.85 }, // RTX 5090 tier, massive VRAM
|
|
99
|
+
'ultra_high': { maxModelSize: 105, availableMemoryRatio: 0.8 }, // RTX 4090, H100 tier
|
|
100
|
+
'high': { maxModelSize: 50, availableMemoryRatio: 0.75 }, // RTX 4080, Apple Silicon 32GB+
|
|
101
|
+
'medium': { maxModelSize: 20, availableMemoryRatio: 0.7 }, // RTX 4070, Apple Silicon 16GB
|
|
102
|
+
'low': { maxModelSize: 8, availableMemoryRatio: 0.6 }, // Budget systems, iGPU
|
|
103
|
+
'ultra_low': { maxModelSize: 3, availableMemoryRatio: 0.5 } // Very limited systems
|
|
104
|
+
};
|
|
105
|
+
|
|
106
|
+
const limits = tierLimits[hardwareTier] || tierLimits['ultra_low'];
|
|
107
|
+
|
|
108
|
+
// Hard size limit based on what the hardware tier can realistically handle
|
|
109
|
+
if (modelSizeGB > limits.maxModelSize) {
|
|
110
|
+
return false; // Model too large for this tier regardless of RAM
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// Memory check with tier-appropriate safety margin
|
|
114
|
+
const availableMemory = hardware.memory.total * limits.availableMemoryRatio;
|
|
115
|
+
|
|
116
|
+
return totalMemoryNeeded <= availableMemory;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* Step 2: Multi-objective scoring
|
|
121
|
+
*/
|
|
122
|
+
calculateMultiObjectiveScore(hardware, model, category) {
|
|
123
|
+
const weights = this.categoryWeights[category] || this.categoryWeights['general'];
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
// Individual component scores (0-1)
|
|
127
|
+
const qualityScore = this.calculateQualityScore(model, category);
|
|
128
|
+
const speedScore = this.calculateSpeedScore(hardware, model);
|
|
129
|
+
const ttfbScore = this.calculateTTFBScore(hardware, model);
|
|
130
|
+
const contextScore = this.calculateContextScore(model, category);
|
|
131
|
+
const hardwareMatchScore = this.calculateHardwareMatchScore(hardware, model);
|
|
132
|
+
|
|
133
|
+
// Weighted total
|
|
134
|
+
const totalScore = (
|
|
135
|
+
weights.quality * qualityScore +
|
|
136
|
+
weights.speed * speedScore +
|
|
137
|
+
weights.ttfb * ttfbScore +
|
|
138
|
+
weights.context * contextScore +
|
|
139
|
+
weights.hardwareMatch * hardwareMatchScore
|
|
140
|
+
) * 100; // Scale to 0-100
|
|
141
|
+
|
|
142
|
+
return {
|
|
143
|
+
...model,
|
|
144
|
+
totalScore: Math.round(totalScore * 100) / 100,
|
|
145
|
+
components: {
|
|
146
|
+
quality: qualityScore,
|
|
147
|
+
speed: speedScore,
|
|
148
|
+
ttfb: ttfbScore,
|
|
149
|
+
context: contextScore,
|
|
150
|
+
hardwareMatch: hardwareMatchScore
|
|
151
|
+
},
|
|
152
|
+
reasoning: this.generateReasoning(model, hardware, qualityScore, hardwareMatchScore)
|
|
153
|
+
};
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
/**
|
|
157
|
+
* Quality score based on model family and benchmarks
|
|
158
|
+
*/
|
|
159
|
+
calculateQualityScore(model, category) {
|
|
160
|
+
const family = this.guessModelFamily(model.name);
|
|
161
|
+
const priors = this.qualityPriors[family] || this.qualityPriors['default'];
|
|
162
|
+
|
|
163
|
+
// Base quality by family
|
|
164
|
+
let baseQuality = priors.base;
|
|
165
|
+
|
|
166
|
+
// Category-specific adjustments
|
|
167
|
+
if (category === 'coding' && priors.coding) {
|
|
168
|
+
baseQuality = priors.coding;
|
|
169
|
+
} else if (category === 'reasoning' && priors.reasoning) {
|
|
170
|
+
baseQuality = priors.reasoning;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
// Scale adjustment (larger models generally better quality)
|
|
174
|
+
const modelSizeB = this.estimateModelParams(model);
|
|
175
|
+
const scaleBonus = Math.min(0.15, Math.log2(Math.max(1, modelSizeB)) * 0.03);
|
|
176
|
+
|
|
177
|
+
return Math.min(1.0, baseQuality + scaleBonus);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
/**
|
|
181
|
+
* Hardware-size matching score - key improvement!
|
|
182
|
+
*/
|
|
183
|
+
calculateHardwareMatchScore(hardware, model) {
|
|
184
|
+
const hardwareTier = this.getHardwareTier(hardware);
|
|
185
|
+
const modelSizeB = this.estimateModelParams(model);
|
|
186
|
+
const optimal = this.optimalSizes[hardwareTier];
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
if (!optimal) return 0.5; // Fallback
|
|
190
|
+
|
|
191
|
+
if (modelSizeB < optimal.min) {
|
|
192
|
+
// Underutilization penalty
|
|
193
|
+
const underutilization = optimal.min / modelSizeB;
|
|
194
|
+
if (underutilization >= 10) return 0.1; // Severe penalty
|
|
195
|
+
if (underutilization >= 5) return 0.3; // Moderate penalty
|
|
196
|
+
return 0.6; // Small penalty
|
|
197
|
+
} else if (modelSizeB > optimal.max) {
|
|
198
|
+
// Too large, but let memory filter handle this
|
|
199
|
+
return 0.4;
|
|
200
|
+
} else {
|
|
201
|
+
// In good range - bonus for sweet spot
|
|
202
|
+
const distanceFromSweet = Math.abs(modelSizeB - optimal.sweet) / optimal.sweet;
|
|
203
|
+
if (distanceFromSweet <= 0.3) return 1.0; // Perfect match
|
|
204
|
+
return 0.8; // Good range
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
/**
|
|
209
|
+
* Speed score estimation
|
|
210
|
+
*/
|
|
211
|
+
calculateSpeedScore(hardware, model) {
|
|
212
|
+
const estimatedTokps = this.estimateTokensPerSecond(hardware, model);
|
|
213
|
+
// Normalize against reasonable expectation (50 tok/s = 1.0)
|
|
214
|
+
return Math.min(1.0, estimatedTokps / 50);
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
/**
|
|
218
|
+
* Time to First Byte score
|
|
219
|
+
*/
|
|
220
|
+
calculateTTFBScore(hardware, model) {
|
|
221
|
+
const estimatedTTFB = this.estimateTTFB(hardware, model);
|
|
222
|
+
// Lower TTFB is better, normalize against 1000ms
|
|
223
|
+
return Math.min(1.0, 1000 / Math.max(100, estimatedTTFB));
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
/**
|
|
227
|
+
* Context score based on model's context window
|
|
228
|
+
*/
|
|
229
|
+
calculateContextScore(model, category) {
|
|
230
|
+
const contextLength = model.context || 4096;
|
|
231
|
+
const targetContext = category === 'longctx' ? 32768 : 4096;
|
|
232
|
+
return Math.min(1.0, contextLength / targetContext);
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
/**
|
|
236
|
+
* Classify results into compatible/marginal/incompatible
|
|
237
|
+
* Lower thresholds for more realistic classification
|
|
238
|
+
*/
|
|
239
|
+
classifyResults(scoredModels, topK) {
|
|
240
|
+
const compatible = scoredModels.filter(m => m.totalScore >= 65);
|
|
241
|
+
const marginal = scoredModels.filter(m => m.totalScore >= 45 && m.totalScore < 65);
|
|
242
|
+
const incompatible = scoredModels.filter(m => m.totalScore < 45);
|
|
243
|
+
|
|
244
|
+
return {
|
|
245
|
+
compatible: compatible.slice(0, topK),
|
|
246
|
+
marginal: marginal.slice(0, topK),
|
|
247
|
+
incompatible: incompatible.slice(0, 5) // Limit incompatible list
|
|
248
|
+
};
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
// Helper methods
|
|
252
|
+
parseModelSize(sizeString) {
|
|
253
|
+
if (!sizeString) return 4; // Default 4GB
|
|
254
|
+
|
|
255
|
+
// Handle different size formats
|
|
256
|
+
let cleanSize = sizeString.toString().toUpperCase();
|
|
257
|
+
|
|
258
|
+
// Extract number and unit - be more specific about units
|
|
259
|
+
const match = cleanSize.match(/([0-9.]+)\s*(GB|MB|B)$/i);
|
|
260
|
+
if (!match) return 4;
|
|
261
|
+
|
|
262
|
+
const num = parseFloat(match[1]);
|
|
263
|
+
const unit = match[2].toUpperCase();
|
|
264
|
+
|
|
265
|
+
// Handle file size units (GB, MB) vs parameter count indicators
|
|
266
|
+
if (unit === 'MB') return num / 1024; // Convert MB to GB
|
|
267
|
+
if (unit === 'GB') return num; // Already in GB
|
|
268
|
+
|
|
269
|
+
// If unit is 'B', determine if it's bytes or billion parameters
|
|
270
|
+
if (unit === 'B') {
|
|
271
|
+
if (num >= 1000000) {
|
|
272
|
+
// Large numbers (>= 1M) are likely bytes
|
|
273
|
+
return num / (1024 ** 3); // Convert bytes to GB
|
|
274
|
+
} else if (num >= 0.1 && num <= 100) {
|
|
275
|
+
// Small numbers (0.1-100) are likely billion parameters - convert to file size
|
|
276
|
+
// Rough estimate: 1B params ≈ 2GB in Q4 quantization
|
|
277
|
+
return Math.max(0.5, num * 2);
|
|
278
|
+
} else {
|
|
279
|
+
// Fallback for edge cases
|
|
280
|
+
return Math.max(0.5, num);
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
return num; // Default fallback
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
estimateModelParams(model) {
|
|
288
|
+
// First, try to extract parameter count directly from model name
|
|
289
|
+
const nameMatch = model.name.match(/(\d+\.?\d*)[bB]\b/i);
|
|
290
|
+
if (nameMatch) {
|
|
291
|
+
const paramCount = parseFloat(nameMatch[1]);
|
|
292
|
+
// Sanity check: parameter counts should be reasonable (0.1B to 100B)
|
|
293
|
+
if (paramCount >= 0.1 && paramCount <= 100) {
|
|
294
|
+
return paramCount;
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
// Use installedSize from Ollama if available, otherwise fall back to size field
|
|
299
|
+
let sizeGB;
|
|
300
|
+
|
|
301
|
+
if (model.installedSize) {
|
|
302
|
+
// Use real size from Ollama (this is file size)
|
|
303
|
+
sizeGB = this.parseModelSize(model.installedSize);
|
|
304
|
+
} else if (model.size) {
|
|
305
|
+
// Use size field from database (this is file size)
|
|
306
|
+
sizeGB = this.parseModelSize(model.size);
|
|
307
|
+
} else {
|
|
308
|
+
sizeGB = 4; // Default fallback file size
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
// Convert file size to parameter count: roughly 1B params ≈ 2GB in Q4 quantization
|
|
312
|
+
return sizeGB / 2;
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
guessModelFamily(name) {
|
|
316
|
+
const n = name.toLowerCase();
|
|
317
|
+
if (n.includes('llama')) return 'llama';
|
|
318
|
+
if (n.includes('qwen')) return 'qwen';
|
|
319
|
+
if (n.includes('mistral')) return 'mistral';
|
|
320
|
+
if (n.includes('gemma')) return 'gemma';
|
|
321
|
+
if (n.includes('phi')) return 'phi';
|
|
322
|
+
if (n.includes('tinyllama')) return 'tinyllama';
|
|
323
|
+
return 'default';
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
getHardwareTier(hardware) {
|
|
327
|
+
// Use the same advanced scoring algorithm for consistency
|
|
328
|
+
const clamp = (x, a = 0, b = 1) => Math.max(a, Math.min(b, x));
|
|
329
|
+
|
|
330
|
+
const ramGB = hardware.memory.total || 0;
|
|
331
|
+
const vramGB = hardware.gpu?.vram || 0;
|
|
332
|
+
const cpuModel = hardware.cpu?.brand || hardware.cpu?.model || '';
|
|
333
|
+
const gpuModel = hardware.gpu?.model || '';
|
|
334
|
+
const architecture = hardware.cpu?.architecture || hardware.cpu?.brand || '';
|
|
335
|
+
const cpuCoresPhys = hardware.cpu?.physicalCores || hardware.cpu?.cores || 1;
|
|
336
|
+
const cpuGHzBase = hardware.cpu?.speed || 2.0;
|
|
337
|
+
|
|
338
|
+
// Enhanced Apple Silicon detection to match main algorithm
|
|
339
|
+
const isAppleSilicon = architecture.toLowerCase().includes('apple') ||
|
|
340
|
+
architecture.toLowerCase().includes('m1') ||
|
|
341
|
+
architecture.toLowerCase().includes('m2') ||
|
|
342
|
+
architecture.toLowerCase().includes('m3') ||
|
|
343
|
+
architecture.toLowerCase().includes('m4') ||
|
|
344
|
+
cpuModel.toLowerCase().includes('apple') ||
|
|
345
|
+
gpuModel.toLowerCase().includes('apple');
|
|
346
|
+
const unified = isAppleSilicon;
|
|
347
|
+
|
|
348
|
+
// Detect PC platform (Windows/Linux) to match main algorithm
|
|
349
|
+
const isPC = !isAppleSilicon && (process.platform === 'win32' || process.platform === 'linux');
|
|
350
|
+
|
|
351
|
+
// 1) Effective memory for model weights (45%) - Apple Silicon & PC optimized
|
|
352
|
+
let effMem;
|
|
353
|
+
|
|
354
|
+
if (vramGB > 0 && !unified) {
|
|
355
|
+
// Dedicated GPU path (Windows/Linux with discrete GPU)
|
|
356
|
+
if (isPC) {
|
|
357
|
+
// PC-specific GPU memory calculation with offload support
|
|
358
|
+
const pcSpecs = this.getPCGPUSpecs(hardware, vramGB, ramGB);
|
|
359
|
+
effMem = vramGB + pcSpecs.offloadCapacity;
|
|
360
|
+
} else {
|
|
361
|
+
// Generic discrete GPU calculation
|
|
362
|
+
effMem = vramGB + Math.min(0.25 * ramGB, 8);
|
|
363
|
+
}
|
|
364
|
+
} else if (unified && isAppleSilicon) {
|
|
365
|
+
// Apple Silicon unified memory optimization
|
|
366
|
+
const appleSiliconInfo = this.getAppleSiliconSpecs(cpuModel, gpuModel, ramGB);
|
|
367
|
+
effMem = appleSiliconInfo.effectiveMemoryRatio * ramGB;
|
|
368
|
+
|
|
369
|
+
// Apply model size bonus for larger unified memory pools
|
|
370
|
+
if (ramGB >= 32) {
|
|
371
|
+
effMem += appleSiliconInfo.largeMemoryBonus;
|
|
372
|
+
}
|
|
373
|
+
} else {
|
|
374
|
+
// Traditional CPU-only path or integrated GPU
|
|
375
|
+
if (isPC) {
|
|
376
|
+
// PC CPU-only with potential iGPU assist
|
|
377
|
+
const pcSpecs = this.getPCCPUSpecs(hardware, ramGB);
|
|
378
|
+
effMem = pcSpecs.effectiveMemoryRatio * ramGB;
|
|
379
|
+
} else {
|
|
380
|
+
// Generic CPU-only calculation
|
|
381
|
+
effMem = 0.6 * ramGB;
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
const mem_cap = clamp(effMem / 32); // More realistic normalization
|
|
386
|
+
|
|
387
|
+
// 2) Memory bandwidth (20%) - simplified estimation
|
|
388
|
+
let memBandwidthGBs = 50; // fallback
|
|
389
|
+
const gpu = gpuModel.toLowerCase();
|
|
390
|
+
if (gpu.includes('m4 pro')) memBandwidthGBs = 273;
|
|
391
|
+
else if (gpu.includes('m4')) memBandwidthGBs = 120;
|
|
392
|
+
else if (gpu.includes('rtx 4090')) memBandwidthGBs = 1008;
|
|
393
|
+
else if (gpu.includes('rtx 4080')) memBandwidthGBs = 716;
|
|
394
|
+
else if (gpu.includes('rtx 4070')) memBandwidthGBs = 448;
|
|
395
|
+
else if (gpu.includes('iris xe')) memBandwidthGBs = 68;
|
|
396
|
+
|
|
397
|
+
const mem_bw = clamp(memBandwidthGBs / 500); // Match main algorithm
|
|
398
|
+
|
|
399
|
+
// 3) Compute (20%) - simplified estimation
|
|
400
|
+
let compute = 0;
|
|
401
|
+
if (gpu.includes('m4 pro')) compute = clamp(28 / 80); // Match main algorithm
|
|
402
|
+
else if (gpu.includes('m4')) compute = clamp(15 / 80);
|
|
403
|
+
else if (gpu.includes('rtx 4090')) compute = clamp(165 / 80);
|
|
404
|
+
else if (gpu.includes('rtx 4080')) compute = clamp(121 / 80);
|
|
405
|
+
else if (gpu.includes('iris xe')) compute = 0.02;
|
|
406
|
+
else {
|
|
407
|
+
// CPU fallback
|
|
408
|
+
compute = clamp((cpuCoresPhys * cpuGHzBase) / 60);
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
// 4) System RAM for KV-cache (10%)
|
|
412
|
+
const sys_ram = clamp(ramGB / 64);
|
|
413
|
+
|
|
414
|
+
// 5) Storage (5%) - assume NVMe
|
|
415
|
+
const storage = 1.0;
|
|
416
|
+
|
|
417
|
+
// Final score
|
|
418
|
+
const score = 100 * (0.45 * mem_cap + 0.20 * mem_bw + 0.20 * compute + 0.10 * sys_ram + 0.05 * storage);
|
|
419
|
+
|
|
420
|
+
// Map to tier (expanded for flagship hardware)
|
|
421
|
+
let tier = score >= 85 ? 'flagship' : // New flagship tier for extreme hardware
|
|
422
|
+
score >= 75 ? 'ultra_high' :
|
|
423
|
+
score >= 55 ? 'high' :
|
|
424
|
+
score >= 35 ? 'medium' :
|
|
425
|
+
score >= 20 ? 'low' : 'ultra_low';
|
|
426
|
+
|
|
427
|
+
// Apply same reality-based adjustments as main algorithm
|
|
428
|
+
const bumpTier = (t, direction) => {
|
|
429
|
+
const tiers = ['ultra_low', 'low', 'medium', 'high', 'ultra_high', 'flagship'];
|
|
430
|
+
const index = tiers.indexOf(t);
|
|
431
|
+
const newIndex = Math.max(0, Math.min(tiers.length - 1, index + direction));
|
|
432
|
+
return tiers[newIndex];
|
|
433
|
+
};
|
|
434
|
+
|
|
435
|
+
// Enhanced tier adjustments for flagship hardware
|
|
436
|
+
if (vramGB >= 32) {
|
|
437
|
+
// RTX 5090 tier - automatic flagship promotion
|
|
438
|
+
tier = 'flagship';
|
|
439
|
+
} else if (vramGB >= 24) {
|
|
440
|
+
tier = bumpTier(tier, +1); // High-end GPU boost
|
|
441
|
+
} else if (!vramGB && !unified) {
|
|
442
|
+
tier = bumpTier(tier, -1); // CPU-only penalty (moderate)
|
|
443
|
+
} else if (/iris xe|uhd.*graphics|vega.*integrated|radeon.*graphics/i.test(gpuModel)) {
|
|
444
|
+
tier = bumpTier(tier, -1); // iGPU penalty
|
|
445
|
+
} else if (vramGB > 0 && vramGB < 6) {
|
|
446
|
+
tier = bumpTier(tier, -1); // Low VRAM penalty
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
// Special flagship GPU detection by model name
|
|
450
|
+
if (gpuModel.toLowerCase().includes('rtx 50') ||
|
|
451
|
+
gpuModel.toLowerCase().includes('h100') ||
|
|
452
|
+
gpuModel.toLowerCase().includes('a100')) {
|
|
453
|
+
tier = 'flagship';
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
return tier;
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
/**
|
|
460
|
+
* Apple Silicon-specific specifications and optimization parameters
|
|
461
|
+
* Shared implementation with main algorithm for consistency
|
|
462
|
+
*/
|
|
463
|
+
getAppleSiliconSpecs(cpuModel, gpuModel, ramGB) {
|
|
464
|
+
const cpu = cpuModel.toLowerCase();
|
|
465
|
+
const gpu = gpuModel.toLowerCase();
|
|
466
|
+
|
|
467
|
+
// Base specs for different Apple Silicon generations
|
|
468
|
+
let baseSpecs = {
|
|
469
|
+
effectiveMemoryRatio: 0.85, // Default unified memory efficiency
|
|
470
|
+
largeMemoryBonus: 0, // Bonus for large memory configs
|
|
471
|
+
memoryBandwidth: 100, // GB/s
|
|
472
|
+
quantizationEfficiency: 1.0, // Quantization optimization factor
|
|
473
|
+
metalOptimization: 1.2 // Metal backend boost
|
|
474
|
+
};
|
|
475
|
+
|
|
476
|
+
// M4 Pro/Max optimizations
|
|
477
|
+
if (cpu.includes('m4 pro') || gpu.includes('m4 pro')) {
|
|
478
|
+
baseSpecs = {
|
|
479
|
+
effectiveMemoryRatio: 0.90, // Higher efficiency due to newer architecture
|
|
480
|
+
largeMemoryBonus: 4, // 4GB bonus for 32GB+ configs
|
|
481
|
+
memoryBandwidth: 273, // 273 GB/s memory bandwidth
|
|
482
|
+
quantizationEfficiency: 1.15, // Better quantization support
|
|
483
|
+
metalOptimization: 1.3 // Enhanced Metal backend
|
|
484
|
+
};
|
|
485
|
+
} else if (cpu.includes('m4') || gpu.includes('m4')) {
|
|
486
|
+
baseSpecs = {
|
|
487
|
+
effectiveMemoryRatio: 0.88,
|
|
488
|
+
largeMemoryBonus: 2,
|
|
489
|
+
memoryBandwidth: 120,
|
|
490
|
+
quantizationEfficiency: 1.10,
|
|
491
|
+
metalOptimization: 1.25
|
|
492
|
+
};
|
|
493
|
+
}
|
|
494
|
+
// M3 optimizations
|
|
495
|
+
else if (cpu.includes('m3 max') || gpu.includes('m3 max')) {
|
|
496
|
+
baseSpecs = {
|
|
497
|
+
effectiveMemoryRatio: 0.87,
|
|
498
|
+
largeMemoryBonus: 3,
|
|
499
|
+
memoryBandwidth: 400,
|
|
500
|
+
quantizationEfficiency: 1.08,
|
|
501
|
+
metalOptimization: 1.2
|
|
502
|
+
};
|
|
503
|
+
} else if (cpu.includes('m3 pro') || gpu.includes('m3 pro')) {
|
|
504
|
+
baseSpecs = {
|
|
505
|
+
effectiveMemoryRatio: 0.86,
|
|
506
|
+
largeMemoryBonus: 2,
|
|
507
|
+
memoryBandwidth: 150,
|
|
508
|
+
quantizationEfficiency: 1.05,
|
|
509
|
+
metalOptimization: 1.15
|
|
510
|
+
};
|
|
511
|
+
} else if (cpu.includes('m3') || gpu.includes('m3')) {
|
|
512
|
+
baseSpecs = {
|
|
513
|
+
effectiveMemoryRatio: 0.85,
|
|
514
|
+
largeMemoryBonus: 1,
|
|
515
|
+
memoryBandwidth: 100,
|
|
516
|
+
quantizationEfficiency: 1.03,
|
|
517
|
+
metalOptimization: 1.1
|
|
518
|
+
};
|
|
519
|
+
}
|
|
520
|
+
// M2 optimizations
|
|
521
|
+
else if (cpu.includes('m2 max') || gpu.includes('m2 max')) {
|
|
522
|
+
baseSpecs = {
|
|
523
|
+
effectiveMemoryRatio: 0.84,
|
|
524
|
+
largeMemoryBonus: 2,
|
|
525
|
+
memoryBandwidth: 400,
|
|
526
|
+
quantizationEfficiency: 1.02,
|
|
527
|
+
metalOptimization: 1.1
|
|
528
|
+
};
|
|
529
|
+
} else if (cpu.includes('m2 pro') || gpu.includes('m2 pro')) {
|
|
530
|
+
baseSpecs = {
|
|
531
|
+
effectiveMemoryRatio: 0.83,
|
|
532
|
+
largeMemoryBonus: 1,
|
|
533
|
+
memoryBandwidth: 200,
|
|
534
|
+
quantizationEfficiency: 1.0,
|
|
535
|
+
metalOptimization: 1.05
|
|
536
|
+
};
|
|
537
|
+
} else if (cpu.includes('m2') || gpu.includes('m2')) {
|
|
538
|
+
baseSpecs = {
|
|
539
|
+
effectiveMemoryRatio: 0.82,
|
|
540
|
+
largeMemoryBonus: 0,
|
|
541
|
+
memoryBandwidth: 100,
|
|
542
|
+
quantizationEfficiency: 1.0,
|
|
543
|
+
metalOptimization: 1.0
|
|
544
|
+
};
|
|
545
|
+
}
|
|
546
|
+
// M1 optimizations (legacy but still supported)
|
|
547
|
+
else if (cpu.includes('m1 max') || gpu.includes('m1 max')) {
|
|
548
|
+
baseSpecs = {
|
|
549
|
+
effectiveMemoryRatio: 0.80,
|
|
550
|
+
largeMemoryBonus: 1,
|
|
551
|
+
memoryBandwidth: 400,
|
|
552
|
+
quantizationEfficiency: 0.95,
|
|
553
|
+
metalOptimization: 1.0
|
|
554
|
+
};
|
|
555
|
+
} else if (cpu.includes('m1 pro') || gpu.includes('m1 pro')) {
|
|
556
|
+
baseSpecs = {
|
|
557
|
+
effectiveMemoryRatio: 0.78,
|
|
558
|
+
largeMemoryBonus: 0,
|
|
559
|
+
memoryBandwidth: 200,
|
|
560
|
+
quantizationEfficiency: 0.95,
|
|
561
|
+
metalOptimization: 0.95
|
|
562
|
+
};
|
|
563
|
+
} else if (cpu.includes('m1') || gpu.includes('m1')) {
|
|
564
|
+
baseSpecs = {
|
|
565
|
+
effectiveMemoryRatio: 0.75,
|
|
566
|
+
largeMemoryBonus: 0,
|
|
567
|
+
memoryBandwidth: 68.25,
|
|
568
|
+
quantizationEfficiency: 0.90,
|
|
569
|
+
metalOptimization: 0.90
|
|
570
|
+
};
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
// Apply memory configuration scaling
|
|
574
|
+
if (ramGB >= 64) {
|
|
575
|
+
baseSpecs.effectiveMemoryRatio += 0.03; // Bonus for very large memory
|
|
576
|
+
baseSpecs.largeMemoryBonus += 2;
|
|
577
|
+
} else if (ramGB >= 32) {
|
|
578
|
+
baseSpecs.effectiveMemoryRatio += 0.02; // Bonus for large memory
|
|
579
|
+
} else if (ramGB <= 8) {
|
|
580
|
+
baseSpecs.effectiveMemoryRatio -= 0.05; // Penalty for small memory
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
return baseSpecs;
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
/**
|
|
587
|
+
* PC GPU-specific specifications - shared implementation with main algorithm
|
|
588
|
+
*/
|
|
589
|
+
getPCGPUSpecs(hardware, vramGB, ramGB) {
|
|
590
|
+
const gpuModel = hardware.gpu?.model || '';
|
|
591
|
+
const gpu = gpuModel.toLowerCase();
|
|
592
|
+
|
|
593
|
+
let specs = {
|
|
594
|
+
offloadCapacity: 0, // Additional effective memory from RAM offload
|
|
595
|
+
memoryEfficiency: 0.85, // VRAM utilization efficiency
|
|
596
|
+
backendOptimization: 1.0, // Backend-specific optimization
|
|
597
|
+
quantizationSupport: 1.0 // Quantization efficiency
|
|
598
|
+
};
|
|
599
|
+
|
|
600
|
+
// NVIDIA GPU optimizations
|
|
601
|
+
if (gpu.includes('nvidia') || gpu.includes('geforce') || gpu.includes('rtx') || gpu.includes('gtx')) {
|
|
602
|
+
if (gpu.includes('rtx 50')) {
|
|
603
|
+
// RTX 50xx series - flagship tier with massive VRAM + excellent offload
|
|
604
|
+
specs.offloadCapacity = Math.min(ramGB * 0.5, 24);
|
|
605
|
+
specs.memoryEfficiency = 0.95;
|
|
606
|
+
} else if (gpu.includes('rtx 40')) {
|
|
607
|
+
specs.offloadCapacity = Math.min(ramGB * 0.35, 16);
|
|
608
|
+
specs.memoryEfficiency = 0.90;
|
|
609
|
+
} else if (gpu.includes('rtx 30')) {
|
|
610
|
+
specs.offloadCapacity = Math.min(ramGB * 0.25, 12);
|
|
611
|
+
} else if (gpu.includes('rtx 20') || gpu.includes('gtx 16')) {
|
|
612
|
+
specs.offloadCapacity = Math.min(ramGB * 0.15, 6);
|
|
613
|
+
}
|
|
614
|
+
}
|
|
615
|
+
// AMD GPU optimizations
|
|
616
|
+
else if (gpu.includes('amd') || gpu.includes('radeon') || gpu.includes('rx ')) {
|
|
617
|
+
if (gpu.includes('rx 7000') || gpu.includes('rx 7900') || gpu.includes('rx 7800')) {
|
|
618
|
+
specs.offloadCapacity = Math.min(ramGB * 0.2, 8);
|
|
619
|
+
} else if (gpu.includes('rx 6000')) {
|
|
620
|
+
specs.offloadCapacity = Math.min(ramGB * 0.15, 6);
|
|
621
|
+
}
|
|
622
|
+
}
|
|
623
|
+
// Intel GPU optimizations
|
|
624
|
+
else if (gpu.includes('intel') || gpu.includes('arc')) {
|
|
625
|
+
if (gpu.includes('arc a7') || gpu.includes('arc a5')) {
|
|
626
|
+
specs.offloadCapacity = Math.min(ramGB * 0.2, 6);
|
|
627
|
+
}
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
// Apply memory scaling bonuses for high-end systems
|
|
631
|
+
if (ramGB >= 64) {
|
|
632
|
+
specs.offloadCapacity += 8; // Massive system bonus
|
|
633
|
+
} else if (ramGB >= 32) {
|
|
634
|
+
specs.offloadCapacity += 4; // High-end system bonus
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
return specs;
|
|
638
|
+
}
|
|
639
|
+
|
|
640
|
+
/**
|
|
641
|
+
* PC CPU-specific specifications - shared implementation with main algorithm
|
|
642
|
+
*/
|
|
643
|
+
getPCCPUSpecs(hardware, ramGB) {
|
|
644
|
+
const cpuModel = hardware.cpu?.brand || hardware.cpu?.model || '';
|
|
645
|
+
const gpuModel = hardware.gpu?.model || '';
|
|
646
|
+
const cpu = cpuModel.toLowerCase();
|
|
647
|
+
const gpu = gpuModel.toLowerCase();
|
|
648
|
+
const cores = hardware.cpu?.physicalCores || hardware.cpu?.cores || 1;
|
|
649
|
+
|
|
650
|
+
let specs = {
|
|
651
|
+
effectiveMemoryRatio: 0.6, // Default CPU memory efficiency
|
|
652
|
+
instructionOptimization: 1.0, // CPU instruction set bonus
|
|
653
|
+
iGPUAssist: 0, // Integrated GPU assistance
|
|
654
|
+
thermalHeadroom: 1.0 // Thermal performance factor
|
|
655
|
+
};
|
|
656
|
+
|
|
657
|
+
// Intel CPU optimizations
|
|
658
|
+
if (cpu.includes('intel')) {
|
|
659
|
+
if (cpu.includes('i9') || cpu.includes('13th gen') || cpu.includes('14th gen')) {
|
|
660
|
+
specs.effectiveMemoryRatio = 0.75;
|
|
661
|
+
} else if (cpu.includes('i7') || cpu.includes('12th gen')) {
|
|
662
|
+
specs.effectiveMemoryRatio = 0.70;
|
|
663
|
+
} else if (cpu.includes('i5')) {
|
|
664
|
+
specs.effectiveMemoryRatio = 0.65;
|
|
665
|
+
}
|
|
666
|
+
|
|
667
|
+
// Intel iGPU assistance
|
|
668
|
+
if (gpu.includes('iris xe')) {
|
|
669
|
+
specs.effectiveMemoryRatio += 0.05;
|
|
670
|
+
} else if (gpu.includes('uhd')) {
|
|
671
|
+
specs.effectiveMemoryRatio += 0.02;
|
|
672
|
+
}
|
|
673
|
+
}
|
|
674
|
+
// AMD CPU optimizations
|
|
675
|
+
else if (cpu.includes('amd') || cpu.includes('ryzen')) {
|
|
676
|
+
if (cpu.includes('ryzen 9') || cpu.includes('7000') || cpu.includes('9000')) {
|
|
677
|
+
specs.effectiveMemoryRatio = 0.72;
|
|
678
|
+
} else if (cpu.includes('ryzen 7') || cpu.includes('5000') || cpu.includes('6000')) {
|
|
679
|
+
specs.effectiveMemoryRatio = 0.68;
|
|
680
|
+
} else if (cpu.includes('ryzen 5')) {
|
|
681
|
+
specs.effectiveMemoryRatio = 0.65;
|
|
682
|
+
}
|
|
683
|
+
|
|
684
|
+
// AMD iGPU assistance (RDNA2/3 in APUs)
|
|
685
|
+
if (gpu.includes('radeon') && gpu.includes('graphics')) {
|
|
686
|
+
if (gpu.includes('780m') || gpu.includes('880m')) {
|
|
687
|
+
specs.effectiveMemoryRatio += 0.08;
|
|
688
|
+
} else if (gpu.includes('680m') || gpu.includes('660m')) {
|
|
689
|
+
specs.effectiveMemoryRatio += 0.06;
|
|
690
|
+
}
|
|
691
|
+
}
|
|
692
|
+
}
|
|
693
|
+
|
|
694
|
+
// Multi-core and memory scaling
|
|
695
|
+
if (cores >= 16) {
|
|
696
|
+
specs.effectiveMemoryRatio += 0.05;
|
|
697
|
+
} else if (cores >= 8) {
|
|
698
|
+
specs.effectiveMemoryRatio += 0.03;
|
|
699
|
+
}
|
|
700
|
+
|
|
701
|
+
if (ramGB >= 64) {
|
|
702
|
+
specs.effectiveMemoryRatio += 0.05;
|
|
703
|
+
} else if (ramGB >= 32) {
|
|
704
|
+
specs.effectiveMemoryRatio += 0.03;
|
|
705
|
+
} else if (ramGB <= 8) {
|
|
706
|
+
specs.effectiveMemoryRatio -= 0.05;
|
|
707
|
+
}
|
|
708
|
+
|
|
709
|
+
return specs;
|
|
710
|
+
}
|
|
711
|
+
|
|
712
|
+
estimateKVCache(model, contextLength) {
|
|
713
|
+
// Rough KV cache estimation: 2 * layers * hidden_size * seq_len * 2 bytes
|
|
714
|
+
const params = this.estimateModelParams(model);
|
|
715
|
+
const layers = Math.round(params * 2); // Rough approximation
|
|
716
|
+
const hiddenSize = Math.round(params * 1000); // Rough approximation
|
|
717
|
+
return (2 * layers * hiddenSize * contextLength * 2) / (1024 ** 3); // GB
|
|
718
|
+
}
|
|
719
|
+
|
|
720
|
+
estimateTokensPerSecond(hardware, model) {
|
|
721
|
+
const params = this.estimateModelParams(model);
|
|
722
|
+
const cpuModel = hardware.cpu?.brand || hardware.cpu?.model || '';
|
|
723
|
+
const gpuModel = hardware.gpu?.model || '';
|
|
724
|
+
const cores = hardware.cpu?.physicalCores || hardware.cpu?.cores || 1;
|
|
725
|
+
const baseSpeed = hardware.cpu?.speed || 2.0;
|
|
726
|
+
const vramGB = hardware.gpu?.vram || 0;
|
|
727
|
+
|
|
728
|
+
// Use improved CPU estimation function for more realistic and varying speeds
|
|
729
|
+
const hasAVX512 = cpuModel.toLowerCase().includes('intel') &&
|
|
730
|
+
(cpuModel.includes('13th') || cpuModel.includes('14th') || cpuModel.includes('12th'));
|
|
731
|
+
|
|
732
|
+
// GPU-based calculation (dedicated GPU only)
|
|
733
|
+
if (vramGB > 0 && !gpuModel.toLowerCase().includes('iris') && !gpuModel.toLowerCase().includes('integrated')) {
|
|
734
|
+
let gpuTPS = 20; // Conservative GPU baseline
|
|
735
|
+
if (gpuModel.toLowerCase().includes('rtx 50')) {
|
|
736
|
+
gpuTPS = 60; // RTX 50 series - more realistic
|
|
737
|
+
} else if (gpuModel.toLowerCase().includes('rtx 40')) {
|
|
738
|
+
gpuTPS = 45; // RTX 40 series
|
|
739
|
+
} else if (gpuModel.toLowerCase().includes('rtx 30')) {
|
|
740
|
+
gpuTPS = 35; // RTX 30 series
|
|
741
|
+
} else if (gpuModel.toLowerCase().includes('rtx 20')) {
|
|
742
|
+
gpuTPS = 25; // RTX 20 series
|
|
743
|
+
} else if (vramGB >= 8) {
|
|
744
|
+
gpuTPS = 30; // Other high-end GPUs
|
|
745
|
+
} else if (vramGB >= 4) {
|
|
746
|
+
gpuTPS = 20; // Mid-range GPUs
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
// Scale by model size for GPU
|
|
750
|
+
return Math.max(5, Math.round((gpuTPS / Math.max(0.8, params)) * 100) / 100);
|
|
751
|
+
}
|
|
752
|
+
// CPU-based calculation (more realistic formula)
|
|
753
|
+
else {
|
|
754
|
+
return this.estimateCpuTps({
|
|
755
|
+
ghz: baseSpeed,
|
|
756
|
+
threads: cores,
|
|
757
|
+
paramsB: params,
|
|
758
|
+
avx512: hasAVX512,
|
|
759
|
+
isIrisXe: gpuModel.toLowerCase().includes('iris xe')
|
|
760
|
+
});
|
|
761
|
+
}
|
|
762
|
+
}
|
|
763
|
+
|
|
764
|
+
/**
|
|
765
|
+
* Realistic CPU token per second estimation that varies significantly by model size
|
|
766
|
+
*/
|
|
767
|
+
estimateCpuTps({ghz, threads, paramsB, avx512, isIrisXe}) {
|
|
768
|
+
// Base coefficient chosen empirically for realistic results
|
|
769
|
+
const k = avx512 ? 2.8 : 2.0;
|
|
770
|
+
|
|
771
|
+
// Apply iGPU boost for Iris Xe
|
|
772
|
+
const iGpuMultiplier = isIrisXe ? 1.25 : 1.0;
|
|
773
|
+
|
|
774
|
+
// More aggressive scaling for larger models (they really slow down on CPU)
|
|
775
|
+
const sizeScaling = Math.max(0.5, paramsB); // Linear scaling, larger models much slower
|
|
776
|
+
|
|
777
|
+
// Calculate base TPS with realistic threading efficiency
|
|
778
|
+
const effectiveThreads = Math.min(threads, 8); // Diminishing returns after 8 threads
|
|
779
|
+
const baseTPS = (k * ghz * effectiveThreads * iGpuMultiplier) / sizeScaling;
|
|
780
|
+
|
|
781
|
+
// Apply realistic upper bound for CPU (even with best CPUs)
|
|
782
|
+
const maxCpuTPS = avx512 ? 40 : 30;
|
|
783
|
+
const finalTPS = Math.min(maxCpuTPS, baseTPS);
|
|
784
|
+
|
|
785
|
+
return Math.max(3, Math.round(finalTPS * 100) / 100);
|
|
786
|
+
}
|
|
787
|
+
|
|
788
|
+
estimateTTFB(hardware, model) {
|
|
789
|
+
const sizeGB = this.parseModelSize(model.size);
|
|
790
|
+
const loadTime = sizeGB * (hardware.gpu ? 50 : 100); // ms per GB
|
|
791
|
+
return Math.max(200, loadTime);
|
|
792
|
+
}
|
|
793
|
+
|
|
794
|
+
checkArchitectureCompatibility(hardware, model) {
|
|
795
|
+
// For now, assume all models are compatible
|
|
796
|
+
// TODO: Add specific architecture checks
|
|
797
|
+
return true;
|
|
798
|
+
}
|
|
799
|
+
|
|
800
|
+
checkBasicRequirements(hardware, model) {
|
|
801
|
+
// Basic CPU/memory requirements
|
|
802
|
+
const minRAM = model.requirements?.ram || 2;
|
|
803
|
+
return hardware.memory.total >= minRAM;
|
|
804
|
+
}
|
|
805
|
+
|
|
806
|
+
generateReasoning(model, hardware, qualityScore, hardwareMatchScore) {
|
|
807
|
+
const tier = this.getHardwareTier(hardware);
|
|
808
|
+
const params = this.estimateModelParams(model);
|
|
809
|
+
|
|
810
|
+
if (hardwareMatchScore >= 0.9) {
|
|
811
|
+
return `Excellent match for your ${tier.replace('_', ' ')} hardware (${params.toFixed(1)}B params)`;
|
|
812
|
+
} else if (hardwareMatchScore >= 0.6) {
|
|
813
|
+
return `Good fit for your system with quality score ${(qualityScore * 100).toFixed(0)}%`;
|
|
814
|
+
} else {
|
|
815
|
+
return `Suboptimal - model ${params < 3 ? 'underutilizes' : 'may strain'} your hardware`;
|
|
816
|
+
}
|
|
817
|
+
}
|
|
818
|
+
}
|
|
819
|
+
|
|
820
|
+
module.exports = MultiObjectiveSelector;
|