llm-checker 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +418 -0
- package/analyzer/compatibility.js +584 -0
- package/analyzer/performance.js +505 -0
- package/bin/CLAUDE.md +12 -0
- package/bin/enhanced_cli.js +3118 -0
- package/bin/test-deterministic.js +41 -0
- package/package.json +96 -0
- package/src/CLAUDE.md +12 -0
- package/src/ai/intelligent-selector.js +615 -0
- package/src/ai/model-selector.js +312 -0
- package/src/ai/multi-objective-selector.js +820 -0
- package/src/commands/check.js +58 -0
- package/src/data/CLAUDE.md +11 -0
- package/src/data/model-database.js +637 -0
- package/src/data/sync-manager.js +279 -0
- package/src/hardware/CLAUDE.md +12 -0
- package/src/hardware/backends/CLAUDE.md +11 -0
- package/src/hardware/backends/apple-silicon.js +318 -0
- package/src/hardware/backends/cpu-detector.js +490 -0
- package/src/hardware/backends/cuda-detector.js +417 -0
- package/src/hardware/backends/intel-detector.js +436 -0
- package/src/hardware/backends/rocm-detector.js +440 -0
- package/src/hardware/detector.js +573 -0
- package/src/hardware/pc-optimizer.js +635 -0
- package/src/hardware/specs.js +286 -0
- package/src/hardware/unified-detector.js +442 -0
- package/src/index.js +2289 -0
- package/src/models/CLAUDE.md +17 -0
- package/src/models/ai-check-selector.js +806 -0
- package/src/models/catalog.json +426 -0
- package/src/models/deterministic-selector.js +1145 -0
- package/src/models/expanded_database.js +1142 -0
- package/src/models/intelligent-selector.js +532 -0
- package/src/models/requirements.js +310 -0
- package/src/models/scoring-config.js +57 -0
- package/src/models/scoring-engine.js +715 -0
- package/src/ollama/.cache/README.md +33 -0
- package/src/ollama/CLAUDE.md +24 -0
- package/src/ollama/client.js +438 -0
- package/src/ollama/enhanced-client.js +113 -0
- package/src/ollama/enhanced-scraper.js +634 -0
- package/src/ollama/manager.js +357 -0
- package/src/ollama/native-scraper.js +776 -0
- package/src/plugins/CLAUDE.md +11 -0
- package/src/plugins/examples/custom_model_plugin.js +87 -0
- package/src/plugins/index.js +295 -0
- package/src/utils/CLAUDE.md +11 -0
- package/src/utils/config.js +359 -0
- package/src/utils/formatter.js +315 -0
- package/src/utils/logger.js +272 -0
- package/src/utils/model-classifier.js +167 -0
- package/src/utils/verbose-progress.js +266 -0
|
@@ -0,0 +1,615 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Intelligent Model Selector - JavaScript-only mathematical model
|
|
3
|
+
* Uses advanced heuristics and mathematical scoring for optimal model selection
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
class IntelligentModelSelector {
|
|
7
|
+
constructor() {
|
|
8
|
+
this.modelDatabase = this.initializeModelDatabase();
|
|
9
|
+
this.hardwareTiers = this.initializeHardwareTiers();
|
|
10
|
+
this.performanceWeights = {
|
|
11
|
+
memory_efficiency: 0.35, // Most important - must fit in memory
|
|
12
|
+
performance_match: 0.25, // CPU/GPU capability match
|
|
13
|
+
task_optimization: 0.20, // Model specialization
|
|
14
|
+
popularity_quality: 0.15, // Community adoption & quality
|
|
15
|
+
resource_efficiency: 0.05 // Power/thermal considerations
|
|
16
|
+
};
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
initializeModelDatabase() {
|
|
20
|
+
return {
|
|
21
|
+
// Llama family
|
|
22
|
+
'llama2:7b': {
|
|
23
|
+
name: 'Llama 2 7B',
|
|
24
|
+
size_gb: 3.8,
|
|
25
|
+
parameters: 7,
|
|
26
|
+
memory_requirement: 8,
|
|
27
|
+
cpu_cores_min: 4,
|
|
28
|
+
cpu_intensive: 0.7,
|
|
29
|
+
specialization: ['general', 'chat', 'reasoning'],
|
|
30
|
+
quality_score: 9.2,
|
|
31
|
+
popularity_score: 9.8,
|
|
32
|
+
context_length: 4096,
|
|
33
|
+
quantization: 'Q4_0',
|
|
34
|
+
inference_speed: 'medium'
|
|
35
|
+
},
|
|
36
|
+
'llama2:13b': {
|
|
37
|
+
name: 'Llama 2 13B',
|
|
38
|
+
size_gb: 7.3,
|
|
39
|
+
parameters: 13,
|
|
40
|
+
memory_requirement: 16,
|
|
41
|
+
cpu_cores_min: 6,
|
|
42
|
+
cpu_intensive: 0.8,
|
|
43
|
+
specialization: ['general', 'chat', 'reasoning', 'complex'],
|
|
44
|
+
quality_score: 9.5,
|
|
45
|
+
popularity_score: 9.0,
|
|
46
|
+
context_length: 4096,
|
|
47
|
+
quantization: 'Q4_0',
|
|
48
|
+
inference_speed: 'slow'
|
|
49
|
+
},
|
|
50
|
+
'llama2:70b': {
|
|
51
|
+
name: 'Llama 2 70B',
|
|
52
|
+
size_gb: 39,
|
|
53
|
+
parameters: 70,
|
|
54
|
+
memory_requirement: 48,
|
|
55
|
+
cpu_cores_min: 8,
|
|
56
|
+
cpu_intensive: 0.95,
|
|
57
|
+
specialization: ['general', 'chat', 'reasoning', 'complex', 'professional'],
|
|
58
|
+
quality_score: 9.8,
|
|
59
|
+
popularity_score: 8.5,
|
|
60
|
+
context_length: 4096,
|
|
61
|
+
quantization: 'Q4_0',
|
|
62
|
+
inference_speed: 'very_slow'
|
|
63
|
+
},
|
|
64
|
+
|
|
65
|
+
// Code Llama family
|
|
66
|
+
'codellama:7b': {
|
|
67
|
+
name: 'Code Llama 7B',
|
|
68
|
+
size_gb: 3.8,
|
|
69
|
+
parameters: 7,
|
|
70
|
+
memory_requirement: 8,
|
|
71
|
+
cpu_cores_min: 4,
|
|
72
|
+
cpu_intensive: 0.7,
|
|
73
|
+
specialization: ['coding', 'programming', 'debugging'],
|
|
74
|
+
quality_score: 9.0,
|
|
75
|
+
popularity_score: 8.8,
|
|
76
|
+
context_length: 16384,
|
|
77
|
+
quantization: 'Q4_0',
|
|
78
|
+
inference_speed: 'medium'
|
|
79
|
+
},
|
|
80
|
+
'codellama:13b': {
|
|
81
|
+
name: 'Code Llama 13B',
|
|
82
|
+
size_gb: 7.3,
|
|
83
|
+
parameters: 13,
|
|
84
|
+
memory_requirement: 16,
|
|
85
|
+
cpu_cores_min: 6,
|
|
86
|
+
cpu_intensive: 0.8,
|
|
87
|
+
specialization: ['coding', 'programming', 'debugging', 'complex'],
|
|
88
|
+
quality_score: 9.3,
|
|
89
|
+
popularity_score: 8.5,
|
|
90
|
+
context_length: 16384,
|
|
91
|
+
quantization: 'Q4_0',
|
|
92
|
+
inference_speed: 'slow'
|
|
93
|
+
},
|
|
94
|
+
|
|
95
|
+
// Mistral family
|
|
96
|
+
'mistral:7b': {
|
|
97
|
+
name: 'Mistral 7B',
|
|
98
|
+
size_gb: 4.1,
|
|
99
|
+
parameters: 7.3,
|
|
100
|
+
memory_requirement: 8,
|
|
101
|
+
cpu_cores_min: 4,
|
|
102
|
+
cpu_intensive: 0.6,
|
|
103
|
+
specialization: ['general', 'chat', 'reasoning'],
|
|
104
|
+
quality_score: 9.1,
|
|
105
|
+
popularity_score: 9.2,
|
|
106
|
+
context_length: 8192,
|
|
107
|
+
quantization: 'Q4_0',
|
|
108
|
+
inference_speed: 'fast'
|
|
109
|
+
},
|
|
110
|
+
|
|
111
|
+
// Phi family
|
|
112
|
+
'phi3:mini': {
|
|
113
|
+
name: 'Phi-3 Mini',
|
|
114
|
+
size_gb: 2.3,
|
|
115
|
+
parameters: 3.8,
|
|
116
|
+
memory_requirement: 4,
|
|
117
|
+
cpu_cores_min: 2,
|
|
118
|
+
cpu_intensive: 0.4,
|
|
119
|
+
specialization: ['general', 'chat', 'lightweight'],
|
|
120
|
+
quality_score: 8.5,
|
|
121
|
+
popularity_score: 8.0,
|
|
122
|
+
context_length: 128000,
|
|
123
|
+
quantization: 'Q4_0',
|
|
124
|
+
inference_speed: 'very_fast'
|
|
125
|
+
},
|
|
126
|
+
'phi3:medium': {
|
|
127
|
+
name: 'Phi-3 Medium',
|
|
128
|
+
size_gb: 7.9,
|
|
129
|
+
parameters: 14,
|
|
130
|
+
memory_requirement: 16,
|
|
131
|
+
cpu_cores_min: 4,
|
|
132
|
+
cpu_intensive: 0.7,
|
|
133
|
+
specialization: ['general', 'chat', 'reasoning'],
|
|
134
|
+
quality_score: 8.8,
|
|
135
|
+
popularity_score: 7.5,
|
|
136
|
+
context_length: 128000,
|
|
137
|
+
quantization: 'Q4_0',
|
|
138
|
+
inference_speed: 'medium'
|
|
139
|
+
},
|
|
140
|
+
|
|
141
|
+
// Gemma family
|
|
142
|
+
'gemma:2b': {
|
|
143
|
+
name: 'Gemma 2B',
|
|
144
|
+
size_gb: 1.4,
|
|
145
|
+
parameters: 2,
|
|
146
|
+
memory_requirement: 3,
|
|
147
|
+
cpu_cores_min: 2,
|
|
148
|
+
cpu_intensive: 0.3,
|
|
149
|
+
specialization: ['general', 'chat', 'ultra_lightweight'],
|
|
150
|
+
quality_score: 7.8,
|
|
151
|
+
popularity_score: 7.0,
|
|
152
|
+
context_length: 8192,
|
|
153
|
+
quantization: 'Q4_0',
|
|
154
|
+
inference_speed: 'very_fast'
|
|
155
|
+
},
|
|
156
|
+
'gemma:7b': {
|
|
157
|
+
name: 'Gemma 7B',
|
|
158
|
+
size_gb: 4.8,
|
|
159
|
+
parameters: 8.5,
|
|
160
|
+
memory_requirement: 10,
|
|
161
|
+
cpu_cores_min: 4,
|
|
162
|
+
cpu_intensive: 0.6,
|
|
163
|
+
specialization: ['general', 'chat', 'reasoning'],
|
|
164
|
+
quality_score: 8.7,
|
|
165
|
+
popularity_score: 7.8,
|
|
166
|
+
context_length: 8192,
|
|
167
|
+
quantization: 'Q4_0',
|
|
168
|
+
inference_speed: 'fast'
|
|
169
|
+
},
|
|
170
|
+
|
|
171
|
+
// Deepseek family
|
|
172
|
+
'deepseek-coder:1.3b': {
|
|
173
|
+
name: 'DeepSeek Coder 1.3B',
|
|
174
|
+
size_gb: 0.8,
|
|
175
|
+
parameters: 1.3,
|
|
176
|
+
memory_requirement: 2,
|
|
177
|
+
cpu_cores_min: 2,
|
|
178
|
+
cpu_intensive: 0.3,
|
|
179
|
+
specialization: ['coding', 'programming', 'ultra_lightweight'],
|
|
180
|
+
quality_score: 8.2,
|
|
181
|
+
popularity_score: 7.5,
|
|
182
|
+
context_length: 16384,
|
|
183
|
+
quantization: 'Q4_0',
|
|
184
|
+
inference_speed: 'very_fast'
|
|
185
|
+
},
|
|
186
|
+
'deepseek-coder:6.7b': {
|
|
187
|
+
name: 'DeepSeek Coder 6.7B',
|
|
188
|
+
size_gb: 3.9,
|
|
189
|
+
parameters: 6.7,
|
|
190
|
+
memory_requirement: 8,
|
|
191
|
+
cpu_cores_min: 4,
|
|
192
|
+
cpu_intensive: 0.7,
|
|
193
|
+
specialization: ['coding', 'programming', 'debugging'],
|
|
194
|
+
quality_score: 9.0,
|
|
195
|
+
popularity_score: 8.2,
|
|
196
|
+
context_length: 16384,
|
|
197
|
+
quantization: 'Q4_0',
|
|
198
|
+
inference_speed: 'medium'
|
|
199
|
+
},
|
|
200
|
+
|
|
201
|
+
// Qwen family
|
|
202
|
+
'qwen2:1.5b': {
|
|
203
|
+
name: 'Qwen2 1.5B',
|
|
204
|
+
size_gb: 0.9,
|
|
205
|
+
parameters: 1.5,
|
|
206
|
+
memory_requirement: 2,
|
|
207
|
+
cpu_cores_min: 2,
|
|
208
|
+
cpu_intensive: 0.3,
|
|
209
|
+
specialization: ['general', 'chat', 'multilingual'],
|
|
210
|
+
quality_score: 8.0,
|
|
211
|
+
popularity_score: 7.2,
|
|
212
|
+
context_length: 32768,
|
|
213
|
+
quantization: 'Q4_0',
|
|
214
|
+
inference_speed: 'very_fast'
|
|
215
|
+
},
|
|
216
|
+
'qwen2:7b': {
|
|
217
|
+
name: 'Qwen2 7B',
|
|
218
|
+
size_gb: 4.4,
|
|
219
|
+
parameters: 7,
|
|
220
|
+
memory_requirement: 8,
|
|
221
|
+
cpu_cores_min: 4,
|
|
222
|
+
cpu_intensive: 0.6,
|
|
223
|
+
specialization: ['general', 'chat', 'multilingual', 'reasoning'],
|
|
224
|
+
quality_score: 8.9,
|
|
225
|
+
popularity_score: 8.0,
|
|
226
|
+
context_length: 32768,
|
|
227
|
+
quantization: 'Q4_0',
|
|
228
|
+
inference_speed: 'fast'
|
|
229
|
+
}
|
|
230
|
+
};
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
initializeHardwareTiers() {
|
|
234
|
+
return {
|
|
235
|
+
memory: {
|
|
236
|
+
ultra_low: { min: 0, max: 4, multiplier: 0.3 },
|
|
237
|
+
low: { min: 4, max: 8, multiplier: 0.6 },
|
|
238
|
+
medium: { min: 8, max: 16, multiplier: 1.0 },
|
|
239
|
+
high: { min: 16, max: 32, multiplier: 1.4 },
|
|
240
|
+
very_high: { min: 32, max: 64, multiplier: 1.8 },
|
|
241
|
+
extreme: { min: 64, max: 128, multiplier: 2.2 }
|
|
242
|
+
},
|
|
243
|
+
cpu: {
|
|
244
|
+
ultra_low: { min: 0, max: 2, multiplier: 0.4 },
|
|
245
|
+
low: { min: 2, max: 4, multiplier: 0.7 },
|
|
246
|
+
medium: { min: 4, max: 8, multiplier: 1.0 },
|
|
247
|
+
high: { min: 8, max: 16, multiplier: 1.3 },
|
|
248
|
+
very_high: { min: 16, max: 32, multiplier: 1.6 },
|
|
249
|
+
extreme: { min: 32, max: 64, multiplier: 2.0 }
|
|
250
|
+
},
|
|
251
|
+
gpu: {
|
|
252
|
+
none: { vram: 0, multiplier: 1.0 },
|
|
253
|
+
integrated: { vram: 2, multiplier: 1.1 },
|
|
254
|
+
low_vram: { vram: 4, multiplier: 1.3 },
|
|
255
|
+
medium_vram: { vram: 8, multiplier: 1.6 },
|
|
256
|
+
high_vram: { vram: 16, multiplier: 2.0 },
|
|
257
|
+
very_high_vram: { vram: 24, multiplier: 2.4 }
|
|
258
|
+
}
|
|
259
|
+
};
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
analyzeHardware(hardware) {
|
|
263
|
+
const analysis = {
|
|
264
|
+
memory_tier: this.getHardwareTier('memory', hardware.total_ram_gb),
|
|
265
|
+
cpu_tier: this.getHardwareTier('cpu', hardware.cpu_cores),
|
|
266
|
+
gpu_tier: this.getGPUTier(hardware.gpu_vram_gb, hardware.gpu_model_normalized),
|
|
267
|
+
available_memory: this.calculateAvailableMemory(hardware),
|
|
268
|
+
performance_multiplier: this.calculatePerformanceMultiplier(hardware),
|
|
269
|
+
thermal_constraint: this.estimateThermalConstraint(hardware)
|
|
270
|
+
};
|
|
271
|
+
|
|
272
|
+
analysis.overall_tier = this.calculateOverallTier(analysis);
|
|
273
|
+
return analysis;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
getHardwareTier(type, value) {
|
|
277
|
+
const tiers = this.hardwareTiers[type];
|
|
278
|
+
for (const [tierName, tierData] of Object.entries(tiers)) {
|
|
279
|
+
if (value >= tierData.min && value <= tierData.max) {
|
|
280
|
+
return { name: tierName, ...tierData };
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
return { name: 'extreme', ...tiers.extreme };
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
getGPUTier(vram, gpuModel) {
|
|
287
|
+
if (gpuModel === 'apple_silicon') {
|
|
288
|
+
// Apple Silicon uses unified memory
|
|
289
|
+
if (vram >= 24) return { name: 'very_high_vram', vram: vram, multiplier: 2.2 };
|
|
290
|
+
if (vram >= 16) return { name: 'high_vram', vram: vram, multiplier: 1.8 };
|
|
291
|
+
if (vram >= 8) return { name: 'medium_vram', vram: vram, multiplier: 1.5 };
|
|
292
|
+
return { name: 'low_vram', vram: vram, multiplier: 1.2 };
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
if (vram <= 0) return { name: 'none', vram: 0, multiplier: 1.0 };
|
|
296
|
+
if (vram <= 4) return { name: 'low_vram', vram: vram, multiplier: 1.3 };
|
|
297
|
+
if (vram <= 8) return { name: 'medium_vram', vram: vram, multiplier: 1.6 };
|
|
298
|
+
if (vram <= 16) return { name: 'high_vram', vram: vram, multiplier: 2.0 };
|
|
299
|
+
return { name: 'very_high_vram', vram: vram, multiplier: 2.4 };
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
calculateAvailableMemory(hardware) {
|
|
303
|
+
let availableRAM = hardware.total_ram_gb * 0.7; // Reserve 30% for OS
|
|
304
|
+
let availableVRAM = hardware.gpu_vram_gb * 0.9; // Reserve 10% for GPU overhead
|
|
305
|
+
|
|
306
|
+
// Apple Silicon unified memory calculation
|
|
307
|
+
if (hardware.gpu_model_normalized === 'apple_silicon') {
|
|
308
|
+
availableVRAM = hardware.total_ram_gb * 0.6; // 60% can be used for models
|
|
309
|
+
availableRAM = hardware.total_ram_gb * 0.4; // Remaining for system
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
return {
|
|
313
|
+
ram: availableRAM,
|
|
314
|
+
vram: availableVRAM,
|
|
315
|
+
total: Math.max(availableRAM, availableVRAM)
|
|
316
|
+
};
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
calculatePerformanceMultiplier(hardware) {
|
|
320
|
+
let multiplier = 1.0;
|
|
321
|
+
|
|
322
|
+
// CPU frequency boost
|
|
323
|
+
if (hardware.cpu_freq_max > 3.5) multiplier *= 1.2;
|
|
324
|
+
else if (hardware.cpu_freq_max > 3.0) multiplier *= 1.1;
|
|
325
|
+
else if (hardware.cpu_freq_max < 2.0) multiplier *= 0.9;
|
|
326
|
+
|
|
327
|
+
// Architecture boost
|
|
328
|
+
if (hardware.gpu_model_normalized === 'apple_silicon') multiplier *= 1.15;
|
|
329
|
+
if (hardware.gpu_model_normalized.includes('rtx_40')) multiplier *= 1.3;
|
|
330
|
+
if (hardware.gpu_model_normalized.includes('rtx_30')) multiplier *= 1.2;
|
|
331
|
+
|
|
332
|
+
return multiplier;
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
estimateThermalConstraint(hardware) {
|
|
336
|
+
// Laptop vs Desktop heuristic
|
|
337
|
+
if (hardware.cpu_cores <= 4 && hardware.total_ram_gb <= 16) {
|
|
338
|
+
return 0.8; // Likely laptop - thermal constraints
|
|
339
|
+
}
|
|
340
|
+
if (hardware.cpu_cores >= 12 || hardware.total_ram_gb >= 32) {
|
|
341
|
+
return 1.2; // Likely desktop/workstation - better cooling
|
|
342
|
+
}
|
|
343
|
+
return 1.0; // Neutral
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
calculateOverallTier(analysis) {
|
|
347
|
+
const memoryScore = analysis.memory_tier.multiplier;
|
|
348
|
+
const cpuScore = analysis.cpu_tier.multiplier;
|
|
349
|
+
const gpuScore = analysis.gpu_tier.multiplier;
|
|
350
|
+
|
|
351
|
+
const weightedScore = (memoryScore * 0.4) + (cpuScore * 0.3) + (gpuScore * 0.3);
|
|
352
|
+
|
|
353
|
+
if (weightedScore >= 2.0) return 'extreme';
|
|
354
|
+
if (weightedScore >= 1.6) return 'very_high';
|
|
355
|
+
if (weightedScore >= 1.2) return 'high';
|
|
356
|
+
if (weightedScore >= 0.8) return 'medium';
|
|
357
|
+
if (weightedScore >= 0.5) return 'low';
|
|
358
|
+
return 'ultra_low';
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
calculateModelScore(model, hardware, analysis, userPreference = 'general') {
|
|
362
|
+
const scores = {
|
|
363
|
+
memory_efficiency: this.calculateMemoryEfficiencyScore(model, analysis),
|
|
364
|
+
performance_match: this.calculatePerformanceMatchScore(model, analysis),
|
|
365
|
+
task_optimization: this.calculateTaskOptimizationScore(model, userPreference),
|
|
366
|
+
popularity_quality: this.calculatePopularityQualityScore(model),
|
|
367
|
+
resource_efficiency: this.calculateResourceEfficiencyScore(model, analysis)
|
|
368
|
+
};
|
|
369
|
+
|
|
370
|
+
// Apply penalties for problematic configurations
|
|
371
|
+
const penalties = this.calculatePenalties(model, hardware, analysis);
|
|
372
|
+
|
|
373
|
+
// Calculate weighted final score
|
|
374
|
+
let finalScore = 0;
|
|
375
|
+
for (const [factor, weight] of Object.entries(this.performanceWeights)) {
|
|
376
|
+
finalScore += scores[factor] * weight;
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
// Apply penalties
|
|
380
|
+
finalScore *= (1 - penalties);
|
|
381
|
+
|
|
382
|
+
// Normalize to 0-100 scale
|
|
383
|
+
return Math.max(0, Math.min(100, finalScore * 100));
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
calculateMemoryEfficiencyScore(model, analysis) {
|
|
387
|
+
const requiredMemory = model.memory_requirement;
|
|
388
|
+
const availableMemory = analysis.available_memory.total;
|
|
389
|
+
|
|
390
|
+
if (requiredMemory > availableMemory) {
|
|
391
|
+
// Model won't fit - severe penalty
|
|
392
|
+
return 0.1;
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
// Calculate efficiency ratio
|
|
396
|
+
const utilizationRatio = requiredMemory / availableMemory;
|
|
397
|
+
|
|
398
|
+
if (utilizationRatio <= 0.3) return 0.6; // Under-utilizing
|
|
399
|
+
if (utilizationRatio <= 0.5) return 0.8; // Good utilization
|
|
400
|
+
if (utilizationRatio <= 0.7) return 1.0; // Optimal utilization
|
|
401
|
+
if (utilizationRatio <= 0.9) return 0.9; // High utilization
|
|
402
|
+
return 0.7; // Very tight fit
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
calculatePerformanceMatchScore(model, analysis) {
|
|
406
|
+
const cpuMatch = Math.min(1.0, analysis.cpu_tier.multiplier * model.cpu_intensive);
|
|
407
|
+
const memoryMatch = analysis.memory_tier.multiplier / 2.0;
|
|
408
|
+
const gpuBoost = analysis.gpu_tier.multiplier > 1.0 ? 0.2 : 0;
|
|
409
|
+
|
|
410
|
+
return Math.min(1.0, cpuMatch + memoryMatch + gpuBoost);
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
calculateTaskOptimizationScore(model, userPreference) {
|
|
414
|
+
if (!model.specialization.includes(userPreference)) {
|
|
415
|
+
// Check for compatible tasks
|
|
416
|
+
const compatibleTasks = {
|
|
417
|
+
'coding': ['programming', 'debugging'],
|
|
418
|
+
'general': ['chat', 'reasoning'],
|
|
419
|
+
'chat': ['general', 'reasoning']
|
|
420
|
+
};
|
|
421
|
+
|
|
422
|
+
const compatible = compatibleTasks[userPreference] || [];
|
|
423
|
+
const hasCompatible = compatible.some(task => model.specialization.includes(task));
|
|
424
|
+
|
|
425
|
+
return hasCompatible ? 0.7 : 0.5;
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
return 1.0; // Perfect match
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
calculatePopularityQualityScore(model) {
|
|
432
|
+
const qualityNormalized = model.quality_score / 10.0;
|
|
433
|
+
const popularityNormalized = model.popularity_score / 10.0;
|
|
434
|
+
|
|
435
|
+
return (qualityNormalized * 0.6) + (popularityNormalized * 0.4);
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
calculateResourceEfficiencyScore(model, analysis) {
|
|
439
|
+
// Favor models that don't waste resources
|
|
440
|
+
const efficiencyFactors = {
|
|
441
|
+
inference_speed: {
|
|
442
|
+
'very_fast': 1.0,
|
|
443
|
+
'fast': 0.9,
|
|
444
|
+
'medium': 0.8,
|
|
445
|
+
'slow': 0.6,
|
|
446
|
+
'very_slow': 0.4
|
|
447
|
+
}
|
|
448
|
+
};
|
|
449
|
+
|
|
450
|
+
const speedScore = efficiencyFactors.inference_speed[model.inference_speed] || 0.8;
|
|
451
|
+
const thermalScore = analysis.thermal_constraint;
|
|
452
|
+
|
|
453
|
+
return (speedScore * 0.7) + (thermalScore * 0.3);
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
calculatePenalties(model, hardware, analysis) {
|
|
457
|
+
let totalPenalty = 0;
|
|
458
|
+
|
|
459
|
+
// Memory overflow penalty
|
|
460
|
+
if (model.memory_requirement > analysis.available_memory.total) {
|
|
461
|
+
totalPenalty += 0.8; // Severe penalty
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
// CPU insufficient penalty
|
|
465
|
+
if (hardware.cpu_cores < model.cpu_cores_min) {
|
|
466
|
+
totalPenalty += 0.3;
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
// Thermal throttling penalty
|
|
470
|
+
if (model.cpu_intensive > 0.8 && analysis.thermal_constraint < 1.0) {
|
|
471
|
+
totalPenalty += 0.2;
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
return Math.min(0.9, totalPenalty); // Cap penalty at 90%
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
selectBestModels(hardware, availableModels, userPreference = 'general', topK = 5) {
|
|
478
|
+
const analysis = this.analyzeHardware(hardware);
|
|
479
|
+
const modelScores = [];
|
|
480
|
+
|
|
481
|
+
// Get model info for each available model
|
|
482
|
+
for (const modelId of availableModels) {
|
|
483
|
+
const modelInfo = this.getModelInfo(modelId);
|
|
484
|
+
if (!modelInfo) continue;
|
|
485
|
+
|
|
486
|
+
const score = this.calculateModelScore(modelInfo, hardware, analysis, userPreference);
|
|
487
|
+
|
|
488
|
+
modelScores.push({
|
|
489
|
+
modelId,
|
|
490
|
+
modelInfo,
|
|
491
|
+
score,
|
|
492
|
+
confidence: this.calculateConfidence(score, analysis),
|
|
493
|
+
reasoning: this.generateReasoning(modelInfo, hardware, analysis, score)
|
|
494
|
+
});
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
// Sort by score and return top K
|
|
498
|
+
modelScores.sort((a, b) => b.score - a.score);
|
|
499
|
+
|
|
500
|
+
return {
|
|
501
|
+
hardware_analysis: analysis,
|
|
502
|
+
recommendations: modelScores.slice(0, topK),
|
|
503
|
+
best_model: modelScores[0],
|
|
504
|
+
selection_method: 'intelligent_mathematical'
|
|
505
|
+
};
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
getModelInfo(modelId) {
|
|
509
|
+
// Direct match
|
|
510
|
+
if (this.modelDatabase[modelId]) {
|
|
511
|
+
return { ...this.modelDatabase[modelId], id: modelId };
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
// Fuzzy matching for variations
|
|
515
|
+
const normalizedId = modelId.toLowerCase().replace(/[:\-_]/g, '');
|
|
516
|
+
|
|
517
|
+
for (const [dbId, modelData] of Object.entries(this.modelDatabase)) {
|
|
518
|
+
const normalizedDbId = dbId.toLowerCase().replace(/[:\-_]/g, '');
|
|
519
|
+
if (normalizedId.includes(normalizedDbId) || normalizedDbId.includes(normalizedId)) {
|
|
520
|
+
return { ...modelData, id: modelId };
|
|
521
|
+
}
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
// Fallback - estimate from model name
|
|
525
|
+
return this.estimateModelInfo(modelId);
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
estimateModelInfo(modelId) {
|
|
529
|
+
const sizeMatch = modelId.match(/(\d+\.?\d*)([kmb])/i);
|
|
530
|
+
let size = 7; // Default size
|
|
531
|
+
|
|
532
|
+
if (sizeMatch) {
|
|
533
|
+
const num = parseFloat(sizeMatch[1]);
|
|
534
|
+
const unit = sizeMatch[2].toLowerCase();
|
|
535
|
+
|
|
536
|
+
if (unit === 'k') size = num / 1000;
|
|
537
|
+
else if (unit === 'm') size = num / 1000;
|
|
538
|
+
else if (unit === 'b') size = num;
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
return {
|
|
542
|
+
id: modelId,
|
|
543
|
+
name: modelId,
|
|
544
|
+
size_gb: size * 0.55, // Rough estimation
|
|
545
|
+
parameters: size,
|
|
546
|
+
memory_requirement: size * 1.2,
|
|
547
|
+
cpu_cores_min: Math.max(2, Math.floor(size / 3)),
|
|
548
|
+
cpu_intensive: Math.min(0.9, 0.3 + (size / 50)),
|
|
549
|
+
specialization: ['general'],
|
|
550
|
+
quality_score: 7.5,
|
|
551
|
+
popularity_score: 6.0,
|
|
552
|
+
context_length: 4096,
|
|
553
|
+
quantization: 'Q4_0',
|
|
554
|
+
inference_speed: size > 13 ? 'slow' : size > 7 ? 'medium' : 'fast'
|
|
555
|
+
};
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
calculateConfidence(score, analysis) {
|
|
559
|
+
let confidence = score / 100;
|
|
560
|
+
|
|
561
|
+
// Boost confidence for well-understood hardware
|
|
562
|
+
if (analysis.overall_tier !== 'ultra_low') {
|
|
563
|
+
confidence *= 1.1;
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
// Reduce confidence for edge cases
|
|
567
|
+
if (analysis.available_memory.total < 4) {
|
|
568
|
+
confidence *= 0.8;
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
return Math.min(1.0, confidence);
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
generateReasoning(modelInfo, hardware, analysis, score) {
|
|
575
|
+
const reasons = [];
|
|
576
|
+
|
|
577
|
+
if (score >= 80) {
|
|
578
|
+
reasons.push(`Excellent fit for your ${analysis.overall_tier} hardware configuration`);
|
|
579
|
+
} else if (score >= 60) {
|
|
580
|
+
reasons.push(`Good match for your system capabilities`);
|
|
581
|
+
} else {
|
|
582
|
+
reasons.push(`Adequate choice given hardware constraints`);
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
// Memory reasoning
|
|
586
|
+
const memoryUtilization = modelInfo.memory_requirement / analysis.available_memory.total;
|
|
587
|
+
if (memoryUtilization <= 0.5) {
|
|
588
|
+
reasons.push(`Efficient memory usage (${Math.round(memoryUtilization * 100)}% of available)`);
|
|
589
|
+
} else if (memoryUtilization <= 0.8) {
|
|
590
|
+
reasons.push(`Optimal memory utilization`);
|
|
591
|
+
} else {
|
|
592
|
+
reasons.push(`High memory usage - may impact system performance`);
|
|
593
|
+
}
|
|
594
|
+
|
|
595
|
+
// Performance reasoning
|
|
596
|
+
if (hardware.cpu_cores >= modelInfo.cpu_cores_min * 1.5) {
|
|
597
|
+
reasons.push(`CPU well-suited for this model`);
|
|
598
|
+
} else if (hardware.cpu_cores >= modelInfo.cpu_cores_min) {
|
|
599
|
+
reasons.push(`CPU meets minimum requirements`);
|
|
600
|
+
} else {
|
|
601
|
+
reasons.push(`CPU may be limiting factor`);
|
|
602
|
+
}
|
|
603
|
+
|
|
604
|
+
// Quality reasoning
|
|
605
|
+
if (modelInfo.quality_score >= 9.0) {
|
|
606
|
+
reasons.push(`High-quality model with excellent capabilities`);
|
|
607
|
+
} else if (modelInfo.quality_score >= 8.0) {
|
|
608
|
+
reasons.push(`Well-regarded model with good performance`);
|
|
609
|
+
}
|
|
610
|
+
|
|
611
|
+
return reasons.join('. ') + '.';
|
|
612
|
+
}
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
module.exports = IntelligentModelSelector;
|