llm-checker 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +418 -0
- package/analyzer/compatibility.js +584 -0
- package/analyzer/performance.js +505 -0
- package/bin/CLAUDE.md +12 -0
- package/bin/enhanced_cli.js +3118 -0
- package/bin/test-deterministic.js +41 -0
- package/package.json +96 -0
- package/src/CLAUDE.md +12 -0
- package/src/ai/intelligent-selector.js +615 -0
- package/src/ai/model-selector.js +312 -0
- package/src/ai/multi-objective-selector.js +820 -0
- package/src/commands/check.js +58 -0
- package/src/data/CLAUDE.md +11 -0
- package/src/data/model-database.js +637 -0
- package/src/data/sync-manager.js +279 -0
- package/src/hardware/CLAUDE.md +12 -0
- package/src/hardware/backends/CLAUDE.md +11 -0
- package/src/hardware/backends/apple-silicon.js +318 -0
- package/src/hardware/backends/cpu-detector.js +490 -0
- package/src/hardware/backends/cuda-detector.js +417 -0
- package/src/hardware/backends/intel-detector.js +436 -0
- package/src/hardware/backends/rocm-detector.js +440 -0
- package/src/hardware/detector.js +573 -0
- package/src/hardware/pc-optimizer.js +635 -0
- package/src/hardware/specs.js +286 -0
- package/src/hardware/unified-detector.js +442 -0
- package/src/index.js +2289 -0
- package/src/models/CLAUDE.md +17 -0
- package/src/models/ai-check-selector.js +806 -0
- package/src/models/catalog.json +426 -0
- package/src/models/deterministic-selector.js +1145 -0
- package/src/models/expanded_database.js +1142 -0
- package/src/models/intelligent-selector.js +532 -0
- package/src/models/requirements.js +310 -0
- package/src/models/scoring-config.js +57 -0
- package/src/models/scoring-engine.js +715 -0
- package/src/ollama/.cache/README.md +33 -0
- package/src/ollama/CLAUDE.md +24 -0
- package/src/ollama/client.js +438 -0
- package/src/ollama/enhanced-client.js +113 -0
- package/src/ollama/enhanced-scraper.js +634 -0
- package/src/ollama/manager.js +357 -0
- package/src/ollama/native-scraper.js +776 -0
- package/src/plugins/CLAUDE.md +11 -0
- package/src/plugins/examples/custom_model_plugin.js +87 -0
- package/src/plugins/index.js +295 -0
- package/src/utils/CLAUDE.md +11 -0
- package/src/utils/config.js +359 -0
- package/src/utils/formatter.js +315 -0
- package/src/utils/logger.js +272 -0
- package/src/utils/model-classifier.js +167 -0
- package/src/utils/verbose-progress.js +266 -0
|
@@ -0,0 +1,1142 @@
|
|
|
1
|
+
class ExpandedModelsDatabase {
|
|
2
|
+
constructor() {
|
|
3
|
+
this.models = this.initializeExpandedModels();
|
|
4
|
+
this.compatibilityMatrix = this.initializeCompatibilityMatrix();
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
initializeExpandedModels() {
|
|
8
|
+
return [
|
|
9
|
+
// Ultra-Small Models (< 1B parámetros)
|
|
10
|
+
{
|
|
11
|
+
name: "Qwen 0.5B",
|
|
12
|
+
size: "0.5B",
|
|
13
|
+
type: "local",
|
|
14
|
+
category: "ultra_small",
|
|
15
|
+
requirements: {
|
|
16
|
+
ram: 1,
|
|
17
|
+
vram: 0,
|
|
18
|
+
cpu_cores: 1,
|
|
19
|
+
storage: 0.5,
|
|
20
|
+
recommended_ram: 2
|
|
21
|
+
},
|
|
22
|
+
frameworks: ["ollama", "transformers"],
|
|
23
|
+
quantization: ["Q4_0", "Q8_0"],
|
|
24
|
+
performance: {
|
|
25
|
+
speed: "very_fast",
|
|
26
|
+
quality: "basic",
|
|
27
|
+
context_length: 2048,
|
|
28
|
+
tokens_per_second_estimate: "100-200"
|
|
29
|
+
},
|
|
30
|
+
installation: {
|
|
31
|
+
ollama: "ollama pull qwen:0.5b",
|
|
32
|
+
description: "Ultra-lightweight model for testing and basic tasks"
|
|
33
|
+
},
|
|
34
|
+
specialization: "general",
|
|
35
|
+
languages: ["en", "zh"],
|
|
36
|
+
year: 2024
|
|
37
|
+
},
|
|
38
|
+
{
|
|
39
|
+
name: "LaMini-GPT 774M",
|
|
40
|
+
size: "774M",
|
|
41
|
+
type: "local",
|
|
42
|
+
category: "ultra_small",
|
|
43
|
+
requirements: {
|
|
44
|
+
ram: 1.5,
|
|
45
|
+
vram: 0,
|
|
46
|
+
cpu_cores: 1,
|
|
47
|
+
storage: 0.8,
|
|
48
|
+
recommended_ram: 2
|
|
49
|
+
},
|
|
50
|
+
frameworks: ["transformers", "llama.cpp"],
|
|
51
|
+
quantization: ["Q4_0", "Q8_0"],
|
|
52
|
+
performance: {
|
|
53
|
+
speed: "very_fast",
|
|
54
|
+
quality: "basic",
|
|
55
|
+
context_length: 2048,
|
|
56
|
+
tokens_per_second_estimate: "80-150"
|
|
57
|
+
},
|
|
58
|
+
installation: {
|
|
59
|
+
description: "Multilingual compact model via knowledge distillation"
|
|
60
|
+
},
|
|
61
|
+
specialization: "multilingual",
|
|
62
|
+
languages: ["en", "es", "fr", "de", "zh", "ja"],
|
|
63
|
+
year: 2024
|
|
64
|
+
},
|
|
65
|
+
|
|
66
|
+
// Small Models (1B - 4B parámetros)
|
|
67
|
+
{
|
|
68
|
+
name: "TinyLlama 1.1B",
|
|
69
|
+
size: "1.1B",
|
|
70
|
+
type: "local",
|
|
71
|
+
category: "small",
|
|
72
|
+
requirements: {
|
|
73
|
+
ram: 2,
|
|
74
|
+
vram: 0,
|
|
75
|
+
cpu_cores: 2,
|
|
76
|
+
storage: 1.2,
|
|
77
|
+
recommended_ram: 4
|
|
78
|
+
},
|
|
79
|
+
frameworks: ["ollama", "llama.cpp", "transformers"],
|
|
80
|
+
quantization: ["Q2_K", "Q3_K", "Q4_0", "Q4_1", "Q5_0", "Q5_1", "Q8_0"],
|
|
81
|
+
performance: {
|
|
82
|
+
speed: "very_fast",
|
|
83
|
+
quality: "basic",
|
|
84
|
+
context_length: 2048,
|
|
85
|
+
tokens_per_second_estimate: "50-100"
|
|
86
|
+
},
|
|
87
|
+
installation: {
|
|
88
|
+
ollama: "ollama pull tinyllama:1.1b",
|
|
89
|
+
llamacpp: "Download from HuggingFace: TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
|
90
|
+
description: "Perfect entry point for testing LLM capabilities"
|
|
91
|
+
},
|
|
92
|
+
specialization: "general",
|
|
93
|
+
languages: ["en"],
|
|
94
|
+
year: 2023
|
|
95
|
+
},
|
|
96
|
+
{
|
|
97
|
+
name: "MobileLLaMA 1.4B",
|
|
98
|
+
size: "1.4B",
|
|
99
|
+
type: "local",
|
|
100
|
+
category: "small",
|
|
101
|
+
requirements: {
|
|
102
|
+
ram: 2.5,
|
|
103
|
+
vram: 0,
|
|
104
|
+
cpu_cores: 2,
|
|
105
|
+
storage: 1.5,
|
|
106
|
+
recommended_ram: 4
|
|
107
|
+
},
|
|
108
|
+
frameworks: ["transformers", "llama.cpp"],
|
|
109
|
+
quantization: ["Q4_0", "Q4_K_M", "Q5_0", "Q8_0"],
|
|
110
|
+
performance: {
|
|
111
|
+
speed: "very_fast",
|
|
112
|
+
quality: "good",
|
|
113
|
+
context_length: 2048,
|
|
114
|
+
tokens_per_second_estimate: "60-120",
|
|
115
|
+
mobile_optimized: true
|
|
116
|
+
},
|
|
117
|
+
installation: {
|
|
118
|
+
description: "Optimized for mobile and edge devices (40% faster than TinyLLaMA)"
|
|
119
|
+
},
|
|
120
|
+
specialization: "mobile",
|
|
121
|
+
languages: ["en"],
|
|
122
|
+
year: 2024
|
|
123
|
+
},
|
|
124
|
+
{
|
|
125
|
+
name: "MobileLLaMA 2.7B",
|
|
126
|
+
size: "2.7B",
|
|
127
|
+
type: "local",
|
|
128
|
+
category: "small",
|
|
129
|
+
requirements: {
|
|
130
|
+
ram: 3.5,
|
|
131
|
+
vram: 1,
|
|
132
|
+
cpu_cores: 2,
|
|
133
|
+
storage: 2.8,
|
|
134
|
+
recommended_ram: 6
|
|
135
|
+
},
|
|
136
|
+
frameworks: ["transformers", "llama.cpp"],
|
|
137
|
+
quantization: ["Q4_0", "Q4_K_M", "Q5_0", "Q8_0"],
|
|
138
|
+
performance: {
|
|
139
|
+
speed: "fast",
|
|
140
|
+
quality: "good",
|
|
141
|
+
context_length: 4096,
|
|
142
|
+
tokens_per_second_estimate: "40-80",
|
|
143
|
+
mobile_optimized: true
|
|
144
|
+
},
|
|
145
|
+
installation: {
|
|
146
|
+
description: "Larger MobileLLaMA variant for better quality on mobile"
|
|
147
|
+
},
|
|
148
|
+
specialization: "mobile",
|
|
149
|
+
languages: ["en"],
|
|
150
|
+
year: 2024
|
|
151
|
+
},
|
|
152
|
+
{
|
|
153
|
+
name: "Gemma 2B",
|
|
154
|
+
size: "2B",
|
|
155
|
+
type: "local",
|
|
156
|
+
category: "small",
|
|
157
|
+
requirements: {
|
|
158
|
+
ram: 3,
|
|
159
|
+
vram: 1,
|
|
160
|
+
cpu_cores: 2,
|
|
161
|
+
storage: 2.2,
|
|
162
|
+
recommended_ram: 6
|
|
163
|
+
},
|
|
164
|
+
frameworks: ["ollama", "transformers", "llama.cpp"],
|
|
165
|
+
quantization: ["Q4_0", "Q4_K_M", "Q5_0", "Q5_K_M", "Q8_0"],
|
|
166
|
+
performance: {
|
|
167
|
+
speed: "fast",
|
|
168
|
+
quality: "very_good",
|
|
169
|
+
context_length: 8192,
|
|
170
|
+
tokens_per_second_estimate: "30-70"
|
|
171
|
+
},
|
|
172
|
+
installation: {
|
|
173
|
+
ollama: "ollama pull gemma2:2b",
|
|
174
|
+
description: "Google's efficient small model with strong performance"
|
|
175
|
+
},
|
|
176
|
+
specialization: "general",
|
|
177
|
+
languages: ["en"],
|
|
178
|
+
year: 2024
|
|
179
|
+
},
|
|
180
|
+
{
|
|
181
|
+
name: "Gemma 3 1B",
|
|
182
|
+
size: "1B",
|
|
183
|
+
type: "local",
|
|
184
|
+
category: "small",
|
|
185
|
+
requirements: {
|
|
186
|
+
ram: 2,
|
|
187
|
+
vram: 0,
|
|
188
|
+
cpu_cores: 2,
|
|
189
|
+
storage: 1.1,
|
|
190
|
+
recommended_ram: 4
|
|
191
|
+
},
|
|
192
|
+
frameworks: ["ollama", "transformers"],
|
|
193
|
+
quantization: ["Q4_0", "Q4_K_M", "Q5_0", "Q8_0"],
|
|
194
|
+
performance: {
|
|
195
|
+
speed: "very_fast",
|
|
196
|
+
quality: "good",
|
|
197
|
+
context_length: 32768,
|
|
198
|
+
tokens_per_second_estimate: "70-140"
|
|
199
|
+
},
|
|
200
|
+
installation: {
|
|
201
|
+
ollama: "ollama pull gemma3:1b",
|
|
202
|
+
description: "Latest Gemma optimized for mobile devices"
|
|
203
|
+
},
|
|
204
|
+
specialization: "mobile",
|
|
205
|
+
languages: ["en"],
|
|
206
|
+
year: 2025
|
|
207
|
+
},
|
|
208
|
+
{
|
|
209
|
+
name: "Phi-3 Mini 3.8B",
|
|
210
|
+
size: "3.8B",
|
|
211
|
+
type: "local",
|
|
212
|
+
category: "small",
|
|
213
|
+
requirements: {
|
|
214
|
+
ram: 4,
|
|
215
|
+
vram: 2,
|
|
216
|
+
cpu_cores: 4,
|
|
217
|
+
storage: 4,
|
|
218
|
+
recommended_ram: 8
|
|
219
|
+
},
|
|
220
|
+
frameworks: ["ollama", "llama.cpp", "transformers"],
|
|
221
|
+
quantization: ["Q4_0", "Q4_K_M", "Q5_0", "Q5_K_M", "Q8_0"],
|
|
222
|
+
performance: {
|
|
223
|
+
speed: "fast",
|
|
224
|
+
quality: "very_good",
|
|
225
|
+
context_length: 4096,
|
|
226
|
+
tokens_per_second_estimate: "25-50"
|
|
227
|
+
},
|
|
228
|
+
installation: {
|
|
229
|
+
ollama: "ollama pull phi3:mini",
|
|
230
|
+
description: "Microsoft's efficient small model with excellent reasoning"
|
|
231
|
+
},
|
|
232
|
+
specialization: "reasoning",
|
|
233
|
+
languages: ["en"],
|
|
234
|
+
year: 2024
|
|
235
|
+
},
|
|
236
|
+
{
|
|
237
|
+
name: "Phi-4 14B",
|
|
238
|
+
size: "14B",
|
|
239
|
+
type: "local",
|
|
240
|
+
category: "medium",
|
|
241
|
+
requirements: {
|
|
242
|
+
ram: 16,
|
|
243
|
+
vram: 8,
|
|
244
|
+
cpu_cores: 6,
|
|
245
|
+
storage: 14,
|
|
246
|
+
recommended_ram: 24
|
|
247
|
+
},
|
|
248
|
+
frameworks: ["ollama", "transformers"],
|
|
249
|
+
quantization: ["Q4_0", "Q4_K_M", "Q5_0", "Q5_K_M"],
|
|
250
|
+
performance: {
|
|
251
|
+
speed: "medium",
|
|
252
|
+
quality: "excellent",
|
|
253
|
+
context_length: 16384,
|
|
254
|
+
tokens_per_second_estimate: "15-30"
|
|
255
|
+
},
|
|
256
|
+
installation: {
|
|
257
|
+
ollama: "ollama pull phi4:14b",
|
|
258
|
+
description: "Latest Microsoft Phi model with enhanced capabilities"
|
|
259
|
+
},
|
|
260
|
+
specialization: "reasoning",
|
|
261
|
+
languages: ["en"],
|
|
262
|
+
year: 2024
|
|
263
|
+
},
|
|
264
|
+
{
|
|
265
|
+
name: "Llama 3.2 1B",
|
|
266
|
+
size: "1B",
|
|
267
|
+
type: "local",
|
|
268
|
+
category: "small",
|
|
269
|
+
requirements: {
|
|
270
|
+
ram: 2,
|
|
271
|
+
vram: 0,
|
|
272
|
+
cpu_cores: 2,
|
|
273
|
+
storage: 1.1,
|
|
274
|
+
recommended_ram: 4
|
|
275
|
+
},
|
|
276
|
+
frameworks: ["ollama", "llama.cpp", "transformers"],
|
|
277
|
+
quantization: ["Q4_0", "Q4_K_M", "Q5_0", "Q5_K_M", "Q8_0"],
|
|
278
|
+
performance: {
|
|
279
|
+
speed: "very_fast",
|
|
280
|
+
quality: "good",
|
|
281
|
+
context_length: 8192,
|
|
282
|
+
tokens_per_second_estimate: "60-120"
|
|
283
|
+
},
|
|
284
|
+
installation: {
|
|
285
|
+
ollama: "ollama pull llama3.2:1b",
|
|
286
|
+
description: "Ultra-compact Llama for mobile and edge devices"
|
|
287
|
+
},
|
|
288
|
+
specialization: "general",
|
|
289
|
+
languages: ["en"],
|
|
290
|
+
year: 2024
|
|
291
|
+
},
|
|
292
|
+
{
|
|
293
|
+
name: "Llama 3.2 3B",
|
|
294
|
+
size: "3B",
|
|
295
|
+
type: "local",
|
|
296
|
+
category: "small",
|
|
297
|
+
requirements: {
|
|
298
|
+
ram: 4,
|
|
299
|
+
vram: 2,
|
|
300
|
+
cpu_cores: 4,
|
|
301
|
+
storage: 3.2,
|
|
302
|
+
recommended_ram: 8
|
|
303
|
+
},
|
|
304
|
+
frameworks: ["ollama", "llama.cpp", "transformers"],
|
|
305
|
+
quantization: ["Q4_0", "Q4_K_M", "Q5_0", "Q5_K_M", "Q8_0"],
|
|
306
|
+
performance: {
|
|
307
|
+
speed: "fast",
|
|
308
|
+
quality: "very_good",
|
|
309
|
+
context_length: 8192,
|
|
310
|
+
tokens_per_second_estimate: "30-60"
|
|
311
|
+
},
|
|
312
|
+
installation: {
|
|
313
|
+
ollama: "ollama pull llama3.2:3b",
|
|
314
|
+
description: "Balanced performance and efficiency from Meta"
|
|
315
|
+
},
|
|
316
|
+
specialization: "general",
|
|
317
|
+
languages: ["en"],
|
|
318
|
+
year: 2024
|
|
319
|
+
},
|
|
320
|
+
|
|
321
|
+
// Medium Models (5B - 15B parámetros)
|
|
322
|
+
{
|
|
323
|
+
name: "Gemma 3 4B",
|
|
324
|
+
size: "4B",
|
|
325
|
+
type: "local",
|
|
326
|
+
category: "medium",
|
|
327
|
+
requirements: {
|
|
328
|
+
ram: 6,
|
|
329
|
+
vram: 3,
|
|
330
|
+
cpu_cores: 4,
|
|
331
|
+
storage: 4.5,
|
|
332
|
+
recommended_ram: 12
|
|
333
|
+
},
|
|
334
|
+
frameworks: ["ollama", "transformers"],
|
|
335
|
+
quantization: ["Q4_0", "Q4_K_M", "Q5_0", "Q5_K_M", "Q8_0"],
|
|
336
|
+
performance: {
|
|
337
|
+
speed: "fast",
|
|
338
|
+
quality: "excellent",
|
|
339
|
+
context_length: 128000,
|
|
340
|
+
tokens_per_second_estimate: "20-40"
|
|
341
|
+
},
|
|
342
|
+
installation: {
|
|
343
|
+
ollama: "ollama pull gemma3:4b",
|
|
344
|
+
description: "Multimodal Gemma with long context support"
|
|
345
|
+
},
|
|
346
|
+
specialization: "multimodal",
|
|
347
|
+
languages: ["en"],
|
|
348
|
+
year: 2025,
|
|
349
|
+
multimodal: true
|
|
350
|
+
},
|
|
351
|
+
{
|
|
352
|
+
name: "Qwen 2.5 7B",
|
|
353
|
+
size: "7B",
|
|
354
|
+
type: "local",
|
|
355
|
+
category: "medium",
|
|
356
|
+
requirements: {
|
|
357
|
+
ram: 8,
|
|
358
|
+
vram: 4,
|
|
359
|
+
cpu_cores: 4,
|
|
360
|
+
storage: 7.5,
|
|
361
|
+
recommended_ram: 16
|
|
362
|
+
},
|
|
363
|
+
frameworks: ["ollama", "transformers", "vllm"],
|
|
364
|
+
quantization: ["Q4_0", "Q4_K_M", "Q5_0", "Q5_K_M", "Q8_0"],
|
|
365
|
+
performance: {
|
|
366
|
+
speed: "medium",
|
|
367
|
+
quality: "excellent",
|
|
368
|
+
context_length: 32768,
|
|
369
|
+
tokens_per_second_estimate: "15-35"
|
|
370
|
+
},
|
|
371
|
+
installation: {
|
|
372
|
+
ollama: "ollama pull qwen2.5:7b",
|
|
373
|
+
description: "Alibaba's latest multilingual model with strong coding abilities"
|
|
374
|
+
},
|
|
375
|
+
specialization: "code",
|
|
376
|
+
languages: ["en", "zh", "ja", "ko", "es", "fr", "de"],
|
|
377
|
+
year: 2024
|
|
378
|
+
},
|
|
379
|
+
{
|
|
380
|
+
name: "Llama 3.1 8B",
|
|
381
|
+
size: "8B",
|
|
382
|
+
type: "local",
|
|
383
|
+
category: "medium",
|
|
384
|
+
requirements: {
|
|
385
|
+
ram: 8,
|
|
386
|
+
vram: 4,
|
|
387
|
+
cpu_cores: 4,
|
|
388
|
+
storage: 8.5,
|
|
389
|
+
recommended_ram: 16
|
|
390
|
+
},
|
|
391
|
+
frameworks: ["ollama", "llama.cpp", "transformers", "vllm"],
|
|
392
|
+
quantization: ["Q2_K", "Q3_K_M", "Q4_0", "Q4_K_M", "Q5_0", "Q5_K_M", "Q6_K", "Q8_0"],
|
|
393
|
+
performance: {
|
|
394
|
+
speed: "medium",
|
|
395
|
+
quality: "excellent",
|
|
396
|
+
context_length: 128000,
|
|
397
|
+
tokens_per_second_estimate: "12-30"
|
|
398
|
+
},
|
|
399
|
+
installation: {
|
|
400
|
+
ollama: "ollama pull llama3.1:8b",
|
|
401
|
+
description: "Outstanding balance of performance and efficiency"
|
|
402
|
+
},
|
|
403
|
+
specialization: "general",
|
|
404
|
+
languages: ["en"],
|
|
405
|
+
year: 2024
|
|
406
|
+
},
|
|
407
|
+
{
|
|
408
|
+
name: "Mistral 7B v0.3",
|
|
409
|
+
size: "7B",
|
|
410
|
+
type: "local",
|
|
411
|
+
category: "medium",
|
|
412
|
+
requirements: {
|
|
413
|
+
ram: 8,
|
|
414
|
+
vram: 4,
|
|
415
|
+
cpu_cores: 4,
|
|
416
|
+
storage: 7.2,
|
|
417
|
+
recommended_ram: 16
|
|
418
|
+
},
|
|
419
|
+
frameworks: ["ollama", "llama.cpp", "transformers", "vllm"],
|
|
420
|
+
quantization: ["Q4_0", "Q4_K_M", "Q5_0", "Q5_K_M", "Q6_K", "Q8_0"],
|
|
421
|
+
performance: {
|
|
422
|
+
speed: "medium",
|
|
423
|
+
quality: "excellent",
|
|
424
|
+
context_length: 32768,
|
|
425
|
+
tokens_per_second_estimate: "15-32"
|
|
426
|
+
},
|
|
427
|
+
installation: {
|
|
428
|
+
ollama: "ollama pull mistral:7b",
|
|
429
|
+
description: "High-quality European model with strong reasoning"
|
|
430
|
+
},
|
|
431
|
+
specialization: "reasoning",
|
|
432
|
+
languages: ["en", "fr", "de", "es", "it"],
|
|
433
|
+
year: 2024
|
|
434
|
+
},
|
|
435
|
+
{
|
|
436
|
+
name: "Mistral Small 3.1",
|
|
437
|
+
size: "22B",
|
|
438
|
+
type: "local",
|
|
439
|
+
category: "large",
|
|
440
|
+
requirements: {
|
|
441
|
+
ram: 24,
|
|
442
|
+
vram: 12,
|
|
443
|
+
cpu_cores: 6,
|
|
444
|
+
storage: 22,
|
|
445
|
+
recommended_ram: 32
|
|
446
|
+
},
|
|
447
|
+
frameworks: ["ollama", "vllm", "transformers"],
|
|
448
|
+
quantization: ["Q3_K_M", "Q4_0", "Q4_K_M", "Q5_K_M"],
|
|
449
|
+
performance: {
|
|
450
|
+
speed: "slow",
|
|
451
|
+
quality: "excellent",
|
|
452
|
+
context_length: 128000,
|
|
453
|
+
tokens_per_second_estimate: "8-20"
|
|
454
|
+
},
|
|
455
|
+
installation: {
|
|
456
|
+
ollama: "ollama pull mistral-small:22b",
|
|
457
|
+
description: "Latest Mistral model with enhanced capabilities"
|
|
458
|
+
},
|
|
459
|
+
specialization: "reasoning",
|
|
460
|
+
languages: ["en", "fr", "de", "es", "it"],
|
|
461
|
+
year: 2024
|
|
462
|
+
},
|
|
463
|
+
{
|
|
464
|
+
name: "CodeLlama 7B",
|
|
465
|
+
size: "7B",
|
|
466
|
+
type: "local",
|
|
467
|
+
category: "medium",
|
|
468
|
+
specialization: "code",
|
|
469
|
+
requirements: {
|
|
470
|
+
ram: 8,
|
|
471
|
+
vram: 4,
|
|
472
|
+
cpu_cores: 4,
|
|
473
|
+
storage: 7.2,
|
|
474
|
+
recommended_ram: 16
|
|
475
|
+
},
|
|
476
|
+
frameworks: ["ollama", "llama.cpp", "transformers"],
|
|
477
|
+
quantization: ["Q4_0", "Q4_K_M", "Q5_0", "Q5_K_M", "Q8_0"],
|
|
478
|
+
performance: {
|
|
479
|
+
speed: "medium",
|
|
480
|
+
quality: "excellent_for_code",
|
|
481
|
+
context_length: 16384,
|
|
482
|
+
tokens_per_second_estimate: "15-30"
|
|
483
|
+
},
|
|
484
|
+
installation: {
|
|
485
|
+
ollama: "ollama pull codellama:7b",
|
|
486
|
+
description: "Meta's specialized coding assistant"
|
|
487
|
+
},
|
|
488
|
+
languages: ["python", "javascript", "java", "c++", "c", "php", "ruby"],
|
|
489
|
+
year: 2023
|
|
490
|
+
},
|
|
491
|
+
{
|
|
492
|
+
name: "DeepSeek Coder 6.7B",
|
|
493
|
+
size: "6.7B",
|
|
494
|
+
type: "local",
|
|
495
|
+
category: "medium",
|
|
496
|
+
specialization: "code",
|
|
497
|
+
requirements: {
|
|
498
|
+
ram: 8,
|
|
499
|
+
vram: 4,
|
|
500
|
+
cpu_cores: 4,
|
|
501
|
+
storage: 7,
|
|
502
|
+
recommended_ram: 16
|
|
503
|
+
},
|
|
504
|
+
frameworks: ["ollama", "transformers"],
|
|
505
|
+
quantization: ["Q4_0", "Q4_K_M", "Q5_0", "Q8_0"],
|
|
506
|
+
performance: {
|
|
507
|
+
speed: "medium",
|
|
508
|
+
quality: "excellent_for_code",
|
|
509
|
+
context_length: 16384,
|
|
510
|
+
tokens_per_second_estimate: "18-35"
|
|
511
|
+
},
|
|
512
|
+
installation: {
|
|
513
|
+
ollama: "ollama pull deepseek-coder:6.7b",
|
|
514
|
+
description: "Specialized for code generation and analysis"
|
|
515
|
+
},
|
|
516
|
+
languages: ["python", "javascript", "java", "c++", "go", "rust"],
|
|
517
|
+
year: 2024
|
|
518
|
+
},
|
|
519
|
+
|
|
520
|
+
// Large Models (15B+ parámetros)
|
|
521
|
+
{
|
|
522
|
+
name: "Gemma 3 12B",
|
|
523
|
+
size: "12B",
|
|
524
|
+
type: "local",
|
|
525
|
+
category: "large",
|
|
526
|
+
requirements: {
|
|
527
|
+
ram: 16,
|
|
528
|
+
vram: 8,
|
|
529
|
+
cpu_cores: 6,
|
|
530
|
+
storage: 13,
|
|
531
|
+
recommended_ram: 24
|
|
532
|
+
},
|
|
533
|
+
frameworks: ["ollama", "transformers"],
|
|
534
|
+
quantization: ["Q4_0", "Q4_K_M", "Q5_0", "Q5_K_M"],
|
|
535
|
+
performance: {
|
|
536
|
+
speed: "medium_slow",
|
|
537
|
+
quality: "excellent",
|
|
538
|
+
context_length: 128000,
|
|
539
|
+
tokens_per_second_estimate: "10-25"
|
|
540
|
+
},
|
|
541
|
+
installation: {
|
|
542
|
+
ollama: "ollama pull gemma3:12b",
|
|
543
|
+
description: "Large multimodal Gemma with advanced capabilities"
|
|
544
|
+
},
|
|
545
|
+
specialization: "multimodal",
|
|
546
|
+
languages: ["en"],
|
|
547
|
+
year: 2025,
|
|
548
|
+
multimodal: true
|
|
549
|
+
},
|
|
550
|
+
{
|
|
551
|
+
name: "Gemma 3 27B",
|
|
552
|
+
size: "27B",
|
|
553
|
+
type: "local",
|
|
554
|
+
category: "large",
|
|
555
|
+
requirements: {
|
|
556
|
+
ram: 32,
|
|
557
|
+
vram: 16,
|
|
558
|
+
cpu_cores: 8,
|
|
559
|
+
storage: 28,
|
|
560
|
+
recommended_ram: 48
|
|
561
|
+
},
|
|
562
|
+
frameworks: ["transformers", "vllm"],
|
|
563
|
+
quantization: ["Q3_K_M", "Q4_0", "Q4_K_M", "Q5_K_M"],
|
|
564
|
+
performance: {
|
|
565
|
+
speed: "slow",
|
|
566
|
+
quality: "excellent",
|
|
567
|
+
context_length: 128000,
|
|
568
|
+
tokens_per_second_estimate: "5-15"
|
|
569
|
+
},
|
|
570
|
+
installation: {
|
|
571
|
+
description: "Flagship multimodal Gemma model"
|
|
572
|
+
},
|
|
573
|
+
specialization: "multimodal",
|
|
574
|
+
languages: ["en"],
|
|
575
|
+
year: 2025,
|
|
576
|
+
multimodal: true
|
|
577
|
+
},
|
|
578
|
+
{
|
|
579
|
+
name: "Llama 3.3 70B",
|
|
580
|
+
size: "70B",
|
|
581
|
+
type: "local",
|
|
582
|
+
category: "large",
|
|
583
|
+
requirements: {
|
|
584
|
+
ram: 48,
|
|
585
|
+
vram: 24,
|
|
586
|
+
cpu_cores: 8,
|
|
587
|
+
storage: 72,
|
|
588
|
+
recommended_ram: 64,
|
|
589
|
+
recommended_vram: 48
|
|
590
|
+
},
|
|
591
|
+
frameworks: ["ollama", "vllm", "transformers"],
|
|
592
|
+
quantization: ["Q2_K", "Q3_K_M", "Q4_0", "Q4_K_M", "Q5_K_M"],
|
|
593
|
+
performance: {
|
|
594
|
+
speed: "slow",
|
|
595
|
+
quality: "excellent",
|
|
596
|
+
context_length: 128000,
|
|
597
|
+
tokens_per_second_estimate: "3-12"
|
|
598
|
+
},
|
|
599
|
+
installation: {
|
|
600
|
+
ollama: "ollama pull llama3.3:70b",
|
|
601
|
+
description: "Latest flagship Llama model with enhanced capabilities"
|
|
602
|
+
},
|
|
603
|
+
specialization: "general",
|
|
604
|
+
languages: ["en"],
|
|
605
|
+
year: 2024
|
|
606
|
+
},
|
|
607
|
+
{
|
|
608
|
+
name: "DeepSeek-R1 70B",
|
|
609
|
+
size: "70B",
|
|
610
|
+
type: "local",
|
|
611
|
+
category: "large",
|
|
612
|
+
requirements: {
|
|
613
|
+
ram: 50,
|
|
614
|
+
vram: 25,
|
|
615
|
+
cpu_cores: 8,
|
|
616
|
+
storage: 75,
|
|
617
|
+
recommended_ram: 64,
|
|
618
|
+
recommended_vram: 50
|
|
619
|
+
},
|
|
620
|
+
frameworks: ["ollama", "vllm"],
|
|
621
|
+
quantization: ["Q2_K", "Q3_K_M", "Q4_0", "Q4_K_M"],
|
|
622
|
+
performance: {
|
|
623
|
+
speed: "slow",
|
|
624
|
+
quality: "excellent",
|
|
625
|
+
context_length: 65536,
|
|
626
|
+
tokens_per_second_estimate: "2-10"
|
|
627
|
+
},
|
|
628
|
+
installation: {
|
|
629
|
+
ollama: "ollama pull deepseek-r1:70b",
|
|
630
|
+
description: "Advanced reasoning model with o1-like capabilities"
|
|
631
|
+
},
|
|
632
|
+
specialization: "reasoning",
|
|
633
|
+
languages: ["en", "zh"],
|
|
634
|
+
year: 2025
|
|
635
|
+
},
|
|
636
|
+
|
|
637
|
+
// Multimodal Models
|
|
638
|
+
{
|
|
639
|
+
name: "LLaVA 7B",
|
|
640
|
+
size: "7B",
|
|
641
|
+
type: "local",
|
|
642
|
+
category: "medium",
|
|
643
|
+
specialization: "multimodal",
|
|
644
|
+
requirements: {
|
|
645
|
+
ram: 10,
|
|
646
|
+
vram: 6,
|
|
647
|
+
cpu_cores: 4,
|
|
648
|
+
storage: 8,
|
|
649
|
+
recommended_ram: 16
|
|
650
|
+
},
|
|
651
|
+
frameworks: ["ollama", "transformers"],
|
|
652
|
+
quantization: ["Q4_0", "Q4_K_M", "Q5_0", "Q8_0"],
|
|
653
|
+
performance: {
|
|
654
|
+
speed: "medium",
|
|
655
|
+
quality: "good",
|
|
656
|
+
context_length: 4096,
|
|
657
|
+
tokens_per_second_estimate: "12-25"
|
|
658
|
+
},
|
|
659
|
+
installation: {
|
|
660
|
+
ollama: "ollama pull llava:7b",
|
|
661
|
+
description: "Vision-language model for image understanding"
|
|
662
|
+
},
|
|
663
|
+
languages: ["en"],
|
|
664
|
+
year: 2023,
|
|
665
|
+
multimodal: true
|
|
666
|
+
},
|
|
667
|
+
{
|
|
668
|
+
name: "LLaVA-NeXT 34B",
|
|
669
|
+
size: "34B",
|
|
670
|
+
type: "local",
|
|
671
|
+
category: "large",
|
|
672
|
+
specialization: "multimodal",
|
|
673
|
+
requirements: {
|
|
674
|
+
ram: 36,
|
|
675
|
+
vram: 18,
|
|
676
|
+
cpu_cores: 8,
|
|
677
|
+
storage: 36,
|
|
678
|
+
recommended_ram: 48
|
|
679
|
+
},
|
|
680
|
+
frameworks: ["transformers"],
|
|
681
|
+
quantization: ["Q3_K_M", "Q4_0", "Q4_K_M"],
|
|
682
|
+
performance: {
|
|
683
|
+
speed: "slow",
|
|
684
|
+
quality: "excellent",
|
|
685
|
+
context_length: 8192,
|
|
686
|
+
tokens_per_second_estimate: "4-12"
|
|
687
|
+
},
|
|
688
|
+
installation: {
|
|
689
|
+
description: "Advanced multimodal model with enhanced vision capabilities"
|
|
690
|
+
},
|
|
691
|
+
languages: ["en"],
|
|
692
|
+
year: 2024,
|
|
693
|
+
multimodal: true
|
|
694
|
+
},
|
|
695
|
+
|
|
696
|
+
// Embedding Models
|
|
697
|
+
{
|
|
698
|
+
name: "all-MiniLM-L6-v2",
|
|
699
|
+
size: "22M",
|
|
700
|
+
type: "local",
|
|
701
|
+
category: "embedding",
|
|
702
|
+
specialization: "embeddings",
|
|
703
|
+
requirements: {
|
|
704
|
+
ram: 0.5,
|
|
705
|
+
vram: 0,
|
|
706
|
+
cpu_cores: 1,
|
|
707
|
+
storage: 0.1,
|
|
708
|
+
recommended_ram: 1
|
|
709
|
+
},
|
|
710
|
+
frameworks: ["sentence-transformers", "ollama"],
|
|
711
|
+
performance: {
|
|
712
|
+
speed: "very_fast",
|
|
713
|
+
quality: "good",
|
|
714
|
+
context_length: 512,
|
|
715
|
+
dimensions: 384
|
|
716
|
+
},
|
|
717
|
+
installation: {
|
|
718
|
+
ollama: "ollama pull all-minilm",
|
|
719
|
+
description: "Compact embedding model for semantic search"
|
|
720
|
+
},
|
|
721
|
+
languages: ["en"],
|
|
722
|
+
year: 2023
|
|
723
|
+
},
|
|
724
|
+
{
|
|
725
|
+
name: "BGE-small-en-v1.5",
|
|
726
|
+
size: "33M",
|
|
727
|
+
type: "local",
|
|
728
|
+
category: "embedding",
|
|
729
|
+
specialization: "embeddings",
|
|
730
|
+
requirements: {
|
|
731
|
+
ram: 0.5,
|
|
732
|
+
vram: 0,
|
|
733
|
+
cpu_cores: 1,
|
|
734
|
+
storage: 0.1,
|
|
735
|
+
recommended_ram: 1
|
|
736
|
+
},
|
|
737
|
+
frameworks: ["sentence-transformers", "transformers"],
|
|
738
|
+
performance: {
|
|
739
|
+
speed: "very_fast",
|
|
740
|
+
quality: "very_good",
|
|
741
|
+
context_length: 512,
|
|
742
|
+
dimensions: 384
|
|
743
|
+
},
|
|
744
|
+
installation: {
|
|
745
|
+
description: "High-quality English embedding model"
|
|
746
|
+
},
|
|
747
|
+
languages: ["en"],
|
|
748
|
+
year: 2023
|
|
749
|
+
}
|
|
750
|
+
];
|
|
751
|
+
}
|
|
752
|
+
|
|
753
|
+
initializeCompatibilityMatrix() {
|
|
754
|
+
return {
|
|
755
|
+
// Hardware tiers
|
|
756
|
+
tiers: {
|
|
757
|
+
ultra_low: { ram: 4, vram: 0, cpu_cores: 2 },
|
|
758
|
+
low: { ram: 8, vram: 2, cpu_cores: 4 },
|
|
759
|
+
medium: { ram: 16, vram: 8, cpu_cores: 6 },
|
|
760
|
+
high: { ram: 32, vram: 16, cpu_cores: 8 },
|
|
761
|
+
ultra_high: { ram: 64, vram: 32, cpu_cores: 12 }
|
|
762
|
+
},
|
|
763
|
+
|
|
764
|
+
// Compatibility scores by category and tier
|
|
765
|
+
compatibility: {
|
|
766
|
+
ultra_small: {
|
|
767
|
+
ultra_low: 95, low: 100, medium: 100, high: 100, ultra_high: 100
|
|
768
|
+
},
|
|
769
|
+
small: {
|
|
770
|
+
ultra_low: 70, low: 90, medium: 100, high: 100, ultra_high: 100
|
|
771
|
+
},
|
|
772
|
+
medium: {
|
|
773
|
+
ultra_low: 20, low: 60, medium: 85, high: 95, ultra_high: 100
|
|
774
|
+
},
|
|
775
|
+
large: {
|
|
776
|
+
ultra_low: 5, low: 25, medium: 50, high: 80, ultra_high: 95
|
|
777
|
+
},
|
|
778
|
+
embedding: {
|
|
779
|
+
ultra_low: 100, low: 100, medium: 100, high: 100, ultra_high: 100
|
|
780
|
+
}
|
|
781
|
+
},
|
|
782
|
+
|
|
783
|
+
// Performance multipliers
|
|
784
|
+
architecture_bonuses: {
|
|
785
|
+
'Apple Silicon': 1.15,
|
|
786
|
+
'x86_64_modern': 1.05,
|
|
787
|
+
'ARM64': 0.95
|
|
788
|
+
},
|
|
789
|
+
|
|
790
|
+
// Quantization effectiveness
|
|
791
|
+
quantization_savings: {
|
|
792
|
+
'Q2_K': 0.6, // 60% size reduction
|
|
793
|
+
'Q3_K_M': 0.7, // 70% of original size
|
|
794
|
+
'Q4_0': 0.75, // 75% of original size
|
|
795
|
+
'Q4_K_M': 0.75,
|
|
796
|
+
'Q5_0': 0.85, // 85% of original size
|
|
797
|
+
'Q5_K_M': 0.85,
|
|
798
|
+
'Q6_K': 0.9, // 90% of original size
|
|
799
|
+
'Q8_0': 0.95 // 95% of original size
|
|
800
|
+
}
|
|
801
|
+
};
|
|
802
|
+
}
|
|
803
|
+
|
|
804
|
+
getAllModels() {
|
|
805
|
+
return this.models;
|
|
806
|
+
}
|
|
807
|
+
|
|
808
|
+
getModelsByCategory(category) {
|
|
809
|
+
return this.models.filter(model => model.category === category);
|
|
810
|
+
}
|
|
811
|
+
|
|
812
|
+
getUltraSmallModels() {
|
|
813
|
+
return this.getModelsByCategory('ultra_small');
|
|
814
|
+
}
|
|
815
|
+
|
|
816
|
+
getSmallModels() {
|
|
817
|
+
return this.getModelsByCategory('small');
|
|
818
|
+
}
|
|
819
|
+
|
|
820
|
+
getMediumModels() {
|
|
821
|
+
return this.getModelsByCategory('medium');
|
|
822
|
+
}
|
|
823
|
+
|
|
824
|
+
getLargeModels() {
|
|
825
|
+
return this.getModelsByCategory('large');
|
|
826
|
+
}
|
|
827
|
+
|
|
828
|
+
getEmbeddingModels() {
|
|
829
|
+
return this.getModelsByCategory('embedding');
|
|
830
|
+
}
|
|
831
|
+
|
|
832
|
+
getMultimodalModels() {
|
|
833
|
+
return this.models.filter(model => model.multimodal === true);
|
|
834
|
+
}
|
|
835
|
+
|
|
836
|
+
getModelsBySpecialization(specialization) {
|
|
837
|
+
return this.models.filter(model => model.specialization === specialization);
|
|
838
|
+
}
|
|
839
|
+
|
|
840
|
+
getModelsByLanguage(language) {
|
|
841
|
+
return this.models.filter(model =>
|
|
842
|
+
model.languages && model.languages.includes(language)
|
|
843
|
+
);
|
|
844
|
+
}
|
|
845
|
+
|
|
846
|
+
getModelsByYear(year) {
|
|
847
|
+
return this.models.filter(model => model.year === year);
|
|
848
|
+
}
|
|
849
|
+
|
|
850
|
+
getRecentModels(yearsBack = 1) {
|
|
851
|
+
const currentYear = new Date().getFullYear();
|
|
852
|
+
const cutoffYear = currentYear - yearsBack;
|
|
853
|
+
return this.models.filter(model => model.year >= cutoffYear);
|
|
854
|
+
}
|
|
855
|
+
|
|
856
|
+
findModel(name) {
|
|
857
|
+
return this.models.find(model =>
|
|
858
|
+
model.name.toLowerCase().includes(name.toLowerCase())
|
|
859
|
+
);
|
|
860
|
+
}
|
|
861
|
+
|
|
862
|
+
getHardwareTier(hardware) {
|
|
863
|
+
const { memory, gpu, cpu } = hardware;
|
|
864
|
+
const ram = memory.total;
|
|
865
|
+
const vram = gpu.vram || 0;
|
|
866
|
+
const cores = cpu.cores;
|
|
867
|
+
|
|
868
|
+
// Check if it's Apple Silicon (unified memory architecture)
|
|
869
|
+
const isAppleSilicon = cpu?.architecture === 'Apple Silicon' ||
|
|
870
|
+
(gpu?.model && gpu.model.toLowerCase().includes('apple')) ||
|
|
871
|
+
(cpu?.brand && cpu.brand.toLowerCase().includes('apple'));
|
|
872
|
+
|
|
873
|
+
// Apple Silicon uses unified memory, so evaluate differently
|
|
874
|
+
if (isAppleSilicon) {
|
|
875
|
+
if (ram >= 64) return 'ultra_high';
|
|
876
|
+
if (ram >= 24) return 'high'; // M4 Pro with 24GB should be high tier
|
|
877
|
+
if (ram >= 16) return 'medium';
|
|
878
|
+
if (ram >= 8) return 'low';
|
|
879
|
+
return 'ultra_low';
|
|
880
|
+
}
|
|
881
|
+
|
|
882
|
+
// Traditional discrete GPU systems
|
|
883
|
+
if (ram >= 64 && vram >= 32 && cores >= 12) return 'ultra_high';
|
|
884
|
+
if (ram >= 32 && vram >= 16 && cores >= 8) return 'high';
|
|
885
|
+
if (ram >= 16 && vram >= 8 && cores >= 6) return 'medium';
|
|
886
|
+
if (ram >= 8 && vram >= 2 && cores >= 4) return 'low';
|
|
887
|
+
return 'ultra_low';
|
|
888
|
+
}
|
|
889
|
+
|
|
890
|
+
getCompatibilityScore(model, hardware) {
|
|
891
|
+
const tier = this.getHardwareTier(hardware);
|
|
892
|
+
const baseScore = this.compatibilityMatrix.compatibility[model.category]?.[tier] || 0;
|
|
893
|
+
|
|
894
|
+
// Apply architecture bonus
|
|
895
|
+
const architectureBonus = this.compatibilityMatrix.architecture_bonuses[hardware.cpu.architecture] || 1.0;
|
|
896
|
+
|
|
897
|
+
// Apply quantization bonus if available
|
|
898
|
+
let quantizationBonus = 1.0;
|
|
899
|
+
if (model.quantization && model.quantization.length > 0) {
|
|
900
|
+
// Use most aggressive quantization for limited hardware
|
|
901
|
+
if (tier === 'ultra_low' || tier === 'low') {
|
|
902
|
+
quantizationBonus = 1.2; // Quantization is very valuable
|
|
903
|
+
}
|
|
904
|
+
}
|
|
905
|
+
|
|
906
|
+
return Math.min(100, Math.round(baseScore * architectureBonus * quantizationBonus));
|
|
907
|
+
}
|
|
908
|
+
|
|
909
|
+
getDetailedCompatibilityAnalysis(model, hardware) {
|
|
910
|
+
const score = this.getCompatibilityScore(model, hardware);
|
|
911
|
+
const tier = this.getHardwareTier(hardware);
|
|
912
|
+
const issues = [];
|
|
913
|
+
const recommendations = [];
|
|
914
|
+
|
|
915
|
+
// Check specific requirements
|
|
916
|
+
if (hardware.memory.total < model.requirements.ram) {
|
|
917
|
+
issues.push(`Insufficient RAM: ${hardware.memory.total}GB < ${model.requirements.ram}GB required`);
|
|
918
|
+
recommendations.push(`Upgrade to at least ${model.requirements.ram}GB RAM`);
|
|
919
|
+
}
|
|
920
|
+
|
|
921
|
+
if (hardware.gpu.vram < model.requirements.vram) {
|
|
922
|
+
issues.push(`Insufficient VRAM: ${hardware.gpu.vram}GB < ${model.requirements.vram}GB required`);
|
|
923
|
+
if (model.quantization && model.quantization.includes('Q4_0')) {
|
|
924
|
+
recommendations.push('Consider using Q4_0 quantization to reduce VRAM usage');
|
|
925
|
+
}
|
|
926
|
+
}
|
|
927
|
+
|
|
928
|
+
if (hardware.cpu.cores < model.requirements.cpu_cores) {
|
|
929
|
+
issues.push(`Limited CPU cores: ${hardware.cpu.cores} < ${model.requirements.cpu_cores} recommended`);
|
|
930
|
+
}
|
|
931
|
+
|
|
932
|
+
// Performance estimation
|
|
933
|
+
const estimatedPerformance = this.estimatePerformance(model, hardware);
|
|
934
|
+
|
|
935
|
+
return {
|
|
936
|
+
score,
|
|
937
|
+
tier,
|
|
938
|
+
issues,
|
|
939
|
+
recommendations,
|
|
940
|
+
canRun: score >= 60,
|
|
941
|
+
estimatedPerformance,
|
|
942
|
+
bestQuantization: this.getBestQuantization(model, hardware)
|
|
943
|
+
};
|
|
944
|
+
}
|
|
945
|
+
|
|
946
|
+
estimatePerformance(model, hardware) {
|
|
947
|
+
// Use realistic performance estimation instead of optimistic predefined values
|
|
948
|
+
const estimatedTokensPerSecond = this.calculateRealisticTokensPerSecond(model, hardware);
|
|
949
|
+
|
|
950
|
+
return {
|
|
951
|
+
tokensPerSecond: estimatedTokensPerSecond,
|
|
952
|
+
category: estimatedTokensPerSecond > 50 ? 'fast' :
|
|
953
|
+
estimatedTokensPerSecond > 20 ? 'medium' :
|
|
954
|
+
estimatedTokensPerSecond > 5 ? 'slow' : 'very_slow',
|
|
955
|
+
memoryUsage: this.estimateMemoryUsage(model),
|
|
956
|
+
powerConsumption: this.estimatePowerConsumption(model, hardware)
|
|
957
|
+
};
|
|
958
|
+
}
|
|
959
|
+
|
|
960
|
+
calculateRealisticTokensPerSecond(model, hardware) {
|
|
961
|
+
// Extract model parameters from name or size
|
|
962
|
+
const modelParams = this.extractModelParams(model);
|
|
963
|
+
|
|
964
|
+
// Get hardware specifics
|
|
965
|
+
const cpuModel = hardware.cpu?.brand || hardware.cpu?.model || '';
|
|
966
|
+
const gpuModel = hardware.gpu?.model || '';
|
|
967
|
+
const cores = hardware.cpu?.physicalCores || hardware.cpu?.cores || 1;
|
|
968
|
+
const baseSpeed = hardware.cpu?.speed || 2.4;
|
|
969
|
+
const vramGB = hardware.gpu?.vram || 0;
|
|
970
|
+
|
|
971
|
+
// Check hardware type
|
|
972
|
+
const isAppleSilicon = process.platform === 'darwin' && (
|
|
973
|
+
gpuModel.toLowerCase().includes('apple') ||
|
|
974
|
+
gpuModel.toLowerCase().includes('m1') ||
|
|
975
|
+
gpuModel.toLowerCase().includes('m2') ||
|
|
976
|
+
gpuModel.toLowerCase().includes('m3') ||
|
|
977
|
+
gpuModel.toLowerCase().includes('m4')
|
|
978
|
+
);
|
|
979
|
+
const isIntegratedGPU = /iris.*xe|iris.*graphics|uhd.*graphics|vega.*integrated|radeon.*graphics/i.test(gpuModel);
|
|
980
|
+
const hasDedicatedGPU = vramGB > 0 && !isIntegratedGPU && !isAppleSilicon;
|
|
981
|
+
|
|
982
|
+
let tokensPerSecond;
|
|
983
|
+
|
|
984
|
+
if (isAppleSilicon) {
|
|
985
|
+
// Apple Silicon unified memory - more optimistic but realistic
|
|
986
|
+
let baseTPS = 25;
|
|
987
|
+
if (gpuModel.toLowerCase().includes('m4 pro')) baseTPS = 35;
|
|
988
|
+
else if (gpuModel.toLowerCase().includes('m4')) baseTPS = 30;
|
|
989
|
+
else if (gpuModel.toLowerCase().includes('m3 pro')) baseTPS = 30;
|
|
990
|
+
else if (gpuModel.toLowerCase().includes('m3')) baseTPS = 25;
|
|
991
|
+
else if (gpuModel.toLowerCase().includes('m2 pro')) baseTPS = 28;
|
|
992
|
+
else if (gpuModel.toLowerCase().includes('m2')) baseTPS = 22;
|
|
993
|
+
else if (gpuModel.toLowerCase().includes('m1 pro')) baseTPS = 25;
|
|
994
|
+
else if (gpuModel.toLowerCase().includes('m1')) baseTPS = 20;
|
|
995
|
+
|
|
996
|
+
// Scale by model size (Apple Silicon handles larger models better)
|
|
997
|
+
tokensPerSecond = Math.max(8, Math.round(baseTPS / Math.max(0.8, modelParams)));
|
|
998
|
+
|
|
999
|
+
} else if (hasDedicatedGPU) {
|
|
1000
|
+
// Dedicated GPU - much better performance
|
|
1001
|
+
let gpuTPS = 30;
|
|
1002
|
+
if (gpuModel.toLowerCase().includes('rtx 50')) gpuTPS = 65;
|
|
1003
|
+
else if (gpuModel.toLowerCase().includes('rtx 40')) gpuTPS = 50;
|
|
1004
|
+
else if (gpuModel.toLowerCase().includes('rtx 30')) gpuTPS = 40;
|
|
1005
|
+
else if (gpuModel.toLowerCase().includes('rtx 20')) gpuTPS = 30;
|
|
1006
|
+
else if (vramGB >= 16) gpuTPS = 45;
|
|
1007
|
+
else if (vramGB >= 8) gpuTPS = 35;
|
|
1008
|
+
else if (vramGB >= 4) gpuTPS = 25;
|
|
1009
|
+
|
|
1010
|
+
// Scale by model size for GPU
|
|
1011
|
+
tokensPerSecond = Math.max(10, Math.round(gpuTPS / Math.max(0.5, modelParams)));
|
|
1012
|
+
|
|
1013
|
+
} else {
|
|
1014
|
+
// CPU-only or integrated GPU - most realistic and conservative
|
|
1015
|
+
const hasAVX512 = cpuModel.toLowerCase().includes('intel') &&
|
|
1016
|
+
(cpuModel.includes('12th') || cpuModel.includes('13th') || cpuModel.includes('14th'));
|
|
1017
|
+
const hasAVX2 = cpuModel.toLowerCase().includes('intel') || cpuModel.toLowerCase().includes('amd');
|
|
1018
|
+
|
|
1019
|
+
// Base CPU coefficient - much more conservative
|
|
1020
|
+
let cpuK = 1.8; // Conservative baseline
|
|
1021
|
+
if (hasAVX512) cpuK = 2.6;
|
|
1022
|
+
else if (hasAVX2) cpuK = 2.2;
|
|
1023
|
+
|
|
1024
|
+
// iGPU boost (small)
|
|
1025
|
+
const iGpuMultiplier = isIntegratedGPU ? 1.3 : 1.0;
|
|
1026
|
+
|
|
1027
|
+
// Calculate with realistic threading limits
|
|
1028
|
+
const effectiveThreads = Math.min(cores, 8); // Diminishing returns after 8 threads
|
|
1029
|
+
const baseTPS = (cpuK * baseSpeed * effectiveThreads * iGpuMultiplier) / Math.max(1.5, modelParams);
|
|
1030
|
+
|
|
1031
|
+
// Apply realistic CPU limits
|
|
1032
|
+
const maxCPUTPS = hasAVX512 ? 25 : (isIntegratedGPU ? 20 : 15);
|
|
1033
|
+
tokensPerSecond = Math.max(2, Math.min(maxCPUTPS, Math.round(baseTPS)));
|
|
1034
|
+
}
|
|
1035
|
+
|
|
1036
|
+
return tokensPerSecond;
|
|
1037
|
+
}
|
|
1038
|
+
|
|
1039
|
+
extractModelParams(model) {
|
|
1040
|
+
// Try to extract parameter count from model name
|
|
1041
|
+
const name = model.name.toLowerCase();
|
|
1042
|
+
|
|
1043
|
+
// Look for patterns like "7b", "3.8b", "0.5b", etc.
|
|
1044
|
+
const paramMatch = name.match(/(\d+\.?\d*)[bm](?:\s|$)/);
|
|
1045
|
+
if (paramMatch) {
|
|
1046
|
+
const value = parseFloat(paramMatch[1]);
|
|
1047
|
+
// Convert millions to billions if needed
|
|
1048
|
+
return paramMatch[1].includes('m') ? value / 1000 : value;
|
|
1049
|
+
}
|
|
1050
|
+
|
|
1051
|
+
// Fallback to size-based estimation
|
|
1052
|
+
const sizeGB = model.size ? parseFloat(model.size.toString()) : 4;
|
|
1053
|
+
// Rough estimate: 1B params ≈ 2GB in Q4 quantization
|
|
1054
|
+
return Math.max(0.5, sizeGB / 2);
|
|
1055
|
+
}
|
|
1056
|
+
|
|
1057
|
+
getBestQuantization(model, hardware) {
|
|
1058
|
+
if (!model.quantization || model.quantization.length === 0) {
|
|
1059
|
+
return null;
|
|
1060
|
+
}
|
|
1061
|
+
|
|
1062
|
+
const tier = this.getHardwareTier(hardware);
|
|
1063
|
+
|
|
1064
|
+
const recommendations = {
|
|
1065
|
+
ultra_low: ['Q2_K', 'Q3_K_M'],
|
|
1066
|
+
low: ['Q4_0', 'Q4_K_M'],
|
|
1067
|
+
medium: ['Q4_K_M', 'Q5_0'],
|
|
1068
|
+
high: ['Q5_K_M', 'Q6_K'],
|
|
1069
|
+
ultra_high: ['Q8_0', 'Q6_K']
|
|
1070
|
+
};
|
|
1071
|
+
|
|
1072
|
+
const recommended = recommendations[tier] || ['Q4_0'];
|
|
1073
|
+
return model.quantization.find(q => recommended.includes(q)) || model.quantization[0];
|
|
1074
|
+
}
|
|
1075
|
+
|
|
1076
|
+
estimateMemoryUsage(model) {
|
|
1077
|
+
const sizeGB = parseFloat(model.size.replace(/[^\d.]/g, ''));
|
|
1078
|
+
|
|
1079
|
+
// Rough estimates including model loading overhead
|
|
1080
|
+
return {
|
|
1081
|
+
minimal: Math.round(sizeGB * 1.2), // With quantization
|
|
1082
|
+
typical: Math.round(sizeGB * 1.5), // Standard loading
|
|
1083
|
+
maximum: Math.round(sizeGB * 2.0) // With full context
|
|
1084
|
+
};
|
|
1085
|
+
}
|
|
1086
|
+
|
|
1087
|
+
estimatePowerConsumption(model, hardware) {
|
|
1088
|
+
const sizeGB = parseFloat(model.size.replace(/[^\d.]/g, ''));
|
|
1089
|
+
const tier = this.getHardwareTier(hardware);
|
|
1090
|
+
|
|
1091
|
+
const basePower = {
|
|
1092
|
+
ultra_low: 15,
|
|
1093
|
+
low: 25,
|
|
1094
|
+
medium: 45,
|
|
1095
|
+
high: 85,
|
|
1096
|
+
ultra_high: 150
|
|
1097
|
+
};
|
|
1098
|
+
|
|
1099
|
+
const modelMultiplier = Math.log10(sizeGB + 1) * 0.5 + 1;
|
|
1100
|
+
|
|
1101
|
+
return {
|
|
1102
|
+
idle: Math.round((basePower[tier] || 25) * 0.3),
|
|
1103
|
+
inference: Math.round((basePower[tier] || 25) * modelMultiplier),
|
|
1104
|
+
unit: 'watts'
|
|
1105
|
+
};
|
|
1106
|
+
}
|
|
1107
|
+
|
|
1108
|
+
getModelRecommendations(hardware, useCase = 'general') {
|
|
1109
|
+
const tier = this.getHardwareTier(hardware);
|
|
1110
|
+
const allModels = this.getAllModels();
|
|
1111
|
+
|
|
1112
|
+
// Filter by use case
|
|
1113
|
+
let relevantModels = allModels;
|
|
1114
|
+
if (useCase !== 'general') {
|
|
1115
|
+
relevantModels = allModels.filter(model =>
|
|
1116
|
+
model.specialization === useCase ||
|
|
1117
|
+
(useCase === 'chat' && !model.specialization)
|
|
1118
|
+
);
|
|
1119
|
+
}
|
|
1120
|
+
|
|
1121
|
+
// Score and sort models
|
|
1122
|
+
const scoredModels = relevantModels.map(model => ({
|
|
1123
|
+
...model,
|
|
1124
|
+
compatibilityScore: this.getCompatibilityScore(model, hardware)
|
|
1125
|
+
}));
|
|
1126
|
+
|
|
1127
|
+
scoredModels.sort((a, b) => b.compatibilityScore - a.compatibilityScore);
|
|
1128
|
+
|
|
1129
|
+
return {
|
|
1130
|
+
tier,
|
|
1131
|
+
topRecommendations: scoredModels.slice(0, 5),
|
|
1132
|
+
byCategory: {
|
|
1133
|
+
ultra_small: scoredModels.filter(m => m.category === 'ultra_small').slice(0, 3),
|
|
1134
|
+
small: scoredModels.filter(m => m.category === 'small').slice(0, 3),
|
|
1135
|
+
medium: scoredModels.filter(m => m.category === 'medium').slice(0, 3),
|
|
1136
|
+
large: scoredModels.filter(m => m.category === 'large').slice(0, 2)
|
|
1137
|
+
}
|
|
1138
|
+
};
|
|
1139
|
+
}
|
|
1140
|
+
}
|
|
1141
|
+
|
|
1142
|
+
module.exports = ExpandedModelsDatabase;
|