llm-checker 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +418 -0
  3. package/analyzer/compatibility.js +584 -0
  4. package/analyzer/performance.js +505 -0
  5. package/bin/CLAUDE.md +12 -0
  6. package/bin/enhanced_cli.js +3118 -0
  7. package/bin/test-deterministic.js +41 -0
  8. package/package.json +96 -0
  9. package/src/CLAUDE.md +12 -0
  10. package/src/ai/intelligent-selector.js +615 -0
  11. package/src/ai/model-selector.js +312 -0
  12. package/src/ai/multi-objective-selector.js +820 -0
  13. package/src/commands/check.js +58 -0
  14. package/src/data/CLAUDE.md +11 -0
  15. package/src/data/model-database.js +637 -0
  16. package/src/data/sync-manager.js +279 -0
  17. package/src/hardware/CLAUDE.md +12 -0
  18. package/src/hardware/backends/CLAUDE.md +11 -0
  19. package/src/hardware/backends/apple-silicon.js +318 -0
  20. package/src/hardware/backends/cpu-detector.js +490 -0
  21. package/src/hardware/backends/cuda-detector.js +417 -0
  22. package/src/hardware/backends/intel-detector.js +436 -0
  23. package/src/hardware/backends/rocm-detector.js +440 -0
  24. package/src/hardware/detector.js +573 -0
  25. package/src/hardware/pc-optimizer.js +635 -0
  26. package/src/hardware/specs.js +286 -0
  27. package/src/hardware/unified-detector.js +442 -0
  28. package/src/index.js +2289 -0
  29. package/src/models/CLAUDE.md +17 -0
  30. package/src/models/ai-check-selector.js +806 -0
  31. package/src/models/catalog.json +426 -0
  32. package/src/models/deterministic-selector.js +1145 -0
  33. package/src/models/expanded_database.js +1142 -0
  34. package/src/models/intelligent-selector.js +532 -0
  35. package/src/models/requirements.js +310 -0
  36. package/src/models/scoring-config.js +57 -0
  37. package/src/models/scoring-engine.js +715 -0
  38. package/src/ollama/.cache/README.md +33 -0
  39. package/src/ollama/CLAUDE.md +24 -0
  40. package/src/ollama/client.js +438 -0
  41. package/src/ollama/enhanced-client.js +113 -0
  42. package/src/ollama/enhanced-scraper.js +634 -0
  43. package/src/ollama/manager.js +357 -0
  44. package/src/ollama/native-scraper.js +776 -0
  45. package/src/plugins/CLAUDE.md +11 -0
  46. package/src/plugins/examples/custom_model_plugin.js +87 -0
  47. package/src/plugins/index.js +295 -0
  48. package/src/utils/CLAUDE.md +11 -0
  49. package/src/utils/config.js +359 -0
  50. package/src/utils/formatter.js +315 -0
  51. package/src/utils/logger.js +272 -0
  52. package/src/utils/model-classifier.js +167 -0
  53. package/src/utils/verbose-progress.js +266 -0
@@ -0,0 +1,1142 @@
1
+ class ExpandedModelsDatabase {
2
+ constructor() {
3
+ this.models = this.initializeExpandedModels();
4
+ this.compatibilityMatrix = this.initializeCompatibilityMatrix();
5
+ }
6
+
7
+ initializeExpandedModels() {
8
+ return [
9
+ // Ultra-Small Models (< 1B parámetros)
10
+ {
11
+ name: "Qwen 0.5B",
12
+ size: "0.5B",
13
+ type: "local",
14
+ category: "ultra_small",
15
+ requirements: {
16
+ ram: 1,
17
+ vram: 0,
18
+ cpu_cores: 1,
19
+ storage: 0.5,
20
+ recommended_ram: 2
21
+ },
22
+ frameworks: ["ollama", "transformers"],
23
+ quantization: ["Q4_0", "Q8_0"],
24
+ performance: {
25
+ speed: "very_fast",
26
+ quality: "basic",
27
+ context_length: 2048,
28
+ tokens_per_second_estimate: "100-200"
29
+ },
30
+ installation: {
31
+ ollama: "ollama pull qwen:0.5b",
32
+ description: "Ultra-lightweight model for testing and basic tasks"
33
+ },
34
+ specialization: "general",
35
+ languages: ["en", "zh"],
36
+ year: 2024
37
+ },
38
+ {
39
+ name: "LaMini-GPT 774M",
40
+ size: "774M",
41
+ type: "local",
42
+ category: "ultra_small",
43
+ requirements: {
44
+ ram: 1.5,
45
+ vram: 0,
46
+ cpu_cores: 1,
47
+ storage: 0.8,
48
+ recommended_ram: 2
49
+ },
50
+ frameworks: ["transformers", "llama.cpp"],
51
+ quantization: ["Q4_0", "Q8_0"],
52
+ performance: {
53
+ speed: "very_fast",
54
+ quality: "basic",
55
+ context_length: 2048,
56
+ tokens_per_second_estimate: "80-150"
57
+ },
58
+ installation: {
59
+ description: "Multilingual compact model via knowledge distillation"
60
+ },
61
+ specialization: "multilingual",
62
+ languages: ["en", "es", "fr", "de", "zh", "ja"],
63
+ year: 2024
64
+ },
65
+
66
+ // Small Models (1B - 4B parámetros)
67
+ {
68
+ name: "TinyLlama 1.1B",
69
+ size: "1.1B",
70
+ type: "local",
71
+ category: "small",
72
+ requirements: {
73
+ ram: 2,
74
+ vram: 0,
75
+ cpu_cores: 2,
76
+ storage: 1.2,
77
+ recommended_ram: 4
78
+ },
79
+ frameworks: ["ollama", "llama.cpp", "transformers"],
80
+ quantization: ["Q2_K", "Q3_K", "Q4_0", "Q4_1", "Q5_0", "Q5_1", "Q8_0"],
81
+ performance: {
82
+ speed: "very_fast",
83
+ quality: "basic",
84
+ context_length: 2048,
85
+ tokens_per_second_estimate: "50-100"
86
+ },
87
+ installation: {
88
+ ollama: "ollama pull tinyllama:1.1b",
89
+ llamacpp: "Download from HuggingFace: TinyLlama/TinyLlama-1.1B-Chat-v1.0",
90
+ description: "Perfect entry point for testing LLM capabilities"
91
+ },
92
+ specialization: "general",
93
+ languages: ["en"],
94
+ year: 2023
95
+ },
96
+ {
97
+ name: "MobileLLaMA 1.4B",
98
+ size: "1.4B",
99
+ type: "local",
100
+ category: "small",
101
+ requirements: {
102
+ ram: 2.5,
103
+ vram: 0,
104
+ cpu_cores: 2,
105
+ storage: 1.5,
106
+ recommended_ram: 4
107
+ },
108
+ frameworks: ["transformers", "llama.cpp"],
109
+ quantization: ["Q4_0", "Q4_K_M", "Q5_0", "Q8_0"],
110
+ performance: {
111
+ speed: "very_fast",
112
+ quality: "good",
113
+ context_length: 2048,
114
+ tokens_per_second_estimate: "60-120",
115
+ mobile_optimized: true
116
+ },
117
+ installation: {
118
+ description: "Optimized for mobile and edge devices (40% faster than TinyLLaMA)"
119
+ },
120
+ specialization: "mobile",
121
+ languages: ["en"],
122
+ year: 2024
123
+ },
124
+ {
125
+ name: "MobileLLaMA 2.7B",
126
+ size: "2.7B",
127
+ type: "local",
128
+ category: "small",
129
+ requirements: {
130
+ ram: 3.5,
131
+ vram: 1,
132
+ cpu_cores: 2,
133
+ storage: 2.8,
134
+ recommended_ram: 6
135
+ },
136
+ frameworks: ["transformers", "llama.cpp"],
137
+ quantization: ["Q4_0", "Q4_K_M", "Q5_0", "Q8_0"],
138
+ performance: {
139
+ speed: "fast",
140
+ quality: "good",
141
+ context_length: 4096,
142
+ tokens_per_second_estimate: "40-80",
143
+ mobile_optimized: true
144
+ },
145
+ installation: {
146
+ description: "Larger MobileLLaMA variant for better quality on mobile"
147
+ },
148
+ specialization: "mobile",
149
+ languages: ["en"],
150
+ year: 2024
151
+ },
152
+ {
153
+ name: "Gemma 2B",
154
+ size: "2B",
155
+ type: "local",
156
+ category: "small",
157
+ requirements: {
158
+ ram: 3,
159
+ vram: 1,
160
+ cpu_cores: 2,
161
+ storage: 2.2,
162
+ recommended_ram: 6
163
+ },
164
+ frameworks: ["ollama", "transformers", "llama.cpp"],
165
+ quantization: ["Q4_0", "Q4_K_M", "Q5_0", "Q5_K_M", "Q8_0"],
166
+ performance: {
167
+ speed: "fast",
168
+ quality: "very_good",
169
+ context_length: 8192,
170
+ tokens_per_second_estimate: "30-70"
171
+ },
172
+ installation: {
173
+ ollama: "ollama pull gemma2:2b",
174
+ description: "Google's efficient small model with strong performance"
175
+ },
176
+ specialization: "general",
177
+ languages: ["en"],
178
+ year: 2024
179
+ },
180
+ {
181
+ name: "Gemma 3 1B",
182
+ size: "1B",
183
+ type: "local",
184
+ category: "small",
185
+ requirements: {
186
+ ram: 2,
187
+ vram: 0,
188
+ cpu_cores: 2,
189
+ storage: 1.1,
190
+ recommended_ram: 4
191
+ },
192
+ frameworks: ["ollama", "transformers"],
193
+ quantization: ["Q4_0", "Q4_K_M", "Q5_0", "Q8_0"],
194
+ performance: {
195
+ speed: "very_fast",
196
+ quality: "good",
197
+ context_length: 32768,
198
+ tokens_per_second_estimate: "70-140"
199
+ },
200
+ installation: {
201
+ ollama: "ollama pull gemma3:1b",
202
+ description: "Latest Gemma optimized for mobile devices"
203
+ },
204
+ specialization: "mobile",
205
+ languages: ["en"],
206
+ year: 2025
207
+ },
208
+ {
209
+ name: "Phi-3 Mini 3.8B",
210
+ size: "3.8B",
211
+ type: "local",
212
+ category: "small",
213
+ requirements: {
214
+ ram: 4,
215
+ vram: 2,
216
+ cpu_cores: 4,
217
+ storage: 4,
218
+ recommended_ram: 8
219
+ },
220
+ frameworks: ["ollama", "llama.cpp", "transformers"],
221
+ quantization: ["Q4_0", "Q4_K_M", "Q5_0", "Q5_K_M", "Q8_0"],
222
+ performance: {
223
+ speed: "fast",
224
+ quality: "very_good",
225
+ context_length: 4096,
226
+ tokens_per_second_estimate: "25-50"
227
+ },
228
+ installation: {
229
+ ollama: "ollama pull phi3:mini",
230
+ description: "Microsoft's efficient small model with excellent reasoning"
231
+ },
232
+ specialization: "reasoning",
233
+ languages: ["en"],
234
+ year: 2024
235
+ },
236
+ {
237
+ name: "Phi-4 14B",
238
+ size: "14B",
239
+ type: "local",
240
+ category: "medium",
241
+ requirements: {
242
+ ram: 16,
243
+ vram: 8,
244
+ cpu_cores: 6,
245
+ storage: 14,
246
+ recommended_ram: 24
247
+ },
248
+ frameworks: ["ollama", "transformers"],
249
+ quantization: ["Q4_0", "Q4_K_M", "Q5_0", "Q5_K_M"],
250
+ performance: {
251
+ speed: "medium",
252
+ quality: "excellent",
253
+ context_length: 16384,
254
+ tokens_per_second_estimate: "15-30"
255
+ },
256
+ installation: {
257
+ ollama: "ollama pull phi4:14b",
258
+ description: "Latest Microsoft Phi model with enhanced capabilities"
259
+ },
260
+ specialization: "reasoning",
261
+ languages: ["en"],
262
+ year: 2024
263
+ },
264
+ {
265
+ name: "Llama 3.2 1B",
266
+ size: "1B",
267
+ type: "local",
268
+ category: "small",
269
+ requirements: {
270
+ ram: 2,
271
+ vram: 0,
272
+ cpu_cores: 2,
273
+ storage: 1.1,
274
+ recommended_ram: 4
275
+ },
276
+ frameworks: ["ollama", "llama.cpp", "transformers"],
277
+ quantization: ["Q4_0", "Q4_K_M", "Q5_0", "Q5_K_M", "Q8_0"],
278
+ performance: {
279
+ speed: "very_fast",
280
+ quality: "good",
281
+ context_length: 8192,
282
+ tokens_per_second_estimate: "60-120"
283
+ },
284
+ installation: {
285
+ ollama: "ollama pull llama3.2:1b",
286
+ description: "Ultra-compact Llama for mobile and edge devices"
287
+ },
288
+ specialization: "general",
289
+ languages: ["en"],
290
+ year: 2024
291
+ },
292
+ {
293
+ name: "Llama 3.2 3B",
294
+ size: "3B",
295
+ type: "local",
296
+ category: "small",
297
+ requirements: {
298
+ ram: 4,
299
+ vram: 2,
300
+ cpu_cores: 4,
301
+ storage: 3.2,
302
+ recommended_ram: 8
303
+ },
304
+ frameworks: ["ollama", "llama.cpp", "transformers"],
305
+ quantization: ["Q4_0", "Q4_K_M", "Q5_0", "Q5_K_M", "Q8_0"],
306
+ performance: {
307
+ speed: "fast",
308
+ quality: "very_good",
309
+ context_length: 8192,
310
+ tokens_per_second_estimate: "30-60"
311
+ },
312
+ installation: {
313
+ ollama: "ollama pull llama3.2:3b",
314
+ description: "Balanced performance and efficiency from Meta"
315
+ },
316
+ specialization: "general",
317
+ languages: ["en"],
318
+ year: 2024
319
+ },
320
+
321
+ // Medium Models (5B - 15B parámetros)
322
+ {
323
+ name: "Gemma 3 4B",
324
+ size: "4B",
325
+ type: "local",
326
+ category: "medium",
327
+ requirements: {
328
+ ram: 6,
329
+ vram: 3,
330
+ cpu_cores: 4,
331
+ storage: 4.5,
332
+ recommended_ram: 12
333
+ },
334
+ frameworks: ["ollama", "transformers"],
335
+ quantization: ["Q4_0", "Q4_K_M", "Q5_0", "Q5_K_M", "Q8_0"],
336
+ performance: {
337
+ speed: "fast",
338
+ quality: "excellent",
339
+ context_length: 128000,
340
+ tokens_per_second_estimate: "20-40"
341
+ },
342
+ installation: {
343
+ ollama: "ollama pull gemma3:4b",
344
+ description: "Multimodal Gemma with long context support"
345
+ },
346
+ specialization: "multimodal",
347
+ languages: ["en"],
348
+ year: 2025,
349
+ multimodal: true
350
+ },
351
+ {
352
+ name: "Qwen 2.5 7B",
353
+ size: "7B",
354
+ type: "local",
355
+ category: "medium",
356
+ requirements: {
357
+ ram: 8,
358
+ vram: 4,
359
+ cpu_cores: 4,
360
+ storage: 7.5,
361
+ recommended_ram: 16
362
+ },
363
+ frameworks: ["ollama", "transformers", "vllm"],
364
+ quantization: ["Q4_0", "Q4_K_M", "Q5_0", "Q5_K_M", "Q8_0"],
365
+ performance: {
366
+ speed: "medium",
367
+ quality: "excellent",
368
+ context_length: 32768,
369
+ tokens_per_second_estimate: "15-35"
370
+ },
371
+ installation: {
372
+ ollama: "ollama pull qwen2.5:7b",
373
+ description: "Alibaba's latest multilingual model with strong coding abilities"
374
+ },
375
+ specialization: "code",
376
+ languages: ["en", "zh", "ja", "ko", "es", "fr", "de"],
377
+ year: 2024
378
+ },
379
+ {
380
+ name: "Llama 3.1 8B",
381
+ size: "8B",
382
+ type: "local",
383
+ category: "medium",
384
+ requirements: {
385
+ ram: 8,
386
+ vram: 4,
387
+ cpu_cores: 4,
388
+ storage: 8.5,
389
+ recommended_ram: 16
390
+ },
391
+ frameworks: ["ollama", "llama.cpp", "transformers", "vllm"],
392
+ quantization: ["Q2_K", "Q3_K_M", "Q4_0", "Q4_K_M", "Q5_0", "Q5_K_M", "Q6_K", "Q8_0"],
393
+ performance: {
394
+ speed: "medium",
395
+ quality: "excellent",
396
+ context_length: 128000,
397
+ tokens_per_second_estimate: "12-30"
398
+ },
399
+ installation: {
400
+ ollama: "ollama pull llama3.1:8b",
401
+ description: "Outstanding balance of performance and efficiency"
402
+ },
403
+ specialization: "general",
404
+ languages: ["en"],
405
+ year: 2024
406
+ },
407
+ {
408
+ name: "Mistral 7B v0.3",
409
+ size: "7B",
410
+ type: "local",
411
+ category: "medium",
412
+ requirements: {
413
+ ram: 8,
414
+ vram: 4,
415
+ cpu_cores: 4,
416
+ storage: 7.2,
417
+ recommended_ram: 16
418
+ },
419
+ frameworks: ["ollama", "llama.cpp", "transformers", "vllm"],
420
+ quantization: ["Q4_0", "Q4_K_M", "Q5_0", "Q5_K_M", "Q6_K", "Q8_0"],
421
+ performance: {
422
+ speed: "medium",
423
+ quality: "excellent",
424
+ context_length: 32768,
425
+ tokens_per_second_estimate: "15-32"
426
+ },
427
+ installation: {
428
+ ollama: "ollama pull mistral:7b",
429
+ description: "High-quality European model with strong reasoning"
430
+ },
431
+ specialization: "reasoning",
432
+ languages: ["en", "fr", "de", "es", "it"],
433
+ year: 2024
434
+ },
435
+ {
436
+ name: "Mistral Small 3.1",
437
+ size: "22B",
438
+ type: "local",
439
+ category: "large",
440
+ requirements: {
441
+ ram: 24,
442
+ vram: 12,
443
+ cpu_cores: 6,
444
+ storage: 22,
445
+ recommended_ram: 32
446
+ },
447
+ frameworks: ["ollama", "vllm", "transformers"],
448
+ quantization: ["Q3_K_M", "Q4_0", "Q4_K_M", "Q5_K_M"],
449
+ performance: {
450
+ speed: "slow",
451
+ quality: "excellent",
452
+ context_length: 128000,
453
+ tokens_per_second_estimate: "8-20"
454
+ },
455
+ installation: {
456
+ ollama: "ollama pull mistral-small:22b",
457
+ description: "Latest Mistral model with enhanced capabilities"
458
+ },
459
+ specialization: "reasoning",
460
+ languages: ["en", "fr", "de", "es", "it"],
461
+ year: 2024
462
+ },
463
+ {
464
+ name: "CodeLlama 7B",
465
+ size: "7B",
466
+ type: "local",
467
+ category: "medium",
468
+ specialization: "code",
469
+ requirements: {
470
+ ram: 8,
471
+ vram: 4,
472
+ cpu_cores: 4,
473
+ storage: 7.2,
474
+ recommended_ram: 16
475
+ },
476
+ frameworks: ["ollama", "llama.cpp", "transformers"],
477
+ quantization: ["Q4_0", "Q4_K_M", "Q5_0", "Q5_K_M", "Q8_0"],
478
+ performance: {
479
+ speed: "medium",
480
+ quality: "excellent_for_code",
481
+ context_length: 16384,
482
+ tokens_per_second_estimate: "15-30"
483
+ },
484
+ installation: {
485
+ ollama: "ollama pull codellama:7b",
486
+ description: "Meta's specialized coding assistant"
487
+ },
488
+ languages: ["python", "javascript", "java", "c++", "c", "php", "ruby"],
489
+ year: 2023
490
+ },
491
+ {
492
+ name: "DeepSeek Coder 6.7B",
493
+ size: "6.7B",
494
+ type: "local",
495
+ category: "medium",
496
+ specialization: "code",
497
+ requirements: {
498
+ ram: 8,
499
+ vram: 4,
500
+ cpu_cores: 4,
501
+ storage: 7,
502
+ recommended_ram: 16
503
+ },
504
+ frameworks: ["ollama", "transformers"],
505
+ quantization: ["Q4_0", "Q4_K_M", "Q5_0", "Q8_0"],
506
+ performance: {
507
+ speed: "medium",
508
+ quality: "excellent_for_code",
509
+ context_length: 16384,
510
+ tokens_per_second_estimate: "18-35"
511
+ },
512
+ installation: {
513
+ ollama: "ollama pull deepseek-coder:6.7b",
514
+ description: "Specialized for code generation and analysis"
515
+ },
516
+ languages: ["python", "javascript", "java", "c++", "go", "rust"],
517
+ year: 2024
518
+ },
519
+
520
+ // Large Models (15B+ parámetros)
521
+ {
522
+ name: "Gemma 3 12B",
523
+ size: "12B",
524
+ type: "local",
525
+ category: "large",
526
+ requirements: {
527
+ ram: 16,
528
+ vram: 8,
529
+ cpu_cores: 6,
530
+ storage: 13,
531
+ recommended_ram: 24
532
+ },
533
+ frameworks: ["ollama", "transformers"],
534
+ quantization: ["Q4_0", "Q4_K_M", "Q5_0", "Q5_K_M"],
535
+ performance: {
536
+ speed: "medium_slow",
537
+ quality: "excellent",
538
+ context_length: 128000,
539
+ tokens_per_second_estimate: "10-25"
540
+ },
541
+ installation: {
542
+ ollama: "ollama pull gemma3:12b",
543
+ description: "Large multimodal Gemma with advanced capabilities"
544
+ },
545
+ specialization: "multimodal",
546
+ languages: ["en"],
547
+ year: 2025,
548
+ multimodal: true
549
+ },
550
+ {
551
+ name: "Gemma 3 27B",
552
+ size: "27B",
553
+ type: "local",
554
+ category: "large",
555
+ requirements: {
556
+ ram: 32,
557
+ vram: 16,
558
+ cpu_cores: 8,
559
+ storage: 28,
560
+ recommended_ram: 48
561
+ },
562
+ frameworks: ["transformers", "vllm"],
563
+ quantization: ["Q3_K_M", "Q4_0", "Q4_K_M", "Q5_K_M"],
564
+ performance: {
565
+ speed: "slow",
566
+ quality: "excellent",
567
+ context_length: 128000,
568
+ tokens_per_second_estimate: "5-15"
569
+ },
570
+ installation: {
571
+ description: "Flagship multimodal Gemma model"
572
+ },
573
+ specialization: "multimodal",
574
+ languages: ["en"],
575
+ year: 2025,
576
+ multimodal: true
577
+ },
578
+ {
579
+ name: "Llama 3.3 70B",
580
+ size: "70B",
581
+ type: "local",
582
+ category: "large",
583
+ requirements: {
584
+ ram: 48,
585
+ vram: 24,
586
+ cpu_cores: 8,
587
+ storage: 72,
588
+ recommended_ram: 64,
589
+ recommended_vram: 48
590
+ },
591
+ frameworks: ["ollama", "vllm", "transformers"],
592
+ quantization: ["Q2_K", "Q3_K_M", "Q4_0", "Q4_K_M", "Q5_K_M"],
593
+ performance: {
594
+ speed: "slow",
595
+ quality: "excellent",
596
+ context_length: 128000,
597
+ tokens_per_second_estimate: "3-12"
598
+ },
599
+ installation: {
600
+ ollama: "ollama pull llama3.3:70b",
601
+ description: "Latest flagship Llama model with enhanced capabilities"
602
+ },
603
+ specialization: "general",
604
+ languages: ["en"],
605
+ year: 2024
606
+ },
607
+ {
608
+ name: "DeepSeek-R1 70B",
609
+ size: "70B",
610
+ type: "local",
611
+ category: "large",
612
+ requirements: {
613
+ ram: 50,
614
+ vram: 25,
615
+ cpu_cores: 8,
616
+ storage: 75,
617
+ recommended_ram: 64,
618
+ recommended_vram: 50
619
+ },
620
+ frameworks: ["ollama", "vllm"],
621
+ quantization: ["Q2_K", "Q3_K_M", "Q4_0", "Q4_K_M"],
622
+ performance: {
623
+ speed: "slow",
624
+ quality: "excellent",
625
+ context_length: 65536,
626
+ tokens_per_second_estimate: "2-10"
627
+ },
628
+ installation: {
629
+ ollama: "ollama pull deepseek-r1:70b",
630
+ description: "Advanced reasoning model with o1-like capabilities"
631
+ },
632
+ specialization: "reasoning",
633
+ languages: ["en", "zh"],
634
+ year: 2025
635
+ },
636
+
637
+ // Multimodal Models
638
+ {
639
+ name: "LLaVA 7B",
640
+ size: "7B",
641
+ type: "local",
642
+ category: "medium",
643
+ specialization: "multimodal",
644
+ requirements: {
645
+ ram: 10,
646
+ vram: 6,
647
+ cpu_cores: 4,
648
+ storage: 8,
649
+ recommended_ram: 16
650
+ },
651
+ frameworks: ["ollama", "transformers"],
652
+ quantization: ["Q4_0", "Q4_K_M", "Q5_0", "Q8_0"],
653
+ performance: {
654
+ speed: "medium",
655
+ quality: "good",
656
+ context_length: 4096,
657
+ tokens_per_second_estimate: "12-25"
658
+ },
659
+ installation: {
660
+ ollama: "ollama pull llava:7b",
661
+ description: "Vision-language model for image understanding"
662
+ },
663
+ languages: ["en"],
664
+ year: 2023,
665
+ multimodal: true
666
+ },
667
+ {
668
+ name: "LLaVA-NeXT 34B",
669
+ size: "34B",
670
+ type: "local",
671
+ category: "large",
672
+ specialization: "multimodal",
673
+ requirements: {
674
+ ram: 36,
675
+ vram: 18,
676
+ cpu_cores: 8,
677
+ storage: 36,
678
+ recommended_ram: 48
679
+ },
680
+ frameworks: ["transformers"],
681
+ quantization: ["Q3_K_M", "Q4_0", "Q4_K_M"],
682
+ performance: {
683
+ speed: "slow",
684
+ quality: "excellent",
685
+ context_length: 8192,
686
+ tokens_per_second_estimate: "4-12"
687
+ },
688
+ installation: {
689
+ description: "Advanced multimodal model with enhanced vision capabilities"
690
+ },
691
+ languages: ["en"],
692
+ year: 2024,
693
+ multimodal: true
694
+ },
695
+
696
+ // Embedding Models
697
+ {
698
+ name: "all-MiniLM-L6-v2",
699
+ size: "22M",
700
+ type: "local",
701
+ category: "embedding",
702
+ specialization: "embeddings",
703
+ requirements: {
704
+ ram: 0.5,
705
+ vram: 0,
706
+ cpu_cores: 1,
707
+ storage: 0.1,
708
+ recommended_ram: 1
709
+ },
710
+ frameworks: ["sentence-transformers", "ollama"],
711
+ performance: {
712
+ speed: "very_fast",
713
+ quality: "good",
714
+ context_length: 512,
715
+ dimensions: 384
716
+ },
717
+ installation: {
718
+ ollama: "ollama pull all-minilm",
719
+ description: "Compact embedding model for semantic search"
720
+ },
721
+ languages: ["en"],
722
+ year: 2023
723
+ },
724
+ {
725
+ name: "BGE-small-en-v1.5",
726
+ size: "33M",
727
+ type: "local",
728
+ category: "embedding",
729
+ specialization: "embeddings",
730
+ requirements: {
731
+ ram: 0.5,
732
+ vram: 0,
733
+ cpu_cores: 1,
734
+ storage: 0.1,
735
+ recommended_ram: 1
736
+ },
737
+ frameworks: ["sentence-transformers", "transformers"],
738
+ performance: {
739
+ speed: "very_fast",
740
+ quality: "very_good",
741
+ context_length: 512,
742
+ dimensions: 384
743
+ },
744
+ installation: {
745
+ description: "High-quality English embedding model"
746
+ },
747
+ languages: ["en"],
748
+ year: 2023
749
+ }
750
+ ];
751
+ }
752
+
753
+ initializeCompatibilityMatrix() {
754
+ return {
755
+ // Hardware tiers
756
+ tiers: {
757
+ ultra_low: { ram: 4, vram: 0, cpu_cores: 2 },
758
+ low: { ram: 8, vram: 2, cpu_cores: 4 },
759
+ medium: { ram: 16, vram: 8, cpu_cores: 6 },
760
+ high: { ram: 32, vram: 16, cpu_cores: 8 },
761
+ ultra_high: { ram: 64, vram: 32, cpu_cores: 12 }
762
+ },
763
+
764
+ // Compatibility scores by category and tier
765
+ compatibility: {
766
+ ultra_small: {
767
+ ultra_low: 95, low: 100, medium: 100, high: 100, ultra_high: 100
768
+ },
769
+ small: {
770
+ ultra_low: 70, low: 90, medium: 100, high: 100, ultra_high: 100
771
+ },
772
+ medium: {
773
+ ultra_low: 20, low: 60, medium: 85, high: 95, ultra_high: 100
774
+ },
775
+ large: {
776
+ ultra_low: 5, low: 25, medium: 50, high: 80, ultra_high: 95
777
+ },
778
+ embedding: {
779
+ ultra_low: 100, low: 100, medium: 100, high: 100, ultra_high: 100
780
+ }
781
+ },
782
+
783
+ // Performance multipliers
784
+ architecture_bonuses: {
785
+ 'Apple Silicon': 1.15,
786
+ 'x86_64_modern': 1.05,
787
+ 'ARM64': 0.95
788
+ },
789
+
790
+ // Quantization effectiveness
791
+ quantization_savings: {
792
+ 'Q2_K': 0.6, // 60% size reduction
793
+ 'Q3_K_M': 0.7, // 70% of original size
794
+ 'Q4_0': 0.75, // 75% of original size
795
+ 'Q4_K_M': 0.75,
796
+ 'Q5_0': 0.85, // 85% of original size
797
+ 'Q5_K_M': 0.85,
798
+ 'Q6_K': 0.9, // 90% of original size
799
+ 'Q8_0': 0.95 // 95% of original size
800
+ }
801
+ };
802
+ }
803
+
804
+ getAllModels() {
805
+ return this.models;
806
+ }
807
+
808
+ getModelsByCategory(category) {
809
+ return this.models.filter(model => model.category === category);
810
+ }
811
+
812
+ getUltraSmallModels() {
813
+ return this.getModelsByCategory('ultra_small');
814
+ }
815
+
816
+ getSmallModels() {
817
+ return this.getModelsByCategory('small');
818
+ }
819
+
820
+ getMediumModels() {
821
+ return this.getModelsByCategory('medium');
822
+ }
823
+
824
+ getLargeModels() {
825
+ return this.getModelsByCategory('large');
826
+ }
827
+
828
+ getEmbeddingModels() {
829
+ return this.getModelsByCategory('embedding');
830
+ }
831
+
832
+ getMultimodalModels() {
833
+ return this.models.filter(model => model.multimodal === true);
834
+ }
835
+
836
+ getModelsBySpecialization(specialization) {
837
+ return this.models.filter(model => model.specialization === specialization);
838
+ }
839
+
840
+ getModelsByLanguage(language) {
841
+ return this.models.filter(model =>
842
+ model.languages && model.languages.includes(language)
843
+ );
844
+ }
845
+
846
+ getModelsByYear(year) {
847
+ return this.models.filter(model => model.year === year);
848
+ }
849
+
850
+ getRecentModels(yearsBack = 1) {
851
+ const currentYear = new Date().getFullYear();
852
+ const cutoffYear = currentYear - yearsBack;
853
+ return this.models.filter(model => model.year >= cutoffYear);
854
+ }
855
+
856
+ findModel(name) {
857
+ return this.models.find(model =>
858
+ model.name.toLowerCase().includes(name.toLowerCase())
859
+ );
860
+ }
861
+
862
+ getHardwareTier(hardware) {
863
+ const { memory, gpu, cpu } = hardware;
864
+ const ram = memory.total;
865
+ const vram = gpu.vram || 0;
866
+ const cores = cpu.cores;
867
+
868
+ // Check if it's Apple Silicon (unified memory architecture)
869
+ const isAppleSilicon = cpu?.architecture === 'Apple Silicon' ||
870
+ (gpu?.model && gpu.model.toLowerCase().includes('apple')) ||
871
+ (cpu?.brand && cpu.brand.toLowerCase().includes('apple'));
872
+
873
+ // Apple Silicon uses unified memory, so evaluate differently
874
+ if (isAppleSilicon) {
875
+ if (ram >= 64) return 'ultra_high';
876
+ if (ram >= 24) return 'high'; // M4 Pro with 24GB should be high tier
877
+ if (ram >= 16) return 'medium';
878
+ if (ram >= 8) return 'low';
879
+ return 'ultra_low';
880
+ }
881
+
882
+ // Traditional discrete GPU systems
883
+ if (ram >= 64 && vram >= 32 && cores >= 12) return 'ultra_high';
884
+ if (ram >= 32 && vram >= 16 && cores >= 8) return 'high';
885
+ if (ram >= 16 && vram >= 8 && cores >= 6) return 'medium';
886
+ if (ram >= 8 && vram >= 2 && cores >= 4) return 'low';
887
+ return 'ultra_low';
888
+ }
889
+
890
+ getCompatibilityScore(model, hardware) {
891
+ const tier = this.getHardwareTier(hardware);
892
+ const baseScore = this.compatibilityMatrix.compatibility[model.category]?.[tier] || 0;
893
+
894
+ // Apply architecture bonus
895
+ const architectureBonus = this.compatibilityMatrix.architecture_bonuses[hardware.cpu.architecture] || 1.0;
896
+
897
+ // Apply quantization bonus if available
898
+ let quantizationBonus = 1.0;
899
+ if (model.quantization && model.quantization.length > 0) {
900
+ // Use most aggressive quantization for limited hardware
901
+ if (tier === 'ultra_low' || tier === 'low') {
902
+ quantizationBonus = 1.2; // Quantization is very valuable
903
+ }
904
+ }
905
+
906
+ return Math.min(100, Math.round(baseScore * architectureBonus * quantizationBonus));
907
+ }
908
+
909
+ getDetailedCompatibilityAnalysis(model, hardware) {
910
+ const score = this.getCompatibilityScore(model, hardware);
911
+ const tier = this.getHardwareTier(hardware);
912
+ const issues = [];
913
+ const recommendations = [];
914
+
915
+ // Check specific requirements
916
+ if (hardware.memory.total < model.requirements.ram) {
917
+ issues.push(`Insufficient RAM: ${hardware.memory.total}GB < ${model.requirements.ram}GB required`);
918
+ recommendations.push(`Upgrade to at least ${model.requirements.ram}GB RAM`);
919
+ }
920
+
921
+ if (hardware.gpu.vram < model.requirements.vram) {
922
+ issues.push(`Insufficient VRAM: ${hardware.gpu.vram}GB < ${model.requirements.vram}GB required`);
923
+ if (model.quantization && model.quantization.includes('Q4_0')) {
924
+ recommendations.push('Consider using Q4_0 quantization to reduce VRAM usage');
925
+ }
926
+ }
927
+
928
+ if (hardware.cpu.cores < model.requirements.cpu_cores) {
929
+ issues.push(`Limited CPU cores: ${hardware.cpu.cores} < ${model.requirements.cpu_cores} recommended`);
930
+ }
931
+
932
+ // Performance estimation
933
+ const estimatedPerformance = this.estimatePerformance(model, hardware);
934
+
935
+ return {
936
+ score,
937
+ tier,
938
+ issues,
939
+ recommendations,
940
+ canRun: score >= 60,
941
+ estimatedPerformance,
942
+ bestQuantization: this.getBestQuantization(model, hardware)
943
+ };
944
+ }
945
+
946
+ estimatePerformance(model, hardware) {
947
+ // Use realistic performance estimation instead of optimistic predefined values
948
+ const estimatedTokensPerSecond = this.calculateRealisticTokensPerSecond(model, hardware);
949
+
950
+ return {
951
+ tokensPerSecond: estimatedTokensPerSecond,
952
+ category: estimatedTokensPerSecond > 50 ? 'fast' :
953
+ estimatedTokensPerSecond > 20 ? 'medium' :
954
+ estimatedTokensPerSecond > 5 ? 'slow' : 'very_slow',
955
+ memoryUsage: this.estimateMemoryUsage(model),
956
+ powerConsumption: this.estimatePowerConsumption(model, hardware)
957
+ };
958
+ }
959
+
960
+ calculateRealisticTokensPerSecond(model, hardware) {
961
+ // Extract model parameters from name or size
962
+ const modelParams = this.extractModelParams(model);
963
+
964
+ // Get hardware specifics
965
+ const cpuModel = hardware.cpu?.brand || hardware.cpu?.model || '';
966
+ const gpuModel = hardware.gpu?.model || '';
967
+ const cores = hardware.cpu?.physicalCores || hardware.cpu?.cores || 1;
968
+ const baseSpeed = hardware.cpu?.speed || 2.4;
969
+ const vramGB = hardware.gpu?.vram || 0;
970
+
971
+ // Check hardware type
972
+ const isAppleSilicon = process.platform === 'darwin' && (
973
+ gpuModel.toLowerCase().includes('apple') ||
974
+ gpuModel.toLowerCase().includes('m1') ||
975
+ gpuModel.toLowerCase().includes('m2') ||
976
+ gpuModel.toLowerCase().includes('m3') ||
977
+ gpuModel.toLowerCase().includes('m4')
978
+ );
979
+ const isIntegratedGPU = /iris.*xe|iris.*graphics|uhd.*graphics|vega.*integrated|radeon.*graphics/i.test(gpuModel);
980
+ const hasDedicatedGPU = vramGB > 0 && !isIntegratedGPU && !isAppleSilicon;
981
+
982
+ let tokensPerSecond;
983
+
984
+ if (isAppleSilicon) {
985
+ // Apple Silicon unified memory - more optimistic but realistic
986
+ let baseTPS = 25;
987
+ if (gpuModel.toLowerCase().includes('m4 pro')) baseTPS = 35;
988
+ else if (gpuModel.toLowerCase().includes('m4')) baseTPS = 30;
989
+ else if (gpuModel.toLowerCase().includes('m3 pro')) baseTPS = 30;
990
+ else if (gpuModel.toLowerCase().includes('m3')) baseTPS = 25;
991
+ else if (gpuModel.toLowerCase().includes('m2 pro')) baseTPS = 28;
992
+ else if (gpuModel.toLowerCase().includes('m2')) baseTPS = 22;
993
+ else if (gpuModel.toLowerCase().includes('m1 pro')) baseTPS = 25;
994
+ else if (gpuModel.toLowerCase().includes('m1')) baseTPS = 20;
995
+
996
+ // Scale by model size (Apple Silicon handles larger models better)
997
+ tokensPerSecond = Math.max(8, Math.round(baseTPS / Math.max(0.8, modelParams)));
998
+
999
+ } else if (hasDedicatedGPU) {
1000
+ // Dedicated GPU - much better performance
1001
+ let gpuTPS = 30;
1002
+ if (gpuModel.toLowerCase().includes('rtx 50')) gpuTPS = 65;
1003
+ else if (gpuModel.toLowerCase().includes('rtx 40')) gpuTPS = 50;
1004
+ else if (gpuModel.toLowerCase().includes('rtx 30')) gpuTPS = 40;
1005
+ else if (gpuModel.toLowerCase().includes('rtx 20')) gpuTPS = 30;
1006
+ else if (vramGB >= 16) gpuTPS = 45;
1007
+ else if (vramGB >= 8) gpuTPS = 35;
1008
+ else if (vramGB >= 4) gpuTPS = 25;
1009
+
1010
+ // Scale by model size for GPU
1011
+ tokensPerSecond = Math.max(10, Math.round(gpuTPS / Math.max(0.5, modelParams)));
1012
+
1013
+ } else {
1014
+ // CPU-only or integrated GPU - most realistic and conservative
1015
+ const hasAVX512 = cpuModel.toLowerCase().includes('intel') &&
1016
+ (cpuModel.includes('12th') || cpuModel.includes('13th') || cpuModel.includes('14th'));
1017
+ const hasAVX2 = cpuModel.toLowerCase().includes('intel') || cpuModel.toLowerCase().includes('amd');
1018
+
1019
+ // Base CPU coefficient - much more conservative
1020
+ let cpuK = 1.8; // Conservative baseline
1021
+ if (hasAVX512) cpuK = 2.6;
1022
+ else if (hasAVX2) cpuK = 2.2;
1023
+
1024
+ // iGPU boost (small)
1025
+ const iGpuMultiplier = isIntegratedGPU ? 1.3 : 1.0;
1026
+
1027
+ // Calculate with realistic threading limits
1028
+ const effectiveThreads = Math.min(cores, 8); // Diminishing returns after 8 threads
1029
+ const baseTPS = (cpuK * baseSpeed * effectiveThreads * iGpuMultiplier) / Math.max(1.5, modelParams);
1030
+
1031
+ // Apply realistic CPU limits
1032
+ const maxCPUTPS = hasAVX512 ? 25 : (isIntegratedGPU ? 20 : 15);
1033
+ tokensPerSecond = Math.max(2, Math.min(maxCPUTPS, Math.round(baseTPS)));
1034
+ }
1035
+
1036
+ return tokensPerSecond;
1037
+ }
1038
+
1039
+ extractModelParams(model) {
1040
+ // Try to extract parameter count from model name
1041
+ const name = model.name.toLowerCase();
1042
+
1043
+ // Look for patterns like "7b", "3.8b", "0.5b", etc.
1044
+ const paramMatch = name.match(/(\d+\.?\d*)[bm](?:\s|$)/);
1045
+ if (paramMatch) {
1046
+ const value = parseFloat(paramMatch[1]);
1047
+ // Convert millions to billions if needed
1048
+ return paramMatch[1].includes('m') ? value / 1000 : value;
1049
+ }
1050
+
1051
+ // Fallback to size-based estimation
1052
+ const sizeGB = model.size ? parseFloat(model.size.toString()) : 4;
1053
+ // Rough estimate: 1B params ≈ 2GB in Q4 quantization
1054
+ return Math.max(0.5, sizeGB / 2);
1055
+ }
1056
+
1057
+ getBestQuantization(model, hardware) {
1058
+ if (!model.quantization || model.quantization.length === 0) {
1059
+ return null;
1060
+ }
1061
+
1062
+ const tier = this.getHardwareTier(hardware);
1063
+
1064
+ const recommendations = {
1065
+ ultra_low: ['Q2_K', 'Q3_K_M'],
1066
+ low: ['Q4_0', 'Q4_K_M'],
1067
+ medium: ['Q4_K_M', 'Q5_0'],
1068
+ high: ['Q5_K_M', 'Q6_K'],
1069
+ ultra_high: ['Q8_0', 'Q6_K']
1070
+ };
1071
+
1072
+ const recommended = recommendations[tier] || ['Q4_0'];
1073
+ return model.quantization.find(q => recommended.includes(q)) || model.quantization[0];
1074
+ }
1075
+
1076
+ estimateMemoryUsage(model) {
1077
+ const sizeGB = parseFloat(model.size.replace(/[^\d.]/g, ''));
1078
+
1079
+ // Rough estimates including model loading overhead
1080
+ return {
1081
+ minimal: Math.round(sizeGB * 1.2), // With quantization
1082
+ typical: Math.round(sizeGB * 1.5), // Standard loading
1083
+ maximum: Math.round(sizeGB * 2.0) // With full context
1084
+ };
1085
+ }
1086
+
1087
+ estimatePowerConsumption(model, hardware) {
1088
+ const sizeGB = parseFloat(model.size.replace(/[^\d.]/g, ''));
1089
+ const tier = this.getHardwareTier(hardware);
1090
+
1091
+ const basePower = {
1092
+ ultra_low: 15,
1093
+ low: 25,
1094
+ medium: 45,
1095
+ high: 85,
1096
+ ultra_high: 150
1097
+ };
1098
+
1099
+ const modelMultiplier = Math.log10(sizeGB + 1) * 0.5 + 1;
1100
+
1101
+ return {
1102
+ idle: Math.round((basePower[tier] || 25) * 0.3),
1103
+ inference: Math.round((basePower[tier] || 25) * modelMultiplier),
1104
+ unit: 'watts'
1105
+ };
1106
+ }
1107
+
1108
+ getModelRecommendations(hardware, useCase = 'general') {
1109
+ const tier = this.getHardwareTier(hardware);
1110
+ const allModels = this.getAllModels();
1111
+
1112
+ // Filter by use case
1113
+ let relevantModels = allModels;
1114
+ if (useCase !== 'general') {
1115
+ relevantModels = allModels.filter(model =>
1116
+ model.specialization === useCase ||
1117
+ (useCase === 'chat' && !model.specialization)
1118
+ );
1119
+ }
1120
+
1121
+ // Score and sort models
1122
+ const scoredModels = relevantModels.map(model => ({
1123
+ ...model,
1124
+ compatibilityScore: this.getCompatibilityScore(model, hardware)
1125
+ }));
1126
+
1127
+ scoredModels.sort((a, b) => b.compatibilityScore - a.compatibilityScore);
1128
+
1129
+ return {
1130
+ tier,
1131
+ topRecommendations: scoredModels.slice(0, 5),
1132
+ byCategory: {
1133
+ ultra_small: scoredModels.filter(m => m.category === 'ultra_small').slice(0, 3),
1134
+ small: scoredModels.filter(m => m.category === 'small').slice(0, 3),
1135
+ medium: scoredModels.filter(m => m.category === 'medium').slice(0, 3),
1136
+ large: scoredModels.filter(m => m.category === 'large').slice(0, 2)
1137
+ }
1138
+ };
1139
+ }
1140
+ }
1141
+
1142
+ module.exports = ExpandedModelsDatabase;