llm-checker 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +418 -0
  3. package/analyzer/compatibility.js +584 -0
  4. package/analyzer/performance.js +505 -0
  5. package/bin/CLAUDE.md +12 -0
  6. package/bin/enhanced_cli.js +3118 -0
  7. package/bin/test-deterministic.js +41 -0
  8. package/package.json +96 -0
  9. package/src/CLAUDE.md +12 -0
  10. package/src/ai/intelligent-selector.js +615 -0
  11. package/src/ai/model-selector.js +312 -0
  12. package/src/ai/multi-objective-selector.js +820 -0
  13. package/src/commands/check.js +58 -0
  14. package/src/data/CLAUDE.md +11 -0
  15. package/src/data/model-database.js +637 -0
  16. package/src/data/sync-manager.js +279 -0
  17. package/src/hardware/CLAUDE.md +12 -0
  18. package/src/hardware/backends/CLAUDE.md +11 -0
  19. package/src/hardware/backends/apple-silicon.js +318 -0
  20. package/src/hardware/backends/cpu-detector.js +490 -0
  21. package/src/hardware/backends/cuda-detector.js +417 -0
  22. package/src/hardware/backends/intel-detector.js +436 -0
  23. package/src/hardware/backends/rocm-detector.js +440 -0
  24. package/src/hardware/detector.js +573 -0
  25. package/src/hardware/pc-optimizer.js +635 -0
  26. package/src/hardware/specs.js +286 -0
  27. package/src/hardware/unified-detector.js +442 -0
  28. package/src/index.js +2289 -0
  29. package/src/models/CLAUDE.md +17 -0
  30. package/src/models/ai-check-selector.js +806 -0
  31. package/src/models/catalog.json +426 -0
  32. package/src/models/deterministic-selector.js +1145 -0
  33. package/src/models/expanded_database.js +1142 -0
  34. package/src/models/intelligent-selector.js +532 -0
  35. package/src/models/requirements.js +310 -0
  36. package/src/models/scoring-config.js +57 -0
  37. package/src/models/scoring-engine.js +715 -0
  38. package/src/ollama/.cache/README.md +33 -0
  39. package/src/ollama/CLAUDE.md +24 -0
  40. package/src/ollama/client.js +438 -0
  41. package/src/ollama/enhanced-client.js +113 -0
  42. package/src/ollama/enhanced-scraper.js +634 -0
  43. package/src/ollama/manager.js +357 -0
  44. package/src/ollama/native-scraper.js +776 -0
  45. package/src/plugins/CLAUDE.md +11 -0
  46. package/src/plugins/examples/custom_model_plugin.js +87 -0
  47. package/src/plugins/index.js +295 -0
  48. package/src/utils/CLAUDE.md +11 -0
  49. package/src/utils/config.js +359 -0
  50. package/src/utils/formatter.js +315 -0
  51. package/src/utils/logger.js +272 -0
  52. package/src/utils/model-classifier.js +167 -0
  53. package/src/utils/verbose-progress.js +266 -0
@@ -0,0 +1,532 @@
1
+ /**
2
+ * Intelligent Model Selector
3
+ * Uses scoring engine and hardware detection to recommend optimal LLM models
4
+ * Provides smart recommendations based on use case, hardware, and preferences
5
+ */
6
+
7
+ const ScoringEngine = require('./scoring-engine');
8
+ const UnifiedDetector = require('../hardware/unified-detector');
9
+
10
+ class IntelligentSelector {
11
+ constructor(options = {}) {
12
+ this.scoring = new ScoringEngine(options.scoring || {});
13
+ this.detector = options.detector || new UnifiedDetector();
14
+ this.database = options.database || null;
15
+
16
+ // Default preferences
17
+ this.defaults = {
18
+ useCase: 'general',
19
+ targetContext: 8192,
20
+ targetTPS: 20,
21
+ preferQuantization: null, // null = auto select
22
+ preferFamily: null,
23
+ maxSize: null, // null = auto from hardware
24
+ minSize: null,
25
+ excludeFamilies: [],
26
+ includeVision: false,
27
+ includeEmbeddings: false,
28
+ limit: 10
29
+ };
30
+ }
31
+
32
+ /**
33
+ * Initialize hardware detection
34
+ */
35
+ async init() {
36
+ await this.detector.detect();
37
+ }
38
+
39
+ /**
40
+ * Get optimal model recommendations
41
+ *
42
+ * @param {Array} variants - Array of model variants from database
43
+ * @param {Object} options - Selection options
44
+ * @returns {Object} Recommendations organized by category
45
+ */
46
+ async recommend(variants, options = {}) {
47
+ // Merge with defaults
48
+ const opts = { ...this.defaults, ...options };
49
+
50
+ // Ensure hardware is detected
51
+ const hardware = await this.detector.detect();
52
+
53
+ // Apply filters
54
+ const filtered = this.applyFilters(variants, opts, hardware);
55
+
56
+ // Score all filtered variants
57
+ const scored = this.scoring.filterAndScore(filtered, hardware, {
58
+ useCase: opts.useCase,
59
+ targetContext: opts.targetContext,
60
+ targetTPS: opts.targetTPS,
61
+ headroom: opts.headroom || 2
62
+ });
63
+
64
+ // Categorize scores
65
+ const categories = this.scoring.categorizeScores(scored);
66
+
67
+ // Get top picks
68
+ const topPicks = this.selectTopPicks(scored, opts);
69
+
70
+ // Generate insights
71
+ const insights = this.generateInsights(scored, hardware, opts);
72
+
73
+ return {
74
+ topPicks,
75
+ categories,
76
+ all: scored.slice(0, opts.limit),
77
+ hardware: {
78
+ description: this.detector.getHardwareDescription(),
79
+ tier: this.detector.getHardwareTier(),
80
+ maxSize: this.detector.getMaxModelSize(),
81
+ backend: hardware.summary.bestBackend
82
+ },
83
+ insights,
84
+ meta: {
85
+ totalCandidates: variants.length,
86
+ afterFiltering: filtered.length,
87
+ useCase: opts.useCase
88
+ }
89
+ };
90
+ }
91
+
92
+ /**
93
+ * Apply filters to variant list
94
+ */
95
+ applyFilters(variants, opts, hardware) {
96
+ let filtered = [...variants];
97
+
98
+ // Size filters
99
+ const maxSize = opts.maxSize || this.detector.getMaxModelSize() + 2;
100
+ const minSize = opts.minSize || 0;
101
+
102
+ filtered = filtered.filter(v => {
103
+ const size = v.size_gb || v.sizeGB || 0;
104
+ return size >= minSize && size <= maxSize;
105
+ });
106
+
107
+ // Family exclusions
108
+ if (opts.excludeFamilies.length > 0) {
109
+ const excludeLower = opts.excludeFamilies.map(f => f.toLowerCase());
110
+ filtered = filtered.filter(v => {
111
+ const modelId = (v.model_id || v.modelId || '').toLowerCase();
112
+ return !excludeLower.some(ex => modelId.includes(ex));
113
+ });
114
+ }
115
+
116
+ // Family preference (boost, don't exclude others)
117
+ if (opts.preferFamily) {
118
+ const prefLower = opts.preferFamily.toLowerCase();
119
+ filtered.sort((a, b) => {
120
+ const aMatches = (a.model_id || a.modelId || '').toLowerCase().includes(prefLower);
121
+ const bMatches = (b.model_id || b.modelId || '').toLowerCase().includes(prefLower);
122
+ if (aMatches && !bMatches) return -1;
123
+ if (!aMatches && bMatches) return 1;
124
+ return 0;
125
+ });
126
+ }
127
+
128
+ // Vision filter
129
+ if (!opts.includeVision) {
130
+ filtered = filtered.filter(v => {
131
+ const inputTypes = v.input_types || v.inputTypes || [];
132
+ const modelId = (v.model_id || v.modelId || '').toLowerCase();
133
+ return !inputTypes.includes('image') &&
134
+ !modelId.includes('llava') &&
135
+ !modelId.includes('vision') &&
136
+ !modelId.includes('bakllava') &&
137
+ !modelId.includes('moondream');
138
+ });
139
+ }
140
+
141
+ // Embeddings filter
142
+ if (!opts.includeEmbeddings) {
143
+ filtered = filtered.filter(v => {
144
+ const modelId = (v.model_id || v.modelId || '').toLowerCase();
145
+ return !modelId.includes('embed') &&
146
+ !modelId.includes('nomic') &&
147
+ !modelId.includes('mxbai') &&
148
+ !modelId.includes('minilm') &&
149
+ !modelId.includes('arctic-embed');
150
+ });
151
+ }
152
+
153
+ // Quantization preference
154
+ if (opts.preferQuantization) {
155
+ const prefQuant = opts.preferQuantization.toUpperCase();
156
+ filtered.sort((a, b) => {
157
+ const aQuant = (a.quant || '').toUpperCase();
158
+ const bQuant = (b.quant || '').toUpperCase();
159
+ if (aQuant === prefQuant && bQuant !== prefQuant) return -1;
160
+ if (aQuant !== prefQuant && bQuant === prefQuant) return 1;
161
+ return 0;
162
+ });
163
+ }
164
+
165
+ return filtered;
166
+ }
167
+
168
+ /**
169
+ * Select top picks from scored variants
170
+ */
171
+ selectTopPicks(scored, opts) {
172
+ const picks = {
173
+ best: null,
174
+ balanced: null,
175
+ fast: null,
176
+ quality: null
177
+ };
178
+
179
+ if (scored.length === 0) return picks;
180
+
181
+ // Best overall (highest score)
182
+ picks.best = scored[0];
183
+
184
+ // Balanced (good quality + speed)
185
+ const balanced = scored.find(s =>
186
+ s.score.components.quality >= 70 &&
187
+ s.score.components.speed >= 70 &&
188
+ s.score.components.fit >= 80
189
+ );
190
+ picks.balanced = balanced || scored[0];
191
+
192
+ // Fastest (highest speed score among acceptable quality)
193
+ const fast = scored
194
+ .filter(s => s.score.components.quality >= 60)
195
+ .sort((a, b) => b.score.components.speed - a.score.components.speed)[0];
196
+ picks.fast = fast || scored[0];
197
+
198
+ // Highest quality (that fits)
199
+ const quality = scored
200
+ .filter(s => s.score.components.fit >= 70)
201
+ .sort((a, b) => b.score.components.quality - a.score.components.quality)[0];
202
+ picks.quality = quality || scored[0];
203
+
204
+ return picks;
205
+ }
206
+
207
+ /**
208
+ * Generate insights about the recommendations
209
+ */
210
+ generateInsights(scored, hardware, opts) {
211
+ const insights = [];
212
+
213
+ if (scored.length === 0) {
214
+ insights.push({
215
+ type: 'warning',
216
+ message: 'No models found that match your criteria. Try relaxing filters.'
217
+ });
218
+ return insights;
219
+ }
220
+
221
+ const top = scored[0];
222
+ const maxSize = this.detector.getMaxModelSize();
223
+
224
+ // Hardware-based insights
225
+ if (hardware.summary.bestBackend === 'cpu') {
226
+ insights.push({
227
+ type: 'info',
228
+ message: 'Running on CPU only. Consider smaller models (≤7B) with aggressive quantization (Q4 or lower).'
229
+ });
230
+ }
231
+
232
+ if (hardware.summary.isMultiGPU) {
233
+ insights.push({
234
+ type: 'tip',
235
+ message: `Multi-GPU detected (${hardware.summary.gpuCount} GPUs). Larger models can utilize combined VRAM.`
236
+ });
237
+ }
238
+
239
+ if (hardware.summary.bestBackend === 'metal') {
240
+ insights.push({
241
+ type: 'info',
242
+ message: 'Apple Silicon detected. Unified memory allows running larger models efficiently.'
243
+ });
244
+ }
245
+
246
+ // Score-based insights
247
+ if (top.score.final >= 85) {
248
+ insights.push({
249
+ type: 'success',
250
+ message: `Excellent match found! ${this.formatModelName(top.variant)} scores ${top.score.final}/100.`
251
+ });
252
+ } else if (top.score.final >= 70) {
253
+ insights.push({
254
+ type: 'success',
255
+ message: `Good match found. ${this.formatModelName(top.variant)} should perform well.`
256
+ });
257
+ } else if (top.score.final >= 55) {
258
+ insights.push({
259
+ type: 'warning',
260
+ message: 'Limited options for your hardware. Consider upgrading RAM/VRAM for better choices.'
261
+ });
262
+ }
263
+
264
+ // Memory pressure insight
265
+ const topSize = top.variant.size_gb || top.variant.sizeGB || 0;
266
+ if (topSize > maxSize * 0.85) {
267
+ insights.push({
268
+ type: 'warning',
269
+ message: 'Top recommendation uses most available memory. Close other applications before running.'
270
+ });
271
+ }
272
+
273
+ // Use case specific insights
274
+ if (opts.useCase === 'coding') {
275
+ const codingModels = scored.filter(s => {
276
+ const modelId = (s.variant.model_id || s.variant.modelId || '').toLowerCase();
277
+ return modelId.includes('coder') || modelId.includes('codellama') || modelId.includes('starcoder');
278
+ });
279
+ if (codingModels.length > 0) {
280
+ insights.push({
281
+ type: 'tip',
282
+ message: `Found ${codingModels.length} coding-specialized model(s). These are optimized for code completion.`
283
+ });
284
+ }
285
+ }
286
+
287
+ if (opts.useCase === 'reasoning' && top.score.components.quality >= 80) {
288
+ insights.push({
289
+ type: 'tip',
290
+ message: 'For complex reasoning, consider using higher temperature (0.7-0.9) and longer contexts.'
291
+ });
292
+ }
293
+
294
+ // Quantization insight
295
+ const topQuant = top.variant.quant || 'Q4_K_M';
296
+ if (topQuant.includes('Q2') || topQuant.includes('IQ2')) {
297
+ insights.push({
298
+ type: 'warning',
299
+ message: 'Very aggressive quantization reduces quality. Use for testing only.'
300
+ });
301
+ } else if (topQuant.includes('Q8') || topQuant === 'FP16') {
302
+ insights.push({
303
+ type: 'tip',
304
+ message: 'High-quality quantization selected. Good balance of quality and performance.'
305
+ });
306
+ }
307
+
308
+ return insights;
309
+ }
310
+
311
+ /**
312
+ * Format model name for display
313
+ */
314
+ formatModelName(variant) {
315
+ const modelId = variant.model_id || variant.modelId || 'Unknown';
316
+ const tag = variant.tag || '';
317
+
318
+ // Tag already contains model:variant format (e.g., "qwen2.5:14b-instruct-q3_K_S")
319
+ if (tag && tag.includes(':')) {
320
+ return tag;
321
+ }
322
+
323
+ // Otherwise build the name
324
+ if (tag) {
325
+ return `${modelId}:${tag}`;
326
+ }
327
+
328
+ const params = variant.params_b || variant.paramsB;
329
+ const quant = variant.quant;
330
+
331
+ let name = modelId;
332
+ if (params) name += ` ${params}B`;
333
+ if (quant) name += ` (${quant})`;
334
+
335
+ return name;
336
+ }
337
+
338
+ /**
339
+ * Get quick recommendation for a specific use case
340
+ */
341
+ async quickRecommend(variants, useCase = 'general') {
342
+ const result = await this.recommend(variants, { useCase, limit: 5 });
343
+ return {
344
+ recommended: result.topPicks.best?.variant || null,
345
+ alternatives: result.all.slice(1, 4).map(s => s.variant),
346
+ score: result.topPicks.best?.score.final || 0
347
+ };
348
+ }
349
+
350
+ /**
351
+ * Find the best variant of a specific model
352
+ */
353
+ async findBestVariant(variants, modelName, options = {}) {
354
+ const modelNameLower = modelName.toLowerCase();
355
+
356
+ // Filter to just this model's variants
357
+ const modelVariants = variants.filter(v => {
358
+ const id = (v.model_id || v.modelId || '').toLowerCase();
359
+ return id.includes(modelNameLower);
360
+ });
361
+
362
+ if (modelVariants.length === 0) {
363
+ return null;
364
+ }
365
+
366
+ const result = await this.recommend(modelVariants, options);
367
+ return result.topPicks.best;
368
+ }
369
+
370
+ /**
371
+ * Compare two models
372
+ */
373
+ async compare(variant1, variant2, options = {}) {
374
+ const hardware = await this.detector.detect();
375
+ const opts = { ...this.defaults, ...options };
376
+
377
+ const score1 = this.scoring.score(variant1, hardware, opts);
378
+ const score2 = this.scoring.score(variant2, hardware, opts);
379
+
380
+ const winner = score1.final > score2.final ? variant1 : variant2;
381
+ const winnerScore = score1.final > score2.final ? score1 : score2;
382
+
383
+ return {
384
+ model1: {
385
+ variant: variant1,
386
+ score: score1
387
+ },
388
+ model2: {
389
+ variant: variant2,
390
+ score: score2
391
+ },
392
+ winner: {
393
+ variant: winner,
394
+ score: winnerScore
395
+ },
396
+ difference: Math.abs(score1.final - score2.final),
397
+ breakdown: {
398
+ quality: score1.components.quality - score2.components.quality,
399
+ speed: score1.components.speed - score2.components.speed,
400
+ fit: score1.components.fit - score2.components.fit,
401
+ context: score1.components.context - score2.components.context
402
+ }
403
+ };
404
+ }
405
+
406
+ /**
407
+ * Get recommendations by category (coding, chat, etc.)
408
+ */
409
+ async recommendByCategory(variants) {
410
+ const categories = ['general', 'coding', 'reasoning', 'chat', 'fast', 'quality'];
411
+ const results = {};
412
+
413
+ for (const category of categories) {
414
+ const result = await this.recommend(variants, { useCase: category, limit: 3 });
415
+ results[category] = result.topPicks.best;
416
+ }
417
+
418
+ return results;
419
+ }
420
+
421
+ /**
422
+ * Get installed model recommendations
423
+ */
424
+ async recommendInstalled(installedModels, options = {}) {
425
+ // installedModels should be array of { name, size, modified_at, ... } from Ollama
426
+ const variants = installedModels.map(m => this.convertInstalledToVariant(m));
427
+ return this.recommend(variants, options);
428
+ }
429
+
430
+ /**
431
+ * Convert installed Ollama model to variant format
432
+ */
433
+ convertInstalledToVariant(installed) {
434
+ const name = installed.name || '';
435
+ const size = installed.size || 0;
436
+ const sizeGB = size / (1024 ** 3);
437
+
438
+ // Parse model name for params and quant
439
+ const parsed = this.parseModelName(name);
440
+
441
+ return {
442
+ model_id: name.split(':')[0],
443
+ tag: name.includes(':') ? name.split(':')[1] : 'latest',
444
+ params_b: parsed.params,
445
+ quant: parsed.quant,
446
+ size_gb: sizeGB,
447
+ context_length: parsed.context || 4096,
448
+ is_moe: parsed.isMoE,
449
+ input_types: parsed.inputTypes,
450
+ installed: true
451
+ };
452
+ }
453
+
454
+ /**
455
+ * Parse model name to extract parameters
456
+ */
457
+ parseModelName(name) {
458
+ const result = {
459
+ params: null,
460
+ quant: 'Q4_K_M',
461
+ context: 4096,
462
+ isMoE: false,
463
+ inputTypes: ['text']
464
+ };
465
+
466
+ const nameLower = name.toLowerCase();
467
+
468
+ // Extract params (e.g., "7b", "70b", "3.1b")
469
+ const paramsMatch = nameLower.match(/(\d+\.?\d*)b/);
470
+ if (paramsMatch) {
471
+ result.params = parseFloat(paramsMatch[1]);
472
+ }
473
+
474
+ // Extract quantization
475
+ const quantPatterns = [
476
+ 'fp16', 'f16', 'q8_0', 'q6_k', 'q5_k_m', 'q5_k_s', 'q5_0',
477
+ 'q4_k_m', 'q4_k_s', 'q4_0', 'q3_k_m', 'q3_k_s', 'q3_k_l',
478
+ 'iq4_xs', 'iq4_nl', 'iq3_xxs', 'iq3_xs', 'iq3_s',
479
+ 'iq2_xs', 'iq2_xxs', 'q2_k', 'q2_k_s'
480
+ ];
481
+ for (const q of quantPatterns) {
482
+ if (nameLower.includes(q)) {
483
+ result.quant = q.toUpperCase().replace(/_/g, '_');
484
+ break;
485
+ }
486
+ }
487
+
488
+ // Check for MoE
489
+ if (nameLower.includes('mixtral') || nameLower.includes('moe')) {
490
+ result.isMoE = true;
491
+ }
492
+
493
+ // Check for vision
494
+ if (nameLower.includes('llava') || nameLower.includes('vision') ||
495
+ nameLower.includes('bakllava') || nameLower.includes('moondream')) {
496
+ result.inputTypes = ['text', 'image'];
497
+ }
498
+
499
+ // Extract context length
500
+ const contextMatch = nameLower.match(/(\d+)k/);
501
+ if (contextMatch) {
502
+ result.context = parseInt(contextMatch[1]) * 1024;
503
+ }
504
+
505
+ return result;
506
+ }
507
+
508
+ /**
509
+ * Generate pull commands for top recommendations
510
+ */
511
+ getPullCommands(recommendations, limit = 5) {
512
+ const commands = [];
513
+
514
+ const models = recommendations.all || [];
515
+ for (const item of models.slice(0, limit)) {
516
+ const variant = item.variant;
517
+ const tag = variant.tag || 'latest';
518
+ const modelId = variant.model_id || variant.modelId;
519
+
520
+ commands.push({
521
+ model: `${modelId}:${tag}`,
522
+ command: `ollama pull ${modelId}:${tag}`,
523
+ score: item.score.final,
524
+ size: variant.size_gb || variant.sizeGB
525
+ });
526
+ }
527
+
528
+ return commands;
529
+ }
530
+ }
531
+
532
+ module.exports = IntelligentSelector;