llm-checker 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +418 -0
- package/analyzer/compatibility.js +584 -0
- package/analyzer/performance.js +505 -0
- package/bin/CLAUDE.md +12 -0
- package/bin/enhanced_cli.js +3118 -0
- package/bin/test-deterministic.js +41 -0
- package/package.json +96 -0
- package/src/CLAUDE.md +12 -0
- package/src/ai/intelligent-selector.js +615 -0
- package/src/ai/model-selector.js +312 -0
- package/src/ai/multi-objective-selector.js +820 -0
- package/src/commands/check.js +58 -0
- package/src/data/CLAUDE.md +11 -0
- package/src/data/model-database.js +637 -0
- package/src/data/sync-manager.js +279 -0
- package/src/hardware/CLAUDE.md +12 -0
- package/src/hardware/backends/CLAUDE.md +11 -0
- package/src/hardware/backends/apple-silicon.js +318 -0
- package/src/hardware/backends/cpu-detector.js +490 -0
- package/src/hardware/backends/cuda-detector.js +417 -0
- package/src/hardware/backends/intel-detector.js +436 -0
- package/src/hardware/backends/rocm-detector.js +440 -0
- package/src/hardware/detector.js +573 -0
- package/src/hardware/pc-optimizer.js +635 -0
- package/src/hardware/specs.js +286 -0
- package/src/hardware/unified-detector.js +442 -0
- package/src/index.js +2289 -0
- package/src/models/CLAUDE.md +17 -0
- package/src/models/ai-check-selector.js +806 -0
- package/src/models/catalog.json +426 -0
- package/src/models/deterministic-selector.js +1145 -0
- package/src/models/expanded_database.js +1142 -0
- package/src/models/intelligent-selector.js +532 -0
- package/src/models/requirements.js +310 -0
- package/src/models/scoring-config.js +57 -0
- package/src/models/scoring-engine.js +715 -0
- package/src/ollama/.cache/README.md +33 -0
- package/src/ollama/CLAUDE.md +24 -0
- package/src/ollama/client.js +438 -0
- package/src/ollama/enhanced-client.js +113 -0
- package/src/ollama/enhanced-scraper.js +634 -0
- package/src/ollama/manager.js +357 -0
- package/src/ollama/native-scraper.js +776 -0
- package/src/plugins/CLAUDE.md +11 -0
- package/src/plugins/examples/custom_model_plugin.js +87 -0
- package/src/plugins/index.js +295 -0
- package/src/utils/CLAUDE.md +11 -0
- package/src/utils/config.js +359 -0
- package/src/utils/formatter.js +315 -0
- package/src/utils/logger.js +272 -0
- package/src/utils/model-classifier.js +167 -0
- package/src/utils/verbose-progress.js +266 -0
|
@@ -0,0 +1,532 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Intelligent Model Selector
|
|
3
|
+
* Uses scoring engine and hardware detection to recommend optimal LLM models
|
|
4
|
+
* Provides smart recommendations based on use case, hardware, and preferences
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
const ScoringEngine = require('./scoring-engine');
|
|
8
|
+
const UnifiedDetector = require('../hardware/unified-detector');
|
|
9
|
+
|
|
10
|
+
class IntelligentSelector {
|
|
11
|
+
constructor(options = {}) {
|
|
12
|
+
this.scoring = new ScoringEngine(options.scoring || {});
|
|
13
|
+
this.detector = options.detector || new UnifiedDetector();
|
|
14
|
+
this.database = options.database || null;
|
|
15
|
+
|
|
16
|
+
// Default preferences
|
|
17
|
+
this.defaults = {
|
|
18
|
+
useCase: 'general',
|
|
19
|
+
targetContext: 8192,
|
|
20
|
+
targetTPS: 20,
|
|
21
|
+
preferQuantization: null, // null = auto select
|
|
22
|
+
preferFamily: null,
|
|
23
|
+
maxSize: null, // null = auto from hardware
|
|
24
|
+
minSize: null,
|
|
25
|
+
excludeFamilies: [],
|
|
26
|
+
includeVision: false,
|
|
27
|
+
includeEmbeddings: false,
|
|
28
|
+
limit: 10
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Initialize hardware detection
|
|
34
|
+
*/
|
|
35
|
+
async init() {
|
|
36
|
+
await this.detector.detect();
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Get optimal model recommendations
|
|
41
|
+
*
|
|
42
|
+
* @param {Array} variants - Array of model variants from database
|
|
43
|
+
* @param {Object} options - Selection options
|
|
44
|
+
* @returns {Object} Recommendations organized by category
|
|
45
|
+
*/
|
|
46
|
+
async recommend(variants, options = {}) {
|
|
47
|
+
// Merge with defaults
|
|
48
|
+
const opts = { ...this.defaults, ...options };
|
|
49
|
+
|
|
50
|
+
// Ensure hardware is detected
|
|
51
|
+
const hardware = await this.detector.detect();
|
|
52
|
+
|
|
53
|
+
// Apply filters
|
|
54
|
+
const filtered = this.applyFilters(variants, opts, hardware);
|
|
55
|
+
|
|
56
|
+
// Score all filtered variants
|
|
57
|
+
const scored = this.scoring.filterAndScore(filtered, hardware, {
|
|
58
|
+
useCase: opts.useCase,
|
|
59
|
+
targetContext: opts.targetContext,
|
|
60
|
+
targetTPS: opts.targetTPS,
|
|
61
|
+
headroom: opts.headroom || 2
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
// Categorize scores
|
|
65
|
+
const categories = this.scoring.categorizeScores(scored);
|
|
66
|
+
|
|
67
|
+
// Get top picks
|
|
68
|
+
const topPicks = this.selectTopPicks(scored, opts);
|
|
69
|
+
|
|
70
|
+
// Generate insights
|
|
71
|
+
const insights = this.generateInsights(scored, hardware, opts);
|
|
72
|
+
|
|
73
|
+
return {
|
|
74
|
+
topPicks,
|
|
75
|
+
categories,
|
|
76
|
+
all: scored.slice(0, opts.limit),
|
|
77
|
+
hardware: {
|
|
78
|
+
description: this.detector.getHardwareDescription(),
|
|
79
|
+
tier: this.detector.getHardwareTier(),
|
|
80
|
+
maxSize: this.detector.getMaxModelSize(),
|
|
81
|
+
backend: hardware.summary.bestBackend
|
|
82
|
+
},
|
|
83
|
+
insights,
|
|
84
|
+
meta: {
|
|
85
|
+
totalCandidates: variants.length,
|
|
86
|
+
afterFiltering: filtered.length,
|
|
87
|
+
useCase: opts.useCase
|
|
88
|
+
}
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Apply filters to variant list
|
|
94
|
+
*/
|
|
95
|
+
applyFilters(variants, opts, hardware) {
|
|
96
|
+
let filtered = [...variants];
|
|
97
|
+
|
|
98
|
+
// Size filters
|
|
99
|
+
const maxSize = opts.maxSize || this.detector.getMaxModelSize() + 2;
|
|
100
|
+
const minSize = opts.minSize || 0;
|
|
101
|
+
|
|
102
|
+
filtered = filtered.filter(v => {
|
|
103
|
+
const size = v.size_gb || v.sizeGB || 0;
|
|
104
|
+
return size >= minSize && size <= maxSize;
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
// Family exclusions
|
|
108
|
+
if (opts.excludeFamilies.length > 0) {
|
|
109
|
+
const excludeLower = opts.excludeFamilies.map(f => f.toLowerCase());
|
|
110
|
+
filtered = filtered.filter(v => {
|
|
111
|
+
const modelId = (v.model_id || v.modelId || '').toLowerCase();
|
|
112
|
+
return !excludeLower.some(ex => modelId.includes(ex));
|
|
113
|
+
});
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// Family preference (boost, don't exclude others)
|
|
117
|
+
if (opts.preferFamily) {
|
|
118
|
+
const prefLower = opts.preferFamily.toLowerCase();
|
|
119
|
+
filtered.sort((a, b) => {
|
|
120
|
+
const aMatches = (a.model_id || a.modelId || '').toLowerCase().includes(prefLower);
|
|
121
|
+
const bMatches = (b.model_id || b.modelId || '').toLowerCase().includes(prefLower);
|
|
122
|
+
if (aMatches && !bMatches) return -1;
|
|
123
|
+
if (!aMatches && bMatches) return 1;
|
|
124
|
+
return 0;
|
|
125
|
+
});
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// Vision filter
|
|
129
|
+
if (!opts.includeVision) {
|
|
130
|
+
filtered = filtered.filter(v => {
|
|
131
|
+
const inputTypes = v.input_types || v.inputTypes || [];
|
|
132
|
+
const modelId = (v.model_id || v.modelId || '').toLowerCase();
|
|
133
|
+
return !inputTypes.includes('image') &&
|
|
134
|
+
!modelId.includes('llava') &&
|
|
135
|
+
!modelId.includes('vision') &&
|
|
136
|
+
!modelId.includes('bakllava') &&
|
|
137
|
+
!modelId.includes('moondream');
|
|
138
|
+
});
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// Embeddings filter
|
|
142
|
+
if (!opts.includeEmbeddings) {
|
|
143
|
+
filtered = filtered.filter(v => {
|
|
144
|
+
const modelId = (v.model_id || v.modelId || '').toLowerCase();
|
|
145
|
+
return !modelId.includes('embed') &&
|
|
146
|
+
!modelId.includes('nomic') &&
|
|
147
|
+
!modelId.includes('mxbai') &&
|
|
148
|
+
!modelId.includes('minilm') &&
|
|
149
|
+
!modelId.includes('arctic-embed');
|
|
150
|
+
});
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// Quantization preference
|
|
154
|
+
if (opts.preferQuantization) {
|
|
155
|
+
const prefQuant = opts.preferQuantization.toUpperCase();
|
|
156
|
+
filtered.sort((a, b) => {
|
|
157
|
+
const aQuant = (a.quant || '').toUpperCase();
|
|
158
|
+
const bQuant = (b.quant || '').toUpperCase();
|
|
159
|
+
if (aQuant === prefQuant && bQuant !== prefQuant) return -1;
|
|
160
|
+
if (aQuant !== prefQuant && bQuant === prefQuant) return 1;
|
|
161
|
+
return 0;
|
|
162
|
+
});
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
return filtered;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
/**
|
|
169
|
+
* Select top picks from scored variants
|
|
170
|
+
*/
|
|
171
|
+
selectTopPicks(scored, opts) {
|
|
172
|
+
const picks = {
|
|
173
|
+
best: null,
|
|
174
|
+
balanced: null,
|
|
175
|
+
fast: null,
|
|
176
|
+
quality: null
|
|
177
|
+
};
|
|
178
|
+
|
|
179
|
+
if (scored.length === 0) return picks;
|
|
180
|
+
|
|
181
|
+
// Best overall (highest score)
|
|
182
|
+
picks.best = scored[0];
|
|
183
|
+
|
|
184
|
+
// Balanced (good quality + speed)
|
|
185
|
+
const balanced = scored.find(s =>
|
|
186
|
+
s.score.components.quality >= 70 &&
|
|
187
|
+
s.score.components.speed >= 70 &&
|
|
188
|
+
s.score.components.fit >= 80
|
|
189
|
+
);
|
|
190
|
+
picks.balanced = balanced || scored[0];
|
|
191
|
+
|
|
192
|
+
// Fastest (highest speed score among acceptable quality)
|
|
193
|
+
const fast = scored
|
|
194
|
+
.filter(s => s.score.components.quality >= 60)
|
|
195
|
+
.sort((a, b) => b.score.components.speed - a.score.components.speed)[0];
|
|
196
|
+
picks.fast = fast || scored[0];
|
|
197
|
+
|
|
198
|
+
// Highest quality (that fits)
|
|
199
|
+
const quality = scored
|
|
200
|
+
.filter(s => s.score.components.fit >= 70)
|
|
201
|
+
.sort((a, b) => b.score.components.quality - a.score.components.quality)[0];
|
|
202
|
+
picks.quality = quality || scored[0];
|
|
203
|
+
|
|
204
|
+
return picks;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
/**
|
|
208
|
+
* Generate insights about the recommendations
|
|
209
|
+
*/
|
|
210
|
+
generateInsights(scored, hardware, opts) {
|
|
211
|
+
const insights = [];
|
|
212
|
+
|
|
213
|
+
if (scored.length === 0) {
|
|
214
|
+
insights.push({
|
|
215
|
+
type: 'warning',
|
|
216
|
+
message: 'No models found that match your criteria. Try relaxing filters.'
|
|
217
|
+
});
|
|
218
|
+
return insights;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
const top = scored[0];
|
|
222
|
+
const maxSize = this.detector.getMaxModelSize();
|
|
223
|
+
|
|
224
|
+
// Hardware-based insights
|
|
225
|
+
if (hardware.summary.bestBackend === 'cpu') {
|
|
226
|
+
insights.push({
|
|
227
|
+
type: 'info',
|
|
228
|
+
message: 'Running on CPU only. Consider smaller models (≤7B) with aggressive quantization (Q4 or lower).'
|
|
229
|
+
});
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
if (hardware.summary.isMultiGPU) {
|
|
233
|
+
insights.push({
|
|
234
|
+
type: 'tip',
|
|
235
|
+
message: `Multi-GPU detected (${hardware.summary.gpuCount} GPUs). Larger models can utilize combined VRAM.`
|
|
236
|
+
});
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
if (hardware.summary.bestBackend === 'metal') {
|
|
240
|
+
insights.push({
|
|
241
|
+
type: 'info',
|
|
242
|
+
message: 'Apple Silicon detected. Unified memory allows running larger models efficiently.'
|
|
243
|
+
});
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
// Score-based insights
|
|
247
|
+
if (top.score.final >= 85) {
|
|
248
|
+
insights.push({
|
|
249
|
+
type: 'success',
|
|
250
|
+
message: `Excellent match found! ${this.formatModelName(top.variant)} scores ${top.score.final}/100.`
|
|
251
|
+
});
|
|
252
|
+
} else if (top.score.final >= 70) {
|
|
253
|
+
insights.push({
|
|
254
|
+
type: 'success',
|
|
255
|
+
message: `Good match found. ${this.formatModelName(top.variant)} should perform well.`
|
|
256
|
+
});
|
|
257
|
+
} else if (top.score.final >= 55) {
|
|
258
|
+
insights.push({
|
|
259
|
+
type: 'warning',
|
|
260
|
+
message: 'Limited options for your hardware. Consider upgrading RAM/VRAM for better choices.'
|
|
261
|
+
});
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
// Memory pressure insight
|
|
265
|
+
const topSize = top.variant.size_gb || top.variant.sizeGB || 0;
|
|
266
|
+
if (topSize > maxSize * 0.85) {
|
|
267
|
+
insights.push({
|
|
268
|
+
type: 'warning',
|
|
269
|
+
message: 'Top recommendation uses most available memory. Close other applications before running.'
|
|
270
|
+
});
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
// Use case specific insights
|
|
274
|
+
if (opts.useCase === 'coding') {
|
|
275
|
+
const codingModels = scored.filter(s => {
|
|
276
|
+
const modelId = (s.variant.model_id || s.variant.modelId || '').toLowerCase();
|
|
277
|
+
return modelId.includes('coder') || modelId.includes('codellama') || modelId.includes('starcoder');
|
|
278
|
+
});
|
|
279
|
+
if (codingModels.length > 0) {
|
|
280
|
+
insights.push({
|
|
281
|
+
type: 'tip',
|
|
282
|
+
message: `Found ${codingModels.length} coding-specialized model(s). These are optimized for code completion.`
|
|
283
|
+
});
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
if (opts.useCase === 'reasoning' && top.score.components.quality >= 80) {
|
|
288
|
+
insights.push({
|
|
289
|
+
type: 'tip',
|
|
290
|
+
message: 'For complex reasoning, consider using higher temperature (0.7-0.9) and longer contexts.'
|
|
291
|
+
});
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
// Quantization insight
|
|
295
|
+
const topQuant = top.variant.quant || 'Q4_K_M';
|
|
296
|
+
if (topQuant.includes('Q2') || topQuant.includes('IQ2')) {
|
|
297
|
+
insights.push({
|
|
298
|
+
type: 'warning',
|
|
299
|
+
message: 'Very aggressive quantization reduces quality. Use for testing only.'
|
|
300
|
+
});
|
|
301
|
+
} else if (topQuant.includes('Q8') || topQuant === 'FP16') {
|
|
302
|
+
insights.push({
|
|
303
|
+
type: 'tip',
|
|
304
|
+
message: 'High-quality quantization selected. Good balance of quality and performance.'
|
|
305
|
+
});
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
return insights;
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
/**
|
|
312
|
+
* Format model name for display
|
|
313
|
+
*/
|
|
314
|
+
formatModelName(variant) {
|
|
315
|
+
const modelId = variant.model_id || variant.modelId || 'Unknown';
|
|
316
|
+
const tag = variant.tag || '';
|
|
317
|
+
|
|
318
|
+
// Tag already contains model:variant format (e.g., "qwen2.5:14b-instruct-q3_K_S")
|
|
319
|
+
if (tag && tag.includes(':')) {
|
|
320
|
+
return tag;
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
// Otherwise build the name
|
|
324
|
+
if (tag) {
|
|
325
|
+
return `${modelId}:${tag}`;
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
const params = variant.params_b || variant.paramsB;
|
|
329
|
+
const quant = variant.quant;
|
|
330
|
+
|
|
331
|
+
let name = modelId;
|
|
332
|
+
if (params) name += ` ${params}B`;
|
|
333
|
+
if (quant) name += ` (${quant})`;
|
|
334
|
+
|
|
335
|
+
return name;
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
/**
|
|
339
|
+
* Get quick recommendation for a specific use case
|
|
340
|
+
*/
|
|
341
|
+
async quickRecommend(variants, useCase = 'general') {
|
|
342
|
+
const result = await this.recommend(variants, { useCase, limit: 5 });
|
|
343
|
+
return {
|
|
344
|
+
recommended: result.topPicks.best?.variant || null,
|
|
345
|
+
alternatives: result.all.slice(1, 4).map(s => s.variant),
|
|
346
|
+
score: result.topPicks.best?.score.final || 0
|
|
347
|
+
};
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
/**
|
|
351
|
+
* Find the best variant of a specific model
|
|
352
|
+
*/
|
|
353
|
+
async findBestVariant(variants, modelName, options = {}) {
|
|
354
|
+
const modelNameLower = modelName.toLowerCase();
|
|
355
|
+
|
|
356
|
+
// Filter to just this model's variants
|
|
357
|
+
const modelVariants = variants.filter(v => {
|
|
358
|
+
const id = (v.model_id || v.modelId || '').toLowerCase();
|
|
359
|
+
return id.includes(modelNameLower);
|
|
360
|
+
});
|
|
361
|
+
|
|
362
|
+
if (modelVariants.length === 0) {
|
|
363
|
+
return null;
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
const result = await this.recommend(modelVariants, options);
|
|
367
|
+
return result.topPicks.best;
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
/**
|
|
371
|
+
* Compare two models
|
|
372
|
+
*/
|
|
373
|
+
async compare(variant1, variant2, options = {}) {
|
|
374
|
+
const hardware = await this.detector.detect();
|
|
375
|
+
const opts = { ...this.defaults, ...options };
|
|
376
|
+
|
|
377
|
+
const score1 = this.scoring.score(variant1, hardware, opts);
|
|
378
|
+
const score2 = this.scoring.score(variant2, hardware, opts);
|
|
379
|
+
|
|
380
|
+
const winner = score1.final > score2.final ? variant1 : variant2;
|
|
381
|
+
const winnerScore = score1.final > score2.final ? score1 : score2;
|
|
382
|
+
|
|
383
|
+
return {
|
|
384
|
+
model1: {
|
|
385
|
+
variant: variant1,
|
|
386
|
+
score: score1
|
|
387
|
+
},
|
|
388
|
+
model2: {
|
|
389
|
+
variant: variant2,
|
|
390
|
+
score: score2
|
|
391
|
+
},
|
|
392
|
+
winner: {
|
|
393
|
+
variant: winner,
|
|
394
|
+
score: winnerScore
|
|
395
|
+
},
|
|
396
|
+
difference: Math.abs(score1.final - score2.final),
|
|
397
|
+
breakdown: {
|
|
398
|
+
quality: score1.components.quality - score2.components.quality,
|
|
399
|
+
speed: score1.components.speed - score2.components.speed,
|
|
400
|
+
fit: score1.components.fit - score2.components.fit,
|
|
401
|
+
context: score1.components.context - score2.components.context
|
|
402
|
+
}
|
|
403
|
+
};
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
/**
|
|
407
|
+
* Get recommendations by category (coding, chat, etc.)
|
|
408
|
+
*/
|
|
409
|
+
async recommendByCategory(variants) {
|
|
410
|
+
const categories = ['general', 'coding', 'reasoning', 'chat', 'fast', 'quality'];
|
|
411
|
+
const results = {};
|
|
412
|
+
|
|
413
|
+
for (const category of categories) {
|
|
414
|
+
const result = await this.recommend(variants, { useCase: category, limit: 3 });
|
|
415
|
+
results[category] = result.topPicks.best;
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
return results;
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
/**
|
|
422
|
+
* Get installed model recommendations
|
|
423
|
+
*/
|
|
424
|
+
async recommendInstalled(installedModels, options = {}) {
|
|
425
|
+
// installedModels should be array of { name, size, modified_at, ... } from Ollama
|
|
426
|
+
const variants = installedModels.map(m => this.convertInstalledToVariant(m));
|
|
427
|
+
return this.recommend(variants, options);
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
/**
|
|
431
|
+
* Convert installed Ollama model to variant format
|
|
432
|
+
*/
|
|
433
|
+
convertInstalledToVariant(installed) {
|
|
434
|
+
const name = installed.name || '';
|
|
435
|
+
const size = installed.size || 0;
|
|
436
|
+
const sizeGB = size / (1024 ** 3);
|
|
437
|
+
|
|
438
|
+
// Parse model name for params and quant
|
|
439
|
+
const parsed = this.parseModelName(name);
|
|
440
|
+
|
|
441
|
+
return {
|
|
442
|
+
model_id: name.split(':')[0],
|
|
443
|
+
tag: name.includes(':') ? name.split(':')[1] : 'latest',
|
|
444
|
+
params_b: parsed.params,
|
|
445
|
+
quant: parsed.quant,
|
|
446
|
+
size_gb: sizeGB,
|
|
447
|
+
context_length: parsed.context || 4096,
|
|
448
|
+
is_moe: parsed.isMoE,
|
|
449
|
+
input_types: parsed.inputTypes,
|
|
450
|
+
installed: true
|
|
451
|
+
};
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
/**
|
|
455
|
+
* Parse model name to extract parameters
|
|
456
|
+
*/
|
|
457
|
+
parseModelName(name) {
|
|
458
|
+
const result = {
|
|
459
|
+
params: null,
|
|
460
|
+
quant: 'Q4_K_M',
|
|
461
|
+
context: 4096,
|
|
462
|
+
isMoE: false,
|
|
463
|
+
inputTypes: ['text']
|
|
464
|
+
};
|
|
465
|
+
|
|
466
|
+
const nameLower = name.toLowerCase();
|
|
467
|
+
|
|
468
|
+
// Extract params (e.g., "7b", "70b", "3.1b")
|
|
469
|
+
const paramsMatch = nameLower.match(/(\d+\.?\d*)b/);
|
|
470
|
+
if (paramsMatch) {
|
|
471
|
+
result.params = parseFloat(paramsMatch[1]);
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
// Extract quantization
|
|
475
|
+
const quantPatterns = [
|
|
476
|
+
'fp16', 'f16', 'q8_0', 'q6_k', 'q5_k_m', 'q5_k_s', 'q5_0',
|
|
477
|
+
'q4_k_m', 'q4_k_s', 'q4_0', 'q3_k_m', 'q3_k_s', 'q3_k_l',
|
|
478
|
+
'iq4_xs', 'iq4_nl', 'iq3_xxs', 'iq3_xs', 'iq3_s',
|
|
479
|
+
'iq2_xs', 'iq2_xxs', 'q2_k', 'q2_k_s'
|
|
480
|
+
];
|
|
481
|
+
for (const q of quantPatterns) {
|
|
482
|
+
if (nameLower.includes(q)) {
|
|
483
|
+
result.quant = q.toUpperCase().replace(/_/g, '_');
|
|
484
|
+
break;
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
// Check for MoE
|
|
489
|
+
if (nameLower.includes('mixtral') || nameLower.includes('moe')) {
|
|
490
|
+
result.isMoE = true;
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
// Check for vision
|
|
494
|
+
if (nameLower.includes('llava') || nameLower.includes('vision') ||
|
|
495
|
+
nameLower.includes('bakllava') || nameLower.includes('moondream')) {
|
|
496
|
+
result.inputTypes = ['text', 'image'];
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
// Extract context length
|
|
500
|
+
const contextMatch = nameLower.match(/(\d+)k/);
|
|
501
|
+
if (contextMatch) {
|
|
502
|
+
result.context = parseInt(contextMatch[1]) * 1024;
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
return result;
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
/**
|
|
509
|
+
* Generate pull commands for top recommendations
|
|
510
|
+
*/
|
|
511
|
+
getPullCommands(recommendations, limit = 5) {
|
|
512
|
+
const commands = [];
|
|
513
|
+
|
|
514
|
+
const models = recommendations.all || [];
|
|
515
|
+
for (const item of models.slice(0, limit)) {
|
|
516
|
+
const variant = item.variant;
|
|
517
|
+
const tag = variant.tag || 'latest';
|
|
518
|
+
const modelId = variant.model_id || variant.modelId;
|
|
519
|
+
|
|
520
|
+
commands.push({
|
|
521
|
+
model: `${modelId}:${tag}`,
|
|
522
|
+
command: `ollama pull ${modelId}:${tag}`,
|
|
523
|
+
score: item.score.final,
|
|
524
|
+
size: variant.size_gb || variant.sizeGB
|
|
525
|
+
});
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
return commands;
|
|
529
|
+
}
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
module.exports = IntelligentSelector;
|