llm-checker 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +418 -0
- package/analyzer/compatibility.js +584 -0
- package/analyzer/performance.js +505 -0
- package/bin/CLAUDE.md +12 -0
- package/bin/enhanced_cli.js +3118 -0
- package/bin/test-deterministic.js +41 -0
- package/package.json +96 -0
- package/src/CLAUDE.md +12 -0
- package/src/ai/intelligent-selector.js +615 -0
- package/src/ai/model-selector.js +312 -0
- package/src/ai/multi-objective-selector.js +820 -0
- package/src/commands/check.js +58 -0
- package/src/data/CLAUDE.md +11 -0
- package/src/data/model-database.js +637 -0
- package/src/data/sync-manager.js +279 -0
- package/src/hardware/CLAUDE.md +12 -0
- package/src/hardware/backends/CLAUDE.md +11 -0
- package/src/hardware/backends/apple-silicon.js +318 -0
- package/src/hardware/backends/cpu-detector.js +490 -0
- package/src/hardware/backends/cuda-detector.js +417 -0
- package/src/hardware/backends/intel-detector.js +436 -0
- package/src/hardware/backends/rocm-detector.js +440 -0
- package/src/hardware/detector.js +573 -0
- package/src/hardware/pc-optimizer.js +635 -0
- package/src/hardware/specs.js +286 -0
- package/src/hardware/unified-detector.js +442 -0
- package/src/index.js +2289 -0
- package/src/models/CLAUDE.md +17 -0
- package/src/models/ai-check-selector.js +806 -0
- package/src/models/catalog.json +426 -0
- package/src/models/deterministic-selector.js +1145 -0
- package/src/models/expanded_database.js +1142 -0
- package/src/models/intelligent-selector.js +532 -0
- package/src/models/requirements.js +310 -0
- package/src/models/scoring-config.js +57 -0
- package/src/models/scoring-engine.js +715 -0
- package/src/ollama/.cache/README.md +33 -0
- package/src/ollama/CLAUDE.md +24 -0
- package/src/ollama/client.js +438 -0
- package/src/ollama/enhanced-client.js +113 -0
- package/src/ollama/enhanced-scraper.js +634 -0
- package/src/ollama/manager.js +357 -0
- package/src/ollama/native-scraper.js +776 -0
- package/src/plugins/CLAUDE.md +11 -0
- package/src/plugins/examples/custom_model_plugin.js +87 -0
- package/src/plugins/index.js +295 -0
- package/src/utils/CLAUDE.md +11 -0
- package/src/utils/config.js +359 -0
- package/src/utils/formatter.js +315 -0
- package/src/utils/logger.js +272 -0
- package/src/utils/model-classifier.js +167 -0
- package/src/utils/verbose-progress.js +266 -0
|
@@ -0,0 +1,1145 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LLM-Checker: Deterministic Model Selection Algorithm (Spec v1.0)
|
|
3
|
+
*
|
|
4
|
+
* A two-phase selector that picks the best Ollama model + quantization
|
|
5
|
+
* for a given machine and task category.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
const fs = require('fs');
|
|
9
|
+
const path = require('path');
|
|
10
|
+
const { spawn } = require('child_process');
|
|
11
|
+
const { DETERMINISTIC_WEIGHTS } = require('./scoring-config');
|
|
12
|
+
|
|
13
|
+
class DeterministicModelSelector {
|
|
14
|
+
constructor() {
|
|
15
|
+
this.catalogPath = path.join(__dirname, 'catalog.json');
|
|
16
|
+
this.benchCachePath = path.join(require('os').homedir(), '.llm-checker', 'bench.json');
|
|
17
|
+
|
|
18
|
+
// Quality priors table
|
|
19
|
+
this.baseQualityByParams = {
|
|
20
|
+
0.5: 45, 1: 45, 1.5: 45,
|
|
21
|
+
2: 60, 3: 60, 4: 60,
|
|
22
|
+
7: 75, 8: 75, 9: 75,
|
|
23
|
+
13: 82, 14: 82, 15: 82,
|
|
24
|
+
30: 89, 32: 89, 34: 89,
|
|
25
|
+
70: 95, 72: 95
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
// Family quality bumps
|
|
29
|
+
this.familyBumps = {
|
|
30
|
+
'qwen2.5': 2,
|
|
31
|
+
'deepseek': 3,
|
|
32
|
+
'mistral': 1,
|
|
33
|
+
'llama3.1': 1,
|
|
34
|
+
'llama3.2': 2,
|
|
35
|
+
'gemma2': 1,
|
|
36
|
+
'phi-3': 0,
|
|
37
|
+
'granite': 0,
|
|
38
|
+
'solar': 0,
|
|
39
|
+
'starcoder': 1,
|
|
40
|
+
'minicpm': 0,
|
|
41
|
+
'llava': 0
|
|
42
|
+
};
|
|
43
|
+
|
|
44
|
+
// Quantization penalties
|
|
45
|
+
this.quantPenalties = {
|
|
46
|
+
'Q8_0': 0,
|
|
47
|
+
'Q6_K': -1,
|
|
48
|
+
'Q5_K_M': -2,
|
|
49
|
+
'Q4_K_M': -5,
|
|
50
|
+
'Q3_K': -8,
|
|
51
|
+
'Q2_K': -12
|
|
52
|
+
};
|
|
53
|
+
|
|
54
|
+
// Quantization hierarchy (best to worst)
|
|
55
|
+
this.quantHierarchy = ['Q8_0', 'Q6_K', 'Q5_K_M', 'Q4_K_M', 'Q3_K', 'Q2_K'];
|
|
56
|
+
|
|
57
|
+
// Quantization speed multipliers
|
|
58
|
+
this.quantSpeedMultipliers = {
|
|
59
|
+
'Q8_0': 0.8,
|
|
60
|
+
'Q6_K': 0.95,
|
|
61
|
+
'Q5_K_M': 1.00,
|
|
62
|
+
'Q4_K_M': 1.15,
|
|
63
|
+
'Q3_K': 1.25,
|
|
64
|
+
'Q2_K': 1.35
|
|
65
|
+
};
|
|
66
|
+
|
|
67
|
+
// Backend speed constants (K)
|
|
68
|
+
this.backendK = {
|
|
69
|
+
'metal': 160, // Apple Metal
|
|
70
|
+
'cuda': 220, // NVIDIA CUDA
|
|
71
|
+
'cpu_x86': 70, // CPU x86_64
|
|
72
|
+
'cpu_arm': 90 // CPU ARM64
|
|
73
|
+
};
|
|
74
|
+
|
|
75
|
+
// Category target speeds (tokens/sec)
|
|
76
|
+
this.targetSpeeds = {
|
|
77
|
+
'general': 40,
|
|
78
|
+
'coding': 40,
|
|
79
|
+
'reasoning': 25,
|
|
80
|
+
'summarization': 60,
|
|
81
|
+
'reading': 60,
|
|
82
|
+
'multimodal': 40,
|
|
83
|
+
'embeddings': 200
|
|
84
|
+
};
|
|
85
|
+
|
|
86
|
+
// Category target contexts
|
|
87
|
+
this.targetContexts = {
|
|
88
|
+
'general': 4096,
|
|
89
|
+
'coding': 8192,
|
|
90
|
+
'reasoning': 8192,
|
|
91
|
+
'summarization': 8192,
|
|
92
|
+
'reading': 8192,
|
|
93
|
+
'multimodal': 4096,
|
|
94
|
+
'embeddings': 512
|
|
95
|
+
};
|
|
96
|
+
|
|
97
|
+
// Category scoring weights [Q, S, F, C] from centralized config
|
|
98
|
+
this.categoryWeights = DETERMINISTIC_WEIGHTS;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// ============================================================================
|
|
102
|
+
// PHASE 0: DATA SOURCES
|
|
103
|
+
// ============================================================================
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* Hardware Profiler - Detect CPU, GPU, RAM, and acceleration support
|
|
107
|
+
*/
|
|
108
|
+
async getHardware() {
|
|
109
|
+
const hardware = {
|
|
110
|
+
cpu: await this.getCPUInfo(),
|
|
111
|
+
gpu: await this.getGPUInfo(),
|
|
112
|
+
memory: await this.getMemoryInfo(),
|
|
113
|
+
os: await this.getOSInfo(),
|
|
114
|
+
acceleration: await this.getAccelerationSupport()
|
|
115
|
+
};
|
|
116
|
+
|
|
117
|
+
// Calculate usable memory: min(0.8 * total_ram, total_ram - 2GB)
|
|
118
|
+
hardware.usableMemGB = Math.min(
|
|
119
|
+
0.8 * hardware.memory.totalGB,
|
|
120
|
+
hardware.memory.totalGB - 2
|
|
121
|
+
);
|
|
122
|
+
|
|
123
|
+
return hardware;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
async getCPUInfo() {
|
|
127
|
+
const os = require('os');
|
|
128
|
+
return {
|
|
129
|
+
architecture: os.arch(),
|
|
130
|
+
cores: os.cpus().length,
|
|
131
|
+
threads: os.cpus().length, // Simplified
|
|
132
|
+
platform: os.platform()
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
async getGPUInfo() {
|
|
137
|
+
const cpu = await this.getCPUInfo();
|
|
138
|
+
|
|
139
|
+
// Simplified GPU detection
|
|
140
|
+
if (cpu.platform === 'darwin' && cpu.architecture === 'arm64') {
|
|
141
|
+
return {
|
|
142
|
+
type: 'apple_silicon',
|
|
143
|
+
vramGB: 0, // Unified memory
|
|
144
|
+
unified: true
|
|
145
|
+
};
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// TODO: Add NVIDIA/AMD detection for other platforms
|
|
149
|
+
return {
|
|
150
|
+
type: 'cpu_only',
|
|
151
|
+
vramGB: 0,
|
|
152
|
+
unified: false
|
|
153
|
+
};
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
async getMemoryInfo() {
|
|
157
|
+
const os = require('os');
|
|
158
|
+
const totalBytes = os.totalmem();
|
|
159
|
+
return {
|
|
160
|
+
totalGB: Math.round((totalBytes / (1024**3)) * 10) / 10
|
|
161
|
+
};
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
async getOSInfo() {
|
|
165
|
+
const os = require('os');
|
|
166
|
+
return {
|
|
167
|
+
platform: os.platform(),
|
|
168
|
+
arch: os.arch(),
|
|
169
|
+
release: os.release()
|
|
170
|
+
};
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
async getAccelerationSupport() {
|
|
174
|
+
const cpu = await this.getCPUInfo();
|
|
175
|
+
const gpu = await this.getGPUInfo();
|
|
176
|
+
|
|
177
|
+
return {
|
|
178
|
+
supports_metal: gpu.type === 'apple_silicon',
|
|
179
|
+
supports_cuda: gpu.type === 'nvidia',
|
|
180
|
+
supports_rocm: gpu.type === 'amd'
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
/**
|
|
185
|
+
* Local Ollama Inventory - Get installed models from `ollama list`
|
|
186
|
+
*/
|
|
187
|
+
async getInstalledModels() {
|
|
188
|
+
try {
|
|
189
|
+
const models = await this.runOllamaCommand(['list']);
|
|
190
|
+
const parsed = [];
|
|
191
|
+
|
|
192
|
+
for (const line of models.split('\n').slice(1)) { // Skip header
|
|
193
|
+
if (!line.trim()) continue;
|
|
194
|
+
|
|
195
|
+
const parts = line.trim().split(/\s+/);
|
|
196
|
+
if (parts.length < 3) continue;
|
|
197
|
+
|
|
198
|
+
const modelName = parts[0];
|
|
199
|
+
const modelId = parts[1];
|
|
200
|
+
const size = parts.length >= 4 ? `${parts[2]} ${parts[3]}` : parts[2];
|
|
201
|
+
|
|
202
|
+
// Get detailed info for each model
|
|
203
|
+
try {
|
|
204
|
+
const details = await this.getModelDetails(modelName);
|
|
205
|
+
parsed.push({
|
|
206
|
+
...details,
|
|
207
|
+
installed: true,
|
|
208
|
+
installedSize: size
|
|
209
|
+
});
|
|
210
|
+
} catch (error) {
|
|
211
|
+
console.warn(`Failed to get details for ${modelName}:`, error.message);
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
return parsed;
|
|
216
|
+
} catch (error) {
|
|
217
|
+
// Silently fail when Ollama is not available - this is expected
|
|
218
|
+
return [];
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
async getModelDetails(modelName) {
|
|
223
|
+
try {
|
|
224
|
+
const details = await this.runOllamaCommand(['show', modelName]);
|
|
225
|
+
|
|
226
|
+
// Parse model details from ollama show output
|
|
227
|
+
const meta = {
|
|
228
|
+
name: modelName,
|
|
229
|
+
family: this.extractFamily(modelName),
|
|
230
|
+
paramsB: this.extractParams(details),
|
|
231
|
+
ctxMax: this.extractContextLength(details),
|
|
232
|
+
quant: this.extractQuantization(details),
|
|
233
|
+
sizeGB: this.extractSizeGB(details),
|
|
234
|
+
modalities: this.extractModalities(details),
|
|
235
|
+
tags: this.extractTags(details),
|
|
236
|
+
model_identifier: modelName
|
|
237
|
+
};
|
|
238
|
+
|
|
239
|
+
return meta;
|
|
240
|
+
} catch (error) {
|
|
241
|
+
// If Ollama is not available or model details can't be fetched, return minimal info
|
|
242
|
+
return {
|
|
243
|
+
name: modelName,
|
|
244
|
+
family: 'unknown',
|
|
245
|
+
paramsB: 0,
|
|
246
|
+
ctxMax: 2048,
|
|
247
|
+
quant: 'unknown',
|
|
248
|
+
sizeGB: 0,
|
|
249
|
+
modalities: ['text'],
|
|
250
|
+
tags: [],
|
|
251
|
+
model_identifier: modelName,
|
|
252
|
+
error: error.message
|
|
253
|
+
};
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
/**
|
|
258
|
+
* Curated Catalog - Load known models from catalog.json
|
|
259
|
+
*/
|
|
260
|
+
async loadCatalog() {
|
|
261
|
+
try {
|
|
262
|
+
if (!fs.existsSync(this.catalogPath)) {
|
|
263
|
+
console.warn('Catalog not found, creating default...');
|
|
264
|
+
await this.createDefaultCatalog();
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
const catalogData = fs.readFileSync(this.catalogPath, 'utf8');
|
|
268
|
+
const catalog = JSON.parse(catalogData);
|
|
269
|
+
|
|
270
|
+
return catalog.models.map(model => ({
|
|
271
|
+
...model,
|
|
272
|
+
installed: false
|
|
273
|
+
}));
|
|
274
|
+
} catch (error) {
|
|
275
|
+
console.warn('Failed to load catalog:', error.message);
|
|
276
|
+
return [];
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
async createDefaultCatalog() {
|
|
281
|
+
const defaultCatalog = {
|
|
282
|
+
version: "1.0",
|
|
283
|
+
updated: new Date().toISOString(),
|
|
284
|
+
models: [
|
|
285
|
+
{
|
|
286
|
+
name: "qwen2.5-coder:0.5b",
|
|
287
|
+
family: "qwen2.5",
|
|
288
|
+
paramsB: 0.5,
|
|
289
|
+
ctxMax: 32768,
|
|
290
|
+
quant: "Q4_K_M",
|
|
291
|
+
sizeGB: 0.4,
|
|
292
|
+
modalities: ["text"],
|
|
293
|
+
tags: ["coder", "instruct"],
|
|
294
|
+
model_identifier: "qwen2.5-coder:0.5b"
|
|
295
|
+
},
|
|
296
|
+
{
|
|
297
|
+
name: "qwen2.5-coder:1.5b",
|
|
298
|
+
family: "qwen2.5",
|
|
299
|
+
paramsB: 1.5,
|
|
300
|
+
ctxMax: 32768,
|
|
301
|
+
quant: "Q4_K_M",
|
|
302
|
+
sizeGB: 1.1,
|
|
303
|
+
modalities: ["text"],
|
|
304
|
+
tags: ["coder", "instruct"],
|
|
305
|
+
model_identifier: "qwen2.5-coder:1.5b"
|
|
306
|
+
},
|
|
307
|
+
{
|
|
308
|
+
name: "qwen2.5-coder:7b",
|
|
309
|
+
family: "qwen2.5",
|
|
310
|
+
paramsB: 7,
|
|
311
|
+
ctxMax: 32768,
|
|
312
|
+
quant: "Q4_K_M",
|
|
313
|
+
sizeGB: 4.4,
|
|
314
|
+
modalities: ["text"],
|
|
315
|
+
tags: ["coder", "instruct"],
|
|
316
|
+
model_identifier: "qwen2.5-coder:7b"
|
|
317
|
+
},
|
|
318
|
+
{
|
|
319
|
+
name: "llama3.2:3b",
|
|
320
|
+
family: "llama3.2",
|
|
321
|
+
paramsB: 3,
|
|
322
|
+
ctxMax: 131072,
|
|
323
|
+
quant: "Q4_K_M",
|
|
324
|
+
sizeGB: 2.0,
|
|
325
|
+
modalities: ["text"],
|
|
326
|
+
tags: ["instruct", "chat"],
|
|
327
|
+
model_identifier: "llama3.2:3b"
|
|
328
|
+
},
|
|
329
|
+
{
|
|
330
|
+
name: "llava:7b",
|
|
331
|
+
family: "llava",
|
|
332
|
+
paramsB: 7,
|
|
333
|
+
ctxMax: 4096,
|
|
334
|
+
quant: "Q4_K_M",
|
|
335
|
+
sizeGB: 4.7,
|
|
336
|
+
modalities: ["text", "vision"],
|
|
337
|
+
tags: ["multimodal", "vision"],
|
|
338
|
+
model_identifier: "llava:7b"
|
|
339
|
+
}
|
|
340
|
+
]
|
|
341
|
+
};
|
|
342
|
+
|
|
343
|
+
// Ensure directory exists
|
|
344
|
+
const dir = path.dirname(this.catalogPath);
|
|
345
|
+
if (!fs.existsSync(dir)) {
|
|
346
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
fs.writeFileSync(this.catalogPath, JSON.stringify(defaultCatalog, null, 2));
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
// ============================================================================
|
|
353
|
+
// HELPER METHODS FOR PARSING OLLAMA OUTPUT
|
|
354
|
+
// ============================================================================
|
|
355
|
+
|
|
356
|
+
extractFamily(modelName) {
|
|
357
|
+
const name = modelName.toLowerCase();
|
|
358
|
+
if (name.includes('qwen2.5')) return 'qwen2.5';
|
|
359
|
+
if (name.includes('qwen3')) return 'qwen2.5';
|
|
360
|
+
if (name.includes('qwen')) return 'qwen2.5';
|
|
361
|
+
if (name.includes('deepseek')) return 'deepseek';
|
|
362
|
+
if (name.includes('llama3.2') || name.includes('llama3.3')) return 'llama3.2';
|
|
363
|
+
if (name.includes('llama3.1')) return 'llama3.1';
|
|
364
|
+
if (name.includes('llama')) return 'llama';
|
|
365
|
+
if (name.includes('mistral')) return 'mistral';
|
|
366
|
+
if (name.includes('gemma')) return 'gemma2';
|
|
367
|
+
if (name.includes('phi')) return 'phi-3';
|
|
368
|
+
if (name.includes('llava')) return 'llava';
|
|
369
|
+
if (name.includes('granite')) return 'granite';
|
|
370
|
+
if (name.includes('solar')) return 'solar';
|
|
371
|
+
if (name.includes('starcoder')) return 'starcoder';
|
|
372
|
+
if (name.includes('minicpm')) return 'minicpm';
|
|
373
|
+
return 'unknown';
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
extractParams(details) {
|
|
377
|
+
// Look for parameter info in ollama show output
|
|
378
|
+
const match = details.match(/parameters\s+(\d+\.?\d*)[BM]/i);
|
|
379
|
+
if (match) {
|
|
380
|
+
const num = parseFloat(match[1]);
|
|
381
|
+
return match[0].toUpperCase().includes('B') ? num : num / 1000;
|
|
382
|
+
}
|
|
383
|
+
return 7; // Default fallback
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
extractContextLength(details) {
|
|
387
|
+
const match = details.match(/context_length\s+(\d+)/i);
|
|
388
|
+
return match ? parseInt(match[1]) : 4096;
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
extractQuantization(details) {
|
|
392
|
+
const match = details.match(/quantization\s+(Q\d+_[A-Z0-9_]+)/i);
|
|
393
|
+
return match ? match[1] : 'Q4_K_M';
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
extractSizeGB(details) {
|
|
397
|
+
const match = details.match(/size\s+(\d+\.?\d*)\s*GB/i);
|
|
398
|
+
return match ? parseFloat(match[1]) : 4.0;
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
extractModalities(details) {
|
|
402
|
+
const modalities = ['text'];
|
|
403
|
+
if (details.toLowerCase().includes('vision') || details.toLowerCase().includes('image')) {
|
|
404
|
+
modalities.push('vision');
|
|
405
|
+
}
|
|
406
|
+
return modalities;
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
extractTags(details) {
|
|
410
|
+
const tags = [];
|
|
411
|
+
const lowerDetails = details.toLowerCase();
|
|
412
|
+
|
|
413
|
+
if (lowerDetails.includes('instruct')) tags.push('instruct');
|
|
414
|
+
if (lowerDetails.includes('chat')) tags.push('chat');
|
|
415
|
+
if (lowerDetails.includes('code')) tags.push('coder');
|
|
416
|
+
if (lowerDetails.includes('vision')) tags.push('vision');
|
|
417
|
+
// Only mark as embedding if it's explicitly an embedding model
|
|
418
|
+
if (lowerDetails.includes('embed-text') ||
|
|
419
|
+
lowerDetails.includes('nomic-embed') ||
|
|
420
|
+
lowerDetails.includes('bge-') ||
|
|
421
|
+
lowerDetails.includes('all-minilm')) tags.push('embedding');
|
|
422
|
+
|
|
423
|
+
return tags;
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
async runOllamaCommand(args) {
|
|
427
|
+
return new Promise((resolve, reject) => {
|
|
428
|
+
try {
|
|
429
|
+
const proc = spawn('ollama', args, { stdio: 'pipe' });
|
|
430
|
+
let output = '';
|
|
431
|
+
let error = '';
|
|
432
|
+
|
|
433
|
+
proc.stdout.on('data', (data) => output += data);
|
|
434
|
+
proc.stderr.on('data', (data) => error += data);
|
|
435
|
+
|
|
436
|
+
proc.on('close', (code) => {
|
|
437
|
+
if (code === 0) {
|
|
438
|
+
resolve(output);
|
|
439
|
+
} else {
|
|
440
|
+
reject(new Error(`Ollama command failed: ${error}`));
|
|
441
|
+
}
|
|
442
|
+
});
|
|
443
|
+
|
|
444
|
+
proc.on('error', (err) => {
|
|
445
|
+
// Handle ENOENT and other spawn errors gracefully
|
|
446
|
+
if (err.code === 'ENOENT') {
|
|
447
|
+
reject(new Error('Ollama not found. Please install Ollama from https://ollama.ai'));
|
|
448
|
+
} else {
|
|
449
|
+
reject(new Error(`Ollama spawn error: ${err.message}`));
|
|
450
|
+
}
|
|
451
|
+
});
|
|
452
|
+
} catch (spawnError) {
|
|
453
|
+
// Handle synchronous spawn errors
|
|
454
|
+
reject(new Error(`Failed to start Ollama: ${spawnError.message}`));
|
|
455
|
+
}
|
|
456
|
+
});
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
// ============================================================================
|
|
460
|
+
// PHASE 1: ESTIMATION FILTER
|
|
461
|
+
// ============================================================================
|
|
462
|
+
|
|
463
|
+
/**
|
|
464
|
+
* Main model selection function
|
|
465
|
+
*/
|
|
466
|
+
async selectModels(category = 'general', options = {}) {
|
|
467
|
+
const {
|
|
468
|
+
targetCtx = this.targetContexts[category],
|
|
469
|
+
topN = 5,
|
|
470
|
+
enableProbe = false,
|
|
471
|
+
silent = false
|
|
472
|
+
} = options;
|
|
473
|
+
|
|
474
|
+
if (!silent) {
|
|
475
|
+
console.log(`🔍 Selecting models for category: ${category}`);
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
// Phase 0: Gather data
|
|
479
|
+
const hardware = await this.getHardware();
|
|
480
|
+
const installed = await this.getInstalledModels();
|
|
481
|
+
const catalog = await this.loadCatalog();
|
|
482
|
+
|
|
483
|
+
if (!silent) {
|
|
484
|
+
console.log(`Found ${installed.length} installed, ${catalog.length} catalog models`);
|
|
485
|
+
console.log(`Hardware: ${hardware.cpu.cores} cores, ${hardware.memory.totalGB}GB RAM, ${hardware.gpu.type}`);
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
// Combine and dedupe models (prefer installed versions)
|
|
489
|
+
const pool = this.combineModels(installed, catalog);
|
|
490
|
+
const filtered = this.filterByCategory(pool, category);
|
|
491
|
+
|
|
492
|
+
if (!silent) {
|
|
493
|
+
console.log(`Evaluating ${filtered.length} models for ${category} category`);
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
// Phase 1: Estimation filter
|
|
497
|
+
const candidates = [];
|
|
498
|
+
const budget = hardware.gpu.unified ? hardware.usableMemGB :
|
|
499
|
+
(hardware.gpu.vramGB || hardware.usableMemGB);
|
|
500
|
+
|
|
501
|
+
for (const model of filtered) {
|
|
502
|
+
const result = this.evaluateModel(model, hardware, category, targetCtx, budget);
|
|
503
|
+
if (result) {
|
|
504
|
+
candidates.push(result);
|
|
505
|
+
}
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
// Sort by score
|
|
509
|
+
candidates.sort((a, b) => b.score - a.score);
|
|
510
|
+
const topCandidates = candidates.slice(0, topN);
|
|
511
|
+
|
|
512
|
+
if (!silent) {
|
|
513
|
+
console.log(`✨ Selected ${topCandidates.length} top candidates`);
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
// Phase 2: Quick probe (optional)
|
|
517
|
+
if (enableProbe && topCandidates.length > 0) {
|
|
518
|
+
if (!silent) {
|
|
519
|
+
console.log(`🔬 Running quick probes...`);
|
|
520
|
+
}
|
|
521
|
+
await this.runQuickProbes(topCandidates, hardware, category);
|
|
522
|
+
// Re-sort after probing
|
|
523
|
+
topCandidates.sort((a, b) => b.score - a.score);
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
return {
|
|
527
|
+
category,
|
|
528
|
+
hardware,
|
|
529
|
+
candidates: topCandidates,
|
|
530
|
+
total_evaluated: filtered.length,
|
|
531
|
+
timestamp: new Date().toISOString()
|
|
532
|
+
};
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
combineModels(installed, catalog) {
|
|
536
|
+
const combined = [...installed];
|
|
537
|
+
const installedNames = new Set(installed.map(m => m.model_identifier));
|
|
538
|
+
|
|
539
|
+
// Add catalog models that aren't installed
|
|
540
|
+
for (const model of catalog) {
|
|
541
|
+
if (!installedNames.has(model.model_identifier)) {
|
|
542
|
+
combined.push(model);
|
|
543
|
+
}
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
return combined;
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
filterByCategory(models, category) {
|
|
550
|
+
return models.filter(model => {
|
|
551
|
+
switch (category) {
|
|
552
|
+
case 'coding':
|
|
553
|
+
return model.tags.some(tag => ['coder', 'code', 'instruct'].includes(tag)) ||
|
|
554
|
+
model.name.toLowerCase().includes('code');
|
|
555
|
+
|
|
556
|
+
case 'multimodal':
|
|
557
|
+
return model.modalities.includes('vision') ||
|
|
558
|
+
model.tags.includes('vision');
|
|
559
|
+
|
|
560
|
+
case 'embeddings':
|
|
561
|
+
return model.tags.includes('embedding') ||
|
|
562
|
+
model.tags.includes('embeddings') ||
|
|
563
|
+
model.name.toLowerCase().includes('embed') ||
|
|
564
|
+
model.name.toLowerCase().includes('bge-') ||
|
|
565
|
+
model.name.toLowerCase().includes('nomic-embed') ||
|
|
566
|
+
model.name.toLowerCase().includes('all-minilm') ||
|
|
567
|
+
model.specialization === 'embeddings';
|
|
568
|
+
|
|
569
|
+
case 'reasoning':
|
|
570
|
+
return model.tags.includes('instruct') ||
|
|
571
|
+
model.paramsB >= 7; // Prefer larger models for reasoning
|
|
572
|
+
|
|
573
|
+
default: // general, reading, summarization
|
|
574
|
+
return true; // Most models can handle these
|
|
575
|
+
}
|
|
576
|
+
});
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
evaluateModel(model, hardware, category, targetCtx, budget) {
|
|
580
|
+
// 1. Select best fitting quantization
|
|
581
|
+
const bestQuant = this.selectBestQuantization(model, budget, targetCtx);
|
|
582
|
+
if (!bestQuant) return null;
|
|
583
|
+
|
|
584
|
+
// 2. Calculate required memory
|
|
585
|
+
const requiredGB = this.estimateRequiredGB(model, bestQuant.quant, targetCtx);
|
|
586
|
+
if (requiredGB > budget) return null;
|
|
587
|
+
|
|
588
|
+
// 3. Calculate component scores
|
|
589
|
+
const Q = this.calculateQualityPrior(model, bestQuant.quant, category);
|
|
590
|
+
const S = this.estimateSpeed(hardware, model, bestQuant.quant, category);
|
|
591
|
+
const F = this.calculateFitScore(requiredGB, budget);
|
|
592
|
+
const C = this.calculateContextScore(model, targetCtx);
|
|
593
|
+
|
|
594
|
+
// 4. Calculate final weighted score
|
|
595
|
+
const weights = this.categoryWeights[category];
|
|
596
|
+
const score = Math.round((Q * weights[0] + S * weights[1] + F * weights[2] + C * weights[3]) * 10) / 10;
|
|
597
|
+
|
|
598
|
+
// 5. Build rationale
|
|
599
|
+
const rationale = this.buildRationale(hardware, model, bestQuant.quant, requiredGB, budget, category, Q, S);
|
|
600
|
+
|
|
601
|
+
return {
|
|
602
|
+
meta: model,
|
|
603
|
+
quant: bestQuant.quant,
|
|
604
|
+
requiredGB: Math.round(requiredGB * 10) / 10,
|
|
605
|
+
estTPS: S,
|
|
606
|
+
score,
|
|
607
|
+
rationale,
|
|
608
|
+
components: { Q, S, F, C }
|
|
609
|
+
};
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
selectBestQuantization(model, budget, targetCtx) {
|
|
613
|
+
// Try quantizations from best to worst quality
|
|
614
|
+
for (const quant of this.quantHierarchy) {
|
|
615
|
+
const requiredGB = this.estimateRequiredGB(model, quant, targetCtx);
|
|
616
|
+
if (requiredGB <= budget) {
|
|
617
|
+
return { quant, sizeGB: requiredGB };
|
|
618
|
+
}
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
// If nothing fits at target context, try halving context once
|
|
622
|
+
const halfCtx = Math.floor(targetCtx / 2);
|
|
623
|
+
if (halfCtx >= 1024) {
|
|
624
|
+
for (const quant of this.quantHierarchy) {
|
|
625
|
+
const requiredGB = this.estimateRequiredGB(model, quant, halfCtx);
|
|
626
|
+
if (requiredGB <= budget) {
|
|
627
|
+
return { quant, sizeGB: requiredGB };
|
|
628
|
+
}
|
|
629
|
+
}
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
return null; // Model doesn't fit
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
estimateRequiredGB(model, quant, ctx) {
|
|
636
|
+
// Bytes per parameter by quantization level (calibrated to real Ollama sizes)
|
|
637
|
+
// 7B Q4_K_M=~4.5GB, 14B Q4_K_M=~9GB, 32B Q4_K_M=~19GB
|
|
638
|
+
const bytesPerParam = {
|
|
639
|
+
'Q8_0': 1.05,
|
|
640
|
+
'Q6_K': 0.80,
|
|
641
|
+
'Q5_K_M': 0.68,
|
|
642
|
+
'Q4_K_M': 0.58,
|
|
643
|
+
'Q3_K': 0.48,
|
|
644
|
+
'Q2_K': 0.37
|
|
645
|
+
};
|
|
646
|
+
const bpp = bytesPerParam[quant] || 0.63;
|
|
647
|
+
const modelMemGB = model.paramsB * bpp;
|
|
648
|
+
|
|
649
|
+
// KV cache: ~2 * numLayers * hiddenDim * 2bytes * ctx / 1e9
|
|
650
|
+
// Simplified: ~0.000008 GB per billion params per context token
|
|
651
|
+
const kvCacheGB = 0.000008 * model.paramsB * ctx;
|
|
652
|
+
|
|
653
|
+
// Runtime overhead (Metal/CUDA context, buffers)
|
|
654
|
+
const runtimeOverhead = 0.5;
|
|
655
|
+
|
|
656
|
+
return modelMemGB + kvCacheGB + runtimeOverhead;
|
|
657
|
+
}
|
|
658
|
+
|
|
659
|
+
calculateQualityPrior(model, quant, category) {
|
|
660
|
+
// Base quality by parameter count
|
|
661
|
+
let Q = this.getBaseQuality(model.paramsB);
|
|
662
|
+
|
|
663
|
+
// Family bump
|
|
664
|
+
const familyBump = this.familyBumps[model.family] || 0;
|
|
665
|
+
Q += familyBump;
|
|
666
|
+
|
|
667
|
+
// Quantization penalty
|
|
668
|
+
const quantPenalty = this.quantPenalties[quant] || -5;
|
|
669
|
+
Q += quantPenalty;
|
|
670
|
+
|
|
671
|
+
// Task alignment bump
|
|
672
|
+
const taskBump = this.getTaskAlignmentBump(model, category);
|
|
673
|
+
Q += taskBump;
|
|
674
|
+
|
|
675
|
+
// Reasoning bonus for larger models
|
|
676
|
+
if (category === 'reasoning' && model.paramsB >= 13) {
|
|
677
|
+
Q += 5;
|
|
678
|
+
}
|
|
679
|
+
|
|
680
|
+
// Coding penalty for non-instruct models
|
|
681
|
+
if (category === 'coding' && !model.tags.some(tag => ['coder', 'instruct'].includes(tag))) {
|
|
682
|
+
Q -= 15;
|
|
683
|
+
}
|
|
684
|
+
|
|
685
|
+
return Math.max(0, Math.min(100, Q));
|
|
686
|
+
}
|
|
687
|
+
|
|
688
|
+
getBaseQuality(paramsB) {
|
|
689
|
+
// Find closest parameter count in our table
|
|
690
|
+
const keys = Object.keys(this.baseQualityByParams).map(Number).sort((a, b) => a - b);
|
|
691
|
+
|
|
692
|
+
for (let i = 0; i < keys.length; i++) {
|
|
693
|
+
if (paramsB <= keys[i]) {
|
|
694
|
+
return this.baseQualityByParams[keys[i]];
|
|
695
|
+
}
|
|
696
|
+
}
|
|
697
|
+
|
|
698
|
+
// If larger than our table, return the largest
|
|
699
|
+
return this.baseQualityByParams[keys[keys.length - 1]];
|
|
700
|
+
}
|
|
701
|
+
|
|
702
|
+
getTaskAlignmentBump(model, category) {
|
|
703
|
+
const name = model.name.toLowerCase();
|
|
704
|
+
const tags = model.tags;
|
|
705
|
+
|
|
706
|
+
switch (category) {
|
|
707
|
+
case 'coding':
|
|
708
|
+
if (tags.includes('coder') || name.includes('code')) return 6;
|
|
709
|
+
if (tags.includes('instruct')) return 2;
|
|
710
|
+
return 0;
|
|
711
|
+
|
|
712
|
+
case 'multimodal':
|
|
713
|
+
if (model.modalities.includes('vision')) return 6;
|
|
714
|
+
return 0;
|
|
715
|
+
|
|
716
|
+
case 'general':
|
|
717
|
+
if (tags.includes('chat') || tags.includes('instruct')) return 4;
|
|
718
|
+
if (name.includes('code')) return 2;
|
|
719
|
+
return 0;
|
|
720
|
+
|
|
721
|
+
default:
|
|
722
|
+
return 0;
|
|
723
|
+
}
|
|
724
|
+
}
|
|
725
|
+
|
|
726
|
+
estimateSpeed(hardware, model, quant, category) {
|
|
727
|
+
// Determine backend
|
|
728
|
+
let backend = 'cpu_x86';
|
|
729
|
+
if (hardware.acceleration.supports_metal) backend = 'metal';
|
|
730
|
+
else if (hardware.acceleration.supports_cuda) backend = 'cuda';
|
|
731
|
+
else if (hardware.cpu.architecture === 'arm64') backend = 'cpu_arm';
|
|
732
|
+
|
|
733
|
+
// Base speed calculation
|
|
734
|
+
const K = this.backendK[backend];
|
|
735
|
+
let base = K / model.paramsB;
|
|
736
|
+
|
|
737
|
+
// Quantization multiplier
|
|
738
|
+
const quantMultiplier = this.quantSpeedMultipliers[quant] || 1.0;
|
|
739
|
+
base *= quantMultiplier;
|
|
740
|
+
|
|
741
|
+
// Threading multiplier
|
|
742
|
+
if (hardware.cpu.cores >= 8) base *= 1.1;
|
|
743
|
+
if (hardware.acceleration.supports_metal || hardware.acceleration.supports_cuda) base *= 1.2;
|
|
744
|
+
|
|
745
|
+
// Normalize to 0-100 score
|
|
746
|
+
const target = this.targetSpeeds[category];
|
|
747
|
+
return Math.min(100, Math.round((100 * base / target) * 10) / 10);
|
|
748
|
+
}
|
|
749
|
+
|
|
750
|
+
calculateFitScore(requiredGB, budgetGB) {
|
|
751
|
+
const ratio = requiredGB / budgetGB;
|
|
752
|
+
if (ratio <= 0.9) return 100;
|
|
753
|
+
if (ratio <= 1.0) return 70;
|
|
754
|
+
return 0; // Should be filtered out earlier
|
|
755
|
+
}
|
|
756
|
+
|
|
757
|
+
calculateContextScore(model, targetCtx) {
|
|
758
|
+
if (model.ctxMax >= targetCtx) return 100;
|
|
759
|
+
if (model.ctxMax >= targetCtx * 0.5) return 70;
|
|
760
|
+
return 0; // Should be filtered out earlier
|
|
761
|
+
}
|
|
762
|
+
|
|
763
|
+
buildRationale(hardware, model, quant, requiredGB, budget, category, Q, S) {
|
|
764
|
+
const parts = [];
|
|
765
|
+
|
|
766
|
+
// Memory fit
|
|
767
|
+
parts.push(`fits in ${requiredGB}/${budget}GB`);
|
|
768
|
+
|
|
769
|
+
// Quantization
|
|
770
|
+
parts.push(quant);
|
|
771
|
+
|
|
772
|
+
// Special attributes
|
|
773
|
+
if (model.tags.includes('coder')) parts.push('coder-tuned');
|
|
774
|
+
if (model.modalities.includes('vision')) parts.push('vision-capable');
|
|
775
|
+
|
|
776
|
+
// Size sweet spot
|
|
777
|
+
if (model.paramsB >= 7 && model.paramsB <= 13) {
|
|
778
|
+
parts.push(`${model.paramsB}B is sweet spot`);
|
|
779
|
+
}
|
|
780
|
+
|
|
781
|
+
// Backend
|
|
782
|
+
if (hardware.acceleration.supports_metal) parts.push('Metal backend');
|
|
783
|
+
else if (hardware.acceleration.supports_cuda) parts.push('CUDA backend');
|
|
784
|
+
|
|
785
|
+
return parts.join(', ');
|
|
786
|
+
}
|
|
787
|
+
|
|
788
|
+
// ============================================================================
|
|
789
|
+
// PHASE 2: QUICK PROBE (Optional)
|
|
790
|
+
// ============================================================================
|
|
791
|
+
|
|
792
|
+
async runQuickProbes(candidates, hardware, category) {
|
|
793
|
+
// Load cached results
|
|
794
|
+
const cache = this.loadBenchCache();
|
|
795
|
+
const hardwareFingerprint = this.getHardwareFingerprint(hardware);
|
|
796
|
+
|
|
797
|
+
for (const candidate of candidates) {
|
|
798
|
+
const cacheKey = `${hardwareFingerprint}_${candidate.meta.model_identifier}@${candidate.quant}`;
|
|
799
|
+
|
|
800
|
+
// Check cache first
|
|
801
|
+
if (cache[cacheKey] && this.isCacheValid(cache[cacheKey])) {
|
|
802
|
+
const cachedTPS = cache[cacheKey].tps;
|
|
803
|
+
this.updateCandidateWithMeasuredSpeed(candidate, cachedTPS, category);
|
|
804
|
+
candidate.rationale += ` | measured ${cachedTPS.toFixed(1)} t/s (cached)`;
|
|
805
|
+
continue;
|
|
806
|
+
}
|
|
807
|
+
|
|
808
|
+
// Run probe
|
|
809
|
+
try {
|
|
810
|
+
const measuredTPS = await this.runSingleProbe(candidate.meta.model_identifier, category);
|
|
811
|
+
this.updateCandidateWithMeasuredSpeed(candidate, measuredTPS, category);
|
|
812
|
+
candidate.rationale += ` | measured ${measuredTPS.toFixed(1)} t/s`;
|
|
813
|
+
|
|
814
|
+
// Cache result
|
|
815
|
+
cache[cacheKey] = {
|
|
816
|
+
tps: measuredTPS,
|
|
817
|
+
timestamp: Date.now(),
|
|
818
|
+
category
|
|
819
|
+
};
|
|
820
|
+
this.saveBenchCache(cache);
|
|
821
|
+
|
|
822
|
+
} catch (error) {
|
|
823
|
+
console.warn(`Probe failed for ${candidate.meta.name}: ${error.message}`);
|
|
824
|
+
}
|
|
825
|
+
}
|
|
826
|
+
}
|
|
827
|
+
|
|
828
|
+
async runSingleProbe(modelId, category) {
|
|
829
|
+
const prompts = {
|
|
830
|
+
'coding': 'Write 3 bullet points about the benefits of unit tests.',
|
|
831
|
+
'general': 'Explain the benefits of regular exercise in 3 sentences.',
|
|
832
|
+
'reasoning': 'What are the steps to solve a quadratic equation?',
|
|
833
|
+
'multimodal': 'Describe what you see in this image.', // Text-only fallback
|
|
834
|
+
'summarization': 'Summarize the key points of effective communication.',
|
|
835
|
+
'reading': 'What are the main themes in classic literature?'
|
|
836
|
+
};
|
|
837
|
+
|
|
838
|
+
const prompt = prompts[category] || prompts['general'];
|
|
839
|
+
const targetTokens = 128;
|
|
840
|
+
|
|
841
|
+
const startTime = Date.now();
|
|
842
|
+
|
|
843
|
+
// Make HTTP request to Ollama API
|
|
844
|
+
const response = await fetch('http://localhost:11434/api/generate', {
|
|
845
|
+
method: 'POST',
|
|
846
|
+
headers: { 'Content-Type': 'application/json' },
|
|
847
|
+
body: JSON.stringify({
|
|
848
|
+
model: modelId,
|
|
849
|
+
prompt: prompt,
|
|
850
|
+
stream: false,
|
|
851
|
+
options: {
|
|
852
|
+
num_predict: targetTokens
|
|
853
|
+
}
|
|
854
|
+
})
|
|
855
|
+
});
|
|
856
|
+
|
|
857
|
+
if (!response.ok) {
|
|
858
|
+
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
|
859
|
+
}
|
|
860
|
+
|
|
861
|
+
const result = await response.json();
|
|
862
|
+
const elapsedSeconds = (Date.now() - startTime) / 1000;
|
|
863
|
+
|
|
864
|
+
// Estimate tokens generated (simplified)
|
|
865
|
+
const tokensGenerated = result.response ? result.response.split(' ').length * 1.3 : targetTokens;
|
|
866
|
+
|
|
867
|
+
return tokensGenerated / elapsedSeconds;
|
|
868
|
+
}
|
|
869
|
+
|
|
870
|
+
updateCandidateWithMeasuredSpeed(candidate, measuredTPS, category) {
|
|
871
|
+
const normalizedS = this.normalizeTPSToScore(measuredTPS, category);
|
|
872
|
+
|
|
873
|
+
// Recalculate final score with measured speed
|
|
874
|
+
const weights = this.categoryWeights[category];
|
|
875
|
+
const { Q, F, C } = candidate.components;
|
|
876
|
+
|
|
877
|
+
candidate.estTPS = measuredTPS;
|
|
878
|
+
candidate.components.S = normalizedS;
|
|
879
|
+
candidate.score = Math.round((Q * weights[0] + normalizedS * weights[1] + F * weights[2] + C * weights[3]) * 10) / 10;
|
|
880
|
+
}
|
|
881
|
+
|
|
882
|
+
normalizeTPSToScore(tps, category) {
|
|
883
|
+
const target = this.targetSpeeds[category];
|
|
884
|
+
return Math.min(100, Math.round((100 * tps / target) * 10) / 10);
|
|
885
|
+
}
|
|
886
|
+
|
|
887
|
+
loadBenchCache() {
|
|
888
|
+
try {
|
|
889
|
+
if (fs.existsSync(this.benchCachePath)) {
|
|
890
|
+
return JSON.parse(fs.readFileSync(this.benchCachePath, 'utf8'));
|
|
891
|
+
}
|
|
892
|
+
} catch (error) {
|
|
893
|
+
console.warn('Failed to load benchmark cache:', error.message);
|
|
894
|
+
}
|
|
895
|
+
return {};
|
|
896
|
+
}
|
|
897
|
+
|
|
898
|
+
saveBenchCache(cache) {
|
|
899
|
+
try {
|
|
900
|
+
const dir = path.dirname(this.benchCachePath);
|
|
901
|
+
if (!fs.existsSync(dir)) {
|
|
902
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
903
|
+
}
|
|
904
|
+
fs.writeFileSync(this.benchCachePath, JSON.stringify(cache, null, 2));
|
|
905
|
+
} catch (error) {
|
|
906
|
+
console.warn('Failed to save benchmark cache:', error.message);
|
|
907
|
+
}
|
|
908
|
+
}
|
|
909
|
+
|
|
910
|
+
isCacheValid(cacheEntry) {
|
|
911
|
+
const maxAge = 7 * 24 * 60 * 60 * 1000; // 7 days
|
|
912
|
+
return (Date.now() - cacheEntry.timestamp) < maxAge;
|
|
913
|
+
}
|
|
914
|
+
|
|
915
|
+
getHardwareFingerprint(hardware) {
|
|
916
|
+
return `${hardware.cpu.architecture}_${hardware.cpu.cores}c_${hardware.memory.totalGB}gb_${hardware.gpu.type}`;
|
|
917
|
+
}
|
|
918
|
+
|
|
919
|
+
// ============================================================================
|
|
920
|
+
// FORMAT HELPERS (migrated from enhanced-selector.js)
|
|
921
|
+
// ============================================================================
|
|
922
|
+
|
|
923
|
+
/**
|
|
924
|
+
* Map a candidate to the legacy format expected by callers
|
|
925
|
+
*/
|
|
926
|
+
mapCandidateToLegacyFormat(candidate) {
|
|
927
|
+
return {
|
|
928
|
+
model_name: candidate.meta.name,
|
|
929
|
+
model_identifier: candidate.meta.model_identifier,
|
|
930
|
+
categoryScore: candidate.score,
|
|
931
|
+
hardwareScore: candidate.components ? candidate.components.F : 90,
|
|
932
|
+
specializationScore: candidate.components ? candidate.components.Q : 85,
|
|
933
|
+
popularityScore: candidate.components ? Math.min(100, (candidate.meta.pulls || 0) / 100000 * 100) : 10,
|
|
934
|
+
efficiencyScore: candidate.components ? candidate.components.S : 80,
|
|
935
|
+
pulls: candidate.meta.pulls || 0,
|
|
936
|
+
size: candidate.meta.paramsB,
|
|
937
|
+
family: candidate.meta.family,
|
|
938
|
+
category: this.inferCategoryFromModel(candidate.meta),
|
|
939
|
+
tags: candidate.meta.tags || [],
|
|
940
|
+
quantization: candidate.quant,
|
|
941
|
+
estimatedRAM: candidate.requiredGB,
|
|
942
|
+
reasoning: candidate.rationale
|
|
943
|
+
};
|
|
944
|
+
}
|
|
945
|
+
|
|
946
|
+
mapHardwareTier(hardware) {
|
|
947
|
+
let ram, cores;
|
|
948
|
+
|
|
949
|
+
if (hardware.memory && hardware.memory.totalGB) {
|
|
950
|
+
ram = hardware.memory.totalGB;
|
|
951
|
+
} else if (hardware.memory && hardware.memory.total) {
|
|
952
|
+
ram = hardware.memory.total;
|
|
953
|
+
} else if (hardware.total_ram_gb) {
|
|
954
|
+
ram = hardware.total_ram_gb;
|
|
955
|
+
} else {
|
|
956
|
+
ram = 8;
|
|
957
|
+
}
|
|
958
|
+
|
|
959
|
+
if (hardware.cpu && hardware.cpu.cores) {
|
|
960
|
+
cores = hardware.cpu.cores;
|
|
961
|
+
} else if (hardware.cpu_cores) {
|
|
962
|
+
cores = hardware.cpu_cores;
|
|
963
|
+
} else {
|
|
964
|
+
cores = 4;
|
|
965
|
+
}
|
|
966
|
+
|
|
967
|
+
if (ram >= 64 && cores >= 16) return 'extreme';
|
|
968
|
+
if (ram >= 32 && cores >= 12) return 'very_high';
|
|
969
|
+
if (ram >= 16 && cores >= 8) return 'high';
|
|
970
|
+
if (ram >= 8 && cores >= 4) return 'medium';
|
|
971
|
+
return 'low';
|
|
972
|
+
}
|
|
973
|
+
|
|
974
|
+
getCategoryInfo(category) {
|
|
975
|
+
const categoryData = {
|
|
976
|
+
coding: { weight: 1.0, keywords: ['code', 'programming', 'coder'] },
|
|
977
|
+
reasoning: { weight: 1.2, keywords: ['reasoning', 'logic', 'math'] },
|
|
978
|
+
multimodal: { weight: 1.1, keywords: ['vision', 'image', 'multimodal'] },
|
|
979
|
+
creative: { weight: 0.9, keywords: ['creative', 'writing', 'story'] },
|
|
980
|
+
talking: { weight: 1.0, keywords: ['chat', 'conversation', 'assistant'] },
|
|
981
|
+
reading: { weight: 1.0, keywords: ['reading', 'comprehension', 'text'] },
|
|
982
|
+
general: { weight: 1.0, keywords: ['general', 'assistant', 'helper'] }
|
|
983
|
+
};
|
|
984
|
+
return categoryData[category] || categoryData.general;
|
|
985
|
+
}
|
|
986
|
+
|
|
987
|
+
inferCategoryFromModel(model) {
|
|
988
|
+
const name = model.name.toLowerCase();
|
|
989
|
+
const tags = model.tags || [];
|
|
990
|
+
|
|
991
|
+
if (tags.includes('coder') || name.includes('code')) return 'coding';
|
|
992
|
+
if (tags.includes('vision') || (model.modalities && model.modalities.includes('vision'))) return 'multimodal';
|
|
993
|
+
if (tags.includes('embed')) return 'embeddings';
|
|
994
|
+
if (name.includes('creative') || name.includes('wizard')) return 'creative';
|
|
995
|
+
|
|
996
|
+
return 'general';
|
|
997
|
+
}
|
|
998
|
+
|
|
999
|
+
formatModelSize(model) {
|
|
1000
|
+
if (model.paramsB) return `${model.paramsB}B`;
|
|
1001
|
+
if (model.size) return `${model.size}B`;
|
|
1002
|
+
return 'Unknown';
|
|
1003
|
+
}
|
|
1004
|
+
|
|
1005
|
+
/**
|
|
1006
|
+
* Generate recommendations by category (main API, replaces EnhancedModelSelector)
|
|
1007
|
+
*/
|
|
1008
|
+
async getBestModelsForHardware(hardware, allModels) {
|
|
1009
|
+
const categories = ['coding', 'reasoning', 'multimodal', 'creative', 'talking', 'reading', 'general'];
|
|
1010
|
+
const recommendations = {};
|
|
1011
|
+
|
|
1012
|
+
for (const category of categories) {
|
|
1013
|
+
try {
|
|
1014
|
+
const result = await this.selectModels(category, {
|
|
1015
|
+
topN: 3,
|
|
1016
|
+
enableProbe: false,
|
|
1017
|
+
silent: true
|
|
1018
|
+
});
|
|
1019
|
+
|
|
1020
|
+
recommendations[category] = {
|
|
1021
|
+
tier: this.mapHardwareTier(hardware),
|
|
1022
|
+
bestModels: result.candidates.map(candidate => this.mapCandidateToLegacyFormat(candidate)),
|
|
1023
|
+
totalEvaluated: result.total_evaluated,
|
|
1024
|
+
category: this.getCategoryInfo(category)
|
|
1025
|
+
};
|
|
1026
|
+
} catch (error) {
|
|
1027
|
+
recommendations[category] = {
|
|
1028
|
+
tier: this.mapHardwareTier(hardware),
|
|
1029
|
+
bestModels: [],
|
|
1030
|
+
totalEvaluated: 0,
|
|
1031
|
+
category: this.getCategoryInfo(category)
|
|
1032
|
+
};
|
|
1033
|
+
}
|
|
1034
|
+
}
|
|
1035
|
+
|
|
1036
|
+
return recommendations;
|
|
1037
|
+
}
|
|
1038
|
+
|
|
1039
|
+
/**
|
|
1040
|
+
* Generate recommendation summary
|
|
1041
|
+
*/
|
|
1042
|
+
generateRecommendationSummary(recommendations, hardware) {
|
|
1043
|
+
const summary = {
|
|
1044
|
+
hardware_tier: this.mapHardwareTier(hardware),
|
|
1045
|
+
total_categories: Object.keys(recommendations).length,
|
|
1046
|
+
best_overall: null,
|
|
1047
|
+
by_category: {},
|
|
1048
|
+
quick_commands: []
|
|
1049
|
+
};
|
|
1050
|
+
|
|
1051
|
+
let bestOverallScore = 0;
|
|
1052
|
+
let bestOverallModel = null;
|
|
1053
|
+
let bestOverallCategory = null;
|
|
1054
|
+
|
|
1055
|
+
Object.entries(recommendations).forEach(([category, data]) => {
|
|
1056
|
+
const bestModel = data.bestModels[0];
|
|
1057
|
+
if (bestModel) {
|
|
1058
|
+
summary.by_category[category] = {
|
|
1059
|
+
name: bestModel.model_name || bestModel.name,
|
|
1060
|
+
identifier: bestModel.model_identifier,
|
|
1061
|
+
score: Math.round(bestModel.categoryScore || bestModel.score),
|
|
1062
|
+
command: `ollama pull ${bestModel.model_identifier}`,
|
|
1063
|
+
size: this.formatModelSize(bestModel),
|
|
1064
|
+
pulls: bestModel.pulls || 0
|
|
1065
|
+
};
|
|
1066
|
+
|
|
1067
|
+
summary.quick_commands.push(`ollama pull ${bestModel.model_identifier}`);
|
|
1068
|
+
|
|
1069
|
+
const isGeneralCategory = ['general', 'coding', 'talking', 'reading'].includes(category);
|
|
1070
|
+
const score = bestModel.categoryScore || bestModel.score || 0;
|
|
1071
|
+
|
|
1072
|
+
if (isGeneralCategory && (score > bestOverallScore || !bestOverallModel)) {
|
|
1073
|
+
bestOverallScore = score;
|
|
1074
|
+
bestOverallModel = bestModel;
|
|
1075
|
+
bestOverallCategory = category;
|
|
1076
|
+
}
|
|
1077
|
+
}
|
|
1078
|
+
});
|
|
1079
|
+
|
|
1080
|
+
if (bestOverallModel) {
|
|
1081
|
+
summary.best_overall = {
|
|
1082
|
+
name: bestOverallModel.model_name || bestOverallModel.name,
|
|
1083
|
+
identifier: bestOverallModel.model_identifier,
|
|
1084
|
+
category: bestOverallCategory,
|
|
1085
|
+
score: Math.round(bestOverallScore),
|
|
1086
|
+
command: `ollama pull ${bestOverallModel.model_identifier}`
|
|
1087
|
+
};
|
|
1088
|
+
}
|
|
1089
|
+
|
|
1090
|
+
return summary;
|
|
1091
|
+
}
|
|
1092
|
+
|
|
1093
|
+
// ============================================================================
|
|
1094
|
+
// PUBLIC API
|
|
1095
|
+
// ============================================================================
|
|
1096
|
+
|
|
1097
|
+
async recommend(category = 'general', options = {}) {
|
|
1098
|
+
const result = await this.selectModels(category, options);
|
|
1099
|
+
return this.formatRecommendations(result);
|
|
1100
|
+
}
|
|
1101
|
+
|
|
1102
|
+
formatRecommendations(result) {
|
|
1103
|
+
const { category, hardware, candidates, total_evaluated } = result;
|
|
1104
|
+
|
|
1105
|
+
console.log(`\n${category.toUpperCase()} RECOMMENDATIONS`);
|
|
1106
|
+
console.log(`Hardware: ${hardware.cpu.cores} cores, ${hardware.memory.totalGB}GB RAM, ${hardware.gpu.type}`);
|
|
1107
|
+
console.log(`Evaluated ${total_evaluated} models\n`);
|
|
1108
|
+
|
|
1109
|
+
if (candidates.length === 0) {
|
|
1110
|
+
console.log('❌ No suitable models found for your hardware');
|
|
1111
|
+
return result;
|
|
1112
|
+
}
|
|
1113
|
+
|
|
1114
|
+
// Table header
|
|
1115
|
+
console.log('┌─────────────────────────────┬────────┬───────┬─────────┬──────────┬───────┬─────────────────────────────┐');
|
|
1116
|
+
console.log('│ Model │ Params │ Quant │ Est t/s │ Mem GB │ Score │ Why │');
|
|
1117
|
+
console.log('├─────────────────────────────┼────────┼───────┼─────────┼──────────┼───────┼─────────────────────────────┤');
|
|
1118
|
+
|
|
1119
|
+
candidates.forEach((candidate, index) => {
|
|
1120
|
+
const isInstalled = candidate.meta.installed ? 'INSTALLED' : 'CLOUD';
|
|
1121
|
+
const name = candidate.meta.name.padEnd(26);
|
|
1122
|
+
const params = `${candidate.meta.paramsB}B`.padEnd(5);
|
|
1123
|
+
const quant = candidate.quant.padEnd(6);
|
|
1124
|
+
const tps = candidate.estTPS.toFixed(1).padStart(7);
|
|
1125
|
+
const mem = `${candidate.requiredGB}/${hardware.usableMemGB}`.padEnd(9);
|
|
1126
|
+
const score = candidate.score.toFixed(1).padStart(5);
|
|
1127
|
+
const why = candidate.rationale.substring(0, 29);
|
|
1128
|
+
|
|
1129
|
+
console.log(`│ ${isInstalled}${name} │ ${params} │ ${quant} │ ${tps} │ ${mem} │ ${score} │ ${why} │`);
|
|
1130
|
+
});
|
|
1131
|
+
|
|
1132
|
+
console.log('└─────────────────────────────┴────────┴───────┴─────────┴──────────┴───────┴─────────────────────────────┘');
|
|
1133
|
+
|
|
1134
|
+
// Best pick
|
|
1135
|
+
const best = candidates[0];
|
|
1136
|
+
console.log(`\nBEST PICK: ${best.meta.name}`);
|
|
1137
|
+
console.log(`Command: ollama pull ${best.meta.model_identifier}`);
|
|
1138
|
+
console.log(`Why: ${best.rationale}`);
|
|
1139
|
+
console.log(`Score: ${best.score} (Q:${best.components.Q} S:${best.components.S} F:${best.components.F} C:${best.components.C})`);
|
|
1140
|
+
|
|
1141
|
+
return result;
|
|
1142
|
+
}
|
|
1143
|
+
}
|
|
1144
|
+
|
|
1145
|
+
module.exports = DeterministicModelSelector;
|