llm-checker 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +418 -0
- package/analyzer/compatibility.js +584 -0
- package/analyzer/performance.js +505 -0
- package/bin/CLAUDE.md +12 -0
- package/bin/enhanced_cli.js +3118 -0
- package/bin/test-deterministic.js +41 -0
- package/package.json +96 -0
- package/src/CLAUDE.md +12 -0
- package/src/ai/intelligent-selector.js +615 -0
- package/src/ai/model-selector.js +312 -0
- package/src/ai/multi-objective-selector.js +820 -0
- package/src/commands/check.js +58 -0
- package/src/data/CLAUDE.md +11 -0
- package/src/data/model-database.js +637 -0
- package/src/data/sync-manager.js +279 -0
- package/src/hardware/CLAUDE.md +12 -0
- package/src/hardware/backends/CLAUDE.md +11 -0
- package/src/hardware/backends/apple-silicon.js +318 -0
- package/src/hardware/backends/cpu-detector.js +490 -0
- package/src/hardware/backends/cuda-detector.js +417 -0
- package/src/hardware/backends/intel-detector.js +436 -0
- package/src/hardware/backends/rocm-detector.js +440 -0
- package/src/hardware/detector.js +573 -0
- package/src/hardware/pc-optimizer.js +635 -0
- package/src/hardware/specs.js +286 -0
- package/src/hardware/unified-detector.js +442 -0
- package/src/index.js +2289 -0
- package/src/models/CLAUDE.md +17 -0
- package/src/models/ai-check-selector.js +806 -0
- package/src/models/catalog.json +426 -0
- package/src/models/deterministic-selector.js +1145 -0
- package/src/models/expanded_database.js +1142 -0
- package/src/models/intelligent-selector.js +532 -0
- package/src/models/requirements.js +310 -0
- package/src/models/scoring-config.js +57 -0
- package/src/models/scoring-engine.js +715 -0
- package/src/ollama/.cache/README.md +33 -0
- package/src/ollama/CLAUDE.md +24 -0
- package/src/ollama/client.js +438 -0
- package/src/ollama/enhanced-client.js +113 -0
- package/src/ollama/enhanced-scraper.js +634 -0
- package/src/ollama/manager.js +357 -0
- package/src/ollama/native-scraper.js +776 -0
- package/src/plugins/CLAUDE.md +11 -0
- package/src/plugins/examples/custom_model_plugin.js +87 -0
- package/src/plugins/index.js +295 -0
- package/src/utils/CLAUDE.md +11 -0
- package/src/utils/config.js +359 -0
- package/src/utils/formatter.js +315 -0
- package/src/utils/logger.js +272 -0
- package/src/utils/model-classifier.js +167 -0
- package/src/utils/verbose-progress.js +266 -0
|
@@ -0,0 +1,806 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AI-Check Mode - Meta-evaluation using installed models
|
|
3
|
+
*
|
|
4
|
+
* Uses the best installed instruction model as an evaluator to rerank
|
|
5
|
+
* and refine deterministic selections.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
const DeterministicModelSelector = require('./deterministic-selector');
|
|
9
|
+
const { OllamaNativeScraper } = require('../ollama/native-scraper');
|
|
10
|
+
const crypto = require('crypto');
|
|
11
|
+
const fs = require('fs');
|
|
12
|
+
const path = require('path');
|
|
13
|
+
const fetch = require('node-fetch');
|
|
14
|
+
|
|
15
|
+
class AICheckSelector {
|
|
16
|
+
constructor() {
|
|
17
|
+
this.deterministicSelector = new DeterministicModelSelector();
|
|
18
|
+
this.ollamaScraper = new OllamaNativeScraper();
|
|
19
|
+
this.cachePath = path.join(require('os').homedir(), '.llm-checker', 'ai-check-cache.json');
|
|
20
|
+
|
|
21
|
+
// Priority models for evaluation (prefer these if installed)
|
|
22
|
+
this.preferredEvaluators = [
|
|
23
|
+
'qwen2.5:7b-instruct',
|
|
24
|
+
'mistral:7b-instruct',
|
|
25
|
+
'llama3.1:8b-instruct',
|
|
26
|
+
'gemma2:9b-it',
|
|
27
|
+
'qwen2.5-coder:7b', // Good fallback
|
|
28
|
+
'llama3.2:3b' // Smallest acceptable
|
|
29
|
+
];
|
|
30
|
+
|
|
31
|
+
// System prompt for evaluator
|
|
32
|
+
this.systemPrompt = `You are a precise model evaluator.
|
|
33
|
+
Your task: Rank ALL provided models for the given category.
|
|
34
|
+
Important: Your ranking must include EVERY model in the list.
|
|
35
|
+
Never skip or omit any model from your ranking.
|
|
36
|
+
Respond with JSON only, no additional text.`;
|
|
37
|
+
|
|
38
|
+
// JSON schema for evaluator response
|
|
39
|
+
this.responseSchema = {
|
|
40
|
+
type: "object",
|
|
41
|
+
properties: {
|
|
42
|
+
winner: { type: "string" },
|
|
43
|
+
ranking: {
|
|
44
|
+
type: "array",
|
|
45
|
+
items: {
|
|
46
|
+
type: "object",
|
|
47
|
+
properties: {
|
|
48
|
+
name: { type: "string" },
|
|
49
|
+
aiScore: { type: "number" },
|
|
50
|
+
shortWhy: { type: "string" }
|
|
51
|
+
},
|
|
52
|
+
required: ["name", "aiScore", "shortWhy"]
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
},
|
|
56
|
+
required: ["winner", "ranking"]
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Main AI-Check function
|
|
62
|
+
*/
|
|
63
|
+
async aiCheck(options = {}) {
|
|
64
|
+
const {
|
|
65
|
+
category = 'general',
|
|
66
|
+
top = 12,
|
|
67
|
+
ctx,
|
|
68
|
+
evaluator = 'auto',
|
|
69
|
+
weight = 0.3,
|
|
70
|
+
silent = false
|
|
71
|
+
} = options;
|
|
72
|
+
|
|
73
|
+
const chalk = require('chalk');
|
|
74
|
+
|
|
75
|
+
// Phase 1: Get ALL available models from the 177-model Ollama database
|
|
76
|
+
const hardware = await this.deterministicSelector.getHardware();
|
|
77
|
+
|
|
78
|
+
// Use the same large database that check command uses (177 models)
|
|
79
|
+
const ollamaData = await this.ollamaScraper.scrapeAllModels(false);
|
|
80
|
+
const allOllamaModels = ollamaData.models || [];
|
|
81
|
+
|
|
82
|
+
if (!silent) {
|
|
83
|
+
console.log(chalk.cyan('│') + ` Found ${allOllamaModels.length} models in Ollama database`);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// Convert Ollama models to deterministic selector format and evaluate them
|
|
87
|
+
const candidates = [];
|
|
88
|
+
const budget = hardware.gpu.unified ? hardware.usableMemGB :
|
|
89
|
+
(hardware.gpu.vramGB || hardware.usableMemGB);
|
|
90
|
+
|
|
91
|
+
// Filter models by category first
|
|
92
|
+
const categoryModels = this.filterOllamaModelsByCategory(allOllamaModels, category);
|
|
93
|
+
|
|
94
|
+
if (!silent) {
|
|
95
|
+
console.log(chalk.cyan('│') + ` ${categoryModels.length} models match ${category} category`);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// Evaluate each model using deterministic scoring
|
|
99
|
+
for (const ollamaModel of categoryModels) {
|
|
100
|
+
const convertedModel = this.convertOllamaModelToDeterministicFormat(ollamaModel);
|
|
101
|
+
const result = this.deterministicSelector.evaluateModel(
|
|
102
|
+
convertedModel,
|
|
103
|
+
hardware,
|
|
104
|
+
category,
|
|
105
|
+
ctx || this.deterministicSelector.targetContexts[category],
|
|
106
|
+
budget
|
|
107
|
+
);
|
|
108
|
+
if (result) {
|
|
109
|
+
candidates.push(result);
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// Sort by score and get top candidates
|
|
114
|
+
candidates.sort((a, b) => b.score - a.score);
|
|
115
|
+
const allModelsResult = {
|
|
116
|
+
category,
|
|
117
|
+
hardware,
|
|
118
|
+
candidates: candidates,
|
|
119
|
+
total_evaluated: categoryModels.length,
|
|
120
|
+
timestamp: new Date().toISOString(),
|
|
121
|
+
targetCtx: ctx || this.deterministicSelector.targetContexts[category]
|
|
122
|
+
};
|
|
123
|
+
|
|
124
|
+
// Then trim to top N for final result, but let AI see more options
|
|
125
|
+
const detResults = {
|
|
126
|
+
...allModelsResult,
|
|
127
|
+
candidates: allModelsResult.candidates.slice(0, top)
|
|
128
|
+
};
|
|
129
|
+
|
|
130
|
+
if (detResults.candidates.length === 0) {
|
|
131
|
+
console.log(`AI-Check Mode: ${category.toUpperCase()}`);
|
|
132
|
+
console.log(`❌ No suitable candidates found by deterministic selector.`);
|
|
133
|
+
return {
|
|
134
|
+
results: detResults,
|
|
135
|
+
note: 'No suitable candidates found by deterministic selector.'
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
// Show professional AI-check header in app style
|
|
140
|
+
console.log('\n' + chalk.bgMagenta.white.bold(' AI-CHECK MODE '));
|
|
141
|
+
console.log(chalk.magenta('╭' + '─'.repeat(65)));
|
|
142
|
+
console.log(chalk.magenta('│') + ` Category: ${chalk.yellow(category.toUpperCase())}`);
|
|
143
|
+
console.log(chalk.magenta('│') + ` AI Weight: ${chalk.cyan(Math.round(weight * 100) + '%')} + Deterministic: ${chalk.green(Math.round((1-weight) * 100) + '%')}`);
|
|
144
|
+
console.log(chalk.magenta('│') + ` Candidates Found: ${chalk.green(detResults.candidates.length)}`);
|
|
145
|
+
console.log(chalk.magenta('│') + ` Hardware: ${chalk.cyan(hardware.cpu.cores + ' cores')}, ${chalk.green(hardware.memory.totalGB + 'GB RAM')}, ${chalk.yellow(hardware.gpu.type)}`);
|
|
146
|
+
console.log(chalk.magenta('╰'));
|
|
147
|
+
|
|
148
|
+
// Phase 2: Pick evaluator model
|
|
149
|
+
const evaluatorModel = evaluator === 'auto' ?
|
|
150
|
+
await this.pickEvaluatorModel(hardware) :
|
|
151
|
+
evaluator;
|
|
152
|
+
|
|
153
|
+
if (!evaluatorModel) {
|
|
154
|
+
console.log('\n' + chalk.red.bold(' ❌ NO EVALUATOR AVAILABLE '));
|
|
155
|
+
console.log(chalk.red('╭' + '─'.repeat(50)));
|
|
156
|
+
console.log(chalk.red('│') + ` ${chalk.white('No suitable evaluator model found locally')}`);
|
|
157
|
+
console.log(chalk.red('│') + ` ${chalk.gray('Install a model for AI evaluation:')}`);
|
|
158
|
+
console.log(chalk.red('│') + ` ${chalk.cyan('ollama pull qwen2.5:7b-instruct')}`);
|
|
159
|
+
console.log(chalk.red('│') + ` ${chalk.cyan('ollama pull mistral:7b-instruct')}`);
|
|
160
|
+
console.log(chalk.red('│') + ` ${chalk.yellow('Showing deterministic results only')}`);
|
|
161
|
+
console.log(chalk.red('╰'));
|
|
162
|
+
|
|
163
|
+
// When no evaluator is available, just return deterministic results with final scores
|
|
164
|
+
const candidatesWithFinalScores = detResults.candidates.map(candidate => ({
|
|
165
|
+
...candidate,
|
|
166
|
+
aiScore: null,
|
|
167
|
+
finalScore: candidate.score,
|
|
168
|
+
rationale: candidate.rationale + ` | AI: not evaluated`
|
|
169
|
+
}));
|
|
170
|
+
|
|
171
|
+
return {
|
|
172
|
+
results: {
|
|
173
|
+
...detResults,
|
|
174
|
+
candidates: candidatesWithFinalScores,
|
|
175
|
+
aiEvaluated: false
|
|
176
|
+
},
|
|
177
|
+
note: 'No local evaluator found; install qwen2.5:7b-instruct or similar for AI-check.',
|
|
178
|
+
suggestedInstall: 'ollama pull qwen2.5:7b-instruct'
|
|
179
|
+
};
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
// Show evaluator status in app style
|
|
183
|
+
console.log('\n' + chalk.bgCyan.black.bold(' AI EVALUATOR STATUS '));
|
|
184
|
+
console.log(chalk.cyan('╭' + '─'.repeat(50)));
|
|
185
|
+
console.log(chalk.cyan('│') + ` Model: ${chalk.green.bold(evaluatorModel)}`);
|
|
186
|
+
|
|
187
|
+
// Phase 3: Build payload for evaluator (use broader set for AI evaluation)
|
|
188
|
+
const aiEvaluationCandidates = {
|
|
189
|
+
...allModelsResult,
|
|
190
|
+
candidates: allModelsResult.candidates.slice(0, Math.max(20, top * 3)) // AI evaluates more models
|
|
191
|
+
};
|
|
192
|
+
console.log(chalk.cyan('│') + ` 🔬 Evaluating: ${chalk.yellow(aiEvaluationCandidates.candidates.length)} models (showing top ${chalk.green(top)})`);
|
|
193
|
+
|
|
194
|
+
const payload = this.buildEvaluatorPayload(hardware, category, aiEvaluationCandidates);
|
|
195
|
+
|
|
196
|
+
// Phase 4: Check cache
|
|
197
|
+
const cached = await this.loadCache(payload, evaluatorModel);
|
|
198
|
+
let aiResult;
|
|
199
|
+
|
|
200
|
+
if (cached) {
|
|
201
|
+
console.log(chalk.cyan('│') + ` 📥 Status: ${chalk.yellow('Using cached evaluation')}`);
|
|
202
|
+
console.log(chalk.cyan('╰'));
|
|
203
|
+
aiResult = cached;
|
|
204
|
+
} else {
|
|
205
|
+
console.log(chalk.cyan('│') + ` 🔬 Status: ${chalk.blue('Running AI evaluation...')}`);
|
|
206
|
+
console.log(chalk.cyan('╰'));
|
|
207
|
+
// Phase 5: Call evaluator
|
|
208
|
+
try {
|
|
209
|
+
aiResult = await this.callOllamaEvaluator(evaluatorModel, payload);
|
|
210
|
+
await this.saveCache(payload, evaluatorModel, aiResult);
|
|
211
|
+
} catch (error) {
|
|
212
|
+
console.log('\n' + chalk.red.bold(' ❌ AI EVALUATION FAILED '));
|
|
213
|
+
console.log(chalk.red('╭' + '─'.repeat(50)));
|
|
214
|
+
console.log(chalk.red('│') + ` ${chalk.white('Error: ' + error.message)}`);
|
|
215
|
+
console.log(chalk.red('│') + ` ${chalk.yellow('Falling back to deterministic results')}`);
|
|
216
|
+
console.log(chalk.red('╰'));
|
|
217
|
+
|
|
218
|
+
const candidatesWithFinalScores = detResults.candidates.map(candidate => ({
|
|
219
|
+
...candidate,
|
|
220
|
+
aiScore: null,
|
|
221
|
+
finalScore: candidate.score,
|
|
222
|
+
rationale: candidate.rationale + ` | AI: evaluation failed`
|
|
223
|
+
}));
|
|
224
|
+
|
|
225
|
+
return {
|
|
226
|
+
results: {
|
|
227
|
+
...detResults,
|
|
228
|
+
candidates: candidatesWithFinalScores,
|
|
229
|
+
aiEvaluated: false
|
|
230
|
+
},
|
|
231
|
+
note: `AI evaluation failed (${error.message}); showing deterministic results.`,
|
|
232
|
+
evaluatorModel
|
|
233
|
+
};
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
// Phase 6: Merge deterministic + AI scores
|
|
238
|
+
// AI evaluated more models, but we merge with our final candidates
|
|
239
|
+
const merged = this.mergeDetAndAI(detResults, aiResult, weight);
|
|
240
|
+
|
|
241
|
+
return {
|
|
242
|
+
results: merged,
|
|
243
|
+
evaluatorModel,
|
|
244
|
+
aiResult,
|
|
245
|
+
note: `AI-evaluated using ${evaluatorModel}`
|
|
246
|
+
};
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
/**
|
|
250
|
+
* Pick the best installed evaluator model
|
|
251
|
+
*/
|
|
252
|
+
async pickEvaluatorModel(hardware) {
|
|
253
|
+
try {
|
|
254
|
+
const installedModels = await this.deterministicSelector.getInstalledModels();
|
|
255
|
+
|
|
256
|
+
if (installedModels.length === 0) {
|
|
257
|
+
return null;
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
// Filter for text-only models that can be used as evaluators
|
|
261
|
+
const candidates = installedModels.filter(model => {
|
|
262
|
+
const isTextOnly = !model.modalities.includes('vision');
|
|
263
|
+
const isReasonableSize = model.paramsB >= 0.5; // At least 0.5B
|
|
264
|
+
const notEmbedding = !model.tags.includes('embedding');
|
|
265
|
+
|
|
266
|
+
return isTextOnly && isReasonableSize && notEmbedding;
|
|
267
|
+
});
|
|
268
|
+
|
|
269
|
+
if (candidates.length === 0) {
|
|
270
|
+
return null;
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
// Score evaluator candidates
|
|
274
|
+
const scored = candidates.map(model => {
|
|
275
|
+
let score = 0;
|
|
276
|
+
|
|
277
|
+
// Quality prior
|
|
278
|
+
score += this.deterministicSelector.getBaseQuality(model.paramsB);
|
|
279
|
+
|
|
280
|
+
// Speed estimation (prefer faster for evaluation)
|
|
281
|
+
const estimatedSpeed = this.deterministicSelector.estimateSpeed(hardware, model, model.quant || 'Q4_K_M', 'general');
|
|
282
|
+
score += estimatedSpeed * 0.3;
|
|
283
|
+
|
|
284
|
+
// Installed bonus
|
|
285
|
+
score += 10;
|
|
286
|
+
|
|
287
|
+
// High-quality quant bonus
|
|
288
|
+
if (model.quant && ['Q8_0', 'Q6_K', 'Q5_K_M'].includes(model.quant)) {
|
|
289
|
+
score += 10;
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
// Preferred model bonus
|
|
293
|
+
const isPreferred = this.preferredEvaluators.some(pref =>
|
|
294
|
+
model.model_identifier.includes(pref.split(':')[0])
|
|
295
|
+
);
|
|
296
|
+
if (isPreferred) score += 15;
|
|
297
|
+
|
|
298
|
+
// Memory pressure penalty
|
|
299
|
+
const requiredMemory = this.deterministicSelector.estimateRequiredGB(model, model.quant || 'Q4_K_M', 4096);
|
|
300
|
+
if (requiredMemory > hardware.usableMemGB * 0.8) {
|
|
301
|
+
score -= 20;
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
return { model, score };
|
|
305
|
+
});
|
|
306
|
+
|
|
307
|
+
// Sort by score and pick the best
|
|
308
|
+
scored.sort((a, b) => b.score - a.score);
|
|
309
|
+
return scored[0].model.model_identifier;
|
|
310
|
+
|
|
311
|
+
} catch (error) {
|
|
312
|
+
console.warn(`Failed to pick evaluator: ${error.message}`);
|
|
313
|
+
return null;
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
/**
|
|
318
|
+
* Build payload for the evaluator LLM
|
|
319
|
+
*/
|
|
320
|
+
buildEvaluatorPayload(hardware, category, detResults) {
|
|
321
|
+
const categoryWeights = this.deterministicSelector.categoryWeights[category] || [0.4, 0.3, 0.2, 0.1];
|
|
322
|
+
|
|
323
|
+
return {
|
|
324
|
+
hardware: {
|
|
325
|
+
backend: hardware.acceleration.supports_metal ? 'metal' :
|
|
326
|
+
hardware.acceleration.supports_cuda ? 'cuda' : 'cpu',
|
|
327
|
+
usableMemGB: Math.round(hardware.usableMemGB * 10) / 10,
|
|
328
|
+
vramGB: hardware.gpu.vramGB || null,
|
|
329
|
+
targetCtx: detResults.targetCtx || this.deterministicSelector.targetContexts[category],
|
|
330
|
+
category: category
|
|
331
|
+
},
|
|
332
|
+
weights: {
|
|
333
|
+
Q: categoryWeights[0],
|
|
334
|
+
S: categoryWeights[1],
|
|
335
|
+
F: categoryWeights[2],
|
|
336
|
+
C: categoryWeights[3]
|
|
337
|
+
},
|
|
338
|
+
candidates: detResults.candidates.map(candidate => ({
|
|
339
|
+
name: candidate.meta.model_identifier,
|
|
340
|
+
paramsB: candidate.meta.paramsB,
|
|
341
|
+
quant: candidate.quant,
|
|
342
|
+
ctxMax: candidate.meta.ctxMax,
|
|
343
|
+
modalities: candidate.meta.modalities,
|
|
344
|
+
tags: candidate.meta.tags,
|
|
345
|
+
requiredGB: candidate.requiredGB,
|
|
346
|
+
budgetGB: hardware.usableMemGB,
|
|
347
|
+
estTPS: candidate.estTPS,
|
|
348
|
+
measuredTPS: candidate.measuredTPS || null,
|
|
349
|
+
qualityPrior: candidate.components ? candidate.components.Q : 80,
|
|
350
|
+
fitScore: candidate.components ? candidate.components.F : 90,
|
|
351
|
+
ctxScore: candidate.components ? candidate.components.C : 100,
|
|
352
|
+
detScore: candidate.score,
|
|
353
|
+
installed: candidate.meta.installed || false
|
|
354
|
+
}))
|
|
355
|
+
};
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
/**
|
|
359
|
+
* Call Ollama evaluator with JSON format
|
|
360
|
+
*/
|
|
361
|
+
async callOllamaEvaluator(modelId, payload) {
|
|
362
|
+
const userPrompt = `Category: ${payload.hardware.category}
|
|
363
|
+
|
|
364
|
+
Models to rank (RANK ALL ${payload.candidates.length} MODELS):
|
|
365
|
+
${payload.candidates.map((c, i) => `${i + 1}. ${c.name} (${c.paramsB}B, ${c.quant}, ${c.requiredGB}GB required, installed: ${c.installed})`).join('\n')}
|
|
366
|
+
|
|
367
|
+
IMPORTANT: Your ranking array must contain exactly ${payload.candidates.length} models. Rank ALL models provided.
|
|
368
|
+
|
|
369
|
+
Return JSON with this structure:
|
|
370
|
+
{
|
|
371
|
+
"winner": "model_name",
|
|
372
|
+
"ranking": [
|
|
373
|
+
{"name": "model_name", "aiScore": 85, "shortWhy": "reason"},
|
|
374
|
+
{"name": "another_model_name", "aiScore": 75, "shortWhy": "reason"}
|
|
375
|
+
]
|
|
376
|
+
}`;
|
|
377
|
+
|
|
378
|
+
const requestBody = {
|
|
379
|
+
model: modelId,
|
|
380
|
+
stream: false,
|
|
381
|
+
options: {
|
|
382
|
+
temperature: 0.1,
|
|
383
|
+
num_ctx: 4096
|
|
384
|
+
},
|
|
385
|
+
messages: [
|
|
386
|
+
{ role: 'system', content: this.systemPrompt },
|
|
387
|
+
{ role: 'user', content: userPrompt }
|
|
388
|
+
]
|
|
389
|
+
};
|
|
390
|
+
|
|
391
|
+
const response = await fetch('http://localhost:11434/api/chat', {
|
|
392
|
+
method: 'POST',
|
|
393
|
+
headers: { 'Content-Type': 'application/json' },
|
|
394
|
+
body: JSON.stringify(requestBody)
|
|
395
|
+
});
|
|
396
|
+
|
|
397
|
+
if (!response.ok) {
|
|
398
|
+
throw new Error(`Ollama API error: ${response.status} ${response.statusText}`);
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
const data = await response.json();
|
|
402
|
+
|
|
403
|
+
if (!data.message || !data.message.content) {
|
|
404
|
+
throw new Error(`Invalid response from Ollama API: ${JSON.stringify(data)}`);
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
// Parse JSON response, strip markdown code blocks if present
|
|
408
|
+
let aiResult;
|
|
409
|
+
try {
|
|
410
|
+
let content = data.message.content.trim();
|
|
411
|
+
|
|
412
|
+
// Strip markdown code blocks
|
|
413
|
+
if (content.startsWith('```json') || content.startsWith('```')) {
|
|
414
|
+
content = content.replace(/```json\s*/, '').replace(/```\s*$/, '').trim();
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
aiResult = JSON.parse(content);
|
|
418
|
+
} catch (error) {
|
|
419
|
+
throw new Error(`Invalid JSON from evaluator: ${error.message}`);
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
// Validate schema
|
|
423
|
+
if (!aiResult.winner || !Array.isArray(aiResult.ranking)) {
|
|
424
|
+
throw new Error('Response does not match required schema');
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
// Validate that AI ranked ALL models
|
|
428
|
+
if (aiResult.ranking.length !== payload.candidates.length) {
|
|
429
|
+
console.warn(`AI only ranked ${aiResult.ranking.length}/${payload.candidates.length} models. Expected all models to be ranked.`);
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
return aiResult;
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
/**
|
|
436
|
+
* Merge deterministic and AI scores
|
|
437
|
+
*/
|
|
438
|
+
mergeDetAndAI(detResults, aiResult, weight) {
|
|
439
|
+
const clampedWeight = Math.max(0, Math.min(1, weight));
|
|
440
|
+
|
|
441
|
+
// Create lookup map for AI scores
|
|
442
|
+
const aiScores = new Map();
|
|
443
|
+
aiResult.ranking.forEach(item => {
|
|
444
|
+
aiScores.set(item.name, {
|
|
445
|
+
aiScore: item.aiScore,
|
|
446
|
+
shortWhy: item.shortWhy
|
|
447
|
+
});
|
|
448
|
+
});
|
|
449
|
+
|
|
450
|
+
// Merge scores
|
|
451
|
+
const mergedCandidates = detResults.candidates.map(candidate => {
|
|
452
|
+
const aiData = aiScores.get(candidate.meta.model_identifier);
|
|
453
|
+
|
|
454
|
+
if (aiData) {
|
|
455
|
+
const finalScore = Math.round(
|
|
456
|
+
((1 - clampedWeight) * candidate.score + clampedWeight * aiData.aiScore) * 10
|
|
457
|
+
) / 10;
|
|
458
|
+
|
|
459
|
+
return {
|
|
460
|
+
...candidate,
|
|
461
|
+
aiScore: aiData.aiScore,
|
|
462
|
+
finalScore: finalScore,
|
|
463
|
+
rationale: candidate.rationale + ` | AI: ${aiData.shortWhy}`
|
|
464
|
+
};
|
|
465
|
+
} else {
|
|
466
|
+
// AI didn't rank this model - assign average AI score as fallback
|
|
467
|
+
const avgAIScore = aiResult.ranking.length > 0 ?
|
|
468
|
+
Math.round(aiResult.ranking.reduce((sum, r) => sum + r.aiScore, 0) / aiResult.ranking.length) :
|
|
469
|
+
candidate.score;
|
|
470
|
+
|
|
471
|
+
const finalScore = Math.round(
|
|
472
|
+
((1 - clampedWeight) * candidate.score + clampedWeight * avgAIScore) * 10
|
|
473
|
+
) / 10;
|
|
474
|
+
|
|
475
|
+
return {
|
|
476
|
+
...candidate,
|
|
477
|
+
aiScore: avgAIScore,
|
|
478
|
+
finalScore: finalScore,
|
|
479
|
+
rationale: candidate.rationale + ` | AI: estimated (${avgAIScore}) - model not ranked by evaluator`
|
|
480
|
+
};
|
|
481
|
+
}
|
|
482
|
+
});
|
|
483
|
+
|
|
484
|
+
// Sort by final score
|
|
485
|
+
mergedCandidates.sort((a, b) => b.finalScore - a.finalScore);
|
|
486
|
+
|
|
487
|
+
return {
|
|
488
|
+
...detResults,
|
|
489
|
+
candidates: mergedCandidates,
|
|
490
|
+
winner: aiResult.winner,
|
|
491
|
+
aiEvaluated: true
|
|
492
|
+
};
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
/**
|
|
496
|
+
* Generate cache key for results
|
|
497
|
+
*/
|
|
498
|
+
generateCacheKey(payload, evaluatorModel) {
|
|
499
|
+
const hashInput = JSON.stringify({
|
|
500
|
+
hardware: payload.hardware,
|
|
501
|
+
category: payload.hardware.category,
|
|
502
|
+
candidates: payload.candidates.map(c => ({
|
|
503
|
+
name: c.name,
|
|
504
|
+
quant: c.quant,
|
|
505
|
+
detScore: c.detScore,
|
|
506
|
+
measuredTPS: c.measuredTPS
|
|
507
|
+
})),
|
|
508
|
+
evaluator: evaluatorModel
|
|
509
|
+
});
|
|
510
|
+
|
|
511
|
+
return crypto.createHash('md5').update(hashInput).digest('hex');
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
/**
|
|
515
|
+
* Load cached AI result
|
|
516
|
+
*/
|
|
517
|
+
async loadCache(payload, evaluatorModel) {
|
|
518
|
+
try {
|
|
519
|
+
if (!fs.existsSync(this.cachePath)) {
|
|
520
|
+
return null;
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
const cache = JSON.parse(fs.readFileSync(this.cachePath, 'utf8'));
|
|
524
|
+
const key = this.generateCacheKey(payload, evaluatorModel);
|
|
525
|
+
const entry = cache[key];
|
|
526
|
+
|
|
527
|
+
if (!entry) return null;
|
|
528
|
+
|
|
529
|
+
// Check if cache is still valid (7 days)
|
|
530
|
+
const maxAge = 7 * 24 * 60 * 60 * 1000;
|
|
531
|
+
if (Date.now() - entry.timestamp > maxAge) {
|
|
532
|
+
return null;
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
return entry.result;
|
|
536
|
+
} catch (error) {
|
|
537
|
+
console.warn(`Failed to load cache: ${error.message}`);
|
|
538
|
+
return null;
|
|
539
|
+
}
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
/**
|
|
543
|
+
* Save AI result to cache
|
|
544
|
+
*/
|
|
545
|
+
async saveCache(payload, evaluatorModel, result) {
|
|
546
|
+
try {
|
|
547
|
+
let cache = {};
|
|
548
|
+
|
|
549
|
+
if (fs.existsSync(this.cachePath)) {
|
|
550
|
+
try {
|
|
551
|
+
cache = JSON.parse(fs.readFileSync(this.cachePath, 'utf8'));
|
|
552
|
+
} catch (error) {
|
|
553
|
+
// Invalid cache file, start fresh
|
|
554
|
+
cache = {};
|
|
555
|
+
}
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
const key = this.generateCacheKey(payload, evaluatorModel);
|
|
559
|
+
cache[key] = {
|
|
560
|
+
timestamp: Date.now(),
|
|
561
|
+
result: result,
|
|
562
|
+
evaluator: evaluatorModel
|
|
563
|
+
};
|
|
564
|
+
|
|
565
|
+
// Ensure directory exists
|
|
566
|
+
const dir = path.dirname(this.cachePath);
|
|
567
|
+
if (!fs.existsSync(dir)) {
|
|
568
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
fs.writeFileSync(this.cachePath, JSON.stringify(cache, null, 2));
|
|
572
|
+
} catch (error) {
|
|
573
|
+
console.warn(`Failed to save cache: ${error.message}`);
|
|
574
|
+
}
|
|
575
|
+
}
|
|
576
|
+
|
|
577
|
+
/**
|
|
578
|
+
* Format results for display
|
|
579
|
+
*/
|
|
580
|
+
formatResults(aiCheckResult) {
|
|
581
|
+
const { results, evaluatorModel, note } = aiCheckResult;
|
|
582
|
+
const chalk = require('chalk');
|
|
583
|
+
const { table } = require('table');
|
|
584
|
+
|
|
585
|
+
if (results.candidates.length === 0) {
|
|
586
|
+
console.log('\n' + chalk.red.bold(' ❌ AI-CHECK: No suitable models found '));
|
|
587
|
+
return aiCheckResult;
|
|
588
|
+
}
|
|
589
|
+
|
|
590
|
+
// AI-Check header
|
|
591
|
+
console.log('\n' + chalk.bgMagenta.white.bold(' AI-CHECK RESULTS '));
|
|
592
|
+
console.log(chalk.magenta('╭' + '─'.repeat(65)));
|
|
593
|
+
console.log(chalk.magenta('│') + ` Evaluator: ${chalk.cyan(evaluatorModel || 'None')}`);
|
|
594
|
+
console.log(chalk.magenta('│') + ` Category: ${chalk.yellow(results.category || 'general').toUpperCase()}`);
|
|
595
|
+
console.log(chalk.magenta('│') + ` Models Evaluated: ${chalk.green(results.candidates.length)}`);
|
|
596
|
+
if (note) {
|
|
597
|
+
console.log(chalk.magenta('│') + ` 📝 Note: ${chalk.gray(note)}`);
|
|
598
|
+
}
|
|
599
|
+
console.log(chalk.magenta('╰'));
|
|
600
|
+
|
|
601
|
+
// Create table data in the same style as check command
|
|
602
|
+
const tableData = [
|
|
603
|
+
[
|
|
604
|
+
chalk.bgMagenta.white.bold(' Model '),
|
|
605
|
+
chalk.bgMagenta.white.bold(' Size '),
|
|
606
|
+
chalk.bgMagenta.white.bold(' Det Score '),
|
|
607
|
+
chalk.bgMagenta.white.bold(' AI Score '),
|
|
608
|
+
chalk.bgMagenta.white.bold(' Final '),
|
|
609
|
+
chalk.bgMagenta.white.bold(' RAM '),
|
|
610
|
+
chalk.bgMagenta.white.bold(' Speed '),
|
|
611
|
+
chalk.bgMagenta.white.bold(' Status ')
|
|
612
|
+
]
|
|
613
|
+
];
|
|
614
|
+
|
|
615
|
+
results.candidates.forEach(candidate => {
|
|
616
|
+
const isInstalled = candidate.meta.installed;
|
|
617
|
+
const modelName = candidate.meta.name || candidate.meta.model_identifier;
|
|
618
|
+
const size = `${candidate.meta.paramsB}B`;
|
|
619
|
+
const detScore = `${Math.round(candidate.score)}/100`;
|
|
620
|
+
const aiScore = candidate.aiScore ? `${Math.round(candidate.aiScore)}/100` : 'N/A';
|
|
621
|
+
const finalScore = `${Math.round(candidate.finalScore)}/100`;
|
|
622
|
+
const ram = `${candidate.requiredGB}/${Math.round(results.hardware.usableMemGB)}GB`;
|
|
623
|
+
const speed = `${candidate.estTPS.toFixed(0)}t/s`;
|
|
624
|
+
|
|
625
|
+
let statusDisplay, modelDisplay;
|
|
626
|
+
if (isInstalled) {
|
|
627
|
+
statusDisplay = chalk.green.bold('Installed');
|
|
628
|
+
modelDisplay = `${modelName}`;
|
|
629
|
+
} else {
|
|
630
|
+
statusDisplay = '🌐 Available';
|
|
631
|
+
modelDisplay = modelName;
|
|
632
|
+
}
|
|
633
|
+
|
|
634
|
+
const row = [
|
|
635
|
+
modelDisplay,
|
|
636
|
+
size,
|
|
637
|
+
this.getScoreColor(candidate.score)(detScore),
|
|
638
|
+
candidate.aiScore ? this.getScoreColor(candidate.aiScore)(aiScore) : chalk.gray(aiScore),
|
|
639
|
+
this.getScoreColor(candidate.finalScore)(finalScore),
|
|
640
|
+
ram,
|
|
641
|
+
speed,
|
|
642
|
+
statusDisplay
|
|
643
|
+
];
|
|
644
|
+
tableData.push(row);
|
|
645
|
+
});
|
|
646
|
+
|
|
647
|
+
console.log(table(tableData));
|
|
648
|
+
|
|
649
|
+
// Best recommendation section
|
|
650
|
+
const best = results.candidates[0];
|
|
651
|
+
console.log('\n' + chalk.bgGreen.black.bold(' AI-POWERED RECOMMENDATION '));
|
|
652
|
+
console.log(chalk.green('╭' + '─'.repeat(50)));
|
|
653
|
+
console.log(chalk.green('│') + ` Best Model: ${chalk.cyan.bold(best.meta.name || best.meta.model_identifier)}`);
|
|
654
|
+
console.log(chalk.green('│') + ` Final Score: ${this.getScoreColor(best.finalScore)(Math.round(best.finalScore) + '/100')}`);
|
|
655
|
+
console.log(chalk.green('│') + ` ⚖️ Det: ${Math.round(best.score)} + AI: ${best.aiScore ? Math.round(best.aiScore) : 'N/A'}`);
|
|
656
|
+
console.log(chalk.green('│'));
|
|
657
|
+
|
|
658
|
+
if (best.meta.installed) {
|
|
659
|
+
console.log(chalk.green('│') + ` Ready to use:`);
|
|
660
|
+
console.log(chalk.green('│') + ` ${chalk.cyan.bold(`ollama run ${best.meta.model_identifier}`)}`);
|
|
661
|
+
} else {
|
|
662
|
+
console.log(chalk.green('│') + ` 📥 Install command:`);
|
|
663
|
+
console.log(chalk.green('│') + ` ${chalk.cyan.bold(`ollama pull ${best.meta.model_identifier}`)}`);
|
|
664
|
+
}
|
|
665
|
+
|
|
666
|
+
console.log(chalk.green('│'));
|
|
667
|
+
console.log(chalk.green('│') + ` Why this model?`);
|
|
668
|
+
|
|
669
|
+
// Parse and display reasoning nicely
|
|
670
|
+
const reasons = best.rationale.split(' | ');
|
|
671
|
+
reasons.forEach(reason => {
|
|
672
|
+
if (reason.trim()) {
|
|
673
|
+
console.log(chalk.green('│') + ` • ${chalk.yellow(reason.trim())}`);
|
|
674
|
+
}
|
|
675
|
+
});
|
|
676
|
+
|
|
677
|
+
console.log(chalk.green('╰'));
|
|
678
|
+
|
|
679
|
+
return aiCheckResult;
|
|
680
|
+
}
|
|
681
|
+
|
|
682
|
+
getScoreColor(score) {
|
|
683
|
+
const chalk = require('chalk');
|
|
684
|
+
if (score >= 85) return chalk.green.bold;
|
|
685
|
+
if (score >= 70) return chalk.cyan.bold;
|
|
686
|
+
if (score >= 55) return chalk.yellow.bold;
|
|
687
|
+
if (score >= 40) return chalk.red.bold;
|
|
688
|
+
return chalk.gray;
|
|
689
|
+
}
|
|
690
|
+
|
|
691
|
+
/**
|
|
692
|
+
* Filter Ollama models by category (same logic as deterministic selector)
|
|
693
|
+
*/
|
|
694
|
+
filterOllamaModelsByCategory(models, category) {
|
|
695
|
+
return models.filter(model => {
|
|
696
|
+
const modelName = model.model_name.toLowerCase();
|
|
697
|
+
const modelId = model.model_identifier.toLowerCase();
|
|
698
|
+
const fullText = `${modelName} ${modelId}`;
|
|
699
|
+
|
|
700
|
+
switch (category) {
|
|
701
|
+
case 'coding':
|
|
702
|
+
return fullText.includes('code') || fullText.includes('coder') ||
|
|
703
|
+
fullText.includes('deepseek-coder') || fullText.includes('qwen2.5-coder');
|
|
704
|
+
|
|
705
|
+
case 'multimodal':
|
|
706
|
+
return fullText.includes('llava') || fullText.includes('vision') ||
|
|
707
|
+
fullText.includes('pixtral') || fullText.includes('moondream') ||
|
|
708
|
+
fullText.includes('qwen-vl');
|
|
709
|
+
|
|
710
|
+
case 'embeddings':
|
|
711
|
+
return fullText.includes('embed') || fullText.includes('nomic') ||
|
|
712
|
+
fullText.includes('bge') || fullText.includes('e5');
|
|
713
|
+
|
|
714
|
+
case 'reasoning':
|
|
715
|
+
return fullText.includes('deepseek-r1') || fullText.includes('reasoning') ||
|
|
716
|
+
fullText.includes('math') || model.model_identifier.includes('o1-');
|
|
717
|
+
|
|
718
|
+
case 'creative':
|
|
719
|
+
return fullText.includes('dolphin') || fullText.includes('wizard') ||
|
|
720
|
+
fullText.includes('uncensored') || fullText.includes('airoboros');
|
|
721
|
+
|
|
722
|
+
case 'reading':
|
|
723
|
+
return fullText.includes('solar') || fullText.includes('openchat') ||
|
|
724
|
+
fullText.includes('neural-chat') || fullText.includes('vicuna');
|
|
725
|
+
|
|
726
|
+
case 'talking':
|
|
727
|
+
// Most conversational models - llama, mistral, etc.
|
|
728
|
+
return (fullText.includes('llama') || fullText.includes('mistral') ||
|
|
729
|
+
fullText.includes('phi') || fullText.includes('gemma') ||
|
|
730
|
+
fullText.includes('qwen') || fullText.includes('chat') ||
|
|
731
|
+
fullText.includes('instruct')) &&
|
|
732
|
+
// Exclude specialized models
|
|
733
|
+
!fullText.includes('coder') && !fullText.includes('vl') &&
|
|
734
|
+
!fullText.includes('embed') && !fullText.includes('vision');
|
|
735
|
+
|
|
736
|
+
default: // general
|
|
737
|
+
return true; // Most models can handle general tasks
|
|
738
|
+
}
|
|
739
|
+
});
|
|
740
|
+
}
|
|
741
|
+
|
|
742
|
+
/**
|
|
743
|
+
* Convert Ollama model format to deterministic selector format
|
|
744
|
+
*/
|
|
745
|
+
convertOllamaModelToDeterministicFormat(ollamaModel) {
|
|
746
|
+
// Extract size from model identifier
|
|
747
|
+
const sizeMatch = ollamaModel.model_identifier.match(/(\d+\.?\d*)[bm]/i);
|
|
748
|
+
const sizeNum = sizeMatch ? parseFloat(sizeMatch[1]) : 7; // Default 7B
|
|
749
|
+
const sizeUnit = sizeMatch ? sizeMatch[0].slice(-1).toLowerCase() : 'b';
|
|
750
|
+
const paramsB = sizeUnit === 'm' ? sizeNum / 1000 : sizeNum;
|
|
751
|
+
|
|
752
|
+
// Extract family
|
|
753
|
+
const modelId = ollamaModel.model_identifier.toLowerCase();
|
|
754
|
+
let family = 'unknown';
|
|
755
|
+
if (modelId.includes('qwen2.5')) family = 'qwen2.5';
|
|
756
|
+
else if (modelId.includes('qwen')) family = 'qwen';
|
|
757
|
+
else if (modelId.includes('llama3.2')) family = 'llama3.2';
|
|
758
|
+
else if (modelId.includes('llama3.1')) family = 'llama3.1';
|
|
759
|
+
else if (modelId.includes('llama')) family = 'llama';
|
|
760
|
+
else if (modelId.includes('mistral')) family = 'mistral';
|
|
761
|
+
else if (modelId.includes('gemma')) family = 'gemma2';
|
|
762
|
+
else if (modelId.includes('phi')) family = 'phi-3';
|
|
763
|
+
else if (modelId.includes('llava')) family = 'llava';
|
|
764
|
+
|
|
765
|
+
// Determine modalities
|
|
766
|
+
const modalities = ['text'];
|
|
767
|
+
if (modelId.includes('llava') || modelId.includes('vision') || modelId.includes('vl')) {
|
|
768
|
+
modalities.push('vision');
|
|
769
|
+
}
|
|
770
|
+
|
|
771
|
+
// Determine tags
|
|
772
|
+
const tags = [];
|
|
773
|
+
if (modelId.includes('instruct') || ollamaModel.model_name.toLowerCase().includes('instruct')) tags.push('instruct');
|
|
774
|
+
if (modelId.includes('chat') || ollamaModel.model_name.toLowerCase().includes('chat')) tags.push('chat');
|
|
775
|
+
if (modelId.includes('code') || ollamaModel.model_name.toLowerCase().includes('code')) tags.push('coder');
|
|
776
|
+
if (modalities.includes('vision')) tags.push('vision');
|
|
777
|
+
if (modelId.includes('embed')) tags.push('embedding');
|
|
778
|
+
|
|
779
|
+
// Default context length based on model family
|
|
780
|
+
let ctxMax = 4096;
|
|
781
|
+
if (family.includes('qwen')) ctxMax = 32768;
|
|
782
|
+
else if (family.includes('llama3')) ctxMax = 131072;
|
|
783
|
+
else if (family.includes('mistral')) ctxMax = 32768;
|
|
784
|
+
else if (family.includes('gemma')) ctxMax = 8192;
|
|
785
|
+
|
|
786
|
+
// Estimate model size in GB (rough approximation)
|
|
787
|
+
const sizeGB = paramsB * 0.6; // ~0.6GB per billion parameters for Q4_K_M
|
|
788
|
+
|
|
789
|
+
return {
|
|
790
|
+
name: ollamaModel.model_name,
|
|
791
|
+
family: family,
|
|
792
|
+
paramsB: paramsB,
|
|
793
|
+
ctxMax: ctxMax,
|
|
794
|
+
quant: 'Q4_K_M', // Default quantization
|
|
795
|
+
sizeGB: sizeGB,
|
|
796
|
+
modalities: modalities,
|
|
797
|
+
tags: tags,
|
|
798
|
+
model_identifier: ollamaModel.model_identifier,
|
|
799
|
+
installed: false, // Will be updated later by checking local models
|
|
800
|
+
pulls: ollamaModel.pulls || 0,
|
|
801
|
+
description: ollamaModel.description || ''
|
|
802
|
+
};
|
|
803
|
+
}
|
|
804
|
+
}
|
|
805
|
+
|
|
806
|
+
module.exports = AICheckSelector;
|