llm-checker 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +418 -0
  3. package/analyzer/compatibility.js +584 -0
  4. package/analyzer/performance.js +505 -0
  5. package/bin/CLAUDE.md +12 -0
  6. package/bin/enhanced_cli.js +3118 -0
  7. package/bin/test-deterministic.js +41 -0
  8. package/package.json +96 -0
  9. package/src/CLAUDE.md +12 -0
  10. package/src/ai/intelligent-selector.js +615 -0
  11. package/src/ai/model-selector.js +312 -0
  12. package/src/ai/multi-objective-selector.js +820 -0
  13. package/src/commands/check.js +58 -0
  14. package/src/data/CLAUDE.md +11 -0
  15. package/src/data/model-database.js +637 -0
  16. package/src/data/sync-manager.js +279 -0
  17. package/src/hardware/CLAUDE.md +12 -0
  18. package/src/hardware/backends/CLAUDE.md +11 -0
  19. package/src/hardware/backends/apple-silicon.js +318 -0
  20. package/src/hardware/backends/cpu-detector.js +490 -0
  21. package/src/hardware/backends/cuda-detector.js +417 -0
  22. package/src/hardware/backends/intel-detector.js +436 -0
  23. package/src/hardware/backends/rocm-detector.js +440 -0
  24. package/src/hardware/detector.js +573 -0
  25. package/src/hardware/pc-optimizer.js +635 -0
  26. package/src/hardware/specs.js +286 -0
  27. package/src/hardware/unified-detector.js +442 -0
  28. package/src/index.js +2289 -0
  29. package/src/models/CLAUDE.md +17 -0
  30. package/src/models/ai-check-selector.js +806 -0
  31. package/src/models/catalog.json +426 -0
  32. package/src/models/deterministic-selector.js +1145 -0
  33. package/src/models/expanded_database.js +1142 -0
  34. package/src/models/intelligent-selector.js +532 -0
  35. package/src/models/requirements.js +310 -0
  36. package/src/models/scoring-config.js +57 -0
  37. package/src/models/scoring-engine.js +715 -0
  38. package/src/ollama/.cache/README.md +33 -0
  39. package/src/ollama/CLAUDE.md +24 -0
  40. package/src/ollama/client.js +438 -0
  41. package/src/ollama/enhanced-client.js +113 -0
  42. package/src/ollama/enhanced-scraper.js +634 -0
  43. package/src/ollama/manager.js +357 -0
  44. package/src/ollama/native-scraper.js +776 -0
  45. package/src/plugins/CLAUDE.md +11 -0
  46. package/src/plugins/examples/custom_model_plugin.js +87 -0
  47. package/src/plugins/index.js +295 -0
  48. package/src/utils/CLAUDE.md +11 -0
  49. package/src/utils/config.js +359 -0
  50. package/src/utils/formatter.js +315 -0
  51. package/src/utils/logger.js +272 -0
  52. package/src/utils/model-classifier.js +167 -0
  53. package/src/utils/verbose-progress.js +266 -0
@@ -0,0 +1,806 @@
1
+ /**
2
+ * AI-Check Mode - Meta-evaluation using installed models
3
+ *
4
+ * Uses the best installed instruction model as an evaluator to rerank
5
+ * and refine deterministic selections.
6
+ */
7
+
8
+ const DeterministicModelSelector = require('./deterministic-selector');
9
+ const { OllamaNativeScraper } = require('../ollama/native-scraper');
10
+ const crypto = require('crypto');
11
+ const fs = require('fs');
12
+ const path = require('path');
13
+ const fetch = require('node-fetch');
14
+
15
+ class AICheckSelector {
16
+ constructor() {
17
+ this.deterministicSelector = new DeterministicModelSelector();
18
+ this.ollamaScraper = new OllamaNativeScraper();
19
+ this.cachePath = path.join(require('os').homedir(), '.llm-checker', 'ai-check-cache.json');
20
+
21
+ // Priority models for evaluation (prefer these if installed)
22
+ this.preferredEvaluators = [
23
+ 'qwen2.5:7b-instruct',
24
+ 'mistral:7b-instruct',
25
+ 'llama3.1:8b-instruct',
26
+ 'gemma2:9b-it',
27
+ 'qwen2.5-coder:7b', // Good fallback
28
+ 'llama3.2:3b' // Smallest acceptable
29
+ ];
30
+
31
+ // System prompt for evaluator
32
+ this.systemPrompt = `You are a precise model evaluator.
33
+ Your task: Rank ALL provided models for the given category.
34
+ Important: Your ranking must include EVERY model in the list.
35
+ Never skip or omit any model from your ranking.
36
+ Respond with JSON only, no additional text.`;
37
+
38
+ // JSON schema for evaluator response
39
+ this.responseSchema = {
40
+ type: "object",
41
+ properties: {
42
+ winner: { type: "string" },
43
+ ranking: {
44
+ type: "array",
45
+ items: {
46
+ type: "object",
47
+ properties: {
48
+ name: { type: "string" },
49
+ aiScore: { type: "number" },
50
+ shortWhy: { type: "string" }
51
+ },
52
+ required: ["name", "aiScore", "shortWhy"]
53
+ }
54
+ }
55
+ },
56
+ required: ["winner", "ranking"]
57
+ };
58
+ }
59
+
60
+ /**
61
+ * Main AI-Check function
62
+ */
63
+ async aiCheck(options = {}) {
64
+ const {
65
+ category = 'general',
66
+ top = 12,
67
+ ctx,
68
+ evaluator = 'auto',
69
+ weight = 0.3,
70
+ silent = false
71
+ } = options;
72
+
73
+ const chalk = require('chalk');
74
+
75
+ // Phase 1: Get ALL available models from the 177-model Ollama database
76
+ const hardware = await this.deterministicSelector.getHardware();
77
+
78
+ // Use the same large database that check command uses (177 models)
79
+ const ollamaData = await this.ollamaScraper.scrapeAllModels(false);
80
+ const allOllamaModels = ollamaData.models || [];
81
+
82
+ if (!silent) {
83
+ console.log(chalk.cyan('│') + ` Found ${allOllamaModels.length} models in Ollama database`);
84
+ }
85
+
86
+ // Convert Ollama models to deterministic selector format and evaluate them
87
+ const candidates = [];
88
+ const budget = hardware.gpu.unified ? hardware.usableMemGB :
89
+ (hardware.gpu.vramGB || hardware.usableMemGB);
90
+
91
+ // Filter models by category first
92
+ const categoryModels = this.filterOllamaModelsByCategory(allOllamaModels, category);
93
+
94
+ if (!silent) {
95
+ console.log(chalk.cyan('│') + ` ${categoryModels.length} models match ${category} category`);
96
+ }
97
+
98
+ // Evaluate each model using deterministic scoring
99
+ for (const ollamaModel of categoryModels) {
100
+ const convertedModel = this.convertOllamaModelToDeterministicFormat(ollamaModel);
101
+ const result = this.deterministicSelector.evaluateModel(
102
+ convertedModel,
103
+ hardware,
104
+ category,
105
+ ctx || this.deterministicSelector.targetContexts[category],
106
+ budget
107
+ );
108
+ if (result) {
109
+ candidates.push(result);
110
+ }
111
+ }
112
+
113
+ // Sort by score and get top candidates
114
+ candidates.sort((a, b) => b.score - a.score);
115
+ const allModelsResult = {
116
+ category,
117
+ hardware,
118
+ candidates: candidates,
119
+ total_evaluated: categoryModels.length,
120
+ timestamp: new Date().toISOString(),
121
+ targetCtx: ctx || this.deterministicSelector.targetContexts[category]
122
+ };
123
+
124
+ // Then trim to top N for final result, but let AI see more options
125
+ const detResults = {
126
+ ...allModelsResult,
127
+ candidates: allModelsResult.candidates.slice(0, top)
128
+ };
129
+
130
+ if (detResults.candidates.length === 0) {
131
+ console.log(`AI-Check Mode: ${category.toUpperCase()}`);
132
+ console.log(`❌ No suitable candidates found by deterministic selector.`);
133
+ return {
134
+ results: detResults,
135
+ note: 'No suitable candidates found by deterministic selector.'
136
+ };
137
+ }
138
+
139
+ // Show professional AI-check header in app style
140
+ console.log('\n' + chalk.bgMagenta.white.bold(' AI-CHECK MODE '));
141
+ console.log(chalk.magenta('╭' + '─'.repeat(65)));
142
+ console.log(chalk.magenta('│') + ` Category: ${chalk.yellow(category.toUpperCase())}`);
143
+ console.log(chalk.magenta('│') + ` AI Weight: ${chalk.cyan(Math.round(weight * 100) + '%')} + Deterministic: ${chalk.green(Math.round((1-weight) * 100) + '%')}`);
144
+ console.log(chalk.magenta('│') + ` Candidates Found: ${chalk.green(detResults.candidates.length)}`);
145
+ console.log(chalk.magenta('│') + ` Hardware: ${chalk.cyan(hardware.cpu.cores + ' cores')}, ${chalk.green(hardware.memory.totalGB + 'GB RAM')}, ${chalk.yellow(hardware.gpu.type)}`);
146
+ console.log(chalk.magenta('╰'));
147
+
148
+ // Phase 2: Pick evaluator model
149
+ const evaluatorModel = evaluator === 'auto' ?
150
+ await this.pickEvaluatorModel(hardware) :
151
+ evaluator;
152
+
153
+ if (!evaluatorModel) {
154
+ console.log('\n' + chalk.red.bold(' ❌ NO EVALUATOR AVAILABLE '));
155
+ console.log(chalk.red('╭' + '─'.repeat(50)));
156
+ console.log(chalk.red('│') + ` ${chalk.white('No suitable evaluator model found locally')}`);
157
+ console.log(chalk.red('│') + ` ${chalk.gray('Install a model for AI evaluation:')}`);
158
+ console.log(chalk.red('│') + ` ${chalk.cyan('ollama pull qwen2.5:7b-instruct')}`);
159
+ console.log(chalk.red('│') + ` ${chalk.cyan('ollama pull mistral:7b-instruct')}`);
160
+ console.log(chalk.red('│') + ` ${chalk.yellow('Showing deterministic results only')}`);
161
+ console.log(chalk.red('╰'));
162
+
163
+ // When no evaluator is available, just return deterministic results with final scores
164
+ const candidatesWithFinalScores = detResults.candidates.map(candidate => ({
165
+ ...candidate,
166
+ aiScore: null,
167
+ finalScore: candidate.score,
168
+ rationale: candidate.rationale + ` | AI: not evaluated`
169
+ }));
170
+
171
+ return {
172
+ results: {
173
+ ...detResults,
174
+ candidates: candidatesWithFinalScores,
175
+ aiEvaluated: false
176
+ },
177
+ note: 'No local evaluator found; install qwen2.5:7b-instruct or similar for AI-check.',
178
+ suggestedInstall: 'ollama pull qwen2.5:7b-instruct'
179
+ };
180
+ }
181
+
182
+ // Show evaluator status in app style
183
+ console.log('\n' + chalk.bgCyan.black.bold(' AI EVALUATOR STATUS '));
184
+ console.log(chalk.cyan('╭' + '─'.repeat(50)));
185
+ console.log(chalk.cyan('│') + ` Model: ${chalk.green.bold(evaluatorModel)}`);
186
+
187
+ // Phase 3: Build payload for evaluator (use broader set for AI evaluation)
188
+ const aiEvaluationCandidates = {
189
+ ...allModelsResult,
190
+ candidates: allModelsResult.candidates.slice(0, Math.max(20, top * 3)) // AI evaluates more models
191
+ };
192
+ console.log(chalk.cyan('│') + ` 🔬 Evaluating: ${chalk.yellow(aiEvaluationCandidates.candidates.length)} models (showing top ${chalk.green(top)})`);
193
+
194
+ const payload = this.buildEvaluatorPayload(hardware, category, aiEvaluationCandidates);
195
+
196
+ // Phase 4: Check cache
197
+ const cached = await this.loadCache(payload, evaluatorModel);
198
+ let aiResult;
199
+
200
+ if (cached) {
201
+ console.log(chalk.cyan('│') + ` 📥 Status: ${chalk.yellow('Using cached evaluation')}`);
202
+ console.log(chalk.cyan('╰'));
203
+ aiResult = cached;
204
+ } else {
205
+ console.log(chalk.cyan('│') + ` 🔬 Status: ${chalk.blue('Running AI evaluation...')}`);
206
+ console.log(chalk.cyan('╰'));
207
+ // Phase 5: Call evaluator
208
+ try {
209
+ aiResult = await this.callOllamaEvaluator(evaluatorModel, payload);
210
+ await this.saveCache(payload, evaluatorModel, aiResult);
211
+ } catch (error) {
212
+ console.log('\n' + chalk.red.bold(' ❌ AI EVALUATION FAILED '));
213
+ console.log(chalk.red('╭' + '─'.repeat(50)));
214
+ console.log(chalk.red('│') + ` ${chalk.white('Error: ' + error.message)}`);
215
+ console.log(chalk.red('│') + ` ${chalk.yellow('Falling back to deterministic results')}`);
216
+ console.log(chalk.red('╰'));
217
+
218
+ const candidatesWithFinalScores = detResults.candidates.map(candidate => ({
219
+ ...candidate,
220
+ aiScore: null,
221
+ finalScore: candidate.score,
222
+ rationale: candidate.rationale + ` | AI: evaluation failed`
223
+ }));
224
+
225
+ return {
226
+ results: {
227
+ ...detResults,
228
+ candidates: candidatesWithFinalScores,
229
+ aiEvaluated: false
230
+ },
231
+ note: `AI evaluation failed (${error.message}); showing deterministic results.`,
232
+ evaluatorModel
233
+ };
234
+ }
235
+ }
236
+
237
+ // Phase 6: Merge deterministic + AI scores
238
+ // AI evaluated more models, but we merge with our final candidates
239
+ const merged = this.mergeDetAndAI(detResults, aiResult, weight);
240
+
241
+ return {
242
+ results: merged,
243
+ evaluatorModel,
244
+ aiResult,
245
+ note: `AI-evaluated using ${evaluatorModel}`
246
+ };
247
+ }
248
+
249
+ /**
250
+ * Pick the best installed evaluator model
251
+ */
252
+ async pickEvaluatorModel(hardware) {
253
+ try {
254
+ const installedModels = await this.deterministicSelector.getInstalledModels();
255
+
256
+ if (installedModels.length === 0) {
257
+ return null;
258
+ }
259
+
260
+ // Filter for text-only models that can be used as evaluators
261
+ const candidates = installedModels.filter(model => {
262
+ const isTextOnly = !model.modalities.includes('vision');
263
+ const isReasonableSize = model.paramsB >= 0.5; // At least 0.5B
264
+ const notEmbedding = !model.tags.includes('embedding');
265
+
266
+ return isTextOnly && isReasonableSize && notEmbedding;
267
+ });
268
+
269
+ if (candidates.length === 0) {
270
+ return null;
271
+ }
272
+
273
+ // Score evaluator candidates
274
+ const scored = candidates.map(model => {
275
+ let score = 0;
276
+
277
+ // Quality prior
278
+ score += this.deterministicSelector.getBaseQuality(model.paramsB);
279
+
280
+ // Speed estimation (prefer faster for evaluation)
281
+ const estimatedSpeed = this.deterministicSelector.estimateSpeed(hardware, model, model.quant || 'Q4_K_M', 'general');
282
+ score += estimatedSpeed * 0.3;
283
+
284
+ // Installed bonus
285
+ score += 10;
286
+
287
+ // High-quality quant bonus
288
+ if (model.quant && ['Q8_0', 'Q6_K', 'Q5_K_M'].includes(model.quant)) {
289
+ score += 10;
290
+ }
291
+
292
+ // Preferred model bonus
293
+ const isPreferred = this.preferredEvaluators.some(pref =>
294
+ model.model_identifier.includes(pref.split(':')[0])
295
+ );
296
+ if (isPreferred) score += 15;
297
+
298
+ // Memory pressure penalty
299
+ const requiredMemory = this.deterministicSelector.estimateRequiredGB(model, model.quant || 'Q4_K_M', 4096);
300
+ if (requiredMemory > hardware.usableMemGB * 0.8) {
301
+ score -= 20;
302
+ }
303
+
304
+ return { model, score };
305
+ });
306
+
307
+ // Sort by score and pick the best
308
+ scored.sort((a, b) => b.score - a.score);
309
+ return scored[0].model.model_identifier;
310
+
311
+ } catch (error) {
312
+ console.warn(`Failed to pick evaluator: ${error.message}`);
313
+ return null;
314
+ }
315
+ }
316
+
317
+ /**
318
+ * Build payload for the evaluator LLM
319
+ */
320
+ buildEvaluatorPayload(hardware, category, detResults) {
321
+ const categoryWeights = this.deterministicSelector.categoryWeights[category] || [0.4, 0.3, 0.2, 0.1];
322
+
323
+ return {
324
+ hardware: {
325
+ backend: hardware.acceleration.supports_metal ? 'metal' :
326
+ hardware.acceleration.supports_cuda ? 'cuda' : 'cpu',
327
+ usableMemGB: Math.round(hardware.usableMemGB * 10) / 10,
328
+ vramGB: hardware.gpu.vramGB || null,
329
+ targetCtx: detResults.targetCtx || this.deterministicSelector.targetContexts[category],
330
+ category: category
331
+ },
332
+ weights: {
333
+ Q: categoryWeights[0],
334
+ S: categoryWeights[1],
335
+ F: categoryWeights[2],
336
+ C: categoryWeights[3]
337
+ },
338
+ candidates: detResults.candidates.map(candidate => ({
339
+ name: candidate.meta.model_identifier,
340
+ paramsB: candidate.meta.paramsB,
341
+ quant: candidate.quant,
342
+ ctxMax: candidate.meta.ctxMax,
343
+ modalities: candidate.meta.modalities,
344
+ tags: candidate.meta.tags,
345
+ requiredGB: candidate.requiredGB,
346
+ budgetGB: hardware.usableMemGB,
347
+ estTPS: candidate.estTPS,
348
+ measuredTPS: candidate.measuredTPS || null,
349
+ qualityPrior: candidate.components ? candidate.components.Q : 80,
350
+ fitScore: candidate.components ? candidate.components.F : 90,
351
+ ctxScore: candidate.components ? candidate.components.C : 100,
352
+ detScore: candidate.score,
353
+ installed: candidate.meta.installed || false
354
+ }))
355
+ };
356
+ }
357
+
358
+ /**
359
+ * Call Ollama evaluator with JSON format
360
+ */
361
+ async callOllamaEvaluator(modelId, payload) {
362
+ const userPrompt = `Category: ${payload.hardware.category}
363
+
364
+ Models to rank (RANK ALL ${payload.candidates.length} MODELS):
365
+ ${payload.candidates.map((c, i) => `${i + 1}. ${c.name} (${c.paramsB}B, ${c.quant}, ${c.requiredGB}GB required, installed: ${c.installed})`).join('\n')}
366
+
367
+ IMPORTANT: Your ranking array must contain exactly ${payload.candidates.length} models. Rank ALL models provided.
368
+
369
+ Return JSON with this structure:
370
+ {
371
+ "winner": "model_name",
372
+ "ranking": [
373
+ {"name": "model_name", "aiScore": 85, "shortWhy": "reason"},
374
+ {"name": "another_model_name", "aiScore": 75, "shortWhy": "reason"}
375
+ ]
376
+ }`;
377
+
378
+ const requestBody = {
379
+ model: modelId,
380
+ stream: false,
381
+ options: {
382
+ temperature: 0.1,
383
+ num_ctx: 4096
384
+ },
385
+ messages: [
386
+ { role: 'system', content: this.systemPrompt },
387
+ { role: 'user', content: userPrompt }
388
+ ]
389
+ };
390
+
391
+ const response = await fetch('http://localhost:11434/api/chat', {
392
+ method: 'POST',
393
+ headers: { 'Content-Type': 'application/json' },
394
+ body: JSON.stringify(requestBody)
395
+ });
396
+
397
+ if (!response.ok) {
398
+ throw new Error(`Ollama API error: ${response.status} ${response.statusText}`);
399
+ }
400
+
401
+ const data = await response.json();
402
+
403
+ if (!data.message || !data.message.content) {
404
+ throw new Error(`Invalid response from Ollama API: ${JSON.stringify(data)}`);
405
+ }
406
+
407
+ // Parse JSON response, strip markdown code blocks if present
408
+ let aiResult;
409
+ try {
410
+ let content = data.message.content.trim();
411
+
412
+ // Strip markdown code blocks
413
+ if (content.startsWith('```json') || content.startsWith('```')) {
414
+ content = content.replace(/```json\s*/, '').replace(/```\s*$/, '').trim();
415
+ }
416
+
417
+ aiResult = JSON.parse(content);
418
+ } catch (error) {
419
+ throw new Error(`Invalid JSON from evaluator: ${error.message}`);
420
+ }
421
+
422
+ // Validate schema
423
+ if (!aiResult.winner || !Array.isArray(aiResult.ranking)) {
424
+ throw new Error('Response does not match required schema');
425
+ }
426
+
427
+ // Validate that AI ranked ALL models
428
+ if (aiResult.ranking.length !== payload.candidates.length) {
429
+ console.warn(`AI only ranked ${aiResult.ranking.length}/${payload.candidates.length} models. Expected all models to be ranked.`);
430
+ }
431
+
432
+ return aiResult;
433
+ }
434
+
435
+ /**
436
+ * Merge deterministic and AI scores
437
+ */
438
+ mergeDetAndAI(detResults, aiResult, weight) {
439
+ const clampedWeight = Math.max(0, Math.min(1, weight));
440
+
441
+ // Create lookup map for AI scores
442
+ const aiScores = new Map();
443
+ aiResult.ranking.forEach(item => {
444
+ aiScores.set(item.name, {
445
+ aiScore: item.aiScore,
446
+ shortWhy: item.shortWhy
447
+ });
448
+ });
449
+
450
+ // Merge scores
451
+ const mergedCandidates = detResults.candidates.map(candidate => {
452
+ const aiData = aiScores.get(candidate.meta.model_identifier);
453
+
454
+ if (aiData) {
455
+ const finalScore = Math.round(
456
+ ((1 - clampedWeight) * candidate.score + clampedWeight * aiData.aiScore) * 10
457
+ ) / 10;
458
+
459
+ return {
460
+ ...candidate,
461
+ aiScore: aiData.aiScore,
462
+ finalScore: finalScore,
463
+ rationale: candidate.rationale + ` | AI: ${aiData.shortWhy}`
464
+ };
465
+ } else {
466
+ // AI didn't rank this model - assign average AI score as fallback
467
+ const avgAIScore = aiResult.ranking.length > 0 ?
468
+ Math.round(aiResult.ranking.reduce((sum, r) => sum + r.aiScore, 0) / aiResult.ranking.length) :
469
+ candidate.score;
470
+
471
+ const finalScore = Math.round(
472
+ ((1 - clampedWeight) * candidate.score + clampedWeight * avgAIScore) * 10
473
+ ) / 10;
474
+
475
+ return {
476
+ ...candidate,
477
+ aiScore: avgAIScore,
478
+ finalScore: finalScore,
479
+ rationale: candidate.rationale + ` | AI: estimated (${avgAIScore}) - model not ranked by evaluator`
480
+ };
481
+ }
482
+ });
483
+
484
+ // Sort by final score
485
+ mergedCandidates.sort((a, b) => b.finalScore - a.finalScore);
486
+
487
+ return {
488
+ ...detResults,
489
+ candidates: mergedCandidates,
490
+ winner: aiResult.winner,
491
+ aiEvaluated: true
492
+ };
493
+ }
494
+
495
+ /**
496
+ * Generate cache key for results
497
+ */
498
+ generateCacheKey(payload, evaluatorModel) {
499
+ const hashInput = JSON.stringify({
500
+ hardware: payload.hardware,
501
+ category: payload.hardware.category,
502
+ candidates: payload.candidates.map(c => ({
503
+ name: c.name,
504
+ quant: c.quant,
505
+ detScore: c.detScore,
506
+ measuredTPS: c.measuredTPS
507
+ })),
508
+ evaluator: evaluatorModel
509
+ });
510
+
511
+ return crypto.createHash('md5').update(hashInput).digest('hex');
512
+ }
513
+
514
+ /**
515
+ * Load cached AI result
516
+ */
517
+ async loadCache(payload, evaluatorModel) {
518
+ try {
519
+ if (!fs.existsSync(this.cachePath)) {
520
+ return null;
521
+ }
522
+
523
+ const cache = JSON.parse(fs.readFileSync(this.cachePath, 'utf8'));
524
+ const key = this.generateCacheKey(payload, evaluatorModel);
525
+ const entry = cache[key];
526
+
527
+ if (!entry) return null;
528
+
529
+ // Check if cache is still valid (7 days)
530
+ const maxAge = 7 * 24 * 60 * 60 * 1000;
531
+ if (Date.now() - entry.timestamp > maxAge) {
532
+ return null;
533
+ }
534
+
535
+ return entry.result;
536
+ } catch (error) {
537
+ console.warn(`Failed to load cache: ${error.message}`);
538
+ return null;
539
+ }
540
+ }
541
+
542
+ /**
543
+ * Save AI result to cache
544
+ */
545
+ async saveCache(payload, evaluatorModel, result) {
546
+ try {
547
+ let cache = {};
548
+
549
+ if (fs.existsSync(this.cachePath)) {
550
+ try {
551
+ cache = JSON.parse(fs.readFileSync(this.cachePath, 'utf8'));
552
+ } catch (error) {
553
+ // Invalid cache file, start fresh
554
+ cache = {};
555
+ }
556
+ }
557
+
558
+ const key = this.generateCacheKey(payload, evaluatorModel);
559
+ cache[key] = {
560
+ timestamp: Date.now(),
561
+ result: result,
562
+ evaluator: evaluatorModel
563
+ };
564
+
565
+ // Ensure directory exists
566
+ const dir = path.dirname(this.cachePath);
567
+ if (!fs.existsSync(dir)) {
568
+ fs.mkdirSync(dir, { recursive: true });
569
+ }
570
+
571
+ fs.writeFileSync(this.cachePath, JSON.stringify(cache, null, 2));
572
+ } catch (error) {
573
+ console.warn(`Failed to save cache: ${error.message}`);
574
+ }
575
+ }
576
+
577
+ /**
578
+ * Format results for display
579
+ */
580
+ formatResults(aiCheckResult) {
581
+ const { results, evaluatorModel, note } = aiCheckResult;
582
+ const chalk = require('chalk');
583
+ const { table } = require('table');
584
+
585
+ if (results.candidates.length === 0) {
586
+ console.log('\n' + chalk.red.bold(' ❌ AI-CHECK: No suitable models found '));
587
+ return aiCheckResult;
588
+ }
589
+
590
+ // AI-Check header
591
+ console.log('\n' + chalk.bgMagenta.white.bold(' AI-CHECK RESULTS '));
592
+ console.log(chalk.magenta('╭' + '─'.repeat(65)));
593
+ console.log(chalk.magenta('│') + ` Evaluator: ${chalk.cyan(evaluatorModel || 'None')}`);
594
+ console.log(chalk.magenta('│') + ` Category: ${chalk.yellow(results.category || 'general').toUpperCase()}`);
595
+ console.log(chalk.magenta('│') + ` Models Evaluated: ${chalk.green(results.candidates.length)}`);
596
+ if (note) {
597
+ console.log(chalk.magenta('│') + ` 📝 Note: ${chalk.gray(note)}`);
598
+ }
599
+ console.log(chalk.magenta('╰'));
600
+
601
+ // Create table data in the same style as check command
602
+ const tableData = [
603
+ [
604
+ chalk.bgMagenta.white.bold(' Model '),
605
+ chalk.bgMagenta.white.bold(' Size '),
606
+ chalk.bgMagenta.white.bold(' Det Score '),
607
+ chalk.bgMagenta.white.bold(' AI Score '),
608
+ chalk.bgMagenta.white.bold(' Final '),
609
+ chalk.bgMagenta.white.bold(' RAM '),
610
+ chalk.bgMagenta.white.bold(' Speed '),
611
+ chalk.bgMagenta.white.bold(' Status ')
612
+ ]
613
+ ];
614
+
615
+ results.candidates.forEach(candidate => {
616
+ const isInstalled = candidate.meta.installed;
617
+ const modelName = candidate.meta.name || candidate.meta.model_identifier;
618
+ const size = `${candidate.meta.paramsB}B`;
619
+ const detScore = `${Math.round(candidate.score)}/100`;
620
+ const aiScore = candidate.aiScore ? `${Math.round(candidate.aiScore)}/100` : 'N/A';
621
+ const finalScore = `${Math.round(candidate.finalScore)}/100`;
622
+ const ram = `${candidate.requiredGB}/${Math.round(results.hardware.usableMemGB)}GB`;
623
+ const speed = `${candidate.estTPS.toFixed(0)}t/s`;
624
+
625
+ let statusDisplay, modelDisplay;
626
+ if (isInstalled) {
627
+ statusDisplay = chalk.green.bold('Installed');
628
+ modelDisplay = `${modelName}`;
629
+ } else {
630
+ statusDisplay = '🌐 Available';
631
+ modelDisplay = modelName;
632
+ }
633
+
634
+ const row = [
635
+ modelDisplay,
636
+ size,
637
+ this.getScoreColor(candidate.score)(detScore),
638
+ candidate.aiScore ? this.getScoreColor(candidate.aiScore)(aiScore) : chalk.gray(aiScore),
639
+ this.getScoreColor(candidate.finalScore)(finalScore),
640
+ ram,
641
+ speed,
642
+ statusDisplay
643
+ ];
644
+ tableData.push(row);
645
+ });
646
+
647
+ console.log(table(tableData));
648
+
649
+ // Best recommendation section
650
+ const best = results.candidates[0];
651
+ console.log('\n' + chalk.bgGreen.black.bold(' AI-POWERED RECOMMENDATION '));
652
+ console.log(chalk.green('╭' + '─'.repeat(50)));
653
+ console.log(chalk.green('│') + ` Best Model: ${chalk.cyan.bold(best.meta.name || best.meta.model_identifier)}`);
654
+ console.log(chalk.green('│') + ` Final Score: ${this.getScoreColor(best.finalScore)(Math.round(best.finalScore) + '/100')}`);
655
+ console.log(chalk.green('│') + ` ⚖️ Det: ${Math.round(best.score)} + AI: ${best.aiScore ? Math.round(best.aiScore) : 'N/A'}`);
656
+ console.log(chalk.green('│'));
657
+
658
+ if (best.meta.installed) {
659
+ console.log(chalk.green('│') + ` Ready to use:`);
660
+ console.log(chalk.green('│') + ` ${chalk.cyan.bold(`ollama run ${best.meta.model_identifier}`)}`);
661
+ } else {
662
+ console.log(chalk.green('│') + ` 📥 Install command:`);
663
+ console.log(chalk.green('│') + ` ${chalk.cyan.bold(`ollama pull ${best.meta.model_identifier}`)}`);
664
+ }
665
+
666
+ console.log(chalk.green('│'));
667
+ console.log(chalk.green('│') + ` Why this model?`);
668
+
669
+ // Parse and display reasoning nicely
670
+ const reasons = best.rationale.split(' | ');
671
+ reasons.forEach(reason => {
672
+ if (reason.trim()) {
673
+ console.log(chalk.green('│') + ` • ${chalk.yellow(reason.trim())}`);
674
+ }
675
+ });
676
+
677
+ console.log(chalk.green('╰'));
678
+
679
+ return aiCheckResult;
680
+ }
681
+
682
+ getScoreColor(score) {
683
+ const chalk = require('chalk');
684
+ if (score >= 85) return chalk.green.bold;
685
+ if (score >= 70) return chalk.cyan.bold;
686
+ if (score >= 55) return chalk.yellow.bold;
687
+ if (score >= 40) return chalk.red.bold;
688
+ return chalk.gray;
689
+ }
690
+
691
+ /**
692
+ * Filter Ollama models by category (same logic as deterministic selector)
693
+ */
694
+ filterOllamaModelsByCategory(models, category) {
695
+ return models.filter(model => {
696
+ const modelName = model.model_name.toLowerCase();
697
+ const modelId = model.model_identifier.toLowerCase();
698
+ const fullText = `${modelName} ${modelId}`;
699
+
700
+ switch (category) {
701
+ case 'coding':
702
+ return fullText.includes('code') || fullText.includes('coder') ||
703
+ fullText.includes('deepseek-coder') || fullText.includes('qwen2.5-coder');
704
+
705
+ case 'multimodal':
706
+ return fullText.includes('llava') || fullText.includes('vision') ||
707
+ fullText.includes('pixtral') || fullText.includes('moondream') ||
708
+ fullText.includes('qwen-vl');
709
+
710
+ case 'embeddings':
711
+ return fullText.includes('embed') || fullText.includes('nomic') ||
712
+ fullText.includes('bge') || fullText.includes('e5');
713
+
714
+ case 'reasoning':
715
+ return fullText.includes('deepseek-r1') || fullText.includes('reasoning') ||
716
+ fullText.includes('math') || model.model_identifier.includes('o1-');
717
+
718
+ case 'creative':
719
+ return fullText.includes('dolphin') || fullText.includes('wizard') ||
720
+ fullText.includes('uncensored') || fullText.includes('airoboros');
721
+
722
+ case 'reading':
723
+ return fullText.includes('solar') || fullText.includes('openchat') ||
724
+ fullText.includes('neural-chat') || fullText.includes('vicuna');
725
+
726
+ case 'talking':
727
+ // Most conversational models - llama, mistral, etc.
728
+ return (fullText.includes('llama') || fullText.includes('mistral') ||
729
+ fullText.includes('phi') || fullText.includes('gemma') ||
730
+ fullText.includes('qwen') || fullText.includes('chat') ||
731
+ fullText.includes('instruct')) &&
732
+ // Exclude specialized models
733
+ !fullText.includes('coder') && !fullText.includes('vl') &&
734
+ !fullText.includes('embed') && !fullText.includes('vision');
735
+
736
+ default: // general
737
+ return true; // Most models can handle general tasks
738
+ }
739
+ });
740
+ }
741
+
742
+ /**
743
+ * Convert Ollama model format to deterministic selector format
744
+ */
745
+ convertOllamaModelToDeterministicFormat(ollamaModel) {
746
+ // Extract size from model identifier
747
+ const sizeMatch = ollamaModel.model_identifier.match(/(\d+\.?\d*)[bm]/i);
748
+ const sizeNum = sizeMatch ? parseFloat(sizeMatch[1]) : 7; // Default 7B
749
+ const sizeUnit = sizeMatch ? sizeMatch[0].slice(-1).toLowerCase() : 'b';
750
+ const paramsB = sizeUnit === 'm' ? sizeNum / 1000 : sizeNum;
751
+
752
+ // Extract family
753
+ const modelId = ollamaModel.model_identifier.toLowerCase();
754
+ let family = 'unknown';
755
+ if (modelId.includes('qwen2.5')) family = 'qwen2.5';
756
+ else if (modelId.includes('qwen')) family = 'qwen';
757
+ else if (modelId.includes('llama3.2')) family = 'llama3.2';
758
+ else if (modelId.includes('llama3.1')) family = 'llama3.1';
759
+ else if (modelId.includes('llama')) family = 'llama';
760
+ else if (modelId.includes('mistral')) family = 'mistral';
761
+ else if (modelId.includes('gemma')) family = 'gemma2';
762
+ else if (modelId.includes('phi')) family = 'phi-3';
763
+ else if (modelId.includes('llava')) family = 'llava';
764
+
765
+ // Determine modalities
766
+ const modalities = ['text'];
767
+ if (modelId.includes('llava') || modelId.includes('vision') || modelId.includes('vl')) {
768
+ modalities.push('vision');
769
+ }
770
+
771
+ // Determine tags
772
+ const tags = [];
773
+ if (modelId.includes('instruct') || ollamaModel.model_name.toLowerCase().includes('instruct')) tags.push('instruct');
774
+ if (modelId.includes('chat') || ollamaModel.model_name.toLowerCase().includes('chat')) tags.push('chat');
775
+ if (modelId.includes('code') || ollamaModel.model_name.toLowerCase().includes('code')) tags.push('coder');
776
+ if (modalities.includes('vision')) tags.push('vision');
777
+ if (modelId.includes('embed')) tags.push('embedding');
778
+
779
+ // Default context length based on model family
780
+ let ctxMax = 4096;
781
+ if (family.includes('qwen')) ctxMax = 32768;
782
+ else if (family.includes('llama3')) ctxMax = 131072;
783
+ else if (family.includes('mistral')) ctxMax = 32768;
784
+ else if (family.includes('gemma')) ctxMax = 8192;
785
+
786
+ // Estimate model size in GB (rough approximation)
787
+ const sizeGB = paramsB * 0.6; // ~0.6GB per billion parameters for Q4_K_M
788
+
789
+ return {
790
+ name: ollamaModel.model_name,
791
+ family: family,
792
+ paramsB: paramsB,
793
+ ctxMax: ctxMax,
794
+ quant: 'Q4_K_M', // Default quantization
795
+ sizeGB: sizeGB,
796
+ modalities: modalities,
797
+ tags: tags,
798
+ model_identifier: ollamaModel.model_identifier,
799
+ installed: false, // Will be updated later by checking local models
800
+ pulls: ollamaModel.pulls || 0,
801
+ description: ollamaModel.description || ''
802
+ };
803
+ }
804
+ }
805
+
806
+ module.exports = AICheckSelector;