llm-checker 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +418 -0
- package/analyzer/compatibility.js +584 -0
- package/analyzer/performance.js +505 -0
- package/bin/CLAUDE.md +12 -0
- package/bin/enhanced_cli.js +3118 -0
- package/bin/test-deterministic.js +41 -0
- package/package.json +96 -0
- package/src/CLAUDE.md +12 -0
- package/src/ai/intelligent-selector.js +615 -0
- package/src/ai/model-selector.js +312 -0
- package/src/ai/multi-objective-selector.js +820 -0
- package/src/commands/check.js +58 -0
- package/src/data/CLAUDE.md +11 -0
- package/src/data/model-database.js +637 -0
- package/src/data/sync-manager.js +279 -0
- package/src/hardware/CLAUDE.md +12 -0
- package/src/hardware/backends/CLAUDE.md +11 -0
- package/src/hardware/backends/apple-silicon.js +318 -0
- package/src/hardware/backends/cpu-detector.js +490 -0
- package/src/hardware/backends/cuda-detector.js +417 -0
- package/src/hardware/backends/intel-detector.js +436 -0
- package/src/hardware/backends/rocm-detector.js +440 -0
- package/src/hardware/detector.js +573 -0
- package/src/hardware/pc-optimizer.js +635 -0
- package/src/hardware/specs.js +286 -0
- package/src/hardware/unified-detector.js +442 -0
- package/src/index.js +2289 -0
- package/src/models/CLAUDE.md +17 -0
- package/src/models/ai-check-selector.js +806 -0
- package/src/models/catalog.json +426 -0
- package/src/models/deterministic-selector.js +1145 -0
- package/src/models/expanded_database.js +1142 -0
- package/src/models/intelligent-selector.js +532 -0
- package/src/models/requirements.js +310 -0
- package/src/models/scoring-config.js +57 -0
- package/src/models/scoring-engine.js +715 -0
- package/src/ollama/.cache/README.md +33 -0
- package/src/ollama/CLAUDE.md +24 -0
- package/src/ollama/client.js +438 -0
- package/src/ollama/enhanced-client.js +113 -0
- package/src/ollama/enhanced-scraper.js +634 -0
- package/src/ollama/manager.js +357 -0
- package/src/ollama/native-scraper.js +776 -0
- package/src/plugins/CLAUDE.md +11 -0
- package/src/plugins/examples/custom_model_plugin.js +87 -0
- package/src/plugins/index.js +295 -0
- package/src/utils/CLAUDE.md +11 -0
- package/src/utils/config.js +359 -0
- package/src/utils/formatter.js +315 -0
- package/src/utils/logger.js +272 -0
- package/src/utils/model-classifier.js +167 -0
- package/src/utils/verbose-progress.js +266 -0
package/src/index.js
ADDED
|
@@ -0,0 +1,2289 @@
|
|
|
1
|
+
const path = require('path');
|
|
2
|
+
const HardwareDetector = require('./hardware/detector');
|
|
3
|
+
const ExpandedModelsDatabase = require('./models/expanded_database');
|
|
4
|
+
const DeterministicModelSelector = require('./models/deterministic-selector');
|
|
5
|
+
const CompatibilityAnalyzer = require(path.join(__dirname, '..', 'analyzer', 'compatibility'));
|
|
6
|
+
const PerformanceAnalyzer = require(path.join(__dirname, '..', 'analyzer', 'performance'));
|
|
7
|
+
const OllamaClient = require('./ollama/client');
|
|
8
|
+
const { getLogger } = require('./utils/logger');
|
|
9
|
+
const { getOllamaModelsIntegration, OllamaNativeScraper } = require('./ollama/native-scraper');
|
|
10
|
+
const VerboseProgress = require('./utils/verbose-progress');
|
|
11
|
+
|
|
12
|
+
class LLMChecker {
|
|
13
|
+
constructor(options = {}) {
|
|
14
|
+
this.hardwareDetector = new HardwareDetector();
|
|
15
|
+
this.expandedModelsDatabase = new ExpandedModelsDatabase();
|
|
16
|
+
this.intelligentRecommender = new DeterministicModelSelector();
|
|
17
|
+
this.ollamaScraper = new OllamaNativeScraper();
|
|
18
|
+
this.compatibilityAnalyzer = new CompatibilityAnalyzer();
|
|
19
|
+
this.performanceAnalyzer = new PerformanceAnalyzer();
|
|
20
|
+
this.ollamaClient = new OllamaClient();
|
|
21
|
+
this.logger = getLogger().createChild('LLMChecker');
|
|
22
|
+
this.verbose = options.verbose !== false; // Default to verbose unless explicitly disabled
|
|
23
|
+
this.progress = null; // Will be initialized when needed
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
async analyze(options = {}) {
|
|
27
|
+
// Initialize verbose progress if enabled
|
|
28
|
+
if (this.verbose && !this.progress) {
|
|
29
|
+
this.progress = VerboseProgress.create(true);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
try {
|
|
33
|
+
if (this.progress) {
|
|
34
|
+
this.progress.startOperation('LLM Model Analysis & Compatibility Check', 8);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
// Step 1: Hardware Detection
|
|
38
|
+
if (this.progress) {
|
|
39
|
+
this.progress.step('System Detection', 'Scanning hardware specifications...');
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
const hardware = await this.hardwareDetector.getSystemInfo();
|
|
43
|
+
this.logger.info('Hardware detected', { hardware });
|
|
44
|
+
|
|
45
|
+
// Detect platform and route to appropriate logic (use hardware OS for simulation support)
|
|
46
|
+
const detectedPlatform = hardware.os?.platform || process.platform;
|
|
47
|
+
|
|
48
|
+
// Report hardware detection progress before platform-specific analysis
|
|
49
|
+
if (this.progress) {
|
|
50
|
+
this.progress.substep(`CPU detected: ${hardware.cpu.brand} (${hardware.cpu.cores} cores)`);
|
|
51
|
+
await new Promise(resolve => setTimeout(resolve, 200)); // Small delay for demo
|
|
52
|
+
const isApple = detectedPlatform === 'darwin';
|
|
53
|
+
const memLabel = isApple ? 'unified memory' : 'RAM';
|
|
54
|
+
this.progress.substep(`Memory detected: ${hardware.memory.total}GB ${memLabel}`, true);
|
|
55
|
+
const summary = `${hardware.cpu.brand}, ${hardware.memory.total}GB RAM, ${hardware.gpu.model || 'Integrated GPU'}`;
|
|
56
|
+
this.progress.stepComplete(summary);
|
|
57
|
+
}
|
|
58
|
+
const isAppleSilicon = detectedPlatform === 'darwin';
|
|
59
|
+
const isWindows = detectedPlatform === 'win32';
|
|
60
|
+
const isLinux = detectedPlatform === 'linux';
|
|
61
|
+
|
|
62
|
+
if (isAppleSilicon) {
|
|
63
|
+
return await this.analyzeForAppleSilicon(hardware, options);
|
|
64
|
+
} else if (isWindows) {
|
|
65
|
+
return await this.analyzeForWindows(hardware, options);
|
|
66
|
+
} else if (isLinux) {
|
|
67
|
+
return await this.analyzeForLinux(hardware, options);
|
|
68
|
+
} else {
|
|
69
|
+
// Fallback to Windows logic for unknown platforms
|
|
70
|
+
return await this.analyzeForWindows(hardware, options);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
} catch (error) {
|
|
74
|
+
if (this.progress) {
|
|
75
|
+
this.progress.fail(`Analysis failed: ${error.message}`);
|
|
76
|
+
}
|
|
77
|
+
this.logger.error('Analysis failed', { error: error.message, component: 'LLMChecker', method: 'analyze' });
|
|
78
|
+
throw new Error(`Analysis failed: ${error.message}`);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// ============================================================================
|
|
83
|
+
// PLATFORM-SPECIFIC ANALYSIS METHODS
|
|
84
|
+
// ============================================================================
|
|
85
|
+
|
|
86
|
+
async analyzeForAppleSilicon(hardware, options = {}) {
|
|
87
|
+
// Apple Silicon optimized analysis with unified memory consideration
|
|
88
|
+
if (this.progress) {
|
|
89
|
+
this.progress.substep(`CPU detected: ${hardware.cpu.brand} (${hardware.cpu.cores} cores)`);
|
|
90
|
+
await new Promise(resolve => setTimeout(resolve, 200));
|
|
91
|
+
this.progress.substep(`Memory detected: ${hardware.memory.total}GB unified memory`, true);
|
|
92
|
+
const summary = `${hardware.cpu.brand}, ${hardware.memory.total}GB RAM, ${hardware.gpu.model || 'Apple Silicon GPU'}`;
|
|
93
|
+
this.progress.stepComplete(summary);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// Continue with original analysis flow but with Apple Silicon specific optimizations
|
|
97
|
+
return await this.runAnalysisFlow(hardware, options, 'apple_silicon');
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
async analyzeForWindows(hardware, options = {}) {
|
|
101
|
+
// Windows-specific analysis with discrete GPU / iGPU handling
|
|
102
|
+
if (this.progress) {
|
|
103
|
+
this.progress.substep(`CPU detected: ${hardware.cpu.brand} (${hardware.cpu.cores} cores)`);
|
|
104
|
+
await new Promise(resolve => setTimeout(resolve, 200));
|
|
105
|
+
this.progress.substep(`Memory detected: ${hardware.memory.total}GB RAM`, true);
|
|
106
|
+
const summary = `${hardware.cpu.brand}, ${hardware.memory.total}GB RAM, ${hardware.gpu.model || 'Integrated GPU'}`;
|
|
107
|
+
this.progress.stepComplete(summary);
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// Continue with original analysis flow but with Windows specific optimizations
|
|
111
|
+
return await this.runAnalysisFlow(hardware, options, 'windows');
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
async analyzeForLinux(hardware, options = {}) {
|
|
115
|
+
// Linux-specific analysis (similar to Windows but with Linux considerations)
|
|
116
|
+
if (this.progress) {
|
|
117
|
+
this.progress.substep(`CPU detected: ${hardware.cpu.brand} (${hardware.cpu.cores} cores)`);
|
|
118
|
+
await new Promise(resolve => setTimeout(resolve, 200));
|
|
119
|
+
this.progress.substep(`Memory detected: ${hardware.memory.total}GB RAM`, true);
|
|
120
|
+
const summary = `${hardware.cpu.brand}, ${hardware.memory.total}GB RAM, ${hardware.gpu.model || 'GPU'}`;
|
|
121
|
+
this.progress.stepComplete(summary);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// Continue with original analysis flow but with Linux specific optimizations
|
|
125
|
+
return await this.runAnalysisFlow(hardware, options, 'linux');
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
async runAnalysisFlow(hardware, options, platform) {
|
|
129
|
+
// Step 2: Database Sync (using static database)
|
|
130
|
+
if (this.progress) {
|
|
131
|
+
this.progress.step('Database Sync', 'Loading model database...');
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// Using static pre-loaded model database with classifications
|
|
135
|
+
const modelCount = 177; // Static count from pre-loaded database
|
|
136
|
+
|
|
137
|
+
if (this.progress) {
|
|
138
|
+
this.progress.found(`${modelCount} models in database`);
|
|
139
|
+
this.progress.stepComplete('Database synchronized');
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// Step 3: Load Base Models
|
|
143
|
+
if (this.progress) {
|
|
144
|
+
this.progress.step('Model Analysis', 'Loading base model definitions...');
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
let models = this.expandedModelsDatabase.getAllModels();
|
|
148
|
+
|
|
149
|
+
if (this.progress) {
|
|
150
|
+
this.progress.found(`Loaded ${models.length} base models`);
|
|
151
|
+
this.progress.stepComplete('Base models loaded');
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
// Step 4: Ollama Integration
|
|
155
|
+
if (this.progress) {
|
|
156
|
+
this.progress.step('Ollama Integration', 'Connecting to Ollama and checking installed models...');
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
const ollamaIntegration = await this.integrateOllamaModels(hardware, models);
|
|
160
|
+
|
|
161
|
+
if (this.progress) {
|
|
162
|
+
if (ollamaIntegration.ollamaInfo.available) {
|
|
163
|
+
const installed = ollamaIntegration.compatibleOllamaModels.length;
|
|
164
|
+
this.progress.found(`Ollama connected with ${installed} locally installed models`);
|
|
165
|
+
} else {
|
|
166
|
+
this.progress.warn('Ollama not available - continuing with database analysis only');
|
|
167
|
+
}
|
|
168
|
+
this.progress.stepComplete('Ollama integration complete');
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
// Step 5: Filter Models (skip step if no filtering needed)
|
|
172
|
+
if (options.filter || !options.includeCloud || options.maxSize || options.minSize) {
|
|
173
|
+
if (this.progress) {
|
|
174
|
+
this.progress.step('Model Filtering', 'Applying user-specified filters...');
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
const originalCount = models.length;
|
|
178
|
+
if (options.filter) {
|
|
179
|
+
models = this.filterModels(models, options.filter);
|
|
180
|
+
if (this.progress) {
|
|
181
|
+
this.progress.substep(`Filter applied: ${options.filter}`);
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
if (!options.includeCloud) {
|
|
186
|
+
models = models.filter(model => model.type === 'local');
|
|
187
|
+
if (this.progress) {
|
|
188
|
+
this.progress.substep('Cloud models excluded', true);
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
// Apply size filters (maxSize and minSize in billions of parameters)
|
|
193
|
+
if (options.maxSize || options.minSize) {
|
|
194
|
+
models = models.filter(model => {
|
|
195
|
+
// Extract size in B from model.size (e.g., "7B", "13B", "70B")
|
|
196
|
+
const sizeMatch = (model.size || '').match(/(\d+\.?\d*)/);
|
|
197
|
+
if (!sizeMatch) return true; // Keep models without size info
|
|
198
|
+
const modelSizeB = parseFloat(sizeMatch[1]);
|
|
199
|
+
|
|
200
|
+
if (options.maxSize && modelSizeB > options.maxSize) {
|
|
201
|
+
return false;
|
|
202
|
+
}
|
|
203
|
+
if (options.minSize && modelSizeB < options.minSize) {
|
|
204
|
+
return false;
|
|
205
|
+
}
|
|
206
|
+
return true;
|
|
207
|
+
});
|
|
208
|
+
if (this.progress) {
|
|
209
|
+
const sizeInfo = [];
|
|
210
|
+
if (options.minSize) sizeInfo.push(`min: ${options.minSize}B`);
|
|
211
|
+
if (options.maxSize) sizeInfo.push(`max: ${options.maxSize}B`);
|
|
212
|
+
this.progress.substep(`Size filter: ${sizeInfo.join(', ')}`, true);
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
if (this.progress) {
|
|
217
|
+
this.progress.stepComplete(`${models.length}/${originalCount} models selected`);
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
// Step 6: Platform-specific Mathematical Analysis
|
|
222
|
+
if (this.progress) {
|
|
223
|
+
this.progress.step('Compatibility Analysis', 'Running mathematical heuristics and hardware matching...');
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
const compatibility = await this.analyzeWithPlatformSpecificHeuristics(hardware, models, ollamaIntegration, platform, options);
|
|
227
|
+
|
|
228
|
+
if (this.progress) {
|
|
229
|
+
const stats = `${compatibility.compatible.length} compatible, ${compatibility.marginal.length} marginal`;
|
|
230
|
+
this.progress.stepComplete(stats);
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
// Step 7: Performance Estimation
|
|
234
|
+
if (this.progress) {
|
|
235
|
+
this.progress.step('Performance Analysis', 'Estimating model performance and speeds...');
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
const enrichedResults = await this.enrichWithPerformanceData(hardware, compatibility, platform);
|
|
239
|
+
|
|
240
|
+
if (this.progress) {
|
|
241
|
+
const perfCount = Object.keys(enrichedResults.performanceEstimates || {}).length;
|
|
242
|
+
this.progress.stepComplete(`Performance data for ${perfCount} models`);
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
// Step 8: Generate Platform-specific Recommendations
|
|
246
|
+
if (this.progress) {
|
|
247
|
+
this.progress.step('Smart Recommendations', 'Generating personalized model suggestions...');
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
const recommendations = await this.generateIntelligentRecommendations(hardware);
|
|
251
|
+
const intelligentRecommendations = recommendations;
|
|
252
|
+
|
|
253
|
+
if (this.progress) {
|
|
254
|
+
const compatibleCount = enrichedResults.compatible.length;
|
|
255
|
+
const marginalCount = enrichedResults.marginal.length;
|
|
256
|
+
const recCount = Object.keys(intelligentRecommendations || {}).length;
|
|
257
|
+
this.progress.substep(`Generating ${platform} recommendations...`, true);
|
|
258
|
+
this.progress.stepComplete(`${recCount} recommendations generated`);
|
|
259
|
+
|
|
260
|
+
if (compatibleCount > 0 && marginalCount === 0) {
|
|
261
|
+
this.progress.complete(`Found ${compatibleCount} compatible models for your hardware`);
|
|
262
|
+
} else if (marginalCount > 0) {
|
|
263
|
+
this.progress.complete(`Found ${compatibleCount} compatible (${marginalCount} marginal) models for your hardware`);
|
|
264
|
+
} else {
|
|
265
|
+
this.progress.complete(`No compatible models found for your hardware`);
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
return {
|
|
270
|
+
hardware,
|
|
271
|
+
compatible: enrichedResults.compatible,
|
|
272
|
+
marginal: enrichedResults.marginal,
|
|
273
|
+
incompatible: enrichedResults.incompatible,
|
|
274
|
+
recommendations,
|
|
275
|
+
intelligentRecommendations,
|
|
276
|
+
ollamaInfo: ollamaIntegration.ollamaInfo,
|
|
277
|
+
ollamaModels: ollamaIntegration.compatibleOllamaModels,
|
|
278
|
+
summary: this.generateEnhancedSummary(hardware, enrichedResults, ollamaIntegration),
|
|
279
|
+
performanceEstimates: enrichedResults.performanceEstimates,
|
|
280
|
+
platform
|
|
281
|
+
};
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
async analyzeWithPlatformSpecificHeuristics(hardware, staticModels, ollamaIntegration, platform, options = {}) {
|
|
285
|
+
// Use different analysis approaches based on platform
|
|
286
|
+
if (platform === 'apple_silicon') {
|
|
287
|
+
return await this.analyzeWithAppleSiliconHeuristics(hardware, staticModels, ollamaIntegration, options);
|
|
288
|
+
} else {
|
|
289
|
+
return await this.analyzeWithMathematicalHeuristics(hardware, staticModels, ollamaIntegration);
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
async analyzeWithAppleSiliconHeuristics(hardware, staticModels, ollamaIntegration, options = {}) {
|
|
294
|
+
// Apple Silicon specific analysis - more optimistic for unified memory
|
|
295
|
+
this.logger.info('Using Apple Silicon specific heuristics');
|
|
296
|
+
|
|
297
|
+
// For Apple Silicon, we use more optimistic thresholds
|
|
298
|
+
const MultiObjectiveSelector = require('./ai/multi-objective-selector');
|
|
299
|
+
const selector = new MultiObjectiveSelector();
|
|
300
|
+
|
|
301
|
+
// Use the specified use case, default to 'general'
|
|
302
|
+
const useCase = options.useCase || 'general';
|
|
303
|
+
const results = await selector.selectBestModels(hardware, staticModels, useCase, 100);
|
|
304
|
+
|
|
305
|
+
// Apple Silicon specific post-processing - make more models compatible
|
|
306
|
+
// Lower threshold for Apple Silicon due to unified memory efficiency
|
|
307
|
+
const appleSiliconThreshold = hardware.memory_gb >= 16 ? 45 : 55;
|
|
308
|
+
const appleSiliconResults = {
|
|
309
|
+
compatible: [...results.compatible, ...results.marginal.filter(m => m.totalScore >= appleSiliconThreshold)],
|
|
310
|
+
marginal: results.marginal.filter(m => m.totalScore < appleSiliconThreshold && m.totalScore >= 35),
|
|
311
|
+
incompatible: [...results.incompatible, ...results.marginal.filter(m => m.totalScore < 35)]
|
|
312
|
+
};
|
|
313
|
+
|
|
314
|
+
this.logger.info('Apple Silicon heuristic results', {
|
|
315
|
+
compatible: appleSiliconResults.compatible.length,
|
|
316
|
+
marginal: appleSiliconResults.marginal.length,
|
|
317
|
+
incompatible: appleSiliconResults.incompatible.length
|
|
318
|
+
});
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
// Convert totalScore to score for consistency with other analysis paths
|
|
322
|
+
const mappedResults = {
|
|
323
|
+
compatible: appleSiliconResults.compatible.map(model => ({
|
|
324
|
+
...model,
|
|
325
|
+
score: model.totalScore,
|
|
326
|
+
confidence: model.totalScore / 100,
|
|
327
|
+
reasoning: model.reasoning,
|
|
328
|
+
mathAnalysis: {
|
|
329
|
+
qualityScore: model.components?.quality,
|
|
330
|
+
speedScore: model.components?.speed,
|
|
331
|
+
ttfbScore: model.components?.ttfb,
|
|
332
|
+
contextScore: model.components?.context,
|
|
333
|
+
hardwareMatchScore: model.components?.hardwareMatch
|
|
334
|
+
},
|
|
335
|
+
isOllamaInstalled: this.checkIfModelInstalled(model, ollamaIntegration),
|
|
336
|
+
ollamaInfo: this.getOllamaModelInfo(model, ollamaIntegration)
|
|
337
|
+
})),
|
|
338
|
+
marginal: appleSiliconResults.marginal.map(model => ({
|
|
339
|
+
...model,
|
|
340
|
+
score: model.totalScore,
|
|
341
|
+
confidence: model.totalScore / 100,
|
|
342
|
+
reasoning: model.reasoning,
|
|
343
|
+
mathAnalysis: {
|
|
344
|
+
qualityScore: model.components?.quality,
|
|
345
|
+
speedScore: model.components?.speed,
|
|
346
|
+
ttfbScore: model.components?.ttfb,
|
|
347
|
+
contextScore: model.components?.context,
|
|
348
|
+
hardwareMatchScore: model.components?.hardwareMatch
|
|
349
|
+
},
|
|
350
|
+
isOllamaInstalled: this.checkIfModelInstalled(model, ollamaIntegration),
|
|
351
|
+
ollamaInfo: this.getOllamaModelInfo(model, ollamaIntegration)
|
|
352
|
+
})),
|
|
353
|
+
incompatible: appleSiliconResults.incompatible.map(model => ({
|
|
354
|
+
...model,
|
|
355
|
+
score: model.totalScore,
|
|
356
|
+
confidence: model.totalScore / 100,
|
|
357
|
+
reasoning: model.reasoning,
|
|
358
|
+
mathAnalysis: {
|
|
359
|
+
qualityScore: model.components?.quality,
|
|
360
|
+
speedScore: model.components?.speed,
|
|
361
|
+
ttfbScore: model.components?.ttfb,
|
|
362
|
+
contextScore: model.components?.context,
|
|
363
|
+
hardwareMatchScore: model.components?.hardwareMatch
|
|
364
|
+
},
|
|
365
|
+
isOllamaInstalled: this.checkIfModelInstalled(model, ollamaIntegration),
|
|
366
|
+
ollamaInfo: this.getOllamaModelInfo(model, ollamaIntegration)
|
|
367
|
+
}))
|
|
368
|
+
};
|
|
369
|
+
|
|
370
|
+
return mappedResults;
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
async integrateOllamaModels(hardware, availableModels) {
|
|
374
|
+
const integration = {
|
|
375
|
+
ollamaInfo: { available: false },
|
|
376
|
+
compatibleOllamaModels: [],
|
|
377
|
+
recommendedPulls: [],
|
|
378
|
+
currentlyRunning: []
|
|
379
|
+
};
|
|
380
|
+
|
|
381
|
+
try {
|
|
382
|
+
const ollamaStatus = await this.ollamaClient.checkOllamaAvailability();
|
|
383
|
+
integration.ollamaInfo = ollamaStatus;
|
|
384
|
+
|
|
385
|
+
if (!ollamaStatus.available) {
|
|
386
|
+
this.logger.warn('Ollama not available', { error: ollamaStatus.error });
|
|
387
|
+
return integration;
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
const [localModels, runningModels] = await Promise.all([
|
|
391
|
+
this.ollamaClient.getLocalModels().catch(error => {
|
|
392
|
+
this.logger.warn('Failed to get local Ollama models', { error: error.message });
|
|
393
|
+
return [];
|
|
394
|
+
}),
|
|
395
|
+
this.ollamaClient.getRunningModels().catch(error => {
|
|
396
|
+
this.logger.warn('Failed to get running Ollama models', { error: error.message });
|
|
397
|
+
return [];
|
|
398
|
+
})
|
|
399
|
+
]);
|
|
400
|
+
|
|
401
|
+
integration.currentlyRunning = runningModels;
|
|
402
|
+
|
|
403
|
+
try {
|
|
404
|
+
this.logger.info('Using enhanced model database for compatibility...');
|
|
405
|
+
|
|
406
|
+
const enhancedCompatibility = await getOllamaModelsIntegration(localModels);
|
|
407
|
+
|
|
408
|
+
if (enhancedCompatibility.compatible_models && enhancedCompatibility.compatible_models.length > 0) {
|
|
409
|
+
for (const compatibleMatch of enhancedCompatibility.compatible_models) {
|
|
410
|
+
const ollamaModel = compatibleMatch.local;
|
|
411
|
+
const cloudModel = compatibleMatch.cloud;
|
|
412
|
+
|
|
413
|
+
let matchedModel = this.findMatchingModelInDatabase(cloudModel, availableModels);
|
|
414
|
+
|
|
415
|
+
if (!matchedModel) {
|
|
416
|
+
matchedModel = this.createModelFromCloudData(cloudModel);
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
const compatibility = this.compatibilityAnalyzer.calculateModelCompatibility(hardware, matchedModel);
|
|
420
|
+
|
|
421
|
+
let finalScore = compatibility.score;
|
|
422
|
+
if (compatibleMatch.match_type === 'exact') {
|
|
423
|
+
finalScore = Math.max(finalScore, 75);
|
|
424
|
+
} else {
|
|
425
|
+
finalScore = Math.max(finalScore, 65);
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
const enrichedOllamaModel = {
|
|
429
|
+
...ollamaModel,
|
|
430
|
+
matchedModel,
|
|
431
|
+
compatibilityScore: finalScore,
|
|
432
|
+
issues: compatibility.issues || [],
|
|
433
|
+
notes: compatibility.notes || [],
|
|
434
|
+
isRunning: runningModels.some(r => r.name === ollamaModel.name),
|
|
435
|
+
canRun: finalScore >= 60,
|
|
436
|
+
performanceEstimate: await this.performanceAnalyzer.estimateModelPerformance(matchedModel, hardware),
|
|
437
|
+
cloudInfo: {
|
|
438
|
+
pulls: cloudModel.pulls,
|
|
439
|
+
url: cloudModel.url,
|
|
440
|
+
match_type: compatibleMatch.match_type,
|
|
441
|
+
model_type: cloudModel.model_type
|
|
442
|
+
}
|
|
443
|
+
};
|
|
444
|
+
|
|
445
|
+
integration.compatibleOllamaModels.push(enrichedOllamaModel);
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
this.logger.info('Enhanced Ollama integration completed', {
|
|
449
|
+
data: {
|
|
450
|
+
localModels: localModels.length,
|
|
451
|
+
compatibleModels: integration.compatibleOllamaModels.length,
|
|
452
|
+
runningModels: runningModels.length,
|
|
453
|
+
totalAvailable: enhancedCompatibility.all_available,
|
|
454
|
+
enhancedMatching: true
|
|
455
|
+
}
|
|
456
|
+
});
|
|
457
|
+
} else {
|
|
458
|
+
this.logger.warn('No enhanced compatible models found, using fallback');
|
|
459
|
+
await this.processFallbackModels(localModels, runningModels, availableModels, hardware, integration);
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
} catch (enhancedError) {
|
|
463
|
+
this.logger.warn('Enhanced matching failed, using fallback method', { error: enhancedError.message });
|
|
464
|
+
await this.processFallbackModels(localModels, runningModels, availableModels, hardware, integration);
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
integration.recommendedPulls = await this.generateOllamaRecommendations(hardware, availableModels, localModels);
|
|
468
|
+
|
|
469
|
+
} catch (error) {
|
|
470
|
+
this.logger.error('Ollama integration failed', { error: error.message, component: 'LLMChecker', method: 'integrateOllamaModels' });
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
return integration;
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
async analyzeWithMathematicalHeuristics(hardware, staticModels, ollamaIntegration) {
|
|
477
|
+
this.logger.info('Using mathematical heuristics combining database + local models');
|
|
478
|
+
|
|
479
|
+
try {
|
|
480
|
+
// 1. Obtener TODOS los modelos de la base de datos de Ollama
|
|
481
|
+
const ollamaData = await this.ollamaScraper.scrapeAllModels(false);
|
|
482
|
+
const allOllamaModels = ollamaData.models || [];
|
|
483
|
+
this.logger.info(`Found ${allOllamaModels.length} models in Ollama database`);
|
|
484
|
+
|
|
485
|
+
// 2. Crear una lista combinada de todos los modelos únicos
|
|
486
|
+
const allModelsMap = new Map();
|
|
487
|
+
|
|
488
|
+
// Agregar modelos estáticos
|
|
489
|
+
staticModels.forEach(model => {
|
|
490
|
+
allModelsMap.set(model.name, {
|
|
491
|
+
...model,
|
|
492
|
+
source: 'static_database',
|
|
493
|
+
isOllamaInstalled: false
|
|
494
|
+
});
|
|
495
|
+
});
|
|
496
|
+
|
|
497
|
+
// Agregar modelos de Ollama (con prioridad si ya existen)
|
|
498
|
+
allOllamaModels.forEach(ollamaModel => {
|
|
499
|
+
const modelKey = this.findBestMatchingKey(ollamaModel, allModelsMap);
|
|
500
|
+
|
|
501
|
+
if (modelKey) {
|
|
502
|
+
// Mejorar modelo existente con datos de Ollama
|
|
503
|
+
const existing = allModelsMap.get(modelKey);
|
|
504
|
+
allModelsMap.set(modelKey, {
|
|
505
|
+
...existing,
|
|
506
|
+
...this.createEnhancedModelFromOllama(ollamaModel, existing),
|
|
507
|
+
source: 'enhanced_with_ollama'
|
|
508
|
+
});
|
|
509
|
+
} else {
|
|
510
|
+
// Crear nuevo modelo desde datos de Ollama
|
|
511
|
+
const newModel = this.createModelFromOllamaData(ollamaModel);
|
|
512
|
+
allModelsMap.set(newModel.name, {
|
|
513
|
+
...newModel,
|
|
514
|
+
source: 'ollama_database'
|
|
515
|
+
});
|
|
516
|
+
}
|
|
517
|
+
});
|
|
518
|
+
|
|
519
|
+
const allUniqueModels = Array.from(allModelsMap.values());
|
|
520
|
+
this.logger.info(`Combined total: ${allUniqueModels.length} unique models`);
|
|
521
|
+
|
|
522
|
+
// 3. Usar el nuevo selector multi-objetivo
|
|
523
|
+
const MultiObjectiveSelector = require('./ai/multi-objective-selector');
|
|
524
|
+
const multiObjectiveSelector = new MultiObjectiveSelector();
|
|
525
|
+
|
|
526
|
+
// Ejecutar análisis multi-objetivo
|
|
527
|
+
const multiObjectiveResult = await multiObjectiveSelector.selectBestModels(
|
|
528
|
+
hardware,
|
|
529
|
+
allUniqueModels,
|
|
530
|
+
'general',
|
|
531
|
+
50 // Top 50 modelos
|
|
532
|
+
);
|
|
533
|
+
|
|
534
|
+
this.logger.info(`Multi-objective analysis completed: ${multiObjectiveResult.compatible.length} compatible, ${multiObjectiveResult.marginal.length} marginal`);
|
|
535
|
+
|
|
536
|
+
// 4. Los resultados ya vienen clasificados del nuevo selector
|
|
537
|
+
const compatibility = {
|
|
538
|
+
compatible: multiObjectiveResult.compatible.map(model => ({
|
|
539
|
+
...model,
|
|
540
|
+
score: model.totalScore,
|
|
541
|
+
confidence: model.totalScore / 100,
|
|
542
|
+
reasoning: model.reasoning,
|
|
543
|
+
mathAnalysis: {
|
|
544
|
+
qualityScore: model.components.quality,
|
|
545
|
+
speedScore: model.components.speed,
|
|
546
|
+
ttfbScore: model.components.ttfb,
|
|
547
|
+
contextScore: model.components.context,
|
|
548
|
+
hardwareMatchScore: model.components.hardwareMatch
|
|
549
|
+
},
|
|
550
|
+
isOllamaInstalled: this.checkIfModelInstalled(model, ollamaIntegration),
|
|
551
|
+
ollamaInfo: this.getOllamaModelInfo(model, ollamaIntegration)
|
|
552
|
+
})),
|
|
553
|
+
marginal: multiObjectiveResult.marginal.map(model => ({
|
|
554
|
+
...model,
|
|
555
|
+
score: model.totalScore,
|
|
556
|
+
confidence: model.totalScore / 100,
|
|
557
|
+
reasoning: model.reasoning,
|
|
558
|
+
mathAnalysis: {
|
|
559
|
+
qualityScore: model.components.quality,
|
|
560
|
+
speedScore: model.components.speed,
|
|
561
|
+
ttfbScore: model.components.ttfb,
|
|
562
|
+
contextScore: model.components.context,
|
|
563
|
+
hardwareMatchScore: model.components.hardwareMatch
|
|
564
|
+
},
|
|
565
|
+
isOllamaInstalled: this.checkIfModelInstalled(model, ollamaIntegration),
|
|
566
|
+
ollamaInfo: this.getOllamaModelInfo(model, ollamaIntegration)
|
|
567
|
+
})),
|
|
568
|
+
incompatible: multiObjectiveResult.incompatible.map(model => ({
|
|
569
|
+
...model,
|
|
570
|
+
score: model.totalScore,
|
|
571
|
+
confidence: model.totalScore / 100,
|
|
572
|
+
reasoning: model.reasoning,
|
|
573
|
+
isOllamaInstalled: this.checkIfModelInstalled(model, ollamaIntegration),
|
|
574
|
+
ollamaInfo: this.getOllamaModelInfo(model, ollamaIntegration)
|
|
575
|
+
}))
|
|
576
|
+
};
|
|
577
|
+
|
|
578
|
+
// Agregar modelos sin puntuación alta a incompatibles
|
|
579
|
+
// Build a set of already included model names for O(1) lookup (instead of O(n) .some())
|
|
580
|
+
const includedModelNames = new Set();
|
|
581
|
+
compatibility.compatible.forEach(m => includedModelNames.add(m.name));
|
|
582
|
+
compatibility.marginal.forEach(m => includedModelNames.add(m.name));
|
|
583
|
+
compatibility.incompatible.forEach(m => includedModelNames.add(m.name));
|
|
584
|
+
|
|
585
|
+
allUniqueModels.forEach(model => {
|
|
586
|
+
if (!includedModelNames.has(model.name)) {
|
|
587
|
+
compatibility.incompatible.push({
|
|
588
|
+
...model,
|
|
589
|
+
score: 0,
|
|
590
|
+
issues: ['Low compatibility score with current hardware'],
|
|
591
|
+
mathAnalysis: { reason: 'Below threshold in mathematical analysis' }
|
|
592
|
+
});
|
|
593
|
+
}
|
|
594
|
+
});
|
|
595
|
+
|
|
596
|
+
this.logger.info(`Mathematical heuristic results: ${compatibility.compatible.length} compatible, ${compatibility.marginal.length} marginal, ${compatibility.incompatible.length} incompatible`);
|
|
597
|
+
|
|
598
|
+
return compatibility;
|
|
599
|
+
|
|
600
|
+
} catch (error) {
|
|
601
|
+
this.logger.error('Mathematical heuristic analysis failed, using fallback', { error: error.message });
|
|
602
|
+
|
|
603
|
+
if (this.progress) {
|
|
604
|
+
this.progress.warn('Advanced analysis failed, falling back to basic compatibility check');
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
// Fallback al método original
|
|
608
|
+
const compatibility = this.compatibilityAnalyzer.analyzeCompatibility(hardware, staticModels);
|
|
609
|
+
|
|
610
|
+
if (ollamaIntegration.compatibleOllamaModels && ollamaIntegration.compatibleOllamaModels.length > 0) {
|
|
611
|
+
for (const ollamaModel of ollamaIntegration.compatibleOllamaModels) {
|
|
612
|
+
if (ollamaModel.matchedModel && ollamaModel.canRun) {
|
|
613
|
+
const enhancedModel = {
|
|
614
|
+
...ollamaModel.matchedModel,
|
|
615
|
+
score: ollamaModel.compatibilityScore,
|
|
616
|
+
issues: ollamaModel.issues || [],
|
|
617
|
+
notes: [...(ollamaModel.notes || []), 'Installed in Ollama'],
|
|
618
|
+
performanceEstimate: ollamaModel.performanceEstimate,
|
|
619
|
+
isOllamaInstalled: true,
|
|
620
|
+
ollamaInfo: {
|
|
621
|
+
localName: ollamaModel.name,
|
|
622
|
+
isRunning: ollamaModel.isRunning,
|
|
623
|
+
cloudInfo: ollamaModel.cloudInfo
|
|
624
|
+
}
|
|
625
|
+
};
|
|
626
|
+
|
|
627
|
+
if (ollamaModel.compatibilityScore >= 75) {
|
|
628
|
+
compatibility.compatible.push(enhancedModel);
|
|
629
|
+
} else if (ollamaModel.compatibilityScore >= 60) {
|
|
630
|
+
compatibility.marginal.push(enhancedModel);
|
|
631
|
+
}
|
|
632
|
+
}
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
compatibility.compatible.sort((a, b) => b.score - a.score);
|
|
636
|
+
compatibility.marginal.sort((a, b) => b.score - a.score);
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
return compatibility;
|
|
640
|
+
}
|
|
641
|
+
}
|
|
642
|
+
|
|
643
|
+
findBestMatchingKey(ollamaModel, modelsMap) {
|
|
644
|
+
const ollamaName = ollamaModel.model_name.toLowerCase();
|
|
645
|
+
const ollamaId = ollamaModel.model_identifier.toLowerCase();
|
|
646
|
+
|
|
647
|
+
// Buscar coincidencia exacta por nombre
|
|
648
|
+
for (const [key, model] of modelsMap) {
|
|
649
|
+
if (key.toLowerCase() === ollamaName ||
|
|
650
|
+
model.name.toLowerCase() === ollamaName) {
|
|
651
|
+
return key;
|
|
652
|
+
}
|
|
653
|
+
}
|
|
654
|
+
|
|
655
|
+
// Buscar por palabras clave del identificador, priorizando matches más exactos
|
|
656
|
+
const keywords = ollamaId.split(/[:\-_]/);
|
|
657
|
+
let bestMatch = null;
|
|
658
|
+
let bestScore = 0;
|
|
659
|
+
|
|
660
|
+
for (const [key, model] of modelsMap) {
|
|
661
|
+
const modelName = model.name.toLowerCase();
|
|
662
|
+
let score = 0;
|
|
663
|
+
|
|
664
|
+
// Calcular score de coincidencia
|
|
665
|
+
for (const keyword of keywords) {
|
|
666
|
+
if (keyword.length > 2 && modelName.includes(keyword)) {
|
|
667
|
+
if (keyword === 'codellama' && ollamaId === 'codellama') {
|
|
668
|
+
score += 10; // Priorizar codellama exacto sobre phind-codellama
|
|
669
|
+
} else if (keyword === 'codellama') {
|
|
670
|
+
score += 5;
|
|
671
|
+
} else {
|
|
672
|
+
score += 1;
|
|
673
|
+
}
|
|
674
|
+
}
|
|
675
|
+
}
|
|
676
|
+
|
|
677
|
+
if (score > bestScore) {
|
|
678
|
+
bestScore = score;
|
|
679
|
+
bestMatch = key;
|
|
680
|
+
}
|
|
681
|
+
}
|
|
682
|
+
|
|
683
|
+
return bestMatch;
|
|
684
|
+
}
|
|
685
|
+
|
|
686
|
+
createEnhancedModelFromOllama(ollamaModel, existingModel) {
|
|
687
|
+
// Extract real file size from variants if available
|
|
688
|
+
let realStorageSize = null;
|
|
689
|
+
if (ollamaModel.variants && ollamaModel.variants.length > 0) {
|
|
690
|
+
// Try to match based on the existing model size
|
|
691
|
+
let mainVariant = null;
|
|
692
|
+
|
|
693
|
+
if (existingModel.size) {
|
|
694
|
+
// Extract size from existing model (e.g., "7B" -> "7b")
|
|
695
|
+
const existingSize = existingModel.size.toLowerCase().replace('b', '');
|
|
696
|
+
// Look for matching variant (e.g., "codellama:7b")
|
|
697
|
+
mainVariant = ollamaModel.variants.find(v =>
|
|
698
|
+
v.tag.includes(`:${existingSize}b`) && !v.tag.includes('-instruct') && !v.tag.includes('-code')
|
|
699
|
+
);
|
|
700
|
+
}
|
|
701
|
+
|
|
702
|
+
// Fallback to exact match or latest
|
|
703
|
+
if (!mainVariant) {
|
|
704
|
+
mainVariant = ollamaModel.variants.find(v =>
|
|
705
|
+
v.tag === ollamaModel.model_identifier ||
|
|
706
|
+
v.tag === `${ollamaModel.model_identifier}:latest`
|
|
707
|
+
) || ollamaModel.variants[0];
|
|
708
|
+
}
|
|
709
|
+
|
|
710
|
+
if (mainVariant && mainVariant.real_size_gb) {
|
|
711
|
+
realStorageSize = mainVariant.real_size_gb;
|
|
712
|
+
}
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
return {
|
|
716
|
+
...existingModel,
|
|
717
|
+
ollamaId: ollamaModel.model_identifier,
|
|
718
|
+
pulls: ollamaModel.pulls,
|
|
719
|
+
lastUpdated: ollamaModel.last_updated,
|
|
720
|
+
description: ollamaModel.description || existingModel.description,
|
|
721
|
+
ollamaAvailable: true,
|
|
722
|
+
requirements: {
|
|
723
|
+
...existingModel.requirements,
|
|
724
|
+
// Update storage with real size if available
|
|
725
|
+
storage: realStorageSize || existingModel.requirements?.storage
|
|
726
|
+
},
|
|
727
|
+
installation: {
|
|
728
|
+
...existingModel.installation,
|
|
729
|
+
ollama: `ollama pull ${ollamaModel.model_identifier}`
|
|
730
|
+
}
|
|
731
|
+
};
|
|
732
|
+
}
|
|
733
|
+
|
|
734
|
+
createModelFromOllamaData(ollamaModel) {
|
|
735
|
+
// Improved size detection with multiple patterns and fallbacks
|
|
736
|
+
let sizeMatch = ollamaModel.model_identifier.match(/(\d+\.?\d*)[bm]/i);
|
|
737
|
+
|
|
738
|
+
// Try alternative patterns if first doesn't work
|
|
739
|
+
if (!sizeMatch) {
|
|
740
|
+
// Try patterns like "llama3.1" -> estimate as 8B, "qwen2.5" -> 7B
|
|
741
|
+
if (/llama3\.?[12]?/i.test(ollamaModel.model_identifier)) {
|
|
742
|
+
sizeMatch = ['8b', '8'];
|
|
743
|
+
} else if (/qwen2\.?5?/i.test(ollamaModel.model_identifier)) {
|
|
744
|
+
sizeMatch = ['7b', '7'];
|
|
745
|
+
} else if (/mistral/i.test(ollamaModel.model_identifier)) {
|
|
746
|
+
sizeMatch = ['7b', '7'];
|
|
747
|
+
} else if (/gemma/i.test(ollamaModel.model_identifier)) {
|
|
748
|
+
sizeMatch = ['7b', '7'];
|
|
749
|
+
} else if (/phi/i.test(ollamaModel.model_identifier)) {
|
|
750
|
+
sizeMatch = ['3b', '3'];
|
|
751
|
+
}
|
|
752
|
+
}
|
|
753
|
+
|
|
754
|
+
const size = sizeMatch ? sizeMatch[1] + 'B' : '7B'; // Default to 7B instead of Unknown
|
|
755
|
+
const sizeNum = sizeMatch ? parseFloat(sizeMatch[1]) : 7; // Default to 7B
|
|
756
|
+
|
|
757
|
+
// Extract real file size from variants if available
|
|
758
|
+
let realStorageSize = null;
|
|
759
|
+
if (ollamaModel.variants && ollamaModel.variants.length > 0) {
|
|
760
|
+
// Find the main variant (usually the first one or one matching the base model name)
|
|
761
|
+
const mainVariant = ollamaModel.variants.find(v =>
|
|
762
|
+
v.tag === ollamaModel.model_identifier ||
|
|
763
|
+
v.tag === `${ollamaModel.model_identifier}:latest`
|
|
764
|
+
) || ollamaModel.variants[0];
|
|
765
|
+
|
|
766
|
+
if (mainVariant && mainVariant.real_size_gb) {
|
|
767
|
+
realStorageSize = mainVariant.real_size_gb;
|
|
768
|
+
}
|
|
769
|
+
}
|
|
770
|
+
|
|
771
|
+
let category = 'medium';
|
|
772
|
+
if (sizeNum < 1) category = 'ultra_small';
|
|
773
|
+
else if (sizeNum <= 4) category = 'small';
|
|
774
|
+
else if (sizeNum <= 15) category = 'medium';
|
|
775
|
+
else category = 'large';
|
|
776
|
+
|
|
777
|
+
let specialization = 'general';
|
|
778
|
+
const id = ollamaModel.model_identifier.toLowerCase();
|
|
779
|
+
if (id.includes('code')) specialization = 'code';
|
|
780
|
+
else if (id.includes('embed')) specialization = 'embeddings';
|
|
781
|
+
|
|
782
|
+
return {
|
|
783
|
+
name: ollamaModel.model_name,
|
|
784
|
+
ollamaId: ollamaModel.model_identifier,
|
|
785
|
+
size: size,
|
|
786
|
+
type: 'local',
|
|
787
|
+
category: category,
|
|
788
|
+
specialization: specialization,
|
|
789
|
+
frameworks: ['ollama'],
|
|
790
|
+
requirements: {
|
|
791
|
+
ram: Math.ceil(sizeNum * 0.6) || 2,
|
|
792
|
+
vram: Math.ceil(sizeNum * 0.4) || 0,
|
|
793
|
+
cpu_cores: Math.min(8, Math.max(2, Math.ceil(sizeNum / 2))),
|
|
794
|
+
storage: realStorageSize || Math.ceil(sizeNum * 0.7) || 1
|
|
795
|
+
},
|
|
796
|
+
installation: {
|
|
797
|
+
ollama: `ollama pull ${ollamaModel.model_identifier}`,
|
|
798
|
+
description: ollamaModel.description || 'Available in Ollama library'
|
|
799
|
+
},
|
|
800
|
+
description: ollamaModel.description || `${ollamaModel.model_name} from Ollama`,
|
|
801
|
+
pulls: ollamaModel.pulls,
|
|
802
|
+
lastUpdated: ollamaModel.last_updated,
|
|
803
|
+
year: 2024,
|
|
804
|
+
ollamaAvailable: true
|
|
805
|
+
};
|
|
806
|
+
}
|
|
807
|
+
|
|
808
|
+
checkIfModelInstalled(model, ollamaIntegration) {
|
|
809
|
+
if (!ollamaIntegration.compatibleOllamaModels) return false;
|
|
810
|
+
|
|
811
|
+
return ollamaIntegration.compatibleOllamaModels.some(installed => {
|
|
812
|
+
return installed.name.toLowerCase().includes(model.ollamaId?.toLowerCase() || model.name.toLowerCase()) ||
|
|
813
|
+
(model.ollamaId?.toLowerCase() || model.name.toLowerCase()).includes(installed.name.toLowerCase());
|
|
814
|
+
});
|
|
815
|
+
}
|
|
816
|
+
|
|
817
|
+
getOllamaModelInfo(model, ollamaIntegration) {
|
|
818
|
+
if (!ollamaIntegration.compatibleOllamaModels) return null;
|
|
819
|
+
|
|
820
|
+
const installedModel = ollamaIntegration.compatibleOllamaModels.find(installed => {
|
|
821
|
+
return installed.name.toLowerCase().includes(model.ollamaId?.toLowerCase() || model.name.toLowerCase()) ||
|
|
822
|
+
(model.ollamaId?.toLowerCase() || model.name.toLowerCase()).includes(installed.name.toLowerCase());
|
|
823
|
+
});
|
|
824
|
+
|
|
825
|
+
return installedModel ? {
|
|
826
|
+
localName: installedModel.name,
|
|
827
|
+
isRunning: installedModel.isRunning,
|
|
828
|
+
cloudInfo: installedModel.cloudInfo
|
|
829
|
+
} : null;
|
|
830
|
+
}
|
|
831
|
+
|
|
832
|
+
async processFallbackModels(localModels, runningModels, availableModels, hardware, integration) {
|
|
833
|
+
for (const ollamaModel of localModels) {
|
|
834
|
+
const matchedModel = this.findMatchingModel(ollamaModel, availableModels);
|
|
835
|
+
|
|
836
|
+
if (matchedModel) {
|
|
837
|
+
const compatibility = this.compatibilityAnalyzer.calculateModelCompatibility(hardware, matchedModel);
|
|
838
|
+
|
|
839
|
+
const enrichedOllamaModel = {
|
|
840
|
+
...ollamaModel,
|
|
841
|
+
matchedModel,
|
|
842
|
+
compatibilityScore: compatibility.score,
|
|
843
|
+
issues: compatibility.issues,
|
|
844
|
+
notes: compatibility.notes,
|
|
845
|
+
isRunning: runningModels.some(r => r.name === ollamaModel.name),
|
|
846
|
+
canRun: compatibility.score >= 60,
|
|
847
|
+
performanceEstimate: await this.performanceAnalyzer.estimateModelPerformance(matchedModel, hardware)
|
|
848
|
+
};
|
|
849
|
+
|
|
850
|
+
integration.compatibleOllamaModels.push(enrichedOllamaModel);
|
|
851
|
+
}
|
|
852
|
+
}
|
|
853
|
+
}
|
|
854
|
+
|
|
855
|
+
findMatchingModelInDatabase(cloudModel, availableModels) {
|
|
856
|
+
const cloudName = cloudModel.model_name.toLowerCase();
|
|
857
|
+
const cloudId = cloudModel.model_identifier.toLowerCase();
|
|
858
|
+
|
|
859
|
+
let match = availableModels.find(m =>
|
|
860
|
+
m.name.toLowerCase() === cloudName ||
|
|
861
|
+
m.name.toLowerCase().includes(cloudId)
|
|
862
|
+
);
|
|
863
|
+
|
|
864
|
+
if (match) return match;
|
|
865
|
+
|
|
866
|
+
const keywords = cloudId.split('-');
|
|
867
|
+
match = availableModels.find(model => {
|
|
868
|
+
const modelName = model.name.toLowerCase();
|
|
869
|
+
return keywords.some(keyword =>
|
|
870
|
+
keyword.length > 2 && modelName.includes(keyword)
|
|
871
|
+
);
|
|
872
|
+
});
|
|
873
|
+
|
|
874
|
+
return match;
|
|
875
|
+
}
|
|
876
|
+
|
|
877
|
+
createModelFromCloudData(cloudModel) {
|
|
878
|
+
// Improved size detection for cloud models too
|
|
879
|
+
let sizeMatch = cloudModel.model_identifier.match(/(\d+\.?\d*)[bm]/i);
|
|
880
|
+
|
|
881
|
+
// Try alternative patterns if first doesn't work
|
|
882
|
+
if (!sizeMatch) {
|
|
883
|
+
if (/llama3\.?[12]?/i.test(cloudModel.model_identifier)) {
|
|
884
|
+
sizeMatch = ['8b', '8'];
|
|
885
|
+
} else if (/qwen2\.?5?/i.test(cloudModel.model_identifier)) {
|
|
886
|
+
sizeMatch = ['7b', '7'];
|
|
887
|
+
} else if (/mistral/i.test(cloudModel.model_identifier)) {
|
|
888
|
+
sizeMatch = ['7b', '7'];
|
|
889
|
+
} else if (/gemma/i.test(cloudModel.model_identifier)) {
|
|
890
|
+
sizeMatch = ['7b', '7'];
|
|
891
|
+
} else if (/phi/i.test(cloudModel.model_identifier)) {
|
|
892
|
+
sizeMatch = ['3b', '3'];
|
|
893
|
+
}
|
|
894
|
+
}
|
|
895
|
+
|
|
896
|
+
const size = sizeMatch ? sizeMatch[1] + 'B' : '7B'; // Default to 7B instead of Unknown
|
|
897
|
+
|
|
898
|
+
let category = 'medium';
|
|
899
|
+
if (size !== '7B') { // Changed from 'Unknown' check
|
|
900
|
+
const sizeNum = parseFloat(size);
|
|
901
|
+
const unit = size.slice(-1);
|
|
902
|
+
const sizeInB = unit === 'M' ? sizeNum / 1000 : sizeNum;
|
|
903
|
+
|
|
904
|
+
if (sizeInB < 1) category = 'ultra_small';
|
|
905
|
+
else if (sizeInB <= 4) category = 'small';
|
|
906
|
+
else if (sizeInB <= 15) category = 'medium';
|
|
907
|
+
else category = 'large';
|
|
908
|
+
}
|
|
909
|
+
|
|
910
|
+
let specialization = 'general';
|
|
911
|
+
const id = cloudModel.model_identifier.toLowerCase();
|
|
912
|
+
if (id.includes('code')) specialization = 'code';
|
|
913
|
+
else if (id.includes('chat')) specialization = 'chat';
|
|
914
|
+
else if (id.includes('embed')) specialization = 'embeddings';
|
|
915
|
+
|
|
916
|
+
return {
|
|
917
|
+
name: cloudModel.model_name,
|
|
918
|
+
size: size,
|
|
919
|
+
type: 'local',
|
|
920
|
+
category: category,
|
|
921
|
+
specialization: specialization,
|
|
922
|
+
frameworks: ['ollama'],
|
|
923
|
+
requirements: {
|
|
924
|
+
ram: Math.ceil((parseFloat(size) || 4) * 0.6),
|
|
925
|
+
vram: Math.ceil((parseFloat(size) || 4) * 0.4),
|
|
926
|
+
cpu_cores: 4,
|
|
927
|
+
storage: Math.ceil((parseFloat(size) || 4) * 0.7)
|
|
928
|
+
},
|
|
929
|
+
installation: {
|
|
930
|
+
ollama: `ollama pull ${cloudModel.model_identifier}`,
|
|
931
|
+
description: cloudModel.description || 'Model from Ollama library'
|
|
932
|
+
},
|
|
933
|
+
year: 2024,
|
|
934
|
+
description: cloudModel.description || `${cloudModel.model_name} model`,
|
|
935
|
+
cloudData: {
|
|
936
|
+
pulls: cloudModel.pulls,
|
|
937
|
+
url: cloudModel.url,
|
|
938
|
+
model_type: cloudModel.model_type,
|
|
939
|
+
identifier: cloudModel.model_identifier
|
|
940
|
+
}
|
|
941
|
+
};
|
|
942
|
+
}
|
|
943
|
+
|
|
944
|
+
findMatchingModel(ollamaModel, availableModels) {
|
|
945
|
+
const ollamaName = ollamaModel.name.toLowerCase();
|
|
946
|
+
|
|
947
|
+
const nameMapping = {
|
|
948
|
+
'llama3.2:3b': 'Llama 3.2 3B',
|
|
949
|
+
'llama3.1:8b': 'Llama 3.1 8B',
|
|
950
|
+
'mistral:7b': 'Mistral 7B v0.3',
|
|
951
|
+
'mistral:latest': 'Mistral 7B v0.3',
|
|
952
|
+
'codellama:7b': 'CodeLlama 7B',
|
|
953
|
+
'phi3:mini': 'Phi-3 Mini 3.8B',
|
|
954
|
+
'gemma2:2b': 'Gemma 2B',
|
|
955
|
+
'tinyllama:1.1b': 'TinyLlama 1.1B',
|
|
956
|
+
'qwen2.5:7b': 'Qwen 2.5 7B'
|
|
957
|
+
};
|
|
958
|
+
|
|
959
|
+
if (nameMapping[ollamaName]) {
|
|
960
|
+
return availableModels.find(m => m.name === nameMapping[ollamaName]);
|
|
961
|
+
}
|
|
962
|
+
|
|
963
|
+
const modelKeywords = ollamaName.split(':')[0].split('-');
|
|
964
|
+
|
|
965
|
+
return availableModels.find(model => {
|
|
966
|
+
const modelName = model.name.toLowerCase();
|
|
967
|
+
return modelKeywords.some(keyword =>
|
|
968
|
+
keyword.length > 2 && modelName.includes(keyword)
|
|
969
|
+
);
|
|
970
|
+
});
|
|
971
|
+
}
|
|
972
|
+
|
|
973
|
+
async generateOllamaRecommendations(hardware, availableModels, installedModels) {
|
|
974
|
+
const recommendations = [];
|
|
975
|
+
const installedNames = new Set(installedModels.map(m => m.name.toLowerCase()));
|
|
976
|
+
|
|
977
|
+
const compatibleModels = availableModels.filter(model => {
|
|
978
|
+
const compatibility = this.compatibilityAnalyzer.calculateModelCompatibility(hardware, model);
|
|
979
|
+
return compatibility.score >= 75 && model.frameworks?.includes('ollama');
|
|
980
|
+
});
|
|
981
|
+
|
|
982
|
+
for (const model of compatibleModels.slice(0, 5)) {
|
|
983
|
+
const ollamaCommand = this.getOllamaCommand(model);
|
|
984
|
+
|
|
985
|
+
if (ollamaCommand && !installedNames.has(ollamaCommand.split(' ')[2])) {
|
|
986
|
+
const performance = await this.performanceAnalyzer.estimateModelPerformance(model, hardware);
|
|
987
|
+
|
|
988
|
+
recommendations.push({
|
|
989
|
+
model,
|
|
990
|
+
command: ollamaCommand,
|
|
991
|
+
reason: this.getRecommendationReason(model, hardware),
|
|
992
|
+
estimatedPerformance: performance,
|
|
993
|
+
priority: this.calculatePriority(model, hardware)
|
|
994
|
+
});
|
|
995
|
+
}
|
|
996
|
+
}
|
|
997
|
+
|
|
998
|
+
return recommendations.sort((a, b) => b.priority - a.priority);
|
|
999
|
+
}
|
|
1000
|
+
|
|
1001
|
+
async enrichWithPerformanceData(hardware, compatibility) {
|
|
1002
|
+
const performanceEstimates = new Map();
|
|
1003
|
+
|
|
1004
|
+
for (const model of [...compatibility.compatible, ...compatibility.marginal]) {
|
|
1005
|
+
try {
|
|
1006
|
+
const estimate = await this.performanceAnalyzer.estimateModelPerformance(model, hardware);
|
|
1007
|
+
performanceEstimates.set(model.name, estimate);
|
|
1008
|
+
|
|
1009
|
+
model.performanceEstimate = estimate;
|
|
1010
|
+
model.tokensPerSecond = estimate.estimatedTokensPerSecond;
|
|
1011
|
+
model.loadTime = estimate.loadTimeEstimate;
|
|
1012
|
+
} catch (error) {
|
|
1013
|
+
this.logger.warn(`Failed to estimate performance for ${model.name}`, { error });
|
|
1014
|
+
}
|
|
1015
|
+
}
|
|
1016
|
+
|
|
1017
|
+
return {
|
|
1018
|
+
...compatibility,
|
|
1019
|
+
performanceEstimates: Object.fromEntries(performanceEstimates)
|
|
1020
|
+
};
|
|
1021
|
+
}
|
|
1022
|
+
|
|
1023
|
+
async generateEnhancedRecommendations(hardware, results, ollamaIntegration, useCase) {
|
|
1024
|
+
const recommendations = {
|
|
1025
|
+
general: [],
|
|
1026
|
+
installedModels: [],
|
|
1027
|
+
cloudSuggestions: [],
|
|
1028
|
+
quickCommands: []
|
|
1029
|
+
};
|
|
1030
|
+
|
|
1031
|
+
const generalRecs = this.compatibilityAnalyzer.generateRecommendations(hardware, results);
|
|
1032
|
+
recommendations.general.push(...generalRecs);
|
|
1033
|
+
|
|
1034
|
+
if (ollamaIntegration.ollamaInfo.available) {
|
|
1035
|
+
if (ollamaIntegration.compatibleOllamaModels.length === 0) {
|
|
1036
|
+
recommendations.general.push('No compatible models installed in Ollama');
|
|
1037
|
+
} else {
|
|
1038
|
+
recommendations.installedModels.push(`${ollamaIntegration.compatibleOllamaModels.length} compatible models found in Ollama:`);
|
|
1039
|
+
|
|
1040
|
+
ollamaIntegration.compatibleOllamaModels.forEach((model, index) => {
|
|
1041
|
+
const runningStatus = model.isRunning ? ' (running)' : '';
|
|
1042
|
+
const score = model.compatibilityScore || 'N/A';
|
|
1043
|
+
recommendations.installedModels.push(`${index + 1}. ${model.name} (Score: ${score}/100)${runningStatus}`);
|
|
1044
|
+
});
|
|
1045
|
+
|
|
1046
|
+
const bestModel = ollamaIntegration.compatibleOllamaModels
|
|
1047
|
+
.sort((a, b) => (b.compatibilityScore || 0) - (a.compatibilityScore || 0))[0];
|
|
1048
|
+
|
|
1049
|
+
if (bestModel) {
|
|
1050
|
+
recommendations.quickCommands.push(`ollama run ${bestModel.name}`);
|
|
1051
|
+
}
|
|
1052
|
+
}
|
|
1053
|
+
|
|
1054
|
+
this.logger.info('Searching for cloud recommendations...');
|
|
1055
|
+
try {
|
|
1056
|
+
const cloudRecommendations = await this.searchOllamaCloudRecommendations(hardware, ollamaIntegration.compatibleOllamaModels);
|
|
1057
|
+
|
|
1058
|
+
if (cloudRecommendations.length > 0) {
|
|
1059
|
+
this.logger.info(`Found ${cloudRecommendations.length} cloud recommendations`);
|
|
1060
|
+
recommendations.cloudSuggestions.push('Recommended models from Ollama library for your hardware:');
|
|
1061
|
+
cloudRecommendations.forEach((model, index) => {
|
|
1062
|
+
recommendations.cloudSuggestions.push(`${index + 1}. ollama pull ${model.identifier} - ${model.reason} (${model.pulls.toLocaleString()} pulls)`);
|
|
1063
|
+
recommendations.quickCommands.push(`ollama pull ${model.identifier}`);
|
|
1064
|
+
});
|
|
1065
|
+
} else {
|
|
1066
|
+
this.logger.warn('No cloud recommendations found, using fallback');
|
|
1067
|
+
this.addFallbackSuggestions(recommendations, ollamaIntegration.compatibleOllamaModels);
|
|
1068
|
+
}
|
|
1069
|
+
} catch (error) {
|
|
1070
|
+
this.logger.error('Failed to get cloud recommendations:', error);
|
|
1071
|
+
this.addFallbackSuggestions(recommendations, ollamaIntegration.compatibleOllamaModels);
|
|
1072
|
+
}
|
|
1073
|
+
|
|
1074
|
+
} else {
|
|
1075
|
+
recommendations.general.push('Install Ollama for local LLM management: https://ollama.ai');
|
|
1076
|
+
}
|
|
1077
|
+
|
|
1078
|
+
const useCaseRecs = this.getUseCaseRecommendations(results, useCase);
|
|
1079
|
+
recommendations.general.push(...useCaseRecs);
|
|
1080
|
+
|
|
1081
|
+
return recommendations;
|
|
1082
|
+
}
|
|
1083
|
+
|
|
1084
|
+
addFallbackSuggestions(recommendations, installedModels) {
|
|
1085
|
+
const installedNames = new Set(installedModels.map(m => m.name.toLowerCase()));
|
|
1086
|
+
|
|
1087
|
+
const allSuggestions = [
|
|
1088
|
+
{ name: 'qwen:0.5b', reason: 'Ultra-fast 0.5B model, runs on any hardware', minRAM: 1, tier: 'any' },
|
|
1089
|
+
{ name: 'tinyllama:1.1b', reason: 'Tiny but capable, perfect for testing', minRAM: 2, tier: 'any' },
|
|
1090
|
+
{ name: 'phi3:mini', reason: 'Microsoft\'s efficient 3.8B model with excellent reasoning', minRAM: 4, tier: 'low' },
|
|
1091
|
+
{ name: 'llama3.2:1b', reason: 'Meta\'s latest compact 1B model', minRAM: 2, tier: 'any' },
|
|
1092
|
+
{ name: 'llama3.2:3b', reason: 'Meta\'s balanced 3B model', minRAM: 4, tier: 'low' },
|
|
1093
|
+
{ name: 'gemma2:2b', reason: 'Google\'s optimized 2B model', minRAM: 3, tier: 'any' },
|
|
1094
|
+
{ name: 'mistral:7b', reason: 'High-quality European 7B model', minRAM: 8, tier: 'medium' },
|
|
1095
|
+
{ name: 'llama3.1:8b', reason: 'Meta\'s flagship 8B model', minRAM: 10, tier: 'medium' },
|
|
1096
|
+
{ name: 'qwen2.5:7b', reason: 'Advanced Chinese 7B model', minRAM: 8, tier: 'medium' },
|
|
1097
|
+
{ name: 'codellama:7b', reason: 'Specialized for coding tasks', minRAM: 8, tier: 'medium', specialty: 'code' },
|
|
1098
|
+
{ name: 'nomic-embed-text', reason: 'Best for text embeddings', minRAM: 2, tier: 'any', specialty: 'embeddings' }
|
|
1099
|
+
];
|
|
1100
|
+
|
|
1101
|
+
const availableSuggestions = allSuggestions.filter(model =>
|
|
1102
|
+
!installedNames.has(model.name) && !installedNames.has(model.name.split(':')[0])
|
|
1103
|
+
);
|
|
1104
|
+
|
|
1105
|
+
if (availableSuggestions.length > 0) {
|
|
1106
|
+
recommendations.cloudSuggestions.push('Curated model suggestions for your hardware:');
|
|
1107
|
+
availableSuggestions.slice(0, 5).forEach((model, index) => {
|
|
1108
|
+
recommendations.cloudSuggestions.push(`${index + 1}. ollama pull ${model.name} - ${model.reason}`);
|
|
1109
|
+
recommendations.quickCommands.push(`ollama pull ${model.name}`);
|
|
1110
|
+
});
|
|
1111
|
+
}
|
|
1112
|
+
}
|
|
1113
|
+
|
|
1114
|
+
async searchOllamaCloudRecommendations(hardware, installedModels) {
|
|
1115
|
+
try {
|
|
1116
|
+
this.logger.info('Searching Ollama cloud for compatible models...');
|
|
1117
|
+
const { getOllamaModelsIntegration } = require('./ollama/native-scraper');
|
|
1118
|
+
|
|
1119
|
+
const allModelsData = await getOllamaModelsIntegration([]);
|
|
1120
|
+
|
|
1121
|
+
if (!allModelsData.recommendations || allModelsData.recommendations.length === 0) {
|
|
1122
|
+
this.logger.warn('No recommendations found from cloud search');
|
|
1123
|
+
return [];
|
|
1124
|
+
}
|
|
1125
|
+
|
|
1126
|
+
this.logger.info(`Found ${allModelsData.recommendations.length} total models from cloud`);
|
|
1127
|
+
|
|
1128
|
+
const installedIdentifiers = new Set(
|
|
1129
|
+
installedModels.map(m => {
|
|
1130
|
+
const name = m.name.toLowerCase();
|
|
1131
|
+
return name.split(':')[0];
|
|
1132
|
+
})
|
|
1133
|
+
);
|
|
1134
|
+
|
|
1135
|
+
this.logger.info(`Installed models identifiers: ${Array.from(installedIdentifiers).join(', ')}`);
|
|
1136
|
+
|
|
1137
|
+
const hardwareTier = this.getHardwareTier(hardware);
|
|
1138
|
+
this.logger.info(`Hardware tier: ${hardwareTier}`);
|
|
1139
|
+
|
|
1140
|
+
const compatibleModels = allModelsData.recommendations
|
|
1141
|
+
.filter(model => {
|
|
1142
|
+
const baseIdentifier = model.model_identifier.split(':')[0].toLowerCase();
|
|
1143
|
+
const isNotInstalled = !installedIdentifiers.has(baseIdentifier) &&
|
|
1144
|
+
!installedIdentifiers.has(model.model_identifier.toLowerCase());
|
|
1145
|
+
|
|
1146
|
+
if (!isNotInstalled) {
|
|
1147
|
+
this.logger.debug(`Skipping already installed model: ${model.model_identifier}`);
|
|
1148
|
+
}
|
|
1149
|
+
return isNotInstalled;
|
|
1150
|
+
})
|
|
1151
|
+
.map(model => {
|
|
1152
|
+
const score = this.calculateCloudModelCompatibility(model, hardware);
|
|
1153
|
+
return {
|
|
1154
|
+
...model,
|
|
1155
|
+
compatibilityScore: score,
|
|
1156
|
+
reason: this.getCloudModelReason(model, hardware)
|
|
1157
|
+
};
|
|
1158
|
+
})
|
|
1159
|
+
.filter(model => {
|
|
1160
|
+
const isCompatible = model.compatibilityScore >= 60;
|
|
1161
|
+
if (!isCompatible) {
|
|
1162
|
+
this.logger.debug(`Model ${model.model_identifier} has low compatibility score: ${model.compatibilityScore}`);
|
|
1163
|
+
}
|
|
1164
|
+
return isCompatible;
|
|
1165
|
+
})
|
|
1166
|
+
.sort((a, b) => {
|
|
1167
|
+
if (b.compatibilityScore !== a.compatibilityScore) {
|
|
1168
|
+
return b.compatibilityScore - a.compatibilityScore;
|
|
1169
|
+
}
|
|
1170
|
+
return (b.pulls || 0) - (a.pulls || 0);
|
|
1171
|
+
})
|
|
1172
|
+
.slice(0, 5);
|
|
1173
|
+
|
|
1174
|
+
this.logger.info(`Final compatible models for recommendations: ${compatibleModels.length}`);
|
|
1175
|
+
compatibleModels.forEach(model => {
|
|
1176
|
+
this.logger.debug(`Recommending: ${model.model_identifier} (score: ${model.compatibilityScore}, pulls: ${model.pulls})`);
|
|
1177
|
+
});
|
|
1178
|
+
|
|
1179
|
+
return compatibleModels.map(model => ({
|
|
1180
|
+
identifier: model.model_identifier,
|
|
1181
|
+
name: model.model_name,
|
|
1182
|
+
pulls: model.pulls || 0,
|
|
1183
|
+
reason: model.reason,
|
|
1184
|
+
score: model.compatibilityScore,
|
|
1185
|
+
size: this.extractModelSize(model.model_identifier),
|
|
1186
|
+
description: model.description || ''
|
|
1187
|
+
}));
|
|
1188
|
+
|
|
1189
|
+
} catch (error) {
|
|
1190
|
+
this.logger.error('Error searching Ollama cloud recommendations:', error);
|
|
1191
|
+
return [];
|
|
1192
|
+
}
|
|
1193
|
+
}
|
|
1194
|
+
|
|
1195
|
+
getHardwareTier(hardware) {
|
|
1196
|
+
return this.calculateHardwareScore(hardware).tier;
|
|
1197
|
+
}
|
|
1198
|
+
|
|
1199
|
+
calculateHardwareScore(hardware) {
|
|
1200
|
+
const clamp = (x, a = 0, b = 1) => Math.max(a, Math.min(b, x));
|
|
1201
|
+
|
|
1202
|
+
// Extract hardware info
|
|
1203
|
+
const ramGB = hardware.memory.total || 0;
|
|
1204
|
+
const vramGB = hardware.gpu?.vram || 0;
|
|
1205
|
+
const cpuModel = hardware.cpu?.brand || hardware.cpu?.model || '';
|
|
1206
|
+
const gpuModel = hardware.gpu?.model || '';
|
|
1207
|
+
const architecture = hardware.cpu?.architecture || hardware.cpu?.brand || '';
|
|
1208
|
+
const cpuCoresPhys = hardware.cpu?.physicalCores || hardware.cpu?.cores || 1;
|
|
1209
|
+
const cpuGHzBase = hardware.cpu?.speed || 2.0;
|
|
1210
|
+
|
|
1211
|
+
// Enhanced Apple Silicon detection
|
|
1212
|
+
const isAppleSilicon = architecture.toLowerCase().includes('apple') ||
|
|
1213
|
+
architecture.toLowerCase().includes('m1') ||
|
|
1214
|
+
architecture.toLowerCase().includes('m2') ||
|
|
1215
|
+
architecture.toLowerCase().includes('m3') ||
|
|
1216
|
+
architecture.toLowerCase().includes('m4') ||
|
|
1217
|
+
cpuModel.toLowerCase().includes('apple') ||
|
|
1218
|
+
gpuModel.toLowerCase().includes('apple');
|
|
1219
|
+
const unified = isAppleSilicon;
|
|
1220
|
+
|
|
1221
|
+
// Detect PC platform (Windows/Linux)
|
|
1222
|
+
const isPC = !isAppleSilicon && (process.platform === 'win32' || process.platform === 'linux');
|
|
1223
|
+
const hasAVX512 = cpuModel.toLowerCase().includes('intel') &&
|
|
1224
|
+
(cpuModel.includes('12th') || cpuModel.includes('13th') || cpuModel.includes('14th'));
|
|
1225
|
+
const hasAVX2 = cpuModel.toLowerCase().includes('intel') ||
|
|
1226
|
+
cpuModel.toLowerCase().includes('amd');
|
|
1227
|
+
|
|
1228
|
+
// 1) Capacidad efectiva para pesos del modelo (45%)
|
|
1229
|
+
let effMem;
|
|
1230
|
+
|
|
1231
|
+
if (vramGB > 0 && !unified) {
|
|
1232
|
+
// Dedicated GPU path (Windows/Linux with discrete GPU)
|
|
1233
|
+
if (isPC) {
|
|
1234
|
+
// PC-specific GPU memory calculation with offload support
|
|
1235
|
+
const PCOptimizer = require('./hardware/pc-optimizer');
|
|
1236
|
+
const pcOpt = new PCOptimizer();
|
|
1237
|
+
const pcSpecs = this.getPCGPUSpecs(hardware, vramGB, ramGB);
|
|
1238
|
+
|
|
1239
|
+
// For PC discrete GPU: VRAM + strategic offload potential
|
|
1240
|
+
effMem = vramGB + pcSpecs.offloadCapacity;
|
|
1241
|
+
} else {
|
|
1242
|
+
// Generic discrete GPU calculation
|
|
1243
|
+
effMem = vramGB + Math.min(0.25 * ramGB, 8); // GPU + small CPU offload
|
|
1244
|
+
}
|
|
1245
|
+
} else if (unified && isAppleSilicon) {
|
|
1246
|
+
// Apple Silicon unified memory optimization
|
|
1247
|
+
const appleSiliconInfo = this.getAppleSiliconSpecs(cpuModel, gpuModel, ramGB);
|
|
1248
|
+
|
|
1249
|
+
// For Apple Silicon, use higher efficiency ratio due to:
|
|
1250
|
+
// 1. Unified memory architecture (no GPU<->RAM transfers)
|
|
1251
|
+
// 2. High memory bandwidth (200-800 GB/s)
|
|
1252
|
+
// 3. Optimized quantization-aware memory allocation
|
|
1253
|
+
// 4. Metal backend optimizations
|
|
1254
|
+
effMem = appleSiliconInfo.effectiveMemoryRatio * ramGB;
|
|
1255
|
+
|
|
1256
|
+
// Apply model size bonus for larger unified memory pools
|
|
1257
|
+
if (ramGB >= 32) {
|
|
1258
|
+
effMem += appleSiliconInfo.largeMemoryBonus;
|
|
1259
|
+
}
|
|
1260
|
+
} else {
|
|
1261
|
+
// Traditional CPU-only path or integrated GPU
|
|
1262
|
+
if (isPC) {
|
|
1263
|
+
// PC CPU-only with potential iGPU assist
|
|
1264
|
+
const pcSpecs = this.getPCCPUSpecs(hardware, ramGB);
|
|
1265
|
+
effMem = pcSpecs.effectiveMemoryRatio * ramGB;
|
|
1266
|
+
} else {
|
|
1267
|
+
// Generic CPU-only calculation
|
|
1268
|
+
effMem = 0.6 * ramGB; // Conservative for CPU inference
|
|
1269
|
+
}
|
|
1270
|
+
}
|
|
1271
|
+
|
|
1272
|
+
const mem_cap = clamp(effMem / 32); // Normalizado contra 32GB para Q4-Q6 (más realista)
|
|
1273
|
+
|
|
1274
|
+
// 2) Ancho de banda de memoria (20%)
|
|
1275
|
+
let memBandwidthGBs = this.estimateMemoryBandwidth(hardware);
|
|
1276
|
+
const mem_bw = clamp(memBandwidthGBs / 500); // Normalizado contra 500 GB/s (más realista)
|
|
1277
|
+
|
|
1278
|
+
// 3) Cómputo (20%)
|
|
1279
|
+
let compute;
|
|
1280
|
+
const tflopsFP16 = this.estimateComputeTFLOPs(hardware);
|
|
1281
|
+
|
|
1282
|
+
if (tflopsFP16 > 0) {
|
|
1283
|
+
compute = clamp(tflopsFP16 / 80); // GPU: normalizado contra 80 TFLOPs (más realista)
|
|
1284
|
+
|
|
1285
|
+
// Cap iGPU compute
|
|
1286
|
+
if (/iris xe|uhd|vega.*integrated|radeon.*graphics/i.test(gpuModel)) {
|
|
1287
|
+
compute = Math.min(compute, 0.15);
|
|
1288
|
+
}
|
|
1289
|
+
} else {
|
|
1290
|
+
// CPU path
|
|
1291
|
+
compute = clamp((cpuCoresPhys * cpuGHzBase) / 60);
|
|
1292
|
+
if (hasAVX512) compute = Math.min(1, compute + 0.1);
|
|
1293
|
+
else if (hasAVX2) compute = Math.min(1, compute + 0.05);
|
|
1294
|
+
}
|
|
1295
|
+
|
|
1296
|
+
// 4) RAM del sistema para KV-cache (10%)
|
|
1297
|
+
const sys_ram = clamp(ramGB / 64);
|
|
1298
|
+
|
|
1299
|
+
// 5) Almacenamiento (5%)
|
|
1300
|
+
const storageClass = this.detectStorageClass(hardware);
|
|
1301
|
+
const storage = storageClass === 'NVME' ? 1.0 :
|
|
1302
|
+
storageClass === 'SSD' ? 0.4 : 0.1;
|
|
1303
|
+
|
|
1304
|
+
// Score final (0-100)
|
|
1305
|
+
let score = 100 * (
|
|
1306
|
+
0.45 * mem_cap +
|
|
1307
|
+
0.20 * mem_bw +
|
|
1308
|
+
0.20 * compute +
|
|
1309
|
+
0.10 * sys_ram +
|
|
1310
|
+
0.05 * storage
|
|
1311
|
+
);
|
|
1312
|
+
|
|
1313
|
+
// Mapear score → tier (final adjusted thresholds)
|
|
1314
|
+
let tier = score >= 75 ? 'ultra_high' : // 75+ for highest-end systems (RTX 4090, etc)
|
|
1315
|
+
score >= 55 ? 'high' : // 55-74 for high-end systems like M4 Pro
|
|
1316
|
+
score >= 35 ? 'medium' : // 35-54 for mid-range systems
|
|
1317
|
+
score >= 20 ? 'low' : 'ultra_low'; // 20-34 for budget systems
|
|
1318
|
+
|
|
1319
|
+
// Detect if system has dedicated GPU (not integrated) - improved detection
|
|
1320
|
+
const hasIntegratedGPU = /iris.*xe|iris.*graphics|uhd.*graphics|vega.*integrated|radeon.*graphics|intel.*integrated|integrated/i.test(gpuModel);
|
|
1321
|
+
const hasDedicatedGPU = vramGB > 0 && !hasIntegratedGPU && !unified;
|
|
1322
|
+
|
|
1323
|
+
// Debug logging for tier calculation
|
|
1324
|
+
if (process.env.DEBUG_TIER) {
|
|
1325
|
+
console.log(`GPU Model: "${gpuModel}"`);
|
|
1326
|
+
console.log(`Has Integrated GPU: ${hasIntegratedGPU}`);
|
|
1327
|
+
console.log(`Has Dedicated GPU: ${hasDedicatedGPU}`);
|
|
1328
|
+
console.log(`VRAM: ${vramGB}GB`);
|
|
1329
|
+
console.log(`Unified: ${unified}`);
|
|
1330
|
+
console.log(`Initial Tier: ${tier}`);
|
|
1331
|
+
}
|
|
1332
|
+
|
|
1333
|
+
// Cap tier for systems without dedicated GPU to avoid overselling capabilities
|
|
1334
|
+
if (!hasDedicatedGPU && !unified) {
|
|
1335
|
+
// Cap iGPU and CPU-only systems at 'high' tier maximum
|
|
1336
|
+
const maxTier = 'high';
|
|
1337
|
+
const tierValues = { 'ultra_low': 0, 'low': 1, 'medium': 2, 'high': 3, 'ultra_high': 4 };
|
|
1338
|
+
const currentTierValue = tierValues[tier] || 0;
|
|
1339
|
+
const maxTierValue = tierValues[maxTier];
|
|
1340
|
+
if (currentTierValue > maxTierValue) {
|
|
1341
|
+
tier = maxTier;
|
|
1342
|
+
}
|
|
1343
|
+
}
|
|
1344
|
+
|
|
1345
|
+
// Ajustes realistas basados en capacidades reales de LLM inference
|
|
1346
|
+
if (vramGB >= 24 && memBandwidthGBs >= 400) {
|
|
1347
|
+
// High-end dedicated GPU boost (RTX 4090, etc.)
|
|
1348
|
+
tier = this.bumpTier(tier, +1);
|
|
1349
|
+
} else if (!vramGB && !unified) {
|
|
1350
|
+
// Windows/Linux CPU-only - significativa limitación pero no extrema
|
|
1351
|
+
tier = this.bumpTier(tier, -1);
|
|
1352
|
+
} else if (hasIntegratedGPU) {
|
|
1353
|
+
// iGPU - limitada pero algo mejor que CPU puro
|
|
1354
|
+
tier = this.bumpTier(tier, -1);
|
|
1355
|
+
} else if (vramGB > 0 && vramGB < 6) {
|
|
1356
|
+
// GPU dedicada con poca VRAM (GTX 1060, etc.)
|
|
1357
|
+
tier = this.bumpTier(tier, -1);
|
|
1358
|
+
}
|
|
1359
|
+
|
|
1360
|
+
return {
|
|
1361
|
+
score: Math.round(score),
|
|
1362
|
+
tier: tier,
|
|
1363
|
+
breakdown: {
|
|
1364
|
+
memory_capacity: Math.round(mem_cap * 45),
|
|
1365
|
+
memory_bandwidth: Math.round(mem_bw * 20),
|
|
1366
|
+
compute: Math.round(compute * 20),
|
|
1367
|
+
system_ram: Math.round(sys_ram * 10),
|
|
1368
|
+
storage: Math.round(storage * 5),
|
|
1369
|
+
effective_memory_gb: Math.round(effMem * 10) / 10,
|
|
1370
|
+
bandwidth_gbs: Math.round(memBandwidthGBs),
|
|
1371
|
+
tflops_fp16: tflopsFP16 > 0 ? Math.round(tflopsFP16 * 10) / 10 : 0,
|
|
1372
|
+
apple_silicon_optimized: isAppleSilicon,
|
|
1373
|
+
pc_optimized: isPC
|
|
1374
|
+
}
|
|
1375
|
+
};
|
|
1376
|
+
}
|
|
1377
|
+
|
|
1378
|
+
/**
|
|
1379
|
+
* Apple Silicon-specific specifications and optimization parameters
|
|
1380
|
+
* Based on unified memory architecture and quantization-aware allocation
|
|
1381
|
+
*/
|
|
1382
|
+
getAppleSiliconSpecs(cpuModel, gpuModel, ramGB) {
|
|
1383
|
+
const cpu = cpuModel.toLowerCase();
|
|
1384
|
+
const gpu = gpuModel.toLowerCase();
|
|
1385
|
+
|
|
1386
|
+
// Base specs for different Apple Silicon generations
|
|
1387
|
+
let baseSpecs = {
|
|
1388
|
+
effectiveMemoryRatio: 0.85, // Default unified memory efficiency
|
|
1389
|
+
largeMemoryBonus: 0, // Bonus for large memory configs
|
|
1390
|
+
memoryBandwidth: 100, // GB/s
|
|
1391
|
+
quantizationEfficiency: 1.0, // Quantization optimization factor
|
|
1392
|
+
metalOptimization: 1.2 // Metal backend boost
|
|
1393
|
+
};
|
|
1394
|
+
|
|
1395
|
+
// M4 Pro/Max optimizations
|
|
1396
|
+
if (cpu.includes('m4 pro') || gpu.includes('m4 pro')) {
|
|
1397
|
+
baseSpecs = {
|
|
1398
|
+
effectiveMemoryRatio: 0.90, // Higher efficiency due to newer architecture
|
|
1399
|
+
largeMemoryBonus: 4, // 4GB bonus for 32GB+ configs
|
|
1400
|
+
memoryBandwidth: 273, // 273 GB/s memory bandwidth
|
|
1401
|
+
quantizationEfficiency: 1.15, // Better quantization support
|
|
1402
|
+
metalOptimization: 1.3 // Enhanced Metal backend
|
|
1403
|
+
};
|
|
1404
|
+
} else if (cpu.includes('m4') || gpu.includes('m4')) {
|
|
1405
|
+
baseSpecs = {
|
|
1406
|
+
effectiveMemoryRatio: 0.88,
|
|
1407
|
+
largeMemoryBonus: 2,
|
|
1408
|
+
memoryBandwidth: 120,
|
|
1409
|
+
quantizationEfficiency: 1.10,
|
|
1410
|
+
metalOptimization: 1.25
|
|
1411
|
+
};
|
|
1412
|
+
}
|
|
1413
|
+
// M3 optimizations
|
|
1414
|
+
else if (cpu.includes('m3 max') || gpu.includes('m3 max')) {
|
|
1415
|
+
baseSpecs = {
|
|
1416
|
+
effectiveMemoryRatio: 0.87,
|
|
1417
|
+
largeMemoryBonus: 3,
|
|
1418
|
+
memoryBandwidth: 400,
|
|
1419
|
+
quantizationEfficiency: 1.08,
|
|
1420
|
+
metalOptimization: 1.2
|
|
1421
|
+
};
|
|
1422
|
+
} else if (cpu.includes('m3 pro') || gpu.includes('m3 pro')) {
|
|
1423
|
+
baseSpecs = {
|
|
1424
|
+
effectiveMemoryRatio: 0.86,
|
|
1425
|
+
largeMemoryBonus: 2,
|
|
1426
|
+
memoryBandwidth: 150,
|
|
1427
|
+
quantizationEfficiency: 1.05,
|
|
1428
|
+
metalOptimization: 1.15
|
|
1429
|
+
};
|
|
1430
|
+
} else if (cpu.includes('m3') || gpu.includes('m3')) {
|
|
1431
|
+
baseSpecs = {
|
|
1432
|
+
effectiveMemoryRatio: 0.85,
|
|
1433
|
+
largeMemoryBonus: 1,
|
|
1434
|
+
memoryBandwidth: 100,
|
|
1435
|
+
quantizationEfficiency: 1.03,
|
|
1436
|
+
metalOptimization: 1.1
|
|
1437
|
+
};
|
|
1438
|
+
}
|
|
1439
|
+
// M2 optimizations
|
|
1440
|
+
else if (cpu.includes('m2 max') || gpu.includes('m2 max')) {
|
|
1441
|
+
baseSpecs = {
|
|
1442
|
+
effectiveMemoryRatio: 0.84,
|
|
1443
|
+
largeMemoryBonus: 2,
|
|
1444
|
+
memoryBandwidth: 400,
|
|
1445
|
+
quantizationEfficiency: 1.02,
|
|
1446
|
+
metalOptimization: 1.1
|
|
1447
|
+
};
|
|
1448
|
+
} else if (cpu.includes('m2 pro') || gpu.includes('m2 pro')) {
|
|
1449
|
+
baseSpecs = {
|
|
1450
|
+
effectiveMemoryRatio: 0.83,
|
|
1451
|
+
largeMemoryBonus: 1,
|
|
1452
|
+
memoryBandwidth: 200,
|
|
1453
|
+
quantizationEfficiency: 1.0,
|
|
1454
|
+
metalOptimization: 1.05
|
|
1455
|
+
};
|
|
1456
|
+
} else if (cpu.includes('m2') || gpu.includes('m2')) {
|
|
1457
|
+
baseSpecs = {
|
|
1458
|
+
effectiveMemoryRatio: 0.82,
|
|
1459
|
+
largeMemoryBonus: 0,
|
|
1460
|
+
memoryBandwidth: 100,
|
|
1461
|
+
quantizationEfficiency: 1.0,
|
|
1462
|
+
metalOptimization: 1.0
|
|
1463
|
+
};
|
|
1464
|
+
}
|
|
1465
|
+
// M1 optimizations (legacy but still supported)
|
|
1466
|
+
else if (cpu.includes('m1 max') || gpu.includes('m1 max')) {
|
|
1467
|
+
baseSpecs = {
|
|
1468
|
+
effectiveMemoryRatio: 0.80,
|
|
1469
|
+
largeMemoryBonus: 1,
|
|
1470
|
+
memoryBandwidth: 400,
|
|
1471
|
+
quantizationEfficiency: 0.95,
|
|
1472
|
+
metalOptimization: 1.0
|
|
1473
|
+
};
|
|
1474
|
+
} else if (cpu.includes('m1 pro') || gpu.includes('m1 pro')) {
|
|
1475
|
+
baseSpecs = {
|
|
1476
|
+
effectiveMemoryRatio: 0.78,
|
|
1477
|
+
largeMemoryBonus: 0,
|
|
1478
|
+
memoryBandwidth: 200,
|
|
1479
|
+
quantizationEfficiency: 0.95,
|
|
1480
|
+
metalOptimization: 0.95
|
|
1481
|
+
};
|
|
1482
|
+
} else if (cpu.includes('m1') || gpu.includes('m1')) {
|
|
1483
|
+
baseSpecs = {
|
|
1484
|
+
effectiveMemoryRatio: 0.75,
|
|
1485
|
+
largeMemoryBonus: 0,
|
|
1486
|
+
memoryBandwidth: 68.25,
|
|
1487
|
+
quantizationEfficiency: 0.90,
|
|
1488
|
+
metalOptimization: 0.90
|
|
1489
|
+
};
|
|
1490
|
+
}
|
|
1491
|
+
|
|
1492
|
+
// Apply memory configuration scaling
|
|
1493
|
+
if (ramGB >= 64) {
|
|
1494
|
+
baseSpecs.effectiveMemoryRatio += 0.03; // Bonus for very large memory
|
|
1495
|
+
baseSpecs.largeMemoryBonus += 2;
|
|
1496
|
+
} else if (ramGB >= 32) {
|
|
1497
|
+
baseSpecs.effectiveMemoryRatio += 0.02; // Bonus for large memory
|
|
1498
|
+
} else if (ramGB <= 8) {
|
|
1499
|
+
baseSpecs.effectiveMemoryRatio -= 0.05; // Penalty for small memory
|
|
1500
|
+
}
|
|
1501
|
+
|
|
1502
|
+
return baseSpecs;
|
|
1503
|
+
}
|
|
1504
|
+
|
|
1505
|
+
/**
|
|
1506
|
+
* PC GPU-specific specifications for Windows/Linux discrete GPU systems
|
|
1507
|
+
*/
|
|
1508
|
+
getPCGPUSpecs(hardware, vramGB, ramGB) {
|
|
1509
|
+
const gpuModel = hardware.gpu?.model || '';
|
|
1510
|
+
const gpu = gpuModel.toLowerCase();
|
|
1511
|
+
|
|
1512
|
+
let specs = {
|
|
1513
|
+
offloadCapacity: 0, // Additional effective memory from RAM offload
|
|
1514
|
+
memoryEfficiency: 0.85, // VRAM utilization efficiency
|
|
1515
|
+
backendOptimization: 1.0, // Backend-specific optimization
|
|
1516
|
+
quantizationSupport: 1.0 // Quantization efficiency
|
|
1517
|
+
};
|
|
1518
|
+
|
|
1519
|
+
// NVIDIA GPU optimizations
|
|
1520
|
+
if (gpu.includes('nvidia') || gpu.includes('geforce') || gpu.includes('rtx') || gpu.includes('gtx')) {
|
|
1521
|
+
if (gpu.includes('rtx 50')) {
|
|
1522
|
+
specs = {
|
|
1523
|
+
offloadCapacity: Math.min(ramGB * 0.3, 12), // Up to 12GB offload for RTX 50 series
|
|
1524
|
+
memoryEfficiency: 0.92,
|
|
1525
|
+
backendOptimization: 1.2, // Excellent CUDA optimization
|
|
1526
|
+
quantizationSupport: 1.15 // Great quantization support
|
|
1527
|
+
};
|
|
1528
|
+
} else if (gpu.includes('rtx 40')) {
|
|
1529
|
+
specs = {
|
|
1530
|
+
offloadCapacity: Math.min(ramGB * 0.25, 10), // Up to 10GB offload for RTX 40 series
|
|
1531
|
+
memoryEfficiency: 0.90,
|
|
1532
|
+
backendOptimization: 1.15,
|
|
1533
|
+
quantizationSupport: 1.10
|
|
1534
|
+
};
|
|
1535
|
+
} else if (gpu.includes('rtx 30')) {
|
|
1536
|
+
specs = {
|
|
1537
|
+
offloadCapacity: Math.min(ramGB * 0.2, 8), // Up to 8GB offload for RTX 30 series
|
|
1538
|
+
memoryEfficiency: 0.88,
|
|
1539
|
+
backendOptimization: 1.10,
|
|
1540
|
+
quantizationSupport: 1.05
|
|
1541
|
+
};
|
|
1542
|
+
} else if (gpu.includes('rtx 20') || gpu.includes('gtx 16')) {
|
|
1543
|
+
specs = {
|
|
1544
|
+
offloadCapacity: Math.min(ramGB * 0.15, 6), // Up to 6GB offload for older cards
|
|
1545
|
+
memoryEfficiency: 0.85,
|
|
1546
|
+
backendOptimization: 1.05,
|
|
1547
|
+
quantizationSupport: 1.0
|
|
1548
|
+
};
|
|
1549
|
+
}
|
|
1550
|
+
}
|
|
1551
|
+
// AMD GPU optimizations
|
|
1552
|
+
else if (gpu.includes('amd') || gpu.includes('radeon') || gpu.includes('rx ')) {
|
|
1553
|
+
if (gpu.includes('rx 7000') || gpu.includes('rx 7900') || gpu.includes('rx 7800')) {
|
|
1554
|
+
specs = {
|
|
1555
|
+
offloadCapacity: Math.min(ramGB * 0.2, 8), // Good offload for RDNA3
|
|
1556
|
+
memoryEfficiency: 0.85,
|
|
1557
|
+
backendOptimization: 0.95, // ROCm slightly behind CUDA
|
|
1558
|
+
quantizationSupport: 1.0
|
|
1559
|
+
};
|
|
1560
|
+
} else if (gpu.includes('rx 6000')) {
|
|
1561
|
+
specs = {
|
|
1562
|
+
offloadCapacity: Math.min(ramGB * 0.15, 6), // Moderate offload for RDNA2
|
|
1563
|
+
memoryEfficiency: 0.82,
|
|
1564
|
+
backendOptimization: 0.90,
|
|
1565
|
+
quantizationSupport: 0.95
|
|
1566
|
+
};
|
|
1567
|
+
}
|
|
1568
|
+
}
|
|
1569
|
+
// Intel GPU optimizations
|
|
1570
|
+
else if (gpu.includes('intel') || gpu.includes('arc')) {
|
|
1571
|
+
if (gpu.includes('arc a7') || gpu.includes('arc a5')) {
|
|
1572
|
+
specs = {
|
|
1573
|
+
offloadCapacity: Math.min(ramGB * 0.2, 6), // Decent offload for Arc discrete
|
|
1574
|
+
memoryEfficiency: 0.80,
|
|
1575
|
+
backendOptimization: 0.85, // Intel drivers still maturing
|
|
1576
|
+
quantizationSupport: 0.90
|
|
1577
|
+
};
|
|
1578
|
+
}
|
|
1579
|
+
}
|
|
1580
|
+
|
|
1581
|
+
// Apply memory scaling bonuses
|
|
1582
|
+
if (ramGB >= 32) {
|
|
1583
|
+
specs.offloadCapacity += 2; // Extra offload potential with large RAM
|
|
1584
|
+
}
|
|
1585
|
+
if (vramGB >= 16) {
|
|
1586
|
+
specs.memoryEfficiency += 0.02; // High VRAM efficiency bonus
|
|
1587
|
+
}
|
|
1588
|
+
|
|
1589
|
+
return specs;
|
|
1590
|
+
}
|
|
1591
|
+
|
|
1592
|
+
/**
|
|
1593
|
+
* PC CPU-specific specifications for Windows/Linux CPU-only or iGPU systems
|
|
1594
|
+
*/
|
|
1595
|
+
getPCCPUSpecs(hardware, ramGB) {
|
|
1596
|
+
const cpuModel = hardware.cpu?.brand || hardware.cpu?.model || '';
|
|
1597
|
+
const gpuModel = hardware.gpu?.model || '';
|
|
1598
|
+
const cpu = cpuModel.toLowerCase();
|
|
1599
|
+
const gpu = gpuModel.toLowerCase();
|
|
1600
|
+
const cores = hardware.cpu?.physicalCores || hardware.cpu?.cores || 1;
|
|
1601
|
+
|
|
1602
|
+
let specs = {
|
|
1603
|
+
effectiveMemoryRatio: 0.6, // Default CPU memory efficiency
|
|
1604
|
+
instructionOptimization: 1.0, // CPU instruction set bonus
|
|
1605
|
+
iGPUAssist: 0, // Integrated GPU assistance
|
|
1606
|
+
thermalHeadroom: 1.0 // Thermal performance factor
|
|
1607
|
+
};
|
|
1608
|
+
|
|
1609
|
+
// Intel CPU optimizations
|
|
1610
|
+
if (cpu.includes('intel')) {
|
|
1611
|
+
if (cpu.includes('i9') || cpu.includes('13th gen') || cpu.includes('14th gen')) {
|
|
1612
|
+
specs.effectiveMemoryRatio = 0.75; // High-end Intel efficiency
|
|
1613
|
+
specs.instructionOptimization = 1.15; // AVX512 + optimization
|
|
1614
|
+
} else if (cpu.includes('i7') || cpu.includes('12th gen')) {
|
|
1615
|
+
specs.effectiveMemoryRatio = 0.70;
|
|
1616
|
+
specs.instructionOptimization = 1.10; // AVX2 + some AVX512
|
|
1617
|
+
} else if (cpu.includes('i5')) {
|
|
1618
|
+
specs.effectiveMemoryRatio = 0.65;
|
|
1619
|
+
specs.instructionOptimization = 1.05;
|
|
1620
|
+
}
|
|
1621
|
+
|
|
1622
|
+
// Intel iGPU assistance
|
|
1623
|
+
if (gpu.includes('iris xe')) {
|
|
1624
|
+
specs.iGPUAssist = 0.05; // 5% effective memory boost from iGPU
|
|
1625
|
+
specs.effectiveMemoryRatio += 0.05;
|
|
1626
|
+
} else if (gpu.includes('uhd')) {
|
|
1627
|
+
specs.iGPUAssist = 0.02; // Minimal iGPU assistance
|
|
1628
|
+
specs.effectiveMemoryRatio += 0.02;
|
|
1629
|
+
}
|
|
1630
|
+
}
|
|
1631
|
+
// AMD CPU optimizations
|
|
1632
|
+
else if (cpu.includes('amd') || cpu.includes('ryzen')) {
|
|
1633
|
+
if (cpu.includes('ryzen 9') || cpu.includes('7000') || cpu.includes('9000')) {
|
|
1634
|
+
specs.effectiveMemoryRatio = 0.72; // High-end AMD efficiency
|
|
1635
|
+
specs.instructionOptimization = 1.12; // Strong AVX2 performance
|
|
1636
|
+
} else if (cpu.includes('ryzen 7') || cpu.includes('5000') || cpu.includes('6000')) {
|
|
1637
|
+
specs.effectiveMemoryRatio = 0.68;
|
|
1638
|
+
specs.instructionOptimization = 1.08;
|
|
1639
|
+
} else if (cpu.includes('ryzen 5')) {
|
|
1640
|
+
specs.effectiveMemoryRatio = 0.65;
|
|
1641
|
+
specs.instructionOptimization = 1.05;
|
|
1642
|
+
}
|
|
1643
|
+
|
|
1644
|
+
// AMD iGPU assistance (RDNA2/3 in APUs)
|
|
1645
|
+
if (gpu.includes('radeon') && gpu.includes('graphics')) {
|
|
1646
|
+
if (gpu.includes('780m') || gpu.includes('880m')) {
|
|
1647
|
+
specs.iGPUAssist = 0.08; // Strong RDNA3 iGPU
|
|
1648
|
+
specs.effectiveMemoryRatio += 0.08;
|
|
1649
|
+
} else if (gpu.includes('680m') || gpu.includes('660m')) {
|
|
1650
|
+
specs.iGPUAssist = 0.06; // Good RDNA2 iGPU
|
|
1651
|
+
specs.effectiveMemoryRatio += 0.06;
|
|
1652
|
+
}
|
|
1653
|
+
}
|
|
1654
|
+
}
|
|
1655
|
+
|
|
1656
|
+
// Multi-core efficiency scaling
|
|
1657
|
+
if (cores >= 16) {
|
|
1658
|
+
specs.effectiveMemoryRatio += 0.05; // High core count bonus
|
|
1659
|
+
} else if (cores >= 8) {
|
|
1660
|
+
specs.effectiveMemoryRatio += 0.03;
|
|
1661
|
+
}
|
|
1662
|
+
|
|
1663
|
+
// Memory configuration scaling
|
|
1664
|
+
if (ramGB >= 64) {
|
|
1665
|
+
specs.effectiveMemoryRatio += 0.05; // Large memory pool bonus
|
|
1666
|
+
} else if (ramGB >= 32) {
|
|
1667
|
+
specs.effectiveMemoryRatio += 0.03;
|
|
1668
|
+
} else if (ramGB <= 8) {
|
|
1669
|
+
specs.effectiveMemoryRatio -= 0.05; // Small memory penalty
|
|
1670
|
+
}
|
|
1671
|
+
|
|
1672
|
+
return specs;
|
|
1673
|
+
}
|
|
1674
|
+
|
|
1675
|
+
/**
|
|
1676
|
+
* Generate PC-specific recommendations with backend and offload strategies
|
|
1677
|
+
*/
|
|
1678
|
+
async generatePCRecommendations(hardware) {
|
|
1679
|
+
if (!hardware || hardware.cpu?.architecture?.toLowerCase().includes('apple')) {
|
|
1680
|
+
return null; // Not a PC system
|
|
1681
|
+
}
|
|
1682
|
+
|
|
1683
|
+
try {
|
|
1684
|
+
const PCOptimizer = require('./hardware/pc-optimizer');
|
|
1685
|
+
const pcOptimizer = new PCOptimizer();
|
|
1686
|
+
|
|
1687
|
+
// Get detailed PC capabilities
|
|
1688
|
+
const pcCapabilities = await pcOptimizer.detectPCCapabilities();
|
|
1689
|
+
|
|
1690
|
+
// Generate hardware-specific recommendations
|
|
1691
|
+
const recommendations = pcOptimizer.generateRecommendations(pcCapabilities);
|
|
1692
|
+
|
|
1693
|
+
return {
|
|
1694
|
+
platform: 'PC (Windows/Linux)',
|
|
1695
|
+
backend: recommendations.backend,
|
|
1696
|
+
capability: recommendations.capability,
|
|
1697
|
+
recommendations: recommendations.recommendations,
|
|
1698
|
+
hardwareProfile: {
|
|
1699
|
+
gpu: pcCapabilities.gpu,
|
|
1700
|
+
cpu: pcCapabilities.cpu,
|
|
1701
|
+
memory: pcCapabilities.memory,
|
|
1702
|
+
availableBackends: pcCapabilities.backends
|
|
1703
|
+
}
|
|
1704
|
+
};
|
|
1705
|
+
} catch (error) {
|
|
1706
|
+
this.logger.warn('PC optimization failed', { error: error.message });
|
|
1707
|
+
return null;
|
|
1708
|
+
}
|
|
1709
|
+
}
|
|
1710
|
+
|
|
1711
|
+
bumpTier(tier, direction) {
|
|
1712
|
+
const clamp = (x, a, b) => Math.max(a, Math.min(b, x));
|
|
1713
|
+
const tiers = ['ultra_low', 'low', 'medium', 'high', 'ultra_high'];
|
|
1714
|
+
const index = tiers.indexOf(tier);
|
|
1715
|
+
const newIndex = clamp(index + direction, 0, tiers.length - 1);
|
|
1716
|
+
return tiers[newIndex];
|
|
1717
|
+
}
|
|
1718
|
+
|
|
1719
|
+
estimateMemoryBandwidth(hardware) {
|
|
1720
|
+
const gpuModel = hardware.gpu?.model || '';
|
|
1721
|
+
const ramType = hardware.memory?.type || 'DDR4';
|
|
1722
|
+
const ramSpeed = hardware.memory?.clockSpeed || 3200;
|
|
1723
|
+
|
|
1724
|
+
// GPU bandwidth (known values)
|
|
1725
|
+
const gpu = gpuModel.toLowerCase();
|
|
1726
|
+
if (gpu.includes('rtx 4090')) return 1008;
|
|
1727
|
+
if (gpu.includes('rtx 4080')) return 716;
|
|
1728
|
+
if (gpu.includes('rtx 4070 ti')) return 504;
|
|
1729
|
+
if (gpu.includes('rtx 4070')) return 448;
|
|
1730
|
+
if (gpu.includes('rtx 4060 ti')) return 288;
|
|
1731
|
+
if (gpu.includes('rtx 4060')) return 272;
|
|
1732
|
+
if (gpu.includes('rtx 3090')) return 936;
|
|
1733
|
+
if (gpu.includes('rtx 3080')) return 760;
|
|
1734
|
+
if (gpu.includes('rtx 3070')) return 448;
|
|
1735
|
+
if (gpu.includes('rx 7900 xtx')) return 960;
|
|
1736
|
+
if (gpu.includes('rx 7900 xt')) return 800;
|
|
1737
|
+
if (gpu.includes('rx 6800 xt')) return 512;
|
|
1738
|
+
if (gpu.includes('m4 pro')) return 273; // Apple M4 Pro
|
|
1739
|
+
if (gpu.includes('m4')) return 120; // Apple M4
|
|
1740
|
+
if (gpu.includes('m3 max')) return 400;
|
|
1741
|
+
if (gpu.includes('m3 pro')) return 150;
|
|
1742
|
+
if (gpu.includes('m3')) return 100;
|
|
1743
|
+
|
|
1744
|
+
// Intel iGPU bandwidth (limited)
|
|
1745
|
+
if (gpu.includes('iris xe')) return 68;
|
|
1746
|
+
if (gpu.includes('uhd')) return 47;
|
|
1747
|
+
|
|
1748
|
+
// Fallback to system RAM bandwidth
|
|
1749
|
+
const channels = 2; // Most common
|
|
1750
|
+
if (ramType.includes('DDR5')) {
|
|
1751
|
+
return (ramSpeed * channels * 8) / 1000; // MT/s to GB/s
|
|
1752
|
+
} else if (ramType.includes('DDR4')) {
|
|
1753
|
+
return (ramSpeed * channels * 8) / 1000;
|
|
1754
|
+
}
|
|
1755
|
+
|
|
1756
|
+
return 50; // Conservative fallback
|
|
1757
|
+
}
|
|
1758
|
+
|
|
1759
|
+
estimateComputeTFLOPs(hardware) {
|
|
1760
|
+
const gpuModel = hardware.gpu?.model || '';
|
|
1761
|
+
const gpu = gpuModel.toLowerCase();
|
|
1762
|
+
|
|
1763
|
+
// Known GPU TFLOPs FP16
|
|
1764
|
+
if (gpu.includes('rtx 4090')) return 165;
|
|
1765
|
+
if (gpu.includes('rtx 4080')) return 121;
|
|
1766
|
+
if (gpu.includes('rtx 4070 ti')) return 83;
|
|
1767
|
+
if (gpu.includes('rtx 4070')) return 64;
|
|
1768
|
+
if (gpu.includes('rtx 4060 ti')) return 44;
|
|
1769
|
+
if (gpu.includes('rtx 4060')) return 32;
|
|
1770
|
+
if (gpu.includes('rtx 3090')) return 142;
|
|
1771
|
+
if (gpu.includes('rtx 3080')) return 116;
|
|
1772
|
+
if (gpu.includes('rtx 3070')) return 82;
|
|
1773
|
+
if (gpu.includes('rx 7900 xtx')) return 123;
|
|
1774
|
+
if (gpu.includes('rx 7900 xt')) return 103;
|
|
1775
|
+
if (gpu.includes('rx 6800 xt')) return 65;
|
|
1776
|
+
if (gpu.includes('m4 pro')) return 28; // Apple M4 Pro GPU
|
|
1777
|
+
if (gpu.includes('m4')) return 15; // Apple M4 GPU
|
|
1778
|
+
if (gpu.includes('m3 max')) return 40;
|
|
1779
|
+
if (gpu.includes('m3 pro')) return 20;
|
|
1780
|
+
if (gpu.includes('m3')) return 10;
|
|
1781
|
+
|
|
1782
|
+
// Intel iGPU (very limited)
|
|
1783
|
+
if (gpu.includes('iris xe')) return 2;
|
|
1784
|
+
if (gpu.includes('uhd')) return 0.5;
|
|
1785
|
+
|
|
1786
|
+
return 0; // Use CPU path
|
|
1787
|
+
}
|
|
1788
|
+
|
|
1789
|
+
detectStorageClass(hardware) {
|
|
1790
|
+
// This would need to be enhanced with actual storage detection
|
|
1791
|
+
// For now, assume NVMe for modern systems
|
|
1792
|
+
const architecture = hardware.cpu?.architecture || hardware.cpu?.brand || '';
|
|
1793
|
+
if (architecture.toLowerCase().includes('apple') ||
|
|
1794
|
+
architecture.toLowerCase().includes('m1') ||
|
|
1795
|
+
architecture.toLowerCase().includes('m2') ||
|
|
1796
|
+
architecture.toLowerCase().includes('m3') ||
|
|
1797
|
+
architecture.toLowerCase().includes('m4')) {
|
|
1798
|
+
return 'NVME'; // Apple Silicon typically has fast storage
|
|
1799
|
+
}
|
|
1800
|
+
|
|
1801
|
+
return 'NVME'; // Conservative assumption for modern systems
|
|
1802
|
+
}
|
|
1803
|
+
|
|
1804
|
+
calculateCloudModelCompatibility(model, hardware) {
|
|
1805
|
+
let score = 50;
|
|
1806
|
+
|
|
1807
|
+
const sizeMatch = model.model_identifier.match(/(\d+\.?\d*)[bm]/i);
|
|
1808
|
+
let modelSizeB = 1;
|
|
1809
|
+
|
|
1810
|
+
if (sizeMatch) {
|
|
1811
|
+
const num = parseFloat(sizeMatch[1]);
|
|
1812
|
+
const unit = sizeMatch[0].slice(-1).toLowerCase();
|
|
1813
|
+
modelSizeB = unit === 'm' ? num / 1000 : num;
|
|
1814
|
+
}
|
|
1815
|
+
|
|
1816
|
+
const estimatedRAM = modelSizeB * 1.2;
|
|
1817
|
+
const ramRatio = hardware.memory.total / estimatedRAM;
|
|
1818
|
+
|
|
1819
|
+
if (ramRatio >= 3) {
|
|
1820
|
+
score += 40;
|
|
1821
|
+
} else if (ramRatio >= 2) {
|
|
1822
|
+
score += 30;
|
|
1823
|
+
} else if (ramRatio >= 1.5) {
|
|
1824
|
+
score += 20;
|
|
1825
|
+
} else if (ramRatio >= 1.2) {
|
|
1826
|
+
score += 10;
|
|
1827
|
+
} else {
|
|
1828
|
+
score -= 20;
|
|
1829
|
+
}
|
|
1830
|
+
|
|
1831
|
+
if (modelSizeB <= 0.5) {
|
|
1832
|
+
score += 25;
|
|
1833
|
+
} else if (modelSizeB <= 1) {
|
|
1834
|
+
score += 20;
|
|
1835
|
+
} else if (modelSizeB <= 3) {
|
|
1836
|
+
score += 15;
|
|
1837
|
+
} else if (modelSizeB <= 7) {
|
|
1838
|
+
score += 10;
|
|
1839
|
+
} else if (modelSizeB <= 13) {
|
|
1840
|
+
score += 5;
|
|
1841
|
+
} else {
|
|
1842
|
+
score -= 15;
|
|
1843
|
+
}
|
|
1844
|
+
|
|
1845
|
+
const hardwareTier = this.getHardwareTier(hardware);
|
|
1846
|
+
switch (hardwareTier) {
|
|
1847
|
+
case 'ultra_high':
|
|
1848
|
+
score += 15;
|
|
1849
|
+
break;
|
|
1850
|
+
case 'high':
|
|
1851
|
+
score += 10;
|
|
1852
|
+
break;
|
|
1853
|
+
case 'medium':
|
|
1854
|
+
score += 5;
|
|
1855
|
+
break;
|
|
1856
|
+
case 'low':
|
|
1857
|
+
if (modelSizeB <= 3) score += 5;
|
|
1858
|
+
break;
|
|
1859
|
+
case 'ultra_low':
|
|
1860
|
+
if (modelSizeB <= 1) score += 10;
|
|
1861
|
+
else score -= 10;
|
|
1862
|
+
break;
|
|
1863
|
+
}
|
|
1864
|
+
|
|
1865
|
+
if (hardware.cpu.cores >= 8) {
|
|
1866
|
+
score += 10;
|
|
1867
|
+
} else if (hardware.cpu.cores >= 4) {
|
|
1868
|
+
score += 5;
|
|
1869
|
+
} else if (hardware.cpu.cores < 4) {
|
|
1870
|
+
score -= 5;
|
|
1871
|
+
}
|
|
1872
|
+
|
|
1873
|
+
const pulls = model.pulls || 0;
|
|
1874
|
+
if (pulls > 10000000) {
|
|
1875
|
+
score += 15;
|
|
1876
|
+
} else if (pulls > 1000000) {
|
|
1877
|
+
score += 10;
|
|
1878
|
+
} else if (pulls > 100000) {
|
|
1879
|
+
score += 5;
|
|
1880
|
+
}
|
|
1881
|
+
|
|
1882
|
+
if (model.model_type === 'official') {
|
|
1883
|
+
score += 8;
|
|
1884
|
+
}
|
|
1885
|
+
|
|
1886
|
+
const identifier = model.model_identifier.toLowerCase();
|
|
1887
|
+
if (identifier.includes('tinyllama') || identifier.includes('phi3') || identifier.includes('qwen')) {
|
|
1888
|
+
score += 5;
|
|
1889
|
+
}
|
|
1890
|
+
|
|
1891
|
+
if (identifier.includes('code') && hardware.cpu.cores >= 6) {
|
|
1892
|
+
score += 5;
|
|
1893
|
+
}
|
|
1894
|
+
|
|
1895
|
+
if (identifier.includes('mini') || identifier.includes('tiny')) {
|
|
1896
|
+
score += 8;
|
|
1897
|
+
}
|
|
1898
|
+
|
|
1899
|
+
if (hardware.cpu.architecture === 'Apple Silicon') {
|
|
1900
|
+
score += 5;
|
|
1901
|
+
}
|
|
1902
|
+
|
|
1903
|
+
this.logger.debug(`Model ${model.model_identifier}: size=${modelSizeB}B, RAM ratio=${ramRatio.toFixed(2)}, score=${score}`);
|
|
1904
|
+
|
|
1905
|
+
return Math.max(0, Math.min(100, Math.round(score)));
|
|
1906
|
+
}
|
|
1907
|
+
|
|
1908
|
+
getCloudModelReason(model, hardware) {
|
|
1909
|
+
const identifier = model.model_identifier.toLowerCase();
|
|
1910
|
+
const sizeMatch = model.model_identifier.match(/(\d+\.?\d*)[bm]/i);
|
|
1911
|
+
const modelSizeB = sizeMatch ?
|
|
1912
|
+
(sizeMatch[0].slice(-1).toLowerCase() === 'm' ? parseFloat(sizeMatch[1]) / 1000 : parseFloat(sizeMatch[1])) : 1;
|
|
1913
|
+
|
|
1914
|
+
if (identifier.includes('qwen') && modelSizeB <= 1) {
|
|
1915
|
+
return 'Ultra-efficient Chinese model, great for limited hardware';
|
|
1916
|
+
}
|
|
1917
|
+
if (identifier.includes('tinyllama')) {
|
|
1918
|
+
return 'Tiny but capable, perfect for testing and light tasks';
|
|
1919
|
+
}
|
|
1920
|
+
if (identifier.includes('phi3') && identifier.includes('mini')) {
|
|
1921
|
+
return 'Microsoft\'s efficient model with excellent reasoning';
|
|
1922
|
+
}
|
|
1923
|
+
if (identifier.includes('gemma') && modelSizeB <= 2) {
|
|
1924
|
+
return 'Google\'s compact model, well-optimized';
|
|
1925
|
+
}
|
|
1926
|
+
if (identifier.includes('mistral') && modelSizeB <= 7) {
|
|
1927
|
+
return 'High-quality European model, excellent performance';
|
|
1928
|
+
}
|
|
1929
|
+
if (identifier.includes('llama3.2') && modelSizeB <= 3) {
|
|
1930
|
+
return 'Meta\'s latest compact model, state-of-the-art';
|
|
1931
|
+
}
|
|
1932
|
+
if (identifier.includes('code')) {
|
|
1933
|
+
return 'Specialized for coding tasks';
|
|
1934
|
+
}
|
|
1935
|
+
|
|
1936
|
+
const ramRatio = hardware.memory.total / (modelSizeB * 0.6);
|
|
1937
|
+
|
|
1938
|
+
if (modelSizeB <= 1) {
|
|
1939
|
+
return 'Ultra-small model, runs very fast on your hardware';
|
|
1940
|
+
} else if (modelSizeB <= 3 && ramRatio >= 2) {
|
|
1941
|
+
return 'Small model with good performance balance';
|
|
1942
|
+
} else if (modelSizeB <= 7 && ramRatio >= 1.5) {
|
|
1943
|
+
return 'Medium-sized model, good capabilities';
|
|
1944
|
+
} else if (ramRatio >= 1.2) {
|
|
1945
|
+
return 'Should run well on your system';
|
|
1946
|
+
} else {
|
|
1947
|
+
return 'Recommended with quantization for your hardware';
|
|
1948
|
+
}
|
|
1949
|
+
}
|
|
1950
|
+
|
|
1951
|
+
extractModelSize(identifier) {
|
|
1952
|
+
const sizeMatch = identifier.match(/(\d+\.?\d*)[bm]/i);
|
|
1953
|
+
if (sizeMatch) {
|
|
1954
|
+
const num = parseFloat(sizeMatch[1]);
|
|
1955
|
+
const unit = sizeMatch[0].slice(-1).toUpperCase();
|
|
1956
|
+
return `${num}${unit}`;
|
|
1957
|
+
}
|
|
1958
|
+
return 'Unknown';
|
|
1959
|
+
}
|
|
1960
|
+
|
|
1961
|
+
getOllamaCommand(model) {
|
|
1962
|
+
const mapping = {
|
|
1963
|
+
'TinyLlama 1.1B': 'ollama pull tinyllama:1.1b',
|
|
1964
|
+
'Qwen 0.5B': 'ollama pull qwen:0.5b',
|
|
1965
|
+
'Gemma 2B': 'ollama pull gemma2:2b',
|
|
1966
|
+
'Phi-3 Mini 3.8B': 'ollama pull phi3:mini',
|
|
1967
|
+
'Llama 3.2 3B': 'ollama pull llama3.2:3b',
|
|
1968
|
+
'Llama 3.1 8B': 'ollama pull llama3.1:8b',
|
|
1969
|
+
'Mistral 7B v0.3': 'ollama pull mistral:7b',
|
|
1970
|
+
'CodeLlama 7B': 'ollama pull codellama:7b',
|
|
1971
|
+
'Qwen 2.5 7B': 'ollama pull qwen2.5:7b'
|
|
1972
|
+
};
|
|
1973
|
+
|
|
1974
|
+
return mapping[model.name] || null;
|
|
1975
|
+
}
|
|
1976
|
+
|
|
1977
|
+
getRecommendationReason(model, hardware) {
|
|
1978
|
+
if (model.specialization === 'code') {
|
|
1979
|
+
return 'Excellent for coding tasks';
|
|
1980
|
+
}
|
|
1981
|
+
if (hardware.memory.total >= 16 && model.size.includes('8B')) {
|
|
1982
|
+
return 'Perfect size for your RAM capacity';
|
|
1983
|
+
}
|
|
1984
|
+
if (model.category === 'small' && hardware.memory.total < 16) {
|
|
1985
|
+
return 'Optimized for systems with limited RAM';
|
|
1986
|
+
}
|
|
1987
|
+
return 'Great balance of performance and efficiency';
|
|
1988
|
+
}
|
|
1989
|
+
|
|
1990
|
+
calculatePriority(model, hardware) {
|
|
1991
|
+
let priority = 50;
|
|
1992
|
+
const modelSize = this.parseModelSize(model.size);
|
|
1993
|
+
const requiredRAM = model.requirements?.ram || 4;
|
|
1994
|
+
const ramRatio = hardware.memory.total / requiredRAM;
|
|
1995
|
+
|
|
1996
|
+
if (ramRatio >= 2) priority += 20;
|
|
1997
|
+
else if (ramRatio >= 1.5) priority += 10;
|
|
1998
|
+
else if (ramRatio < 1) priority -= 20;
|
|
1999
|
+
|
|
2000
|
+
if (modelSize <= 1) priority += 15;
|
|
2001
|
+
else if (modelSize <= 3) priority += 10;
|
|
2002
|
+
else if (modelSize <= 7) priority += 5;
|
|
2003
|
+
else if (modelSize > 30) priority -= 15;
|
|
2004
|
+
|
|
2005
|
+
if (model.specialization === 'code') priority += 15;
|
|
2006
|
+
else if (model.specialization === 'chat') priority += 10;
|
|
2007
|
+
else if (model.specialization === 'embeddings') priority += 5;
|
|
2008
|
+
|
|
2009
|
+
if (model.year >= 2024) priority += 10;
|
|
2010
|
+
else if (model.year >= 2023) priority += 5;
|
|
2011
|
+
|
|
2012
|
+
if (hardware.gpu.dedicated && model.requirements?.vram > 0) {
|
|
2013
|
+
if (hardware.gpu.vram >= model.requirements.vram) {
|
|
2014
|
+
priority += 10;
|
|
2015
|
+
} else {
|
|
2016
|
+
priority -= 5;
|
|
2017
|
+
}
|
|
2018
|
+
}
|
|
2019
|
+
|
|
2020
|
+
if (hardware.cpu.architecture === 'Apple Silicon' &&
|
|
2021
|
+
model.frameworks?.includes('llama.cpp')) {
|
|
2022
|
+
priority += 8;
|
|
2023
|
+
}
|
|
2024
|
+
|
|
2025
|
+
return Math.max(0, priority);
|
|
2026
|
+
}
|
|
2027
|
+
|
|
2028
|
+
parseModelSize(sizeString) {
|
|
2029
|
+
const match = sizeString.match(/(\d+\.?\d*)[BM]/i);
|
|
2030
|
+
if (!match) return 1;
|
|
2031
|
+
|
|
2032
|
+
const num = parseFloat(match[1]);
|
|
2033
|
+
const unit = match[0].slice(-1).toUpperCase();
|
|
2034
|
+
|
|
2035
|
+
return unit === 'B' ? num : num / 1000;
|
|
2036
|
+
}
|
|
2037
|
+
|
|
2038
|
+
getUseCaseRecommendations(results, useCase) {
|
|
2039
|
+
const recommendations = [];
|
|
2040
|
+
|
|
2041
|
+
switch (useCase) {
|
|
2042
|
+
case 'code':
|
|
2043
|
+
const codeModels = results.compatible.filter(m => m.specialization === 'code');
|
|
2044
|
+
if (codeModels.length > 0) {
|
|
2045
|
+
recommendations.push(`Top coding model: ${codeModels[0].name}`);
|
|
2046
|
+
}
|
|
2047
|
+
break;
|
|
2048
|
+
|
|
2049
|
+
case 'chat':
|
|
2050
|
+
const chatModels = results.compatible.filter(m =>
|
|
2051
|
+
m.specialization === 'chat' || m.specialization === 'general'
|
|
2052
|
+
);
|
|
2053
|
+
if (chatModels.length > 0) {
|
|
2054
|
+
recommendations.push(`Best chat model: ${chatModels[0].name}`);
|
|
2055
|
+
}
|
|
2056
|
+
break;
|
|
2057
|
+
|
|
2058
|
+
case 'multimodal':
|
|
2059
|
+
const multiModels = results.compatible.filter(m => m.multimodal);
|
|
2060
|
+
if (multiModels.length > 0) {
|
|
2061
|
+
recommendations.push(`Multimodal option: ${multiModels[0].name}`);
|
|
2062
|
+
}
|
|
2063
|
+
break;
|
|
2064
|
+
}
|
|
2065
|
+
|
|
2066
|
+
return recommendations;
|
|
2067
|
+
}
|
|
2068
|
+
|
|
2069
|
+
generateEnhancedSummary(hardware, results, ollamaIntegration) {
|
|
2070
|
+
const baseSummary = this.generateSummary(hardware, results);
|
|
2071
|
+
|
|
2072
|
+
return {
|
|
2073
|
+
...baseSummary,
|
|
2074
|
+
ollama: {
|
|
2075
|
+
available: ollamaIntegration.ollamaInfo.available,
|
|
2076
|
+
installedModels: ollamaIntegration.compatibleOllamaModels.length,
|
|
2077
|
+
runningModels: ollamaIntegration.currentlyRunning.length,
|
|
2078
|
+
recommendedInstalls: ollamaIntegration.recommendedPulls.length
|
|
2079
|
+
},
|
|
2080
|
+
hardwareTier: this.getHardwareTier(hardware),
|
|
2081
|
+
topPerformanceModel: this.getTopPerformanceModel(results)
|
|
2082
|
+
};
|
|
2083
|
+
}
|
|
2084
|
+
|
|
2085
|
+
getTopPerformanceModel(results) {
|
|
2086
|
+
if (results.compatible.length === 0) return null;
|
|
2087
|
+
|
|
2088
|
+
const sorted = results.compatible
|
|
2089
|
+
.filter(m => m.performanceEstimate)
|
|
2090
|
+
.sort((a, b) => (b.performanceEstimate.estimatedTokensPerSecond || 0) -
|
|
2091
|
+
(a.performanceEstimate.estimatedTokensPerSecond || 0));
|
|
2092
|
+
|
|
2093
|
+
return sorted[0] || results.compatible[0];
|
|
2094
|
+
}
|
|
2095
|
+
|
|
2096
|
+
async analyzeOllamaModel(modelName) {
|
|
2097
|
+
try {
|
|
2098
|
+
const [hardware, model] = await Promise.all([
|
|
2099
|
+
this.getSystemInfo(),
|
|
2100
|
+
Promise.resolve(this.findModel(modelName))
|
|
2101
|
+
]);
|
|
2102
|
+
|
|
2103
|
+
if (!model) {
|
|
2104
|
+
throw new Error(`Model "${modelName}" not found in database`);
|
|
2105
|
+
}
|
|
2106
|
+
|
|
2107
|
+
const [localModels, runningModels] = await Promise.all([
|
|
2108
|
+
this.ollamaClient.getLocalModels().catch(error => {
|
|
2109
|
+
this.logger.warn('Failed to get local Ollama models for analysis', { error: error.message });
|
|
2110
|
+
return [];
|
|
2111
|
+
}),
|
|
2112
|
+
this.ollamaClient.getRunningModels().catch(error => {
|
|
2113
|
+
this.logger.warn('Failed to get running Ollama models for analysis', { error: error.message });
|
|
2114
|
+
return [];
|
|
2115
|
+
})
|
|
2116
|
+
]);
|
|
2117
|
+
|
|
2118
|
+
const isInstalled = localModels.some(m => m.name.toLowerCase().includes(modelName.toLowerCase()));
|
|
2119
|
+
const isRunning = runningModels.some(m => m.name.toLowerCase().includes(modelName.toLowerCase()));
|
|
2120
|
+
|
|
2121
|
+
const [compatibility, performance] = await Promise.all([
|
|
2122
|
+
Promise.resolve(this.compatibilityAnalyzer.calculateModelCompatibility(hardware, model)),
|
|
2123
|
+
this.performanceAnalyzer.estimateModelPerformance(model, hardware)
|
|
2124
|
+
]);
|
|
2125
|
+
|
|
2126
|
+
let benchmarkResults = null;
|
|
2127
|
+
if (isInstalled) {
|
|
2128
|
+
try {
|
|
2129
|
+
benchmarkResults = await this.performanceAnalyzer.benchmarkInferenceSpeed(
|
|
2130
|
+
modelName, hardware, this.ollamaClient
|
|
2131
|
+
);
|
|
2132
|
+
} catch (error) {
|
|
2133
|
+
this.logger.warn(`Benchmark failed for ${modelName}`, { error });
|
|
2134
|
+
}
|
|
2135
|
+
}
|
|
2136
|
+
|
|
2137
|
+
return {
|
|
2138
|
+
model,
|
|
2139
|
+
hardware,
|
|
2140
|
+
status: {
|
|
2141
|
+
installed: isInstalled,
|
|
2142
|
+
running: isRunning,
|
|
2143
|
+
canRun: compatibility.score >= 60
|
|
2144
|
+
},
|
|
2145
|
+
compatibility,
|
|
2146
|
+
performance,
|
|
2147
|
+
benchmarkResults,
|
|
2148
|
+
recommendations: this.generateModelSpecificRecommendations(model, hardware, compatibility)
|
|
2149
|
+
};
|
|
2150
|
+
|
|
2151
|
+
} catch (error) {
|
|
2152
|
+
this.logger.error('Model analysis failed', { error: error.message, component: 'LLMChecker', method: 'analyzeOllamaModel' });
|
|
2153
|
+
throw error;
|
|
2154
|
+
}
|
|
2155
|
+
}
|
|
2156
|
+
|
|
2157
|
+
generateModelSpecificRecommendations(model, hardware, compatibility) {
|
|
2158
|
+
const recommendations = [];
|
|
2159
|
+
|
|
2160
|
+
if (compatibility.score < 60) {
|
|
2161
|
+
recommendations.push('Model may not run well on this hardware');
|
|
2162
|
+
recommendations.push('Consider using heavy quantization (Q2_K, Q3_K_M)');
|
|
2163
|
+
} else if (compatibility.score < 75) {
|
|
2164
|
+
recommendations.push('✅ Model should run with some optimizations');
|
|
2165
|
+
recommendations.push('Use Q4_K_M quantization for best balance');
|
|
2166
|
+
} else {
|
|
2167
|
+
recommendations.push('Model should run excellently on this hardware');
|
|
2168
|
+
if (hardware.memory.total >= 32) {
|
|
2169
|
+
recommendations.push('You can use higher quality quantization (Q5_K_M, Q6_K)');
|
|
2170
|
+
}
|
|
2171
|
+
}
|
|
2172
|
+
|
|
2173
|
+
if (hardware.gpu.dedicated && hardware.gpu.vram >= (model.requirements?.vram || 0)) {
|
|
2174
|
+
recommendations.push('Enable GPU acceleration for faster inference');
|
|
2175
|
+
}
|
|
2176
|
+
|
|
2177
|
+
return recommendations;
|
|
2178
|
+
}
|
|
2179
|
+
|
|
2180
|
+
filterModels(models, filter) {
|
|
2181
|
+
switch (filter.toLowerCase()) {
|
|
2182
|
+
case 'local':
|
|
2183
|
+
return models.filter(m => m.type === 'local');
|
|
2184
|
+
case 'cloud':
|
|
2185
|
+
return models.filter(m => m.type === 'cloud');
|
|
2186
|
+
case 'ultra_small':
|
|
2187
|
+
return models.filter(m => m.category === 'ultra_small');
|
|
2188
|
+
case 'small':
|
|
2189
|
+
return models.filter(m => m.category === 'small');
|
|
2190
|
+
case 'medium':
|
|
2191
|
+
return models.filter(m => m.category === 'medium');
|
|
2192
|
+
case 'large':
|
|
2193
|
+
return models.filter(m => m.category === 'large');
|
|
2194
|
+
case 'code':
|
|
2195
|
+
return models.filter(m => m.specialization === 'code');
|
|
2196
|
+
case 'chat':
|
|
2197
|
+
return models.filter(m => m.specialization === 'chat' || !m.specialization);
|
|
2198
|
+
case 'multimodal':
|
|
2199
|
+
return models.filter(m => m.specialization === 'multimodal' || m.multimodal);
|
|
2200
|
+
case 'embeddings':
|
|
2201
|
+
return models.filter(m => m.specialization === 'embeddings');
|
|
2202
|
+
default:
|
|
2203
|
+
return models;
|
|
2204
|
+
}
|
|
2205
|
+
}
|
|
2206
|
+
|
|
2207
|
+
generateSummary(hardware, compatibility) {
|
|
2208
|
+
return {
|
|
2209
|
+
grade: this.calculateGrade(compatibility),
|
|
2210
|
+
systemClass: this.getSystemClass(hardware),
|
|
2211
|
+
compatibleCount: compatibility.compatible.length,
|
|
2212
|
+
marginalCount: compatibility.marginal.length,
|
|
2213
|
+
incompatibleCount: compatibility.incompatible.length,
|
|
2214
|
+
totalModels: compatibility.compatible.length + compatibility.marginal.length + compatibility.incompatible.length
|
|
2215
|
+
};
|
|
2216
|
+
}
|
|
2217
|
+
|
|
2218
|
+
calculateGrade(compatibility) {
|
|
2219
|
+
const total = compatibility.compatible.length + compatibility.marginal.length + compatibility.incompatible.length;
|
|
2220
|
+
const compatiblePercent = total > 0 ? (compatibility.compatible.length / total) * 100 : 0;
|
|
2221
|
+
|
|
2222
|
+
if (compatiblePercent >= 80) return 'A';
|
|
2223
|
+
if (compatiblePercent >= 60) return 'B';
|
|
2224
|
+
if (compatiblePercent >= 40) return 'C';
|
|
2225
|
+
if (compatiblePercent >= 20) return 'D';
|
|
2226
|
+
return 'F';
|
|
2227
|
+
}
|
|
2228
|
+
|
|
2229
|
+
getSystemClass(hardware) {
|
|
2230
|
+
if (hardware.memory.total >= 32 && hardware.gpu.vram >= 16) return 'High End';
|
|
2231
|
+
if (hardware.memory.total >= 16 && hardware.gpu.vram >= 8) return 'Mid Range';
|
|
2232
|
+
if (hardware.memory.total >= 8) return 'Budget';
|
|
2233
|
+
return 'Entry Level';
|
|
2234
|
+
}
|
|
2235
|
+
|
|
2236
|
+
async getOllamaInfo() {
|
|
2237
|
+
return await this.integrateOllamaModels(await this.getSystemInfo(), []);
|
|
2238
|
+
}
|
|
2239
|
+
|
|
2240
|
+
async getSystemInfo() {
|
|
2241
|
+
return await this.hardwareDetector.getSystemInfo();
|
|
2242
|
+
}
|
|
2243
|
+
|
|
2244
|
+
getAllModels() {
|
|
2245
|
+
return this.expandedModelsDatabase.getAllModels();
|
|
2246
|
+
}
|
|
2247
|
+
|
|
2248
|
+
findModel(name) {
|
|
2249
|
+
return this.expandedModelsDatabase.findModel ?
|
|
2250
|
+
this.expandedModelsDatabase.findModel(name) :
|
|
2251
|
+
this.getAllModels().find(m => m.name.toLowerCase().includes(name.toLowerCase()));
|
|
2252
|
+
}
|
|
2253
|
+
|
|
2254
|
+
|
|
2255
|
+
async generateIntelligentRecommendations(hardware) {
|
|
2256
|
+
try {
|
|
2257
|
+
this.logger.info('Generating intelligent recommendations...');
|
|
2258
|
+
|
|
2259
|
+
// Obtener todos los modelos de Ollama
|
|
2260
|
+
const ollamaData = await this.ollamaScraper.scrapeAllModels(false);
|
|
2261
|
+
const allModels = ollamaData.models || [];
|
|
2262
|
+
|
|
2263
|
+
if (allModels.length === 0) {
|
|
2264
|
+
this.logger.warn('No Ollama models available for recommendations');
|
|
2265
|
+
return null;
|
|
2266
|
+
}
|
|
2267
|
+
|
|
2268
|
+
// Generar recomendaciones inteligentes
|
|
2269
|
+
const recommendations = await this.intelligentRecommender.getBestModelsForHardware(hardware, allModels);
|
|
2270
|
+
const summary = this.intelligentRecommender.generateRecommendationSummary(recommendations, hardware);
|
|
2271
|
+
|
|
2272
|
+
this.logger.info(`Generated recommendations for ${Object.keys(recommendations).length} categories`);
|
|
2273
|
+
|
|
2274
|
+
return {
|
|
2275
|
+
recommendations,
|
|
2276
|
+
summary,
|
|
2277
|
+
totalModelsAnalyzed: allModels.length,
|
|
2278
|
+
generatedAt: new Date().toISOString()
|
|
2279
|
+
};
|
|
2280
|
+
|
|
2281
|
+
} catch (error) {
|
|
2282
|
+
this.logger.error('Failed to generate intelligent recommendations', { error: error.message });
|
|
2283
|
+
return null;
|
|
2284
|
+
}
|
|
2285
|
+
}
|
|
2286
|
+
|
|
2287
|
+
}
|
|
2288
|
+
|
|
2289
|
+
module.exports = LLMChecker;
|