llm-checker 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +418 -0
- package/analyzer/compatibility.js +584 -0
- package/analyzer/performance.js +505 -0
- package/bin/CLAUDE.md +12 -0
- package/bin/enhanced_cli.js +3118 -0
- package/bin/test-deterministic.js +41 -0
- package/package.json +96 -0
- package/src/CLAUDE.md +12 -0
- package/src/ai/intelligent-selector.js +615 -0
- package/src/ai/model-selector.js +312 -0
- package/src/ai/multi-objective-selector.js +820 -0
- package/src/commands/check.js +58 -0
- package/src/data/CLAUDE.md +11 -0
- package/src/data/model-database.js +637 -0
- package/src/data/sync-manager.js +279 -0
- package/src/hardware/CLAUDE.md +12 -0
- package/src/hardware/backends/CLAUDE.md +11 -0
- package/src/hardware/backends/apple-silicon.js +318 -0
- package/src/hardware/backends/cpu-detector.js +490 -0
- package/src/hardware/backends/cuda-detector.js +417 -0
- package/src/hardware/backends/intel-detector.js +436 -0
- package/src/hardware/backends/rocm-detector.js +440 -0
- package/src/hardware/detector.js +573 -0
- package/src/hardware/pc-optimizer.js +635 -0
- package/src/hardware/specs.js +286 -0
- package/src/hardware/unified-detector.js +442 -0
- package/src/index.js +2289 -0
- package/src/models/CLAUDE.md +17 -0
- package/src/models/ai-check-selector.js +806 -0
- package/src/models/catalog.json +426 -0
- package/src/models/deterministic-selector.js +1145 -0
- package/src/models/expanded_database.js +1142 -0
- package/src/models/intelligent-selector.js +532 -0
- package/src/models/requirements.js +310 -0
- package/src/models/scoring-config.js +57 -0
- package/src/models/scoring-engine.js +715 -0
- package/src/ollama/.cache/README.md +33 -0
- package/src/ollama/CLAUDE.md +24 -0
- package/src/ollama/client.js +438 -0
- package/src/ollama/enhanced-client.js +113 -0
- package/src/ollama/enhanced-scraper.js +634 -0
- package/src/ollama/manager.js +357 -0
- package/src/ollama/native-scraper.js +776 -0
- package/src/plugins/CLAUDE.md +11 -0
- package/src/plugins/examples/custom_model_plugin.js +87 -0
- package/src/plugins/index.js +295 -0
- package/src/utils/CLAUDE.md +11 -0
- package/src/utils/config.js +359 -0
- package/src/utils/formatter.js +315 -0
- package/src/utils/logger.js +272 -0
- package/src/utils/model-classifier.js +167 -0
- package/src/utils/verbose-progress.js +266 -0
|
@@ -0,0 +1,505 @@
|
|
|
1
|
+
class PerformanceAnalyzer {
|
|
2
|
+
constructor() {
|
|
3
|
+
this.benchmarkCache = new Map();
|
|
4
|
+
this.cacheExpiry = 24 * 60 * 60 * 1000; // 24 horas
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
async analyzeSystemPerformance(hardware) {
|
|
8
|
+
const cacheKey = this.generateCacheKey(hardware);
|
|
9
|
+
const cached = this.benchmarkCache.get(cacheKey);
|
|
10
|
+
|
|
11
|
+
if (cached && Date.now() - cached.timestamp < this.cacheExpiry) {
|
|
12
|
+
return cached.data;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
const performance = {
|
|
16
|
+
cpu: await this.analyzeCPUPerformance(hardware.cpu),
|
|
17
|
+
memory: await this.analyzeMemoryPerformance(hardware.memory),
|
|
18
|
+
gpu: await this.analyzeGPUPerformance(hardware.gpu),
|
|
19
|
+
storage: await this.analyzeStoragePerformance(),
|
|
20
|
+
overall: 0
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
// Calculate overall score
|
|
24
|
+
performance.overall = Math.round(
|
|
25
|
+
(performance.cpu.score * 0.3 +
|
|
26
|
+
performance.memory.score * 0.25 +
|
|
27
|
+
performance.gpu.score * 0.35 +
|
|
28
|
+
performance.storage.score * 0.1)
|
|
29
|
+
);
|
|
30
|
+
|
|
31
|
+
// Cache results
|
|
32
|
+
this.benchmarkCache.set(cacheKey, {
|
|
33
|
+
data: performance,
|
|
34
|
+
timestamp: Date.now()
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
return performance;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
async analyzeCPUPerformance(cpu) {
|
|
41
|
+
const analysis = {
|
|
42
|
+
score: cpu.score || 50,
|
|
43
|
+
details: {
|
|
44
|
+
cores: cpu.cores,
|
|
45
|
+
threads: cpu.physicalCores * 2, // Assume hyperthreading
|
|
46
|
+
speed: cpu.speedMax || cpu.speed,
|
|
47
|
+
architecture: cpu.architecture,
|
|
48
|
+
cache: cpu.cache
|
|
49
|
+
},
|
|
50
|
+
benchmarks: {},
|
|
51
|
+
strengths: [],
|
|
52
|
+
weaknesses: [],
|
|
53
|
+
recommendations: []
|
|
54
|
+
};
|
|
55
|
+
|
|
56
|
+
// Run CPU benchmarks
|
|
57
|
+
try {
|
|
58
|
+
analysis.benchmarks = await this.runCPUBenchmarks();
|
|
59
|
+
} catch (error) {
|
|
60
|
+
analysis.benchmarks = { error: error.message };
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Analyze strengths and weaknesses
|
|
64
|
+
this.analyzeCPUCharacteristics(analysis);
|
|
65
|
+
|
|
66
|
+
return analysis;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
async runCPUBenchmarks() {
|
|
70
|
+
const results = {};
|
|
71
|
+
|
|
72
|
+
// Single-threaded performance test
|
|
73
|
+
const singleThreadStart = process.hrtime.bigint();
|
|
74
|
+
let result = 0;
|
|
75
|
+
for (let i = 0; i < 1000000; i++) {
|
|
76
|
+
result += Math.sqrt(i) * Math.sin(i);
|
|
77
|
+
}
|
|
78
|
+
const singleThreadEnd = process.hrtime.bigint();
|
|
79
|
+
results.singleThread = {
|
|
80
|
+
duration: Number(singleThreadEnd - singleThreadStart) / 1000000, // ms
|
|
81
|
+
score: Math.max(0, 1000 - (Number(singleThreadEnd - singleThreadStart) / 1000000))
|
|
82
|
+
};
|
|
83
|
+
|
|
84
|
+
// Multi-threaded performance test (simulate with Promise.all)
|
|
85
|
+
const multiThreadStart = process.hrtime.bigint();
|
|
86
|
+
const workers = Array.from({ length: 4 }, () =>
|
|
87
|
+
Promise.resolve().then(() => {
|
|
88
|
+
let result = 0;
|
|
89
|
+
for (let i = 0; i < 250000; i++) {
|
|
90
|
+
result += Math.sqrt(i) * Math.sin(i);
|
|
91
|
+
}
|
|
92
|
+
return result;
|
|
93
|
+
})
|
|
94
|
+
);
|
|
95
|
+
await Promise.all(workers);
|
|
96
|
+
const multiThreadEnd = process.hrtime.bigint();
|
|
97
|
+
results.multiThread = {
|
|
98
|
+
duration: Number(multiThreadEnd - multiThreadStart) / 1000000, // ms
|
|
99
|
+
score: Math.max(0, 1000 - (Number(multiThreadEnd - multiThreadStart) / 1000000))
|
|
100
|
+
};
|
|
101
|
+
|
|
102
|
+
// Memory bandwidth test
|
|
103
|
+
const memBandwidthStart = process.hrtime.bigint();
|
|
104
|
+
const largeArray = new Array(1000000).fill(0);
|
|
105
|
+
for (let i = 0; i < largeArray.length; i++) {
|
|
106
|
+
largeArray[i] = Math.random();
|
|
107
|
+
}
|
|
108
|
+
largeArray.sort();
|
|
109
|
+
const memBandwidthEnd = process.hrtime.bigint();
|
|
110
|
+
results.memoryBandwidth = {
|
|
111
|
+
duration: Number(memBandwidthEnd - memBandwidthStart) / 1000000, // ms
|
|
112
|
+
score: Math.max(0, 2000 - (Number(memBandwidthEnd - memBandwidthStart) / 1000000))
|
|
113
|
+
};
|
|
114
|
+
|
|
115
|
+
return results;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
analyzeCPUCharacteristics(analysis) {
|
|
119
|
+
const { details, benchmarks } = analysis;
|
|
120
|
+
|
|
121
|
+
// Strengths
|
|
122
|
+
if (details.cores >= 8) {
|
|
123
|
+
analysis.strengths.push('High core count suitable for parallel processing');
|
|
124
|
+
}
|
|
125
|
+
if (details.speed >= 3.5) {
|
|
126
|
+
analysis.strengths.push('High clock speed for single-threaded performance');
|
|
127
|
+
}
|
|
128
|
+
if (details.architecture === 'Apple Silicon') {
|
|
129
|
+
analysis.strengths.push('Unified memory architecture with excellent efficiency');
|
|
130
|
+
}
|
|
131
|
+
if (details.cache.l3 >= 16) {
|
|
132
|
+
analysis.strengths.push('Large L3 cache improves model loading performance');
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// Weaknesses
|
|
136
|
+
if (details.cores < 4) {
|
|
137
|
+
analysis.weaknesses.push('Low core count may limit concurrent model execution');
|
|
138
|
+
}
|
|
139
|
+
if (details.speed < 2.5) {
|
|
140
|
+
analysis.weaknesses.push('Low clock speed may impact inference speed');
|
|
141
|
+
}
|
|
142
|
+
if (benchmarks.singleThread?.score < 500) {
|
|
143
|
+
analysis.weaknesses.push('Below-average single-threaded performance');
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// Recommendations
|
|
147
|
+
if (details.cores >= 8) {
|
|
148
|
+
analysis.recommendations.push('Consider running multiple small models simultaneously');
|
|
149
|
+
}
|
|
150
|
+
if (details.architecture === 'Apple Silicon') {
|
|
151
|
+
analysis.recommendations.push('Use llama.cpp with Metal acceleration for optimal performance');
|
|
152
|
+
}
|
|
153
|
+
if (details.cores < 6) {
|
|
154
|
+
analysis.recommendations.push('Focus on smaller models (1B-7B parameters)');
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
async analyzeMemoryPerformance(memory) {
|
|
159
|
+
const analysis = {
|
|
160
|
+
score: memory.score || 50,
|
|
161
|
+
details: {
|
|
162
|
+
total: memory.total,
|
|
163
|
+
available: memory.free,
|
|
164
|
+
usage: memory.usagePercent,
|
|
165
|
+
type: 'Unknown' // Would need additional detection
|
|
166
|
+
},
|
|
167
|
+
characteristics: {},
|
|
168
|
+
recommendations: []
|
|
169
|
+
};
|
|
170
|
+
|
|
171
|
+
// Memory characteristics analysis
|
|
172
|
+
this.analyzeMemoryCharacteristics(analysis);
|
|
173
|
+
|
|
174
|
+
return analysis;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
analyzeMemoryCharacteristics(analysis) {
|
|
178
|
+
const { details } = analysis;
|
|
179
|
+
|
|
180
|
+
// Memory adequacy for different model sizes
|
|
181
|
+
analysis.characteristics = {
|
|
182
|
+
ultraSmall: details.total >= 2,
|
|
183
|
+
small: details.total >= 8,
|
|
184
|
+
medium: details.total >= 16,
|
|
185
|
+
large: details.total >= 32,
|
|
186
|
+
ultraLarge: details.total >= 64
|
|
187
|
+
};
|
|
188
|
+
|
|
189
|
+
// Recommendations based on memory
|
|
190
|
+
if (details.total < 8) {
|
|
191
|
+
analysis.recommendations.push('Upgrade to 16GB+ RAM for better model compatibility');
|
|
192
|
+
analysis.recommendations.push('Use aggressive quantization (Q2_K, Q3_K_M)');
|
|
193
|
+
} else if (details.total < 16) {
|
|
194
|
+
analysis.recommendations.push('Current RAM suitable for small-medium models');
|
|
195
|
+
analysis.recommendations.push('Consider 32GB for large model flexibility');
|
|
196
|
+
} else if (details.total >= 32) {
|
|
197
|
+
analysis.recommendations.push('Excellent RAM capacity for most models');
|
|
198
|
+
analysis.recommendations.push('Can run multiple models simultaneously');
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
if (details.usage > 80) {
|
|
202
|
+
analysis.recommendations.push('High memory usage - close unnecessary applications');
|
|
203
|
+
analysis.recommendations.push('Consider memory optimization tools');
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
async analyzeGPUPerformance(gpu) {
|
|
208
|
+
const analysis = {
|
|
209
|
+
score: gpu.score || 0,
|
|
210
|
+
details: {
|
|
211
|
+
model: gpu.model,
|
|
212
|
+
vram: gpu.vram,
|
|
213
|
+
dedicated: gpu.dedicated,
|
|
214
|
+
vendor: gpu.vendor
|
|
215
|
+
},
|
|
216
|
+
capabilities: {},
|
|
217
|
+
recommendations: []
|
|
218
|
+
};
|
|
219
|
+
|
|
220
|
+
// GPU capabilities analysis
|
|
221
|
+
this.analyzeGPUCapabilities(analysis);
|
|
222
|
+
|
|
223
|
+
return analysis;
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
analyzeGPUCapabilities(analysis) {
|
|
227
|
+
const { details } = analysis;
|
|
228
|
+
|
|
229
|
+
// VRAM adequacy for different model sizes
|
|
230
|
+
analysis.capabilities = {
|
|
231
|
+
acceleratesSmall: details.vram >= 4 && details.dedicated,
|
|
232
|
+
acceleratesMedium: details.vram >= 8 && details.dedicated,
|
|
233
|
+
acceleratesLarge: details.vram >= 16 && details.dedicated,
|
|
234
|
+
acceleratesUltraLarge: details.vram >= 24 && details.dedicated
|
|
235
|
+
};
|
|
236
|
+
|
|
237
|
+
// GPU-specific recommendations
|
|
238
|
+
if (!details.dedicated) {
|
|
239
|
+
analysis.recommendations.push('Integrated GPU detected - CPU inference recommended');
|
|
240
|
+
analysis.recommendations.push('Consider dedicated GPU for significant speedup');
|
|
241
|
+
} else if (details.vram < 4) {
|
|
242
|
+
analysis.recommendations.push('Limited VRAM - focus on CPU inference or small models');
|
|
243
|
+
} else if (details.vram >= 8) {
|
|
244
|
+
analysis.recommendations.push('Good VRAM capacity for GPU-accelerated inference');
|
|
245
|
+
analysis.recommendations.push('Enable GPU acceleration in llama.cpp or Ollama');
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
if (details.vendor === 'NVIDIA' && details.dedicated) {
|
|
249
|
+
analysis.recommendations.push('NVIDIA GPU detected - CUDA acceleration available');
|
|
250
|
+
} else if (details.vendor === 'AMD' && details.dedicated) {
|
|
251
|
+
analysis.recommendations.push('AMD GPU detected - ROCm acceleration may be available');
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
async analyzeStoragePerformance() {
|
|
256
|
+
const analysis = {
|
|
257
|
+
score: 70, // Default assumption of SSD
|
|
258
|
+
details: {
|
|
259
|
+
type: 'Unknown',
|
|
260
|
+
estimatedSpeed: 'Unknown'
|
|
261
|
+
},
|
|
262
|
+
impact: {},
|
|
263
|
+
recommendations: []
|
|
264
|
+
};
|
|
265
|
+
|
|
266
|
+
// Storage impact on model loading
|
|
267
|
+
analysis.impact = {
|
|
268
|
+
modelLoadTime: 'Moderate', // Would be faster with NVMe
|
|
269
|
+
swapPerformance: 'Adequate',
|
|
270
|
+
tempFileAccess: 'Good'
|
|
271
|
+
};
|
|
272
|
+
|
|
273
|
+
// Storage recommendations
|
|
274
|
+
analysis.recommendations.push('SSD storage recommended for faster model loading');
|
|
275
|
+
analysis.recommendations.push('NVMe storage provides best performance for large models');
|
|
276
|
+
analysis.recommendations.push('Ensure sufficient free space for model downloads');
|
|
277
|
+
|
|
278
|
+
return analysis;
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
async estimateModelPerformance(model, hardware) {
|
|
282
|
+
// Use realistic estimation that considers hardware type properly
|
|
283
|
+
return this.calculateRealisticPerformance(model, hardware);
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
calculateRealisticPerformance(model, hardware) {
|
|
287
|
+
// Parse model size
|
|
288
|
+
const modelSizeB = this.parseModelSize(model.size);
|
|
289
|
+
|
|
290
|
+
// Get hardware specifics
|
|
291
|
+
const cpuModel = hardware.cpu?.brand || hardware.cpu?.model || '';
|
|
292
|
+
const gpuModel = hardware.gpu?.model || '';
|
|
293
|
+
const cores = hardware.cpu?.physicalCores || hardware.cpu?.cores || 1;
|
|
294
|
+
const baseSpeed = hardware.cpu?.speed || 2.4;
|
|
295
|
+
const vramGB = hardware.gpu?.vram || 0;
|
|
296
|
+
const memoryTotal = hardware.memory?.total || 8;
|
|
297
|
+
|
|
298
|
+
// Hardware type detection
|
|
299
|
+
const isAppleSilicon = hardware.cpu?.architecture === 'Apple Silicon' || (
|
|
300
|
+
process.platform === 'darwin' && (
|
|
301
|
+
gpuModel.toLowerCase().includes('apple') ||
|
|
302
|
+
gpuModel.toLowerCase().includes('m1') ||
|
|
303
|
+
gpuModel.toLowerCase().includes('m2') ||
|
|
304
|
+
gpuModel.toLowerCase().includes('m3') ||
|
|
305
|
+
gpuModel.toLowerCase().includes('m4')
|
|
306
|
+
)
|
|
307
|
+
);
|
|
308
|
+
const isIntegratedGPU = /iris.*xe|iris.*graphics|uhd.*graphics|vega.*integrated|radeon.*graphics/i.test(gpuModel);
|
|
309
|
+
const hasDedicatedGPU = vramGB > 0 && !isIntegratedGPU && !isAppleSilicon;
|
|
310
|
+
|
|
311
|
+
let tokensPerSecond;
|
|
312
|
+
|
|
313
|
+
if (isAppleSilicon) {
|
|
314
|
+
// Apple Silicon - realistic but optimistic due to unified memory
|
|
315
|
+
let baseTPS = 20; // More realistic baseline
|
|
316
|
+
if (gpuModel.toLowerCase().includes('m4 pro')) baseTPS = 30;
|
|
317
|
+
else if (gpuModel.toLowerCase().includes('m4')) baseTPS = 25;
|
|
318
|
+
else if (gpuModel.toLowerCase().includes('m3 pro')) baseTPS = 28;
|
|
319
|
+
else if (gpuModel.toLowerCase().includes('m3')) baseTPS = 22;
|
|
320
|
+
else if (gpuModel.toLowerCase().includes('m2 pro')) baseTPS = 25;
|
|
321
|
+
else if (gpuModel.toLowerCase().includes('m2')) baseTPS = 20;
|
|
322
|
+
else if (gpuModel.toLowerCase().includes('m1 pro')) baseTPS = 22;
|
|
323
|
+
else if (gpuModel.toLowerCase().includes('m1')) baseTPS = 18;
|
|
324
|
+
|
|
325
|
+
// Memory scaling for Apple Silicon
|
|
326
|
+
if (memoryTotal >= 64) baseTPS *= 1.2;
|
|
327
|
+
else if (memoryTotal >= 32) baseTPS *= 1.1;
|
|
328
|
+
|
|
329
|
+
tokensPerSecond = Math.max(6, Math.round(baseTPS / Math.max(0.7, modelSizeB)));
|
|
330
|
+
|
|
331
|
+
} else if (hasDedicatedGPU) {
|
|
332
|
+
// Dedicated GPU - much better but still realistic
|
|
333
|
+
let gpuTPS = 25;
|
|
334
|
+
if (gpuModel.toLowerCase().includes('rtx 50')) gpuTPS = 60;
|
|
335
|
+
else if (gpuModel.toLowerCase().includes('rtx 40')) gpuTPS = 45;
|
|
336
|
+
else if (gpuModel.toLowerCase().includes('rtx 30')) gpuTPS = 35;
|
|
337
|
+
else if (gpuModel.toLowerCase().includes('rtx 20')) gpuTPS = 28;
|
|
338
|
+
else if (vramGB >= 16) gpuTPS = 40;
|
|
339
|
+
else if (vramGB >= 8) gpuTPS = 30;
|
|
340
|
+
else if (vramGB >= 4) gpuTPS = 25;
|
|
341
|
+
|
|
342
|
+
tokensPerSecond = Math.max(8, Math.round(gpuTPS / Math.max(0.4, modelSizeB)));
|
|
343
|
+
|
|
344
|
+
} else {
|
|
345
|
+
// CPU-only or integrated GPU - most conservative and realistic
|
|
346
|
+
const hasAVX512 = cpuModel.toLowerCase().includes('intel') &&
|
|
347
|
+
(cpuModel.includes('12th') || cpuModel.includes('13th') || cpuModel.includes('14th'));
|
|
348
|
+
const hasAVX2 = cpuModel.toLowerCase().includes('intel') || cpuModel.toLowerCase().includes('amd');
|
|
349
|
+
|
|
350
|
+
// Base CPU performance - very conservative
|
|
351
|
+
let cpuK = 1.2; // Much more realistic
|
|
352
|
+
if (hasAVX512) cpuK = 2.0;
|
|
353
|
+
else if (hasAVX2) cpuK = 1.6;
|
|
354
|
+
|
|
355
|
+
// Threading efficiency (realistic diminishing returns)
|
|
356
|
+
const effectiveThreads = Math.min(cores, 6); // CPU inference doesn't scale linearly
|
|
357
|
+
|
|
358
|
+
// iGPU small boost
|
|
359
|
+
const iGpuMultiplier = isIntegratedGPU ? 1.2 : 1.0;
|
|
360
|
+
|
|
361
|
+
// Memory pressure factor
|
|
362
|
+
const memoryPressure = Math.min(1.0, Math.max(0.6, memoryTotal / (modelSizeB * 2)));
|
|
363
|
+
|
|
364
|
+
const baseTPS = (cpuK * baseSpeed * effectiveThreads * iGpuMultiplier * memoryPressure) / Math.max(2.0, modelSizeB);
|
|
365
|
+
|
|
366
|
+
// Realistic CPU caps based on hardware
|
|
367
|
+
const maxCPUTPS = hasAVX512 ? 18 : (isIntegratedGPU ? 12 : 8);
|
|
368
|
+
tokensPerSecond = Math.max(1, Math.min(maxCPUTPS, Math.round(baseTPS)));
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
return {
|
|
372
|
+
estimatedTokensPerSecond: Math.round(tokensPerSecond),
|
|
373
|
+
confidence: this.calculateConfidence(hardware, model),
|
|
374
|
+
factors: {
|
|
375
|
+
cpu: cpuModel,
|
|
376
|
+
memory: memoryTotal,
|
|
377
|
+
gpu: hasDedicatedGPU ? 'dedicated' : (isIntegratedGPU ? 'integrated' : 'cpu_only'),
|
|
378
|
+
modelSize: modelSizeB,
|
|
379
|
+
architecture: isAppleSilicon ? 'Apple Silicon' : 'x86'
|
|
380
|
+
},
|
|
381
|
+
category: this.categorizePerformance(Math.round(tokensPerSecond)),
|
|
382
|
+
loadTimeEstimate: this.estimateLoadTime(model, hardware)
|
|
383
|
+
};
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
parseModelSize(sizeString) {
|
|
387
|
+
const match = sizeString.match(/(\d+\.?\d*)[BM]/i);
|
|
388
|
+
if (!match) return 1;
|
|
389
|
+
|
|
390
|
+
const num = parseFloat(match[1]);
|
|
391
|
+
const unit = match[0].slice(-1).toUpperCase();
|
|
392
|
+
|
|
393
|
+
return unit === 'B' ? num : num / 1000; // Convert M to B
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
calculateConfidence(hardware, model) {
|
|
397
|
+
let confidence = 50; // Base confidence
|
|
398
|
+
|
|
399
|
+
// Higher confidence for better documented hardware
|
|
400
|
+
if (hardware.cpu.score > 70) confidence += 20;
|
|
401
|
+
if (hardware.memory.total >= 16) confidence += 15;
|
|
402
|
+
if (hardware.gpu.dedicated) confidence += 10;
|
|
403
|
+
|
|
404
|
+
// Lower confidence for edge cases
|
|
405
|
+
if (hardware.memory.total < 4) confidence -= 30;
|
|
406
|
+
if (!model.requirements) confidence -= 20;
|
|
407
|
+
|
|
408
|
+
return Math.max(10, Math.min(90, confidence));
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
categorizePerformance(tokensPerSecond) {
|
|
412
|
+
if (tokensPerSecond >= 50) return 'excellent';
|
|
413
|
+
if (tokensPerSecond >= 25) return 'good';
|
|
414
|
+
if (tokensPerSecond >= 10) return 'moderate';
|
|
415
|
+
if (tokensPerSecond >= 5) return 'slow';
|
|
416
|
+
return 'very_slow';
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
estimateLoadTime(model, hardware) {
|
|
420
|
+
const modelSizeGB = this.parseModelSize(model.size) * 2;
|
|
421
|
+
|
|
422
|
+
let loadTimeSeconds = modelSizeGB * 2;
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
loadTimeSeconds *= 0.7;
|
|
426
|
+
|
|
427
|
+
const cpuSpeedFactor = Math.max(0.5, Math.min(1.5, (hardware.cpu.speed || 2.5) / 2.5));
|
|
428
|
+
loadTimeSeconds /= cpuSpeedFactor;
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
// Load time doesn't depend on current free memory, but on total RAM vs model size
|
|
432
|
+
if (hardware.memory.total < modelSizeGB * 1.5) {
|
|
433
|
+
loadTimeSeconds *= 1.5; // Slower if tight on total memory
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
return {
|
|
437
|
+
estimated: Math.round(loadTimeSeconds),
|
|
438
|
+
confidence: this.calculateConfidence(hardware, model),
|
|
439
|
+
factors: ['storage_speed', 'cpu_performance', 'available_memory']
|
|
440
|
+
};
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
generateCacheKey(hardware) {
|
|
444
|
+
return `${hardware.cpu.brand}-${hardware.memory.total}-${hardware.gpu.model}`;
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
async benchmarkInferenceSpeed(modelName, hardware, ollamaClient) {
|
|
448
|
+
if (!ollamaClient) {
|
|
449
|
+
throw new Error('Ollama client required for inference benchmarking');
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
const testPrompts = [
|
|
453
|
+
"Hello, how are you today?",
|
|
454
|
+
"Explain the concept of artificial intelligence in one sentence.",
|
|
455
|
+
"What is 2 + 2?",
|
|
456
|
+
"Write a haiku about programming.",
|
|
457
|
+
"List three benefits of renewable energy."
|
|
458
|
+
];
|
|
459
|
+
|
|
460
|
+
const results = [];
|
|
461
|
+
|
|
462
|
+
for (const prompt of testPrompts) {
|
|
463
|
+
try {
|
|
464
|
+
const result = await ollamaClient.testModelPerformance(modelName, prompt);
|
|
465
|
+
results.push({
|
|
466
|
+
prompt: prompt.substring(0, 30) + '...',
|
|
467
|
+
tokensPerSecond: result.tokensPerSecond,
|
|
468
|
+
responseTime: result.responseTime,
|
|
469
|
+
success: result.success
|
|
470
|
+
});
|
|
471
|
+
} catch (error) {
|
|
472
|
+
results.push({
|
|
473
|
+
prompt: prompt.substring(0, 30) + '...',
|
|
474
|
+
tokensPerSecond: 0,
|
|
475
|
+
responseTime: 0,
|
|
476
|
+
success: false,
|
|
477
|
+
error: error.message
|
|
478
|
+
});
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
const successful = results.filter(r => r.success);
|
|
483
|
+
const avgTokensPerSecond = successful.length > 0 ?
|
|
484
|
+
successful.reduce((sum, r) => sum + r.tokensPerSecond, 0) / successful.length : 0;
|
|
485
|
+
|
|
486
|
+
return {
|
|
487
|
+
model: modelName,
|
|
488
|
+
hardware: {
|
|
489
|
+
cpu: hardware.cpu.brand,
|
|
490
|
+
ram: hardware.memory.total,
|
|
491
|
+
gpu: hardware.gpu.model
|
|
492
|
+
},
|
|
493
|
+
averageTokensPerSecond: Math.round(avgTokensPerSecond * 10) / 10,
|
|
494
|
+
successRate: (successful.length / results.length) * 100,
|
|
495
|
+
detailedResults: results,
|
|
496
|
+
timestamp: new Date().toISOString()
|
|
497
|
+
};
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
clearCache() {
|
|
501
|
+
this.benchmarkCache.clear();
|
|
502
|
+
}
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
module.exports = PerformanceAnalyzer;
|
package/bin/CLAUDE.md
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
<claude-mem-context>
|
|
2
|
+
# Recent Activity
|
|
3
|
+
|
|
4
|
+
<!-- This section is auto-generated by claude-mem. Edit content outside the tags. -->
|
|
5
|
+
|
|
6
|
+
### Feb 12, 2026
|
|
7
|
+
|
|
8
|
+
| ID | Time | T | Title | Read |
|
|
9
|
+
|----|------|---|-------|------|
|
|
10
|
+
| #3492 | 10:24 PM | 🔵 | Enhanced CLI Structure - Lazy Loading with ASCII Art Branding | ~456 |
|
|
11
|
+
| #3436 | 9:57 PM | 🔵 | Enhanced CLI Implementation - Command-Line Interface with ASCII Art and Ollama Integration | ~575 |
|
|
12
|
+
</claude-mem-context>
|