llm-checker 3.5.6 → 3.5.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/enhanced_cli.js +7 -1
- package/bin/mcp-server.mjs +1 -1
- package/package.json +1 -1
- package/src/ai/multi-objective-selector.js +10 -4
- package/src/hardware/backends/cpu-detector.js +39 -9
- package/src/hardware/backends/cuda-detector.js +13 -8
- package/src/hardware/detector.js +111 -12
- package/src/hardware/unified-detector.js +122 -3
- package/src/index.js +12 -13
- package/src/models/deterministic-selector.js +145 -31
- package/src/models/intelligent-selector.js +3 -2
- package/src/ollama/client.js +105 -32
package/bin/enhanced_cli.js
CHANGED
|
@@ -5059,10 +5059,16 @@ program
|
|
|
5059
5059
|
console.log(` Tier: ${chalk.cyan(detector.getHardwareTier().replace('_', ' ').toUpperCase())}`);
|
|
5060
5060
|
console.log(` Max model size: ${chalk.green(detector.getMaxModelSize() + 'GB')}`);
|
|
5061
5061
|
console.log(` Best backend: ${chalk.cyan(hardware.summary.bestBackend)}`);
|
|
5062
|
+
if (hardware.summary.runtimeBackend && hardware.summary.runtimeBackend !== hardware.summary.bestBackend) {
|
|
5063
|
+
console.log(` Runtime assist: ${chalk.green(hardware.summary.runtimeBackendName || hardware.summary.runtimeBackend)}`);
|
|
5064
|
+
}
|
|
5062
5065
|
console.log(` Dedicated GPUs: ${chalk.green(formatGpuInventoryList(hardware.summary.dedicatedGpuModels))}`);
|
|
5063
5066
|
console.log(` Integrated GPUs: ${chalk.hex('#FFA500')(formatGpuInventoryList(hardware.summary.integratedGpuModels))}`);
|
|
5064
5067
|
if (hardware.summary.hasIntegratedGPU && hardware.summary.bestBackend === 'cpu') {
|
|
5065
|
-
|
|
5068
|
+
const assistMessage = hardware.summary.runtimeBackend && hardware.summary.runtimeBackend !== hardware.summary.bestBackend
|
|
5069
|
+
? `Integrated/shared-memory GPU detected, runtime may use ${hardware.summary.runtimeBackendName || hardware.summary.runtimeBackend} acceleration`
|
|
5070
|
+
: 'Integrated/shared-memory GPU detected, runtime remains CPU';
|
|
5071
|
+
console.log(` Assist path: ${chalk.yellow(assistMessage)}`);
|
|
5066
5072
|
}
|
|
5067
5073
|
|
|
5068
5074
|
// CPU
|
package/bin/mcp-server.mjs
CHANGED
package/package.json
CHANGED
|
@@ -353,13 +353,16 @@ class MultiObjectiveSelector {
|
|
|
353
353
|
const hasIntegratedGPU = typeof hardware.summary?.hasIntegratedGPU === 'boolean'
|
|
354
354
|
? hardware.summary.hasIntegratedGPU
|
|
355
355
|
: /iris xe|uhd.*graphics|vega.*integrated|radeon.*graphics/i.test(`${gpuModel} ${integratedGpuInventory}`);
|
|
356
|
+
const hasDedicatedGPU = typeof hardware.summary?.hasDedicatedGPU === 'boolean'
|
|
357
|
+
? hardware.summary.hasDedicatedGPU
|
|
358
|
+
: Boolean(hardware.gpu?.dedicated || (vramGB > 0 && !hasIntegratedGPU));
|
|
356
359
|
const platform = normalizePlatform(hardware?.os?.platform || process.platform);
|
|
357
360
|
const isPC = !isAppleSilicon && (platform === 'win32' || platform === 'linux');
|
|
358
361
|
|
|
359
362
|
// 1) Effective memory for model weights (45%) - Apple Silicon & PC optimized
|
|
360
363
|
let effMem;
|
|
361
364
|
|
|
362
|
-
if (vramGB > 0 && !unified) {
|
|
365
|
+
if (hasDedicatedGPU && vramGB > 0 && !unified) {
|
|
363
366
|
// Dedicated GPU path (Windows/Linux with discrete GPU)
|
|
364
367
|
if (isPC) {
|
|
365
368
|
// PC-specific GPU memory calculation with offload support
|
|
@@ -454,9 +457,9 @@ class MultiObjectiveSelector {
|
|
|
454
457
|
tier = bumpTier(tier, +1); // High-end GPU boost
|
|
455
458
|
} else if (!vramGB && !unified) {
|
|
456
459
|
tier = bumpTier(tier, -1); // CPU-only penalty (moderate)
|
|
457
|
-
} else if (hasIntegratedGPU) {
|
|
460
|
+
} else if (hasIntegratedGPU && !hasDedicatedGPU) {
|
|
458
461
|
tier = bumpTier(tier, -1); // iGPU penalty
|
|
459
|
-
} else if (vramGB > 0 && vramGB < 6) {
|
|
462
|
+
} else if (hasDedicatedGPU && vramGB > 0 && vramGB < 6) {
|
|
460
463
|
tier = bumpTier(tier, -1); // Low VRAM penalty
|
|
461
464
|
}
|
|
462
465
|
|
|
@@ -752,13 +755,16 @@ class MultiObjectiveSelector {
|
|
|
752
755
|
const hasIntegratedGPU = typeof hardware.summary?.hasIntegratedGPU === 'boolean'
|
|
753
756
|
? hardware.summary.hasIntegratedGPU
|
|
754
757
|
: false;
|
|
758
|
+
const hasDedicatedGPU = typeof hardware.summary?.hasDedicatedGPU === 'boolean'
|
|
759
|
+
? hardware.summary.hasDedicatedGPU
|
|
760
|
+
: Boolean(hardware.gpu?.dedicated || (vramGB > 0 && !hasIntegratedGPU));
|
|
755
761
|
|
|
756
762
|
// Use improved CPU estimation function for more realistic and varying speeds
|
|
757
763
|
const hasAVX512 = cpuModel.toLowerCase().includes('intel') &&
|
|
758
764
|
(cpuModel.includes('13th') || cpuModel.includes('14th') || cpuModel.includes('12th'));
|
|
759
765
|
|
|
760
766
|
// GPU-based calculation (dedicated GPU only)
|
|
761
|
-
if (vramGB > 0
|
|
767
|
+
if (hasDedicatedGPU && vramGB > 0) {
|
|
762
768
|
let gpuTPS = 20; // Conservative GPU baseline
|
|
763
769
|
if (gpuModel.toLowerCase().includes('gb10') ||
|
|
764
770
|
gpuModel.toLowerCase().includes('grace blackwell') ||
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
* Focuses on AVX2, AVX512, AMX, and other SIMD extensions
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
|
-
const
|
|
7
|
+
const childProcess = require('child_process');
|
|
8
8
|
const os = require('os');
|
|
9
9
|
const fs = require('fs');
|
|
10
10
|
const { normalizePlatform } = require('../../utils/platform');
|
|
@@ -92,7 +92,7 @@ class CPUDetector {
|
|
|
92
92
|
|
|
93
93
|
try {
|
|
94
94
|
if (platform === 'darwin') {
|
|
95
|
-
return parseInt(execSync('sysctl -n hw.physicalcpu', { encoding: 'utf8', timeout: 5000 }).trim());
|
|
95
|
+
return parseInt(childProcess.execSync('sysctl -n hw.physicalcpu', { encoding: 'utf8', timeout: 5000 }).trim());
|
|
96
96
|
} else if (platform === 'linux') {
|
|
97
97
|
const cpuInfo = fs.readFileSync('/proc/cpuinfo', 'utf8');
|
|
98
98
|
const coreIds = new Set();
|
|
@@ -142,7 +142,37 @@ class CPUDetector {
|
|
|
142
142
|
* Execute shell command with consistent options.
|
|
143
143
|
*/
|
|
144
144
|
runCommand(command) {
|
|
145
|
-
|
|
145
|
+
const baseOptions = {
|
|
146
|
+
encoding: 'utf8',
|
|
147
|
+
timeout: 5000
|
|
148
|
+
};
|
|
149
|
+
|
|
150
|
+
if (normalizePlatform() === 'win32') {
|
|
151
|
+
const result = childProcess.spawnSync(command, {
|
|
152
|
+
...baseOptions,
|
|
153
|
+
shell: true,
|
|
154
|
+
stdio: ['ignore', 'pipe', 'pipe'],
|
|
155
|
+
windowsHide: true
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
if (result.error) {
|
|
159
|
+
throw result.error;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
if (result.status !== 0) {
|
|
163
|
+
const stderr = String(result.stderr || '').trim();
|
|
164
|
+
const stdout = String(result.stdout || '').trim();
|
|
165
|
+
const error = new Error(stderr || stdout || `Command failed: ${command}`);
|
|
166
|
+
error.status = result.status;
|
|
167
|
+
error.stdout = result.stdout;
|
|
168
|
+
error.stderr = result.stderr;
|
|
169
|
+
throw error;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
return result.stdout;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
return childProcess.execSync(command, baseOptions);
|
|
146
176
|
}
|
|
147
177
|
|
|
148
178
|
/**
|
|
@@ -215,10 +245,10 @@ class CPUDetector {
|
|
|
215
245
|
|
|
216
246
|
try {
|
|
217
247
|
if (platform === 'darwin') {
|
|
218
|
-
cache.l1d = parseInt(execSync('sysctl -n hw.l1dcachesize', { encoding: 'utf8', timeout: 5000 })) / 1024 || 0;
|
|
219
|
-
cache.l1i = parseInt(execSync('sysctl -n hw.l1icachesize', { encoding: 'utf8', timeout: 5000 })) / 1024 || 0;
|
|
220
|
-
cache.l2 = parseInt(execSync('sysctl -n hw.l2cachesize', { encoding: 'utf8', timeout: 5000 })) / 1024 / 1024 || 0;
|
|
221
|
-
cache.l3 = parseInt(execSync('sysctl -n hw.l3cachesize', { encoding: 'utf8', timeout: 5000 })) / 1024 / 1024 || 0;
|
|
248
|
+
cache.l1d = parseInt(childProcess.execSync('sysctl -n hw.l1dcachesize', { encoding: 'utf8', timeout: 5000 })) / 1024 || 0;
|
|
249
|
+
cache.l1i = parseInt(childProcess.execSync('sysctl -n hw.l1icachesize', { encoding: 'utf8', timeout: 5000 })) / 1024 || 0;
|
|
250
|
+
cache.l2 = parseInt(childProcess.execSync('sysctl -n hw.l2cachesize', { encoding: 'utf8', timeout: 5000 })) / 1024 / 1024 || 0;
|
|
251
|
+
cache.l3 = parseInt(childProcess.execSync('sysctl -n hw.l3cachesize', { encoding: 'utf8', timeout: 5000 })) / 1024 / 1024 || 0;
|
|
222
252
|
} else if (platform === 'linux') {
|
|
223
253
|
// Parse from /sys/devices/system/cpu/cpu0/cache/
|
|
224
254
|
const cachePath = '/sys/devices/system/cpu/cpu0/cache';
|
|
@@ -288,12 +318,12 @@ class CPUDetector {
|
|
|
288
318
|
} else {
|
|
289
319
|
// Intel Mac - check via sysctl
|
|
290
320
|
try {
|
|
291
|
-
const features = execSync('sysctl -n machdep.cpu.features', {
|
|
321
|
+
const features = childProcess.execSync('sysctl -n machdep.cpu.features', {
|
|
292
322
|
encoding: 'utf8',
|
|
293
323
|
timeout: 5000
|
|
294
324
|
}).toLowerCase();
|
|
295
325
|
|
|
296
|
-
const leafFeatures = execSync('sysctl -n machdep.cpu.leaf7_features', {
|
|
326
|
+
const leafFeatures = childProcess.execSync('sysctl -n machdep.cpu.leaf7_features', {
|
|
297
327
|
encoding: 'utf8',
|
|
298
328
|
timeout: 5000
|
|
299
329
|
}).toLowerCase();
|
|
@@ -15,6 +15,10 @@ class CUDADetector {
|
|
|
15
15
|
this.detectionMode = null;
|
|
16
16
|
}
|
|
17
17
|
|
|
18
|
+
execCommand(command, options = {}) {
|
|
19
|
+
return execSync(command, options);
|
|
20
|
+
}
|
|
21
|
+
|
|
18
22
|
/**
|
|
19
23
|
* Check if CUDA is available
|
|
20
24
|
*/
|
|
@@ -43,7 +47,7 @@ class CUDADetector {
|
|
|
43
47
|
|
|
44
48
|
hasNvidiaSMI() {
|
|
45
49
|
try {
|
|
46
|
-
|
|
50
|
+
this.execCommand('nvidia-smi --version', {
|
|
47
51
|
encoding: 'utf8',
|
|
48
52
|
timeout: 5000,
|
|
49
53
|
stdio: ['pipe', 'pipe', 'pipe']
|
|
@@ -142,7 +146,7 @@ class CUDADetector {
|
|
|
142
146
|
}
|
|
143
147
|
|
|
144
148
|
try {
|
|
145
|
-
|
|
149
|
+
this.execCommand('nvcc --version', {
|
|
146
150
|
encoding: 'utf8',
|
|
147
151
|
timeout: 5000,
|
|
148
152
|
stdio: ['pipe', 'pipe', 'pipe']
|
|
@@ -197,17 +201,18 @@ class CUDADetector {
|
|
|
197
201
|
|
|
198
202
|
try {
|
|
199
203
|
// Get driver and CUDA version
|
|
200
|
-
const versionInfo =
|
|
204
|
+
const versionInfo = this.execCommand('nvidia-smi --query-gpu=driver_version --format=csv,noheader,nounits', {
|
|
201
205
|
encoding: 'utf8',
|
|
202
206
|
timeout: 5000
|
|
203
207
|
}).trim().split('\n')[0];
|
|
204
208
|
result.driver = versionInfo;
|
|
205
209
|
|
|
206
|
-
//
|
|
207
|
-
const
|
|
210
|
+
// Parse the nvidia-smi banner in JS so Windows does not require shell-only tools like `head`.
|
|
211
|
+
const banner = this.execCommand('nvidia-smi', {
|
|
208
212
|
encoding: 'utf8',
|
|
209
213
|
timeout: 5000
|
|
210
214
|
});
|
|
215
|
+
const header = banner.split('\n').slice(0, 3).join('\n');
|
|
211
216
|
const cudaMatch = header.match(/CUDA Version:\s*([\d.]+)/);
|
|
212
217
|
if (cudaMatch) {
|
|
213
218
|
result.cuda = cudaMatch[1];
|
|
@@ -237,7 +242,7 @@ class CUDADetector {
|
|
|
237
242
|
'clocks.max.sm'
|
|
238
243
|
].join(',');
|
|
239
244
|
|
|
240
|
-
const gpuData =
|
|
245
|
+
const gpuData = this.execCommand(
|
|
241
246
|
`nvidia-smi --query-gpu=${query} --format=csv,noheader,nounits`,
|
|
242
247
|
{ encoding: 'utf8', timeout: 10000 }
|
|
243
248
|
).trim();
|
|
@@ -286,7 +291,7 @@ class CUDADetector {
|
|
|
286
291
|
} catch (e) {
|
|
287
292
|
// Fallback to simpler query
|
|
288
293
|
try {
|
|
289
|
-
const simpleQuery =
|
|
294
|
+
const simpleQuery = this.execCommand(
|
|
290
295
|
'nvidia-smi --query-gpu=name,memory.total --format=csv,noheader,nounits',
|
|
291
296
|
{ encoding: 'utf8', timeout: 5000 }
|
|
292
297
|
).trim();
|
|
@@ -408,7 +413,7 @@ class CUDADetector {
|
|
|
408
413
|
}
|
|
409
414
|
|
|
410
415
|
try {
|
|
411
|
-
const nvccVersion =
|
|
416
|
+
const nvccVersion = this.execCommand('nvcc --version', {
|
|
412
417
|
encoding: 'utf8',
|
|
413
418
|
timeout: 5000,
|
|
414
419
|
stdio: ['pipe', 'pipe', 'pipe']
|
package/src/hardware/detector.js
CHANGED
|
@@ -41,7 +41,7 @@ class HardwareDetector {
|
|
|
41
41
|
const systemInfo = {
|
|
42
42
|
cpu: this.processCPUInfo(cpu),
|
|
43
43
|
memory: this.processMemoryInfo(memory),
|
|
44
|
-
gpu: this.processGPUInfo(graphics),
|
|
44
|
+
gpu: this.processGPUInfo(graphics, memory),
|
|
45
45
|
system: this.processSystemInfo(system),
|
|
46
46
|
os: this.processOSInfo(osInfo),
|
|
47
47
|
timestamp: Date.now()
|
|
@@ -97,7 +97,48 @@ class HardwareDetector {
|
|
|
97
97
|
};
|
|
98
98
|
}
|
|
99
99
|
|
|
100
|
-
|
|
100
|
+
getSystemMemoryGB(memoryInfo) {
|
|
101
|
+
const totalBytes = Number(memoryInfo?.total || 0);
|
|
102
|
+
if (!Number.isFinite(totalBytes) || totalBytes <= 0) return 0;
|
|
103
|
+
return Math.max(1, Math.round(totalBytes / (1024 ** 3)));
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
estimateIntegratedSharedMemory(gpu, memoryInfo) {
|
|
107
|
+
const dedicatedAperture = this.normalizeVRAM(gpu?.vram || 0);
|
|
108
|
+
const explicitSharedCandidates = [
|
|
109
|
+
gpu?.memoryTotal,
|
|
110
|
+
gpu?.memory,
|
|
111
|
+
gpu?.sharedMemory,
|
|
112
|
+
gpu?.memoryShared,
|
|
113
|
+
gpu?.memory?.shared,
|
|
114
|
+
gpu?.memory?.total
|
|
115
|
+
]
|
|
116
|
+
.map((value) => this.normalizeVRAM(value))
|
|
117
|
+
.filter((value) => value > dedicatedAperture);
|
|
118
|
+
|
|
119
|
+
if (explicitSharedCandidates.length > 0) {
|
|
120
|
+
return Math.max(...explicitSharedCandidates);
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
const totalSystemGB = this.getSystemMemoryGB(memoryInfo);
|
|
124
|
+
if (gpu?.vramDynamic || dedicatedAperture <= 2) {
|
|
125
|
+
return this.estimateSystemSharedMemory(totalSystemGB, dedicatedAperture);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
return dedicatedAperture;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
estimateSystemSharedMemory(totalSystemGB, fallbackGB = 0) {
|
|
132
|
+
const fallback = Math.max(0, Number(fallbackGB) || 0);
|
|
133
|
+
if (!Number.isFinite(totalSystemGB) || totalSystemGB <= 0) {
|
|
134
|
+
return fallback;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// Integrated GPUs typically expose roughly half of system RAM as a shared pool.
|
|
138
|
+
return Math.max(fallback, Math.min(Math.max(1, Math.round(totalSystemGB / 2)), 16));
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
processGPUInfo(graphics, memoryInfo = null) {
|
|
101
142
|
const controllers = graphics.controllers || [];
|
|
102
143
|
const displays = graphics.displays || [];
|
|
103
144
|
|
|
@@ -149,7 +190,12 @@ class HardwareDetector {
|
|
|
149
190
|
}
|
|
150
191
|
|
|
151
192
|
// Skip very generic/placeholder entries
|
|
152
|
-
if (
|
|
193
|
+
if (
|
|
194
|
+
model.includes('standard vga') ||
|
|
195
|
+
model.includes('microsoft basic') ||
|
|
196
|
+
model.includes('remote display adapter') ||
|
|
197
|
+
model.includes('basic render driver')
|
|
198
|
+
) {
|
|
153
199
|
return false;
|
|
154
200
|
}
|
|
155
201
|
|
|
@@ -199,8 +245,14 @@ class HardwareDetector {
|
|
|
199
245
|
enhancedModel = this.getGPUModelFromDeviceId(primaryGPU.deviceId) || enhancedModel;
|
|
200
246
|
}
|
|
201
247
|
|
|
248
|
+
const primaryIsIntegrated = this.isIntegratedGPU(enhancedModel);
|
|
249
|
+
const normalizedPrimaryVRAM = this.normalizeVRAM(primaryGPU.vram || 0);
|
|
250
|
+
const estimatedSharedMemory = primaryIsIntegrated
|
|
251
|
+
? this.estimateIntegratedSharedMemory(primaryGPU, memoryInfo)
|
|
252
|
+
: 0;
|
|
253
|
+
|
|
202
254
|
// Enhanced VRAM detection using the new normalizeVRAM function
|
|
203
|
-
let vram =
|
|
255
|
+
let vram = primaryIsIntegrated ? estimatedSharedMemory : normalizedPrimaryVRAM;
|
|
204
256
|
|
|
205
257
|
// If VRAM is still 0, try to estimate based on model or handle unified memory
|
|
206
258
|
if (vram === 0 && primaryGPU.model) {
|
|
@@ -227,24 +279,41 @@ class HardwareDetector {
|
|
|
227
279
|
|
|
228
280
|
// If we have multiple dedicated GPUs, use the combined VRAM
|
|
229
281
|
const effectiveVRAM = gpuCount > 1 ? totalDedicatedVRAM : vram;
|
|
282
|
+
const sharedMemory = primaryIsIntegrated ? effectiveVRAM : 0;
|
|
283
|
+
const dedicatedMemory = primaryIsIntegrated ? normalizedPrimaryVRAM : effectiveVRAM;
|
|
284
|
+
const scoredGPU = {
|
|
285
|
+
...primaryGPU,
|
|
286
|
+
model: enhancedModel,
|
|
287
|
+
vram: effectiveVRAM,
|
|
288
|
+
sharedMemory,
|
|
289
|
+
dedicatedMemory
|
|
290
|
+
};
|
|
230
291
|
|
|
231
292
|
return {
|
|
232
293
|
model: enhancedModel,
|
|
233
294
|
vendor: primaryGPU.vendor || this.inferVendorFromGPUModel(enhancedModel, 'Unknown'),
|
|
234
295
|
vram: effectiveVRAM,
|
|
235
296
|
vramPerGPU: vram, // VRAM of primary GPU for reference
|
|
297
|
+
sharedMemory,
|
|
298
|
+
dedicatedMemory,
|
|
236
299
|
vramDynamic: primaryGPU.vramDynamic || false,
|
|
237
|
-
dedicated: !
|
|
300
|
+
dedicated: !primaryIsIntegrated,
|
|
238
301
|
driverVersion: primaryGPU.driverVersion || 'Unknown',
|
|
239
302
|
gpuCount: gpuCount > 0 ? gpuCount : (dedicatedGPUs.length > 0 ? dedicatedGPUs.length : 1),
|
|
240
303
|
isMultiGPU: gpuCount > 1,
|
|
241
304
|
all: normalizedControllers.map(gpu => ({
|
|
242
305
|
model: gpu.model,
|
|
243
|
-
vram: this.
|
|
306
|
+
vram: this.isIntegratedGPU(gpu.model)
|
|
307
|
+
? this.estimateIntegratedSharedMemory(gpu, memoryInfo)
|
|
308
|
+
: this.normalizeVRAM(gpu.vram || 0),
|
|
309
|
+
sharedMemory: this.isIntegratedGPU(gpu.model)
|
|
310
|
+
? this.estimateIntegratedSharedMemory(gpu, memoryInfo)
|
|
311
|
+
: 0,
|
|
312
|
+
dedicatedMemory: this.normalizeVRAM(gpu.vram || 0),
|
|
244
313
|
vendor: gpu.vendor || this.inferVendorFromGPUModel(gpu.model, 'Unknown')
|
|
245
314
|
})),
|
|
246
315
|
displays: displays.length,
|
|
247
|
-
score: this.calculateGPUScore(
|
|
316
|
+
score: this.calculateGPUScore(scoredGPU)
|
|
248
317
|
};
|
|
249
318
|
}
|
|
250
319
|
|
|
@@ -303,17 +372,40 @@ class HardwareDetector {
|
|
|
303
372
|
const modelFromUnified = summary.gpuModel || fallbackModel || systemInfo.gpu.model;
|
|
304
373
|
const vendor = this.inferVendorFromGPUModel(modelFromUnified, systemInfo.gpu.vendor);
|
|
305
374
|
const isAppleUnified = primaryType === 'metal';
|
|
375
|
+
const integratedSharedMemory = Math.max(
|
|
376
|
+
Number(systemInfo.gpu.sharedMemory || 0),
|
|
377
|
+
...(Array.isArray(unified.systemGpu?.gpus)
|
|
378
|
+
? unified.systemGpu.gpus
|
|
379
|
+
.filter((gpu) => gpu?.type === 'integrated')
|
|
380
|
+
.map((gpu) => Number(gpu?.memory?.total || gpu?.memoryTotal || 0))
|
|
381
|
+
: [0])
|
|
382
|
+
);
|
|
306
383
|
|
|
307
384
|
systemInfo.summary = {
|
|
308
|
-
...summary
|
|
385
|
+
...summary,
|
|
386
|
+
integratedSharedMemory: typeof summary.integratedSharedMemory === 'number'
|
|
387
|
+
? summary.integratedSharedMemory
|
|
388
|
+
: integratedSharedMemory
|
|
309
389
|
};
|
|
310
390
|
|
|
311
391
|
systemInfo.gpu = {
|
|
312
392
|
...systemInfo.gpu,
|
|
313
393
|
model: modelFromUnified,
|
|
314
394
|
vendor,
|
|
315
|
-
vram: isAppleUnified
|
|
316
|
-
|
|
395
|
+
vram: isAppleUnified
|
|
396
|
+
? systemInfo.gpu.vram
|
|
397
|
+
: (hasDedicatedGPU ? (totalVRAM || systemInfo.gpu.vram) : (integratedSharedMemory || systemInfo.gpu.vram || 0)),
|
|
398
|
+
vramPerGPU: isAppleUnified
|
|
399
|
+
? (systemInfo.gpu.vramPerGPU || 0)
|
|
400
|
+
: (hasDedicatedGPU
|
|
401
|
+
? (perGPUVRAM || systemInfo.gpu.vramPerGPU || 0)
|
|
402
|
+
: (integratedSharedMemory || systemInfo.gpu.vramPerGPU || systemInfo.gpu.vram || 0)),
|
|
403
|
+
sharedMemory: isAppleUnified
|
|
404
|
+
? (systemInfo.gpu.sharedMemory || 0)
|
|
405
|
+
: (hasIntegratedGPU ? Math.max(integratedSharedMemory || 0, systemInfo.gpu.sharedMemory || 0) : 0),
|
|
406
|
+
dedicatedMemory: hasDedicatedGPU
|
|
407
|
+
? (totalVRAM || systemInfo.gpu.dedicatedMemory || systemInfo.gpu.vram || 0)
|
|
408
|
+
: 0,
|
|
317
409
|
dedicated: hasDedicatedGPU,
|
|
318
410
|
gpuCount: summary.gpuCount || gpuCount,
|
|
319
411
|
isMultiGPU: Boolean(summary.isMultiGPU || gpuCount > 1),
|
|
@@ -557,13 +649,16 @@ class HardwareDetector {
|
|
|
557
649
|
|
|
558
650
|
let score = 0;
|
|
559
651
|
const model = gpu.model.toLowerCase();
|
|
560
|
-
const
|
|
652
|
+
const integrated = this.isIntegratedGPU(gpu.model);
|
|
653
|
+
const vram = integrated
|
|
654
|
+
? Math.min(gpu.sharedMemory || gpu.vram || 0, 2)
|
|
655
|
+
: (gpu.vram || 0);
|
|
561
656
|
|
|
562
657
|
|
|
563
658
|
score += vram * 8;
|
|
564
659
|
|
|
565
660
|
|
|
566
|
-
if (!
|
|
661
|
+
if (!integrated) {
|
|
567
662
|
score += 20;
|
|
568
663
|
}
|
|
569
664
|
|
|
@@ -586,6 +681,10 @@ class HardwareDetector {
|
|
|
586
681
|
else if (model.includes('apple m')) score += 15;
|
|
587
682
|
else if (model.includes('r9700') || model.includes('ai pro r9700')) score += 23;
|
|
588
683
|
|
|
684
|
+
if (integrated) {
|
|
685
|
+
return Math.min(Math.round(score), 45);
|
|
686
|
+
}
|
|
687
|
+
|
|
589
688
|
return Math.min(Math.round(score), 100);
|
|
590
689
|
}
|
|
591
690
|
|
|
@@ -216,6 +216,9 @@ class UnifiedDetector {
|
|
|
216
216
|
const summary = {
|
|
217
217
|
bestBackend: result.primary?.type || 'cpu',
|
|
218
218
|
backendName: result.primary?.name || 'CPU',
|
|
219
|
+
runtimeBackend: result.primary?.type || 'cpu',
|
|
220
|
+
runtimeBackendName: result.primary?.name || 'CPU',
|
|
221
|
+
hasRuntimeAssist: false,
|
|
219
222
|
totalVRAM: 0,
|
|
220
223
|
effectiveMemory: 0,
|
|
221
224
|
speedCoefficient: 0,
|
|
@@ -231,6 +234,7 @@ class UnifiedDetector {
|
|
|
231
234
|
dedicatedGpuCount: 0,
|
|
232
235
|
integratedGpuModels: [],
|
|
233
236
|
dedicatedGpuModels: [],
|
|
237
|
+
integratedSharedMemory: 0,
|
|
234
238
|
cpuModel: result.cpu?.brand || 'Unknown',
|
|
235
239
|
systemRAM: require('os').totalmem() / (1024 ** 3)
|
|
236
240
|
};
|
|
@@ -303,6 +307,7 @@ class UnifiedDetector {
|
|
|
303
307
|
summary.dedicatedGpuCount = topology.dedicatedCount;
|
|
304
308
|
summary.integratedGpuModels = topology.integratedModels;
|
|
305
309
|
summary.dedicatedGpuModels = topology.dedicatedModels;
|
|
310
|
+
summary.integratedSharedMemory = topology.integratedSharedMemory;
|
|
306
311
|
if (!summary.gpuModel) {
|
|
307
312
|
summary.gpuModel = topology.primaryModel || null;
|
|
308
313
|
}
|
|
@@ -314,6 +319,11 @@ class UnifiedDetector {
|
|
|
314
319
|
}
|
|
315
320
|
summary.hasHeterogeneousGPU = summary.hasHeterogeneousGPU || topology.isHeterogeneous;
|
|
316
321
|
|
|
322
|
+
const runtimeSelection = this.detectRuntimeAssistBackend(result, topology);
|
|
323
|
+
summary.runtimeBackend = runtimeSelection.backend;
|
|
324
|
+
summary.runtimeBackendName = runtimeSelection.name;
|
|
325
|
+
summary.hasRuntimeAssist = runtimeSelection.assisted;
|
|
326
|
+
|
|
317
327
|
// Effective memory for LLM loading
|
|
318
328
|
// For GPU: use VRAM; for CPU/Metal: use system RAM
|
|
319
329
|
if (summary.totalVRAM > 0 && ['cuda', 'rocm', 'intel'].includes(primary?.type)) {
|
|
@@ -355,6 +365,9 @@ class UnifiedDetector {
|
|
|
355
365
|
models,
|
|
356
366
|
integratedModels,
|
|
357
367
|
dedicatedModels,
|
|
368
|
+
integratedSharedMemory: normalized
|
|
369
|
+
.filter((gpu) => gpu.type === 'integrated')
|
|
370
|
+
.reduce((max, gpu) => Math.max(max, Number(gpu?.memory?.total || 0)), 0),
|
|
358
371
|
integratedCount: normalized.filter((gpu) => gpu.type === 'integrated').length,
|
|
359
372
|
dedicatedCount: normalized.filter((gpu) => gpu.type === 'dedicated').length,
|
|
360
373
|
hasIntegratedGPU: normalized.some((gpu) => gpu.type === 'integrated'),
|
|
@@ -393,6 +406,7 @@ class UnifiedDetector {
|
|
|
393
406
|
.map((gpu) => {
|
|
394
407
|
const name = String(gpu?.name || gpu?.model || '').replace(/\s+/g, ' ').trim();
|
|
395
408
|
if (!name) return null;
|
|
409
|
+
if (this.isRemoteDisplayModel(name)) return null;
|
|
396
410
|
|
|
397
411
|
let type = gpu?.type;
|
|
398
412
|
if (type !== 'integrated' && type !== 'dedicated') {
|
|
@@ -412,6 +426,101 @@ class UnifiedDetector {
|
|
|
412
426
|
.filter(Boolean);
|
|
413
427
|
}
|
|
414
428
|
|
|
429
|
+
isRemoteDisplayModel(model) {
|
|
430
|
+
const lower = String(model || '').toLowerCase();
|
|
431
|
+
if (!lower) return false;
|
|
432
|
+
|
|
433
|
+
return (
|
|
434
|
+
lower.includes('microsoft remote display adapter') ||
|
|
435
|
+
lower.includes('remote display adapter') ||
|
|
436
|
+
lower.includes('basic render driver')
|
|
437
|
+
);
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
inferGpuVendor(name) {
|
|
441
|
+
const lower = String(name || '').toLowerCase();
|
|
442
|
+
if (!lower) return 'unknown';
|
|
443
|
+
if (lower.includes('nvidia') || lower.includes('geforce') || lower.includes('rtx') || lower.includes('gtx')) return 'nvidia';
|
|
444
|
+
if (lower.includes('amd') || lower.includes('ati') || lower.includes('radeon')) return 'amd';
|
|
445
|
+
if (lower.includes('intel') || lower.includes('iris') || lower.includes('uhd') || lower.includes('arc')) return 'intel';
|
|
446
|
+
if (lower.includes('apple')) return 'apple';
|
|
447
|
+
return 'unknown';
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
detectRuntimeAssistBackend(result, topology = {}) {
|
|
451
|
+
const primaryType = result?.primary?.type || 'cpu';
|
|
452
|
+
const primaryName = result?.primary?.name || 'CPU';
|
|
453
|
+
|
|
454
|
+
if (primaryType !== 'cpu') {
|
|
455
|
+
return {
|
|
456
|
+
backend: primaryType,
|
|
457
|
+
name: primaryName,
|
|
458
|
+
assisted: false
|
|
459
|
+
};
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
const platform = result?.platform || result?.os?.platform || normalizePlatform();
|
|
463
|
+
const integratedModels = Array.isArray(topology.integratedModels) ? topology.integratedModels : [];
|
|
464
|
+
const integratedVendors = integratedModels.map((gpu) => this.inferGpuVendor(gpu.name));
|
|
465
|
+
|
|
466
|
+
const hasWindowsIntegratedGpu = platform === 'win32' && integratedModels.length > 0;
|
|
467
|
+
const hasKnownIntegratedVendor = integratedVendors.some((vendor) => ['amd', 'intel', 'nvidia'].includes(vendor));
|
|
468
|
+
|
|
469
|
+
if (hasWindowsIntegratedGpu && hasKnownIntegratedVendor) {
|
|
470
|
+
return {
|
|
471
|
+
backend: 'vulkan',
|
|
472
|
+
name: 'Vulkan',
|
|
473
|
+
assisted: true
|
|
474
|
+
};
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
return {
|
|
478
|
+
backend: primaryType,
|
|
479
|
+
name: primaryName,
|
|
480
|
+
assisted: false
|
|
481
|
+
};
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
getSystemMemoryGB(memoryInfo) {
|
|
485
|
+
const totalBytes = Number(memoryInfo?.total || 0);
|
|
486
|
+
if (!Number.isFinite(totalBytes) || totalBytes <= 0) return 0;
|
|
487
|
+
return Math.max(1, Math.round(totalBytes / (1024 ** 3)));
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
estimateSystemSharedMemory(totalSystemGB, fallbackGB = 0) {
|
|
491
|
+
const fallback = Math.max(0, Number(fallbackGB) || 0);
|
|
492
|
+
if (!Number.isFinite(totalSystemGB) || totalSystemGB <= 0) {
|
|
493
|
+
return fallback;
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
return Math.max(fallback, Math.min(Math.max(1, Math.round(totalSystemGB / 2)), 16));
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
estimateIntegratedFallbackMemory(controller, memoryInfo) {
|
|
500
|
+
const dedicatedAperture = this.normalizeFallbackVRAM(controller?.vram || 0);
|
|
501
|
+
const explicitSharedCandidates = [
|
|
502
|
+
controller?.memoryTotal,
|
|
503
|
+
controller?.memory,
|
|
504
|
+
controller?.sharedMemory,
|
|
505
|
+
controller?.memoryShared,
|
|
506
|
+
controller?.memory?.shared,
|
|
507
|
+
controller?.memory?.total
|
|
508
|
+
]
|
|
509
|
+
.map((value) => this.normalizeFallbackVRAM(value))
|
|
510
|
+
.filter((value) => value > dedicatedAperture);
|
|
511
|
+
|
|
512
|
+
if (explicitSharedCandidates.length > 0) {
|
|
513
|
+
return Math.max(...explicitSharedCandidates);
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
const totalSystemGB = this.getSystemMemoryGB(memoryInfo);
|
|
517
|
+
if (controller?.vramDynamic || dedicatedAperture <= 2) {
|
|
518
|
+
return this.estimateSystemSharedMemory(totalSystemGB, dedicatedAperture);
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
return dedicatedAperture;
|
|
522
|
+
}
|
|
523
|
+
|
|
415
524
|
mergeGpuInventories(...gpuLists) {
|
|
416
525
|
const normalizedLists = gpuLists.map((list) => this.normalizeGpuInventory(list));
|
|
417
526
|
const primaryIndex = normalizedLists.findIndex((list) => list.length > 0);
|
|
@@ -433,6 +542,7 @@ class UnifiedDetector {
|
|
|
433
542
|
|
|
434
543
|
async detectSystemGpuFallback() {
|
|
435
544
|
const graphics = await si.graphics();
|
|
545
|
+
const memoryInfo = await si.mem().catch(() => null);
|
|
436
546
|
const controllers = Array.isArray(graphics?.controllers) ? graphics.controllers : [];
|
|
437
547
|
|
|
438
548
|
if (controllers.length === 0) {
|
|
@@ -450,12 +560,15 @@ class UnifiedDetector {
|
|
|
450
560
|
.map((controller) => {
|
|
451
561
|
const name = String(controller?.model || controller?.name || '').replace(/\s+/g, ' ').trim();
|
|
452
562
|
if (!name || name.toLowerCase() === 'unknown') return null;
|
|
563
|
+
if (this.isRemoteDisplayModel(name)) return null;
|
|
453
564
|
|
|
454
565
|
const nameLower = name.toLowerCase();
|
|
455
566
|
if (nameLower.includes('microsoft basic') || nameLower.includes('standard vga')) return null;
|
|
456
567
|
|
|
457
568
|
const isIntegrated = this.isIntegratedGPUModel(name);
|
|
458
|
-
let vram =
|
|
569
|
+
let vram = isIntegrated
|
|
570
|
+
? this.estimateIntegratedFallbackMemory(controller, memoryInfo)
|
|
571
|
+
: this.normalizeFallbackVRAM(controller?.vram || controller?.memoryTotal || controller?.memory || 0);
|
|
459
572
|
|
|
460
573
|
// For dedicated cards, estimate VRAM from model if runtime did not report memory.
|
|
461
574
|
if (!isIntegrated && vram === 0) {
|
|
@@ -819,13 +932,19 @@ class UnifiedDetector {
|
|
|
819
932
|
return `${gpuDesc} (${summary.totalVRAM}GB) + ${summary.cpuModel}`;
|
|
820
933
|
}
|
|
821
934
|
else {
|
|
935
|
+
const runtimeAssistSuffix = summary.hasRuntimeAssist && summary.runtimeBackend !== summary.bestBackend
|
|
936
|
+
? `${summary.runtimeBackendName || summary.runtimeBackend} assist`
|
|
937
|
+
: 'CPU backend';
|
|
822
938
|
if (summary.gpuModel && summary.hasIntegratedGPU && !summary.hasDedicatedGPU) {
|
|
823
939
|
const gpuDesc = summary.gpuInventory || summary.gpuModel;
|
|
824
|
-
|
|
940
|
+
if (summary.integratedSharedMemory > 0) {
|
|
941
|
+
return `${gpuDesc} (${summary.integratedSharedMemory}GB shared memory, ${runtimeAssistSuffix}) + ${summary.cpuModel}`;
|
|
942
|
+
}
|
|
943
|
+
return `${gpuDesc} (integrated/shared memory, ${runtimeAssistSuffix}) + ${summary.cpuModel}`;
|
|
825
944
|
}
|
|
826
945
|
if (summary.gpuModel && summary.gpuCount > 0) {
|
|
827
946
|
const gpuDesc = summary.gpuInventory || summary.gpuModel;
|
|
828
|
-
return `${gpuDesc} (${summary.totalVRAM}GB VRAM detected,
|
|
947
|
+
return `${gpuDesc} (${summary.totalVRAM}GB VRAM detected, ${runtimeAssistSuffix}) + ${summary.cpuModel}`;
|
|
829
948
|
}
|
|
830
949
|
return `${summary.cpuModel} (${Math.round(summary.systemRAM)}GB RAM, CPU-only)`;
|
|
831
950
|
}
|
package/src/index.js
CHANGED
|
@@ -1373,6 +1373,15 @@ class LLMChecker {
|
|
|
1373
1373
|
// Detect PC platform (Windows/Linux)
|
|
1374
1374
|
const normalizedPlatform = normalizePlatform(hardware.os?.platform || process.platform);
|
|
1375
1375
|
const isPC = !isAppleSilicon && (normalizedPlatform === 'win32' || normalizedPlatform === 'linux');
|
|
1376
|
+
const integratedGpuInventory = Array.isArray(hardware.summary?.integratedGpuModels)
|
|
1377
|
+
? hardware.summary.integratedGpuModels.map(({ name }) => name).join(' ')
|
|
1378
|
+
: '';
|
|
1379
|
+
const hasIntegratedGPU = typeof hardware.summary?.hasIntegratedGPU === 'boolean'
|
|
1380
|
+
? hardware.summary.hasIntegratedGPU
|
|
1381
|
+
: /iris.*xe|iris.*graphics|uhd.*graphics|vega.*integrated|radeon.*graphics|intel.*integrated|integrated/i.test(`${gpuModel} ${integratedGpuInventory}`);
|
|
1382
|
+
const hasDedicatedGPU = typeof hardware.summary?.hasDedicatedGPU === 'boolean'
|
|
1383
|
+
? (!unified && hardware.summary.hasDedicatedGPU)
|
|
1384
|
+
: Boolean(!unified && (hardware.gpu?.dedicated || (vramGB > 0 && !hasIntegratedGPU)));
|
|
1376
1385
|
const hasAVX512 = cpuModel.toLowerCase().includes('intel') &&
|
|
1377
1386
|
(cpuModel.includes('12th') || cpuModel.includes('13th') || cpuModel.includes('14th'));
|
|
1378
1387
|
const hasAVX2 = cpuModel.toLowerCase().includes('intel') ||
|
|
@@ -1381,7 +1390,7 @@ class LLMChecker {
|
|
|
1381
1390
|
// 1) Capacidad efectiva para pesos del modelo (45%)
|
|
1382
1391
|
let effMem;
|
|
1383
1392
|
|
|
1384
|
-
if (vramGB > 0 && !unified) {
|
|
1393
|
+
if (hasDedicatedGPU && vramGB > 0 && !unified) {
|
|
1385
1394
|
// Dedicated GPU path (Windows/Linux with discrete GPU)
|
|
1386
1395
|
if (isPC) {
|
|
1387
1396
|
// PC-specific GPU memory calculation with offload support
|
|
@@ -1469,16 +1478,6 @@ class LLMChecker {
|
|
|
1469
1478
|
score >= 35 ? 'medium' : // 35-54 for mid-range systems
|
|
1470
1479
|
score >= 20 ? 'low' : 'ultra_low'; // 20-34 for budget systems
|
|
1471
1480
|
|
|
1472
|
-
const integratedGpuInventory = Array.isArray(hardware.summary?.integratedGpuModels)
|
|
1473
|
-
? hardware.summary.integratedGpuModels.map(({ name }) => name).join(' ')
|
|
1474
|
-
: '';
|
|
1475
|
-
const hasIntegratedGPU = typeof hardware.summary?.hasIntegratedGPU === 'boolean'
|
|
1476
|
-
? hardware.summary.hasIntegratedGPU
|
|
1477
|
-
: /iris.*xe|iris.*graphics|uhd.*graphics|vega.*integrated|radeon.*graphics|intel.*integrated|integrated/i.test(`${gpuModel} ${integratedGpuInventory}`);
|
|
1478
|
-
const hasDedicatedGPU = typeof hardware.summary?.hasDedicatedGPU === 'boolean'
|
|
1479
|
-
? (!unified && hardware.summary.hasDedicatedGPU)
|
|
1480
|
-
: (vramGB > 0 && !hasIntegratedGPU && !unified);
|
|
1481
|
-
|
|
1482
1481
|
// Debug logging for tier calculation
|
|
1483
1482
|
if (process.env.DEBUG_TIER) {
|
|
1484
1483
|
console.log(`GPU Model: "${gpuModel}"`);
|
|
@@ -1508,10 +1507,10 @@ class LLMChecker {
|
|
|
1508
1507
|
} else if (!vramGB && !unified) {
|
|
1509
1508
|
// Windows/Linux CPU-only - significativa limitación pero no extrema
|
|
1510
1509
|
tier = this.bumpTier(tier, -1);
|
|
1511
|
-
} else if (hasIntegratedGPU) {
|
|
1510
|
+
} else if (hasIntegratedGPU && !hasDedicatedGPU) {
|
|
1512
1511
|
// iGPU - limitada pero algo mejor que CPU puro
|
|
1513
1512
|
tier = this.bumpTier(tier, -1);
|
|
1514
|
-
} else if (vramGB > 0 && vramGB < 6) {
|
|
1513
|
+
} else if (hasDedicatedGPU && vramGB > 0 && vramGB < 6) {
|
|
1515
1514
|
// GPU dedicada con poca VRAM (GTX 1060, etc.)
|
|
1516
1515
|
tier = this.bumpTier(tier, -1);
|
|
1517
1516
|
}
|
|
@@ -751,40 +751,26 @@ class DeterministicModelSelector {
|
|
|
751
751
|
|
|
752
752
|
return variants.map((variant) => {
|
|
753
753
|
const variantTag = variant.tag || fallbackTag;
|
|
754
|
-
const
|
|
755
|
-
|
|
756
|
-
variantTag,
|
|
757
|
-
ollamaModel.main_size,
|
|
758
|
-
ollamaModel.model_identifier
|
|
759
|
-
);
|
|
754
|
+
const quant = this.resolveVariantQuantization(variant, variantTag);
|
|
755
|
+
const paramsB = this.resolveVariantParamsB(ollamaModel, variant, quant);
|
|
760
756
|
const moeMetadata = this.extractMoEMetadata(ollamaModel, variant, paramsB, baseText);
|
|
761
|
-
const quant = this.normalizeQuantization(
|
|
762
|
-
variant.quantization ||
|
|
763
|
-
this.extractQuantizationFromTag(variantTag) ||
|
|
764
|
-
'Q4_K_M'
|
|
765
|
-
);
|
|
766
757
|
|
|
767
758
|
const variantSizeGB = this.extractVariantSizeGB(variant, paramsB);
|
|
768
759
|
const modalities = this.inferModalities(ollamaModel, variantTag);
|
|
769
760
|
const modelTags = this.inferTagsForVariant(derivedTags, variant, variantTag);
|
|
770
761
|
const sizeByQuant = {};
|
|
762
|
+
const variantIsCloud = this.isCloudVariantTag(variantTag);
|
|
771
763
|
|
|
772
764
|
for (const sibling of variants) {
|
|
773
|
-
const
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
);
|
|
765
|
+
const siblingTag = sibling.tag || fallbackTag;
|
|
766
|
+
if (this.isCloudVariantTag(siblingTag) !== variantIsCloud) continue;
|
|
767
|
+
|
|
768
|
+
const siblingQuant = this.resolveVariantQuantization(sibling, siblingTag);
|
|
769
|
+
const siblingParams = this.resolveVariantParamsB(ollamaModel, sibling, siblingQuant);
|
|
779
770
|
|
|
780
771
|
// Keep quantization map parameter-aware: don't blend 8B/70B/405B sizes.
|
|
781
772
|
if (Math.abs(siblingParams - paramsB) > 0.25) continue;
|
|
782
773
|
|
|
783
|
-
const siblingQuant = this.normalizeQuantization(
|
|
784
|
-
sibling.quantization ||
|
|
785
|
-
this.extractQuantizationFromTag(sibling.tag || '') ||
|
|
786
|
-
quant
|
|
787
|
-
);
|
|
788
774
|
const siblingSize = this.extractVariantSizeGB(sibling, siblingParams);
|
|
789
775
|
if (!Number.isFinite(sizeByQuant[siblingQuant]) || siblingSize < sizeByQuant[siblingQuant]) {
|
|
790
776
|
sizeByQuant[siblingQuant] = siblingSize;
|
|
@@ -1003,17 +989,130 @@ class DeterministicModelSelector {
|
|
|
1003
989
|
}
|
|
1004
990
|
|
|
1005
991
|
extractParamsFromString(...values) {
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
992
|
+
const candidates = this.extractParameterCandidates(...values);
|
|
993
|
+
return candidates.length > 0 ? candidates[0] : null;
|
|
994
|
+
}
|
|
995
|
+
|
|
996
|
+
extractParameterCandidates(...values) {
|
|
997
|
+
const candidates = [];
|
|
998
|
+
const seen = new Set();
|
|
999
|
+
|
|
1000
|
+
const pushCandidate = (value) => {
|
|
1001
|
+
if (!Number.isFinite(value) || value <= 0) return;
|
|
1002
|
+
const rounded = Math.round(value * 1000) / 1000;
|
|
1003
|
+
const key = String(rounded);
|
|
1004
|
+
if (seen.has(key)) return;
|
|
1005
|
+
seen.add(key);
|
|
1006
|
+
candidates.push(rounded);
|
|
1007
|
+
};
|
|
1008
|
+
|
|
1009
|
+
const visit = (value) => {
|
|
1010
|
+
if (typeof value === 'number') {
|
|
1011
|
+
pushCandidate(value);
|
|
1012
|
+
return;
|
|
1013
|
+
}
|
|
1014
|
+
|
|
1015
|
+
if (Array.isArray(value)) {
|
|
1016
|
+
value.forEach(visit);
|
|
1017
|
+
return;
|
|
1018
|
+
}
|
|
1019
|
+
|
|
1020
|
+
if (value && typeof value === 'object') {
|
|
1021
|
+
Object.values(value).forEach(visit);
|
|
1022
|
+
return;
|
|
1023
|
+
}
|
|
1024
|
+
|
|
1025
|
+
if (typeof value !== 'string') return;
|
|
1026
|
+
|
|
1027
|
+
const regex = /(\d+\.?\d*)\s*([BbMm])/g;
|
|
1028
|
+
for (const match of value.matchAll(regex)) {
|
|
1029
|
+
const amount = parseFloat(match[1]);
|
|
1030
|
+
const unit = match[2].toUpperCase();
|
|
1031
|
+
pushCandidate(unit === 'M' ? amount / 1000 : amount);
|
|
1009
1032
|
}
|
|
1010
|
-
|
|
1033
|
+
};
|
|
1034
|
+
|
|
1035
|
+
values.forEach(visit);
|
|
1036
|
+
return candidates;
|
|
1037
|
+
}
|
|
1038
|
+
|
|
1039
|
+
extractArtifactSizeGBFromValue(value) {
|
|
1040
|
+
if (typeof value === 'number' && Number.isFinite(value) && value > 0) {
|
|
1041
|
+
return value;
|
|
1042
|
+
}
|
|
1043
|
+
if (typeof value !== 'string') return null;
|
|
1044
|
+
|
|
1045
|
+
const match = value.match(/(\d+\.?\d*)\s*g(?:i)?b\b/i);
|
|
1046
|
+
if (!match) return null;
|
|
1047
|
+
return parseFloat(match[1]);
|
|
1048
|
+
}
|
|
1049
|
+
|
|
1050
|
+
inferParamsFromArtifactSizeGB(sizeGB, quant = 'Q4_K_M') {
|
|
1051
|
+
const normalizedQuant = this.normalizeQuantization(quant);
|
|
1052
|
+
const bytesPerParam = {
|
|
1053
|
+
'Q8_0': 1.05,
|
|
1054
|
+
'Q6_K': 0.80,
|
|
1055
|
+
'Q5_K_M': 0.68,
|
|
1056
|
+
'Q4_K_M': 0.58,
|
|
1057
|
+
'Q3_K': 0.48,
|
|
1058
|
+
'Q2_K': 0.37
|
|
1059
|
+
};
|
|
1060
|
+
const bpp = bytesPerParam[normalizedQuant] || 0.58;
|
|
1061
|
+
const inferred = sizeGB / bpp;
|
|
1062
|
+
return Math.max(0.5, Math.round(inferred * 2) / 2);
|
|
1063
|
+
}
|
|
1064
|
+
|
|
1065
|
+
isCloudVariantTag(tag = '') {
|
|
1066
|
+
return /:cloud$/i.test(String(tag).trim());
|
|
1067
|
+
}
|
|
1068
|
+
|
|
1069
|
+
resolveVariantQuantization(variant = {}, variantTag = '') {
|
|
1070
|
+
const tagQuant = this.extractQuantizationFromTag(variantTag);
|
|
1071
|
+
if (tagQuant) {
|
|
1072
|
+
return this.normalizeQuantization(tagQuant);
|
|
1073
|
+
}
|
|
1074
|
+
|
|
1075
|
+
return this.normalizeQuantization(
|
|
1076
|
+
variant.quantization ||
|
|
1077
|
+
variant.quant ||
|
|
1078
|
+
'Q4_K_M'
|
|
1079
|
+
);
|
|
1080
|
+
}
|
|
1011
1081
|
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1082
|
+
resolveVariantParamsB(ollamaModel = {}, variant = {}, quant = 'Q4_K_M') {
|
|
1083
|
+
const explicitParams = this.extractParamsFromString(
|
|
1084
|
+
variant.size,
|
|
1085
|
+
variant.tag,
|
|
1086
|
+
variant.label,
|
|
1087
|
+
variant.name,
|
|
1088
|
+
ollamaModel.model_identifier,
|
|
1089
|
+
ollamaModel.model_name,
|
|
1090
|
+
ollamaModel.parameter_size,
|
|
1091
|
+
ollamaModel.parameter_count,
|
|
1092
|
+
ollamaModel.parameters
|
|
1093
|
+
);
|
|
1094
|
+
if (Number.isFinite(explicitParams) && explicitParams > 0) {
|
|
1095
|
+
return explicitParams;
|
|
1096
|
+
}
|
|
1097
|
+
|
|
1098
|
+
const metadataCandidates = this.extractParameterCandidates(
|
|
1099
|
+
ollamaModel.model_sizes,
|
|
1100
|
+
ollamaModel.parameters,
|
|
1101
|
+
ollamaModel.parameter_size,
|
|
1102
|
+
ollamaModel.parameter_count
|
|
1103
|
+
);
|
|
1104
|
+
if (metadataCandidates.length > 0) {
|
|
1105
|
+
return Math.max(...metadataCandidates);
|
|
1106
|
+
}
|
|
1107
|
+
|
|
1108
|
+
const artifactSizeGB = this.extractVariantSizeGB(variant, null);
|
|
1109
|
+
if (!this.isCloudVariantTag(variant.tag) && Number.isFinite(artifactSizeGB) && artifactSizeGB > 0) {
|
|
1110
|
+
return this.inferParamsFromArtifactSizeGB(artifactSizeGB, quant);
|
|
1111
|
+
}
|
|
1112
|
+
|
|
1113
|
+
const modelArtifactSizeGB = this.extractArtifactSizeGBFromValue(ollamaModel.main_size);
|
|
1114
|
+
if (Number.isFinite(modelArtifactSizeGB) && modelArtifactSizeGB > 0) {
|
|
1115
|
+
return this.inferParamsFromArtifactSizeGB(modelArtifactSizeGB, quant);
|
|
1017
1116
|
}
|
|
1018
1117
|
|
|
1019
1118
|
return 7;
|
|
@@ -1038,6 +1137,7 @@ class DeterministicModelSelector {
|
|
|
1038
1137
|
extractVariantSizeGB(variant, paramsB) {
|
|
1039
1138
|
const candidate = Number(variant.real_size_gb ?? variant.estimated_size_gb ?? NaN);
|
|
1040
1139
|
if (Number.isFinite(candidate) && candidate > 0) return candidate;
|
|
1140
|
+
if (!Number.isFinite(paramsB) || paramsB <= 0) return 0.5;
|
|
1041
1141
|
return Math.max(0.5, Math.round((paramsB * 0.58 + 0.5) * 10) / 10);
|
|
1042
1142
|
}
|
|
1043
1143
|
|
|
@@ -2049,6 +2149,20 @@ class DeterministicModelSelector {
|
|
|
2049
2149
|
}
|
|
2050
2150
|
|
|
2051
2151
|
mapHardwareTier(hardware = {}) {
|
|
2152
|
+
const summary = hardware?.summary || {};
|
|
2153
|
+
const effectiveMemory = Number(summary.effectiveMemory);
|
|
2154
|
+
const speedCoefficient = Number(summary.speedCoefficient);
|
|
2155
|
+
if (Number.isFinite(effectiveMemory) && effectiveMemory > 0 && Number.isFinite(speedCoefficient)) {
|
|
2156
|
+
if (effectiveMemory >= 80 && speedCoefficient >= 300) return 'ultra_high';
|
|
2157
|
+
if (effectiveMemory >= 48 && speedCoefficient >= 200) return 'very_high';
|
|
2158
|
+
if (effectiveMemory >= 24 && speedCoefficient >= 150) return 'high';
|
|
2159
|
+
if (effectiveMemory >= 16 && speedCoefficient >= 100) return 'medium_high';
|
|
2160
|
+
if (effectiveMemory >= 12 && speedCoefficient >= 80) return 'medium';
|
|
2161
|
+
if (effectiveMemory >= 8 && speedCoefficient >= 50) return 'medium_low';
|
|
2162
|
+
if (effectiveMemory >= 6 && speedCoefficient >= 30) return 'low';
|
|
2163
|
+
return 'ultra_low';
|
|
2164
|
+
}
|
|
2165
|
+
|
|
2052
2166
|
let ram, cores;
|
|
2053
2167
|
|
|
2054
2168
|
if (hardware?.memory?.totalGB) {
|
|
@@ -98,7 +98,8 @@ class IntelligentSelector {
|
|
|
98
98
|
description: this.detector.getHardwareDescription(),
|
|
99
99
|
tier: this.detector.getHardwareTier(),
|
|
100
100
|
maxSize: this.detector.getMaxModelSize(),
|
|
101
|
-
backend: hardware.summary.bestBackend
|
|
101
|
+
backend: hardware.summary.bestBackend,
|
|
102
|
+
runtimeBackend: hardware.summary.runtimeBackend || hardware.summary.bestBackend
|
|
102
103
|
},
|
|
103
104
|
policy: {
|
|
104
105
|
mode: policyEngine.getMode(),
|
|
@@ -163,7 +164,7 @@ class IntelligentSelector {
|
|
|
163
164
|
|
|
164
165
|
const context = {
|
|
165
166
|
backend: summary.bestBackend || null,
|
|
166
|
-
runtimeBackend: summary.bestBackend || null,
|
|
167
|
+
runtimeBackend: summary.runtimeBackend || summary.bestBackend || null,
|
|
167
168
|
ramGB: systemRAM,
|
|
168
169
|
totalRamGB: systemRAM,
|
|
169
170
|
hardware
|
package/src/ollama/client.js
CHANGED
|
@@ -4,13 +4,10 @@ class OllamaClient {
|
|
|
4
4
|
constructor(baseURL = null) {
|
|
5
5
|
// Support OLLAMA_HOST environment variable (standard Ollama configuration)
|
|
6
6
|
// Also support OLLAMA_URL for backwards compatibility
|
|
7
|
-
this.
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
this.baseURL = 'http://' + this.baseURL;
|
|
12
|
-
}
|
|
13
|
-
this.baseURL = this.baseURL.replace(/\/$/, '');
|
|
7
|
+
this.preferredBaseURL = this.normalizeBaseURL(
|
|
8
|
+
baseURL || process.env.OLLAMA_HOST || process.env.OLLAMA_URL || 'http://localhost:11434'
|
|
9
|
+
);
|
|
10
|
+
this.baseURL = this.preferredBaseURL;
|
|
14
11
|
|
|
15
12
|
this.isAvailable = null;
|
|
16
13
|
this.lastCheck = 0;
|
|
@@ -18,6 +15,53 @@ class OllamaClient {
|
|
|
18
15
|
this._pendingCheck = null;
|
|
19
16
|
}
|
|
20
17
|
|
|
18
|
+
normalizeBaseURL(baseURL) {
|
|
19
|
+
let normalized = String(baseURL || '').trim();
|
|
20
|
+
if (!normalized.startsWith('http://') && !normalized.startsWith('https://')) {
|
|
21
|
+
normalized = 'http://' + normalized;
|
|
22
|
+
}
|
|
23
|
+
return normalized.replace(/\/$/, '');
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
buildCandidateBaseURLs(baseURL = this.preferredBaseURL) {
|
|
27
|
+
const normalized = this.normalizeBaseURL(baseURL);
|
|
28
|
+
const candidates = [normalized];
|
|
29
|
+
|
|
30
|
+
try {
|
|
31
|
+
const parsed = new URL(normalized);
|
|
32
|
+
if (parsed.hostname === 'localhost') {
|
|
33
|
+
const ipv4 = new URL(parsed.toString());
|
|
34
|
+
ipv4.hostname = '127.0.0.1';
|
|
35
|
+
candidates.push(ipv4.toString().replace(/\/$/, ''));
|
|
36
|
+
|
|
37
|
+
const ipv6 = new URL(parsed.toString());
|
|
38
|
+
ipv6.hostname = '::1';
|
|
39
|
+
candidates.push(ipv6.toString().replace(/\/$/, ''));
|
|
40
|
+
}
|
|
41
|
+
} catch (error) {
|
|
42
|
+
// Keep the preferred URL only if parsing fails.
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
return [...new Set(candidates)];
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
applyResolvedBaseURL(baseURL) {
|
|
49
|
+
this.baseURL = this.normalizeBaseURL(baseURL);
|
|
50
|
+
return this.baseURL;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
isRetryableAvailabilityError(error) {
|
|
54
|
+
const message = String(error?.message || '').toLowerCase();
|
|
55
|
+
return (
|
|
56
|
+
message.includes('econnrefused') ||
|
|
57
|
+
message.includes('fetch failed') ||
|
|
58
|
+
message.includes('network') ||
|
|
59
|
+
message.includes('socket') ||
|
|
60
|
+
message.includes('connect') ||
|
|
61
|
+
error?.name === 'AbortError'
|
|
62
|
+
);
|
|
63
|
+
}
|
|
64
|
+
|
|
21
65
|
async checkOllamaAvailability() {
|
|
22
66
|
|
|
23
67
|
if (this.isAvailable !== null && Date.now() - this.lastCheck < this.cacheTimeout) {
|
|
@@ -38,50 +82,79 @@ class OllamaClient {
|
|
|
38
82
|
}
|
|
39
83
|
|
|
40
84
|
async _doAvailabilityCheck() {
|
|
85
|
+
const candidateURLs = this.buildCandidateBaseURLs();
|
|
86
|
+
const attemptedURLs = [];
|
|
87
|
+
let lastError = null;
|
|
41
88
|
|
|
42
|
-
|
|
43
|
-
const
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
89
|
+
for (let index = 0; index < candidateURLs.length; index += 1) {
|
|
90
|
+
const candidateBaseURL = candidateURLs[index];
|
|
91
|
+
attemptedURLs.push(candidateBaseURL);
|
|
92
|
+
|
|
93
|
+
try {
|
|
94
|
+
const controller = new AbortController();
|
|
95
|
+
const timeoutId = setTimeout(() => controller.abort(), 5000);
|
|
96
|
+
|
|
97
|
+
const response = await fetch(`${candidateBaseURL}/api/version`, {
|
|
98
|
+
signal: controller.signal,
|
|
99
|
+
headers: { 'Content-Type': 'application/json' }
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
clearTimeout(timeoutId);
|
|
103
|
+
|
|
104
|
+
if (!response.ok) {
|
|
105
|
+
this.isAvailable = {
|
|
106
|
+
available: false,
|
|
107
|
+
error: 'Ollama not responding properly',
|
|
108
|
+
attemptedURL: candidateBaseURL,
|
|
109
|
+
attemptedURLs
|
|
110
|
+
};
|
|
111
|
+
this.lastCheck = Date.now();
|
|
112
|
+
return this.isAvailable;
|
|
113
|
+
}
|
|
52
114
|
|
|
53
|
-
if (response.ok) {
|
|
54
115
|
const data = await response.json();
|
|
55
|
-
this.
|
|
116
|
+
this.applyResolvedBaseURL(candidateBaseURL);
|
|
117
|
+
this.isAvailable = {
|
|
118
|
+
available: true,
|
|
119
|
+
version: data.version || 'unknown',
|
|
120
|
+
attemptedURL: candidateBaseURL,
|
|
121
|
+
attemptedURLs
|
|
122
|
+
};
|
|
56
123
|
this.lastCheck = Date.now();
|
|
57
124
|
return this.isAvailable;
|
|
125
|
+
} catch (error) {
|
|
126
|
+
lastError = error;
|
|
127
|
+
if (!this.isRetryableAvailabilityError(error) || index === candidateURLs.length - 1) {
|
|
128
|
+
break;
|
|
129
|
+
}
|
|
58
130
|
}
|
|
131
|
+
}
|
|
59
132
|
|
|
60
|
-
|
|
61
|
-
this.lastCheck = Date.now();
|
|
62
|
-
return this.isAvailable;
|
|
63
|
-
} catch (error) {
|
|
133
|
+
if (lastError) {
|
|
64
134
|
let errorMessage;
|
|
65
135
|
let hint = '';
|
|
136
|
+
const errorText = String(lastError.message || '');
|
|
137
|
+
const activeURL = attemptedURLs[attemptedURLs.length - 1] || this.preferredBaseURL;
|
|
66
138
|
|
|
67
|
-
if (
|
|
68
|
-
errorMessage = `Ollama not running at ${
|
|
139
|
+
if (errorText.includes('ECONNREFUSED')) {
|
|
140
|
+
errorMessage = `Ollama not running at ${activeURL}`;
|
|
69
141
|
hint = 'Make sure Ollama is running. Try: ollama serve';
|
|
70
|
-
} else if (
|
|
71
|
-
errorMessage = `Ollama connection timeout at ${
|
|
142
|
+
} else if (errorText.includes('timeout') || lastError.name === 'AbortError') {
|
|
143
|
+
errorMessage = `Ollama connection timeout at ${activeURL}`;
|
|
72
144
|
hint = 'The server is not responding. Check if Ollama is running and accessible.';
|
|
73
|
-
} else if (
|
|
74
|
-
errorMessage = `Cannot resolve host: ${
|
|
145
|
+
} else if (errorText.includes('ENOTFOUND')) {
|
|
146
|
+
errorMessage = `Cannot resolve host: ${activeURL}`;
|
|
75
147
|
hint = 'Check your OLLAMA_HOST environment variable or network configuration.';
|
|
76
148
|
} else {
|
|
77
|
-
errorMessage = error
|
|
149
|
+
errorMessage = errorText || 'Unknown Ollama availability error';
|
|
78
150
|
}
|
|
79
151
|
|
|
80
152
|
this.isAvailable = {
|
|
81
153
|
available: false,
|
|
82
154
|
error: errorMessage,
|
|
83
|
-
hint
|
|
84
|
-
attemptedURL:
|
|
155
|
+
hint,
|
|
156
|
+
attemptedURL: activeURL,
|
|
157
|
+
attemptedURLs
|
|
85
158
|
};
|
|
86
159
|
this.lastCheck = Date.now();
|
|
87
160
|
return this.isAvailable;
|