llm-checker 3.5.11 → 3.5.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +83 -17
- package/bin/cli.js +40 -0
- package/bin/enhanced_cli.js +384 -35
- package/package.json +2 -1
- package/src/ai/model-selector.js +47 -16
- package/src/ai/multi-objective-selector.js +55 -9
- package/src/data/model-database.js +92 -1
- package/src/data/seed/README.md +8 -0
- package/src/data/seed/models.db +0 -0
- package/src/hardware/backends/rocm-detector.js +469 -68
- package/src/hardware/unified-detector.js +69 -18
- package/src/index.js +59 -8
- package/src/models/ai-check-selector.js +27 -2
- package/src/models/deterministic-selector.js +84 -7
- package/src/ollama/client.js +121 -0
- package/src/ollama/enhanced-scraper.js +40 -26
- package/src/ollama/native-scraper.js +52 -27
- package/src/ui/cli-theme.js +139 -24
- package/src/ui/interactive-panel.js +1 -18
- package/src/utils/verbose-progress.js +144 -187
|
@@ -307,7 +307,10 @@ class UnifiedDetector {
|
|
|
307
307
|
summary.dedicatedGpuCount = topology.dedicatedCount;
|
|
308
308
|
summary.integratedGpuModels = topology.integratedModels;
|
|
309
309
|
summary.dedicatedGpuModels = topology.dedicatedModels;
|
|
310
|
-
summary.integratedSharedMemory =
|
|
310
|
+
summary.integratedSharedMemory = Math.max(
|
|
311
|
+
topology.integratedSharedMemory,
|
|
312
|
+
this.getPrimaryIntegratedSharedMemory(primary)
|
|
313
|
+
);
|
|
311
314
|
if (!summary.gpuModel) {
|
|
312
315
|
summary.gpuModel = topology.primaryModel || null;
|
|
313
316
|
}
|
|
@@ -324,18 +327,70 @@ class UnifiedDetector {
|
|
|
324
327
|
summary.runtimeBackendName = runtimeSelection.name;
|
|
325
328
|
summary.hasRuntimeAssist = runtimeSelection.assisted;
|
|
326
329
|
|
|
327
|
-
// Effective memory for LLM loading
|
|
328
|
-
//
|
|
329
|
-
|
|
330
|
+
// Effective memory for LLM loading. Integrated ROCm/iGPU devices expose
|
|
331
|
+
// a small aperture as VRAM and a much larger shared pool for model-fit
|
|
332
|
+
// decisions, so avoid treating the aperture as dedicated VRAM.
|
|
333
|
+
if (
|
|
334
|
+
['rocm', 'intel'].includes(primary?.type) &&
|
|
335
|
+
summary.hasIntegratedGPU &&
|
|
336
|
+
!summary.hasDedicatedGPU &&
|
|
337
|
+
summary.integratedSharedMemory > 0
|
|
338
|
+
) {
|
|
339
|
+
summary.effectiveMemory = summary.integratedSharedMemory;
|
|
340
|
+
} else if (summary.totalVRAM > 0 && ['cuda', 'rocm', 'intel'].includes(primary?.type)) {
|
|
330
341
|
summary.effectiveMemory = summary.totalVRAM;
|
|
331
342
|
} else {
|
|
332
343
|
// Use 70% of system RAM for models (leave room for OS)
|
|
333
344
|
summary.effectiveMemory = Math.round(summary.systemRAM * 0.7);
|
|
334
345
|
}
|
|
335
346
|
|
|
347
|
+
summary.hardwareTier = this.classifyHardwareTierFromSummary(summary);
|
|
348
|
+
summary.bestBackendLabel = this.getBestBackendLabel(summary);
|
|
349
|
+
|
|
336
350
|
return summary;
|
|
337
351
|
}
|
|
338
352
|
|
|
353
|
+
getPrimaryIntegratedSharedMemory(primary) {
|
|
354
|
+
const gpus = Array.isArray(primary?.info?.gpus) ? primary.info.gpus : [];
|
|
355
|
+
return gpus
|
|
356
|
+
.filter((gpu) => gpu?.type === 'integrated')
|
|
357
|
+
.reduce((max, gpu) => {
|
|
358
|
+
const candidates = [
|
|
359
|
+
gpu?.sharedMemory,
|
|
360
|
+
gpu?.unifiedMemory,
|
|
361
|
+
gpu?.memory?.shared,
|
|
362
|
+
gpu?.memory?.total
|
|
363
|
+
].map(Number).filter((value) => Number.isFinite(value) && value > 0);
|
|
364
|
+
return Math.max(max, ...candidates, 0);
|
|
365
|
+
}, 0);
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
classifyHardwareTierFromSummary(summary = {}) {
|
|
369
|
+
const effectiveMem = Number(summary.effectiveMemory) || 0;
|
|
370
|
+
const speed = Number(summary.speedCoefficient) || 0;
|
|
371
|
+
|
|
372
|
+
if (effectiveMem >= 80 && speed >= 300) return 'ultra_high'; // H100, MI300
|
|
373
|
+
if (effectiveMem >= 48 && speed >= 200) return 'very_high'; // 2x3090, 4090
|
|
374
|
+
if (effectiveMem >= 24 && speed >= 150) return 'high'; // 3090, 4090, M2 Max
|
|
375
|
+
if (effectiveMem >= 16 && speed >= 100) return 'medium_high'; // 4080, 3080, M3 Pro
|
|
376
|
+
if (effectiveMem >= 12 && speed >= 80) return 'medium'; // 3060, 4060 Ti
|
|
377
|
+
if (effectiveMem >= 8 && speed >= 50) return 'medium_low'; // 3060, M2
|
|
378
|
+
if (effectiveMem >= 6 && speed >= 30) return 'low'; // GTX 1660, iGPU
|
|
379
|
+
return 'ultra_low'; // CPU only
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
getBestBackendLabel(summary = {}) {
|
|
383
|
+
const backendName = summary.backendName || String(summary.bestBackend || 'cpu').toUpperCase();
|
|
384
|
+
if (
|
|
385
|
+
summary.hasRuntimeAssist &&
|
|
386
|
+
summary.runtimeBackend &&
|
|
387
|
+
summary.runtimeBackend !== summary.bestBackend
|
|
388
|
+
) {
|
|
389
|
+
return `${backendName} + ${summary.runtimeBackendName || summary.runtimeBackend} assist`;
|
|
390
|
+
}
|
|
391
|
+
return backendName;
|
|
392
|
+
}
|
|
393
|
+
|
|
339
394
|
summarizeGPUInventory(gpus = []) {
|
|
340
395
|
const normalized = this.normalizeGpuInventory(gpus);
|
|
341
396
|
const counts = new Map();
|
|
@@ -819,7 +874,11 @@ class UnifiedDetector {
|
|
|
819
874
|
const summary = result.summary;
|
|
820
875
|
|
|
821
876
|
// Leave headroom (2GB for GPU, 20% for RAM)
|
|
822
|
-
if (
|
|
877
|
+
if (
|
|
878
|
+
summary.bestBackend === 'cpu' ||
|
|
879
|
+
summary.bestBackend === 'metal' ||
|
|
880
|
+
(summary.hasIntegratedGPU && !summary.hasDedicatedGPU && summary.integratedSharedMemory > 0)
|
|
881
|
+
) {
|
|
823
882
|
return sizeGB <= (summary.effectiveMemory - 2);
|
|
824
883
|
} else {
|
|
825
884
|
const availableVRAM = useMultiGPU ? summary.totalVRAM : (summary.totalVRAM / summary.gpuCount);
|
|
@@ -844,19 +903,7 @@ class UnifiedDetector {
|
|
|
844
903
|
const result = this.cache;
|
|
845
904
|
if (!result) return 'unknown';
|
|
846
905
|
|
|
847
|
-
|
|
848
|
-
const effectiveMem = summary.effectiveMemory;
|
|
849
|
-
const speed = summary.speedCoefficient;
|
|
850
|
-
|
|
851
|
-
// Tier based on effective memory and speed
|
|
852
|
-
if (effectiveMem >= 80 && speed >= 300) return 'ultra_high'; // H100, MI300
|
|
853
|
-
if (effectiveMem >= 48 && speed >= 200) return 'very_high'; // 2x3090, 4090
|
|
854
|
-
if (effectiveMem >= 24 && speed >= 150) return 'high'; // 3090, 4090, M2 Max
|
|
855
|
-
if (effectiveMem >= 16 && speed >= 100) return 'medium_high'; // 4080, 3080, M3 Pro
|
|
856
|
-
if (effectiveMem >= 12 && speed >= 80) return 'medium'; // 3060, 4060 Ti
|
|
857
|
-
if (effectiveMem >= 8 && speed >= 50) return 'medium_low'; // 3060, M2
|
|
858
|
-
if (effectiveMem >= 6 && speed >= 30) return 'low'; // GTX 1660, iGPU
|
|
859
|
-
return 'ultra_low'; // CPU only
|
|
906
|
+
return result.summary?.hardwareTier || this.classifyHardwareTierFromSummary(result.summary);
|
|
860
907
|
}
|
|
861
908
|
|
|
862
909
|
/**
|
|
@@ -922,6 +969,10 @@ class UnifiedDetector {
|
|
|
922
969
|
const gpuDesc = summary.gpuInventory || (
|
|
923
970
|
summary.isMultiGPU ? `${summary.gpuCount}x ${summary.gpuModel}` : summary.gpuModel
|
|
924
971
|
);
|
|
972
|
+
if (summary.hasIntegratedGPU && !summary.hasDedicatedGPU && summary.integratedSharedMemory > 0) {
|
|
973
|
+
const dedicatedLabel = summary.totalVRAM > 0 ? `, ${summary.totalVRAM}GB aperture` : '';
|
|
974
|
+
return `${gpuDesc} (${summary.integratedSharedMemory}GB shared memory${dedicatedLabel}) + ${summary.cpuModel}`;
|
|
975
|
+
}
|
|
925
976
|
return `${gpuDesc} (${summary.totalVRAM}GB VRAM) + ${summary.cpuModel}`;
|
|
926
977
|
}
|
|
927
978
|
else if (summary.bestBackend === 'metal') {
|
package/src/index.js
CHANGED
|
@@ -78,7 +78,6 @@ class LLMChecker {
|
|
|
78
78
|
// Report hardware detection progress before platform-specific analysis
|
|
79
79
|
if (this.progress) {
|
|
80
80
|
this.progress.substep(`CPU detected: ${hardware.cpu.brand} (${hardware.cpu.cores} cores)`);
|
|
81
|
-
await new Promise(resolve => setTimeout(resolve, 200)); // Small delay for demo
|
|
82
81
|
const isApple = detectedPlatform === 'darwin';
|
|
83
82
|
const memLabel = isApple ? 'unified memory' : 'RAM';
|
|
84
83
|
this.progress.substep(`Memory detected: ${hardware.memory.total}GB ${memLabel}`, true);
|
|
@@ -117,7 +116,6 @@ class LLMChecker {
|
|
|
117
116
|
// Apple Silicon optimized analysis with unified memory consideration
|
|
118
117
|
if (this.progress) {
|
|
119
118
|
this.progress.substep(`CPU detected: ${hardware.cpu.brand} (${hardware.cpu.cores} cores)`);
|
|
120
|
-
await new Promise(resolve => setTimeout(resolve, 200));
|
|
121
119
|
this.progress.substep(`Memory detected: ${hardware.memory.total}GB unified memory`, true);
|
|
122
120
|
const summary = `${hardware.cpu.brand}, ${hardware.memory.total}GB RAM, ${hardware.gpu.model || 'Apple Silicon GPU'}`;
|
|
123
121
|
this.progress.stepComplete(summary);
|
|
@@ -131,7 +129,6 @@ class LLMChecker {
|
|
|
131
129
|
// Windows-specific analysis with discrete GPU / iGPU handling
|
|
132
130
|
if (this.progress) {
|
|
133
131
|
this.progress.substep(`CPU detected: ${hardware.cpu.brand} (${hardware.cpu.cores} cores)`);
|
|
134
|
-
await new Promise(resolve => setTimeout(resolve, 200));
|
|
135
132
|
this.progress.substep(`Memory detected: ${hardware.memory.total}GB RAM`, true);
|
|
136
133
|
const summary = `${hardware.cpu.brand}, ${hardware.memory.total}GB RAM, ${hardware.gpu.model || 'Integrated GPU'}`;
|
|
137
134
|
this.progress.stepComplete(summary);
|
|
@@ -145,7 +142,6 @@ class LLMChecker {
|
|
|
145
142
|
// Linux-specific analysis (similar to Windows but with Linux considerations)
|
|
146
143
|
if (this.progress) {
|
|
147
144
|
this.progress.substep(`CPU detected: ${hardware.cpu.brand} (${hardware.cpu.cores} cores)`);
|
|
148
|
-
await new Promise(resolve => setTimeout(resolve, 200));
|
|
149
145
|
this.progress.substep(`Memory detected: ${hardware.memory.total}GB RAM`, true);
|
|
150
146
|
const summary = `${hardware.cpu.brand}, ${hardware.memory.total}GB RAM, ${hardware.gpu.model || 'GPU'}`;
|
|
151
147
|
this.progress.stepComplete(summary);
|
|
@@ -516,7 +512,7 @@ class LLMChecker {
|
|
|
516
512
|
|
|
517
513
|
try {
|
|
518
514
|
// 1. Obtener TODOS los modelos de la base de datos de Ollama
|
|
519
|
-
const ollamaData = await this.
|
|
515
|
+
const ollamaData = await this.loadOllamaModelData();
|
|
520
516
|
const allOllamaModels = ollamaData.models || [];
|
|
521
517
|
this.logger.info(`Found ${allOllamaModels.length} models in Ollama database`);
|
|
522
518
|
|
|
@@ -1345,9 +1341,27 @@ class LLMChecker {
|
|
|
1345
1341
|
}
|
|
1346
1342
|
|
|
1347
1343
|
getHardwareTier(hardware) {
|
|
1344
|
+
const canonicalTier = hardware?.summary?.hardwareTier;
|
|
1345
|
+
if (typeof canonicalTier === 'string' && canonicalTier.trim()) {
|
|
1346
|
+
return canonicalTier.trim().toLowerCase().replace(/\s+/g, '_');
|
|
1347
|
+
}
|
|
1348
1348
|
return this.calculateHardwareScore(hardware).tier;
|
|
1349
1349
|
}
|
|
1350
1350
|
|
|
1351
|
+
getHardwareTierBucket(hardware) {
|
|
1352
|
+
const tier = this.getHardwareTier(hardware);
|
|
1353
|
+
switch (tier) {
|
|
1354
|
+
case 'very_high':
|
|
1355
|
+
return 'ultra_high';
|
|
1356
|
+
case 'medium_high':
|
|
1357
|
+
return 'high';
|
|
1358
|
+
case 'medium_low':
|
|
1359
|
+
return 'low';
|
|
1360
|
+
default:
|
|
1361
|
+
return tier;
|
|
1362
|
+
}
|
|
1363
|
+
}
|
|
1364
|
+
|
|
1351
1365
|
calculateHardwareScore(hardware) {
|
|
1352
1366
|
const clamp = (x, a = 0, b = 1) => Math.max(a, Math.min(b, x));
|
|
1353
1367
|
|
|
@@ -2003,7 +2017,7 @@ class LLMChecker {
|
|
|
2003
2017
|
score -= 15;
|
|
2004
2018
|
}
|
|
2005
2019
|
|
|
2006
|
-
const hardwareTier = this.
|
|
2020
|
+
const hardwareTier = this.getHardwareTierBucket(hardware);
|
|
2007
2021
|
switch (hardwareTier) {
|
|
2008
2022
|
case 'ultra_high':
|
|
2009
2023
|
score += 15;
|
|
@@ -2412,14 +2426,51 @@ class LLMChecker {
|
|
|
2412
2426
|
this.getAllModels().find(m => m.name.toLowerCase().includes(name.toLowerCase()));
|
|
2413
2427
|
}
|
|
2414
2428
|
|
|
2429
|
+
async loadSyncedOllamaModelData() {
|
|
2430
|
+
const ModelDatabase = require('./data/model-database');
|
|
2431
|
+
const database = new ModelDatabase();
|
|
2432
|
+
|
|
2433
|
+
try {
|
|
2434
|
+
await database.initialize();
|
|
2435
|
+
const models = database.getAllModelsWithVariants();
|
|
2436
|
+
const stats = database.getStats();
|
|
2437
|
+
|
|
2438
|
+
if (models.length > 0) {
|
|
2439
|
+
return {
|
|
2440
|
+
models,
|
|
2441
|
+
total_count: models.length,
|
|
2442
|
+
cached_at: stats.lastSync || null,
|
|
2443
|
+
source: 'ollama_sqlite_database'
|
|
2444
|
+
};
|
|
2445
|
+
}
|
|
2446
|
+
} finally {
|
|
2447
|
+
database.close();
|
|
2448
|
+
}
|
|
2449
|
+
|
|
2450
|
+
return null;
|
|
2451
|
+
}
|
|
2452
|
+
|
|
2453
|
+
async loadOllamaModelData() {
|
|
2454
|
+
try {
|
|
2455
|
+
const syncedData = await this.loadSyncedOllamaModelData();
|
|
2456
|
+
if (syncedData?.models?.length > 0) {
|
|
2457
|
+
return syncedData;
|
|
2458
|
+
}
|
|
2459
|
+
} catch (error) {
|
|
2460
|
+
this.logger.warn('Synced SQLite model database unavailable, falling back to Ollama cache', { error: error.message });
|
|
2461
|
+
}
|
|
2462
|
+
|
|
2463
|
+
return this.ollamaScraper.scrapeAllModels(false);
|
|
2464
|
+
}
|
|
2465
|
+
|
|
2415
2466
|
|
|
2416
2467
|
async generateIntelligentRecommendations(hardware, options = {}) {
|
|
2417
2468
|
try {
|
|
2418
2469
|
this.logger.info('Generating intelligent recommendations...');
|
|
2419
2470
|
const selectedRuntime = normalizeRuntime(options.runtime || 'ollama');
|
|
2420
2471
|
|
|
2421
|
-
//
|
|
2422
|
-
const ollamaData = await this.
|
|
2472
|
+
// Prefer the synced SQLite catalog so `llm-checker sync` updates recommendations immediately.
|
|
2473
|
+
const ollamaData = await this.loadOllamaModelData();
|
|
2423
2474
|
const allModels = ollamaData.models || [];
|
|
2424
2475
|
|
|
2425
2476
|
if (allModels.length === 0) {
|
|
@@ -77,8 +77,8 @@ Respond with JSON only, no additional text.`;
|
|
|
77
77
|
// Phase 1: Get ALL available models from the 177-model Ollama database
|
|
78
78
|
const hardware = await this.deterministicSelector.getHardware();
|
|
79
79
|
|
|
80
|
-
// Use the same
|
|
81
|
-
const ollamaData = await this.
|
|
80
|
+
// Use the same synced database that recommend/check use.
|
|
81
|
+
const ollamaData = await this.loadModelDatabase();
|
|
82
82
|
const allOllamaModels = ollamaData.models || [];
|
|
83
83
|
|
|
84
84
|
if (!silent) {
|
|
@@ -248,6 +248,31 @@ Respond with JSON only, no additional text.`;
|
|
|
248
248
|
};
|
|
249
249
|
}
|
|
250
250
|
|
|
251
|
+
async loadModelDatabase() {
|
|
252
|
+
try {
|
|
253
|
+
const ModelDatabase = require('../data/model-database');
|
|
254
|
+
const database = new ModelDatabase();
|
|
255
|
+
await database.initialize();
|
|
256
|
+
|
|
257
|
+
try {
|
|
258
|
+
const models = database.getAllModelsWithVariants();
|
|
259
|
+
if (models.length > 0) {
|
|
260
|
+
return {
|
|
261
|
+
models,
|
|
262
|
+
total_count: models.length,
|
|
263
|
+
source: 'ollama_sqlite_database'
|
|
264
|
+
};
|
|
265
|
+
}
|
|
266
|
+
} finally {
|
|
267
|
+
database.close();
|
|
268
|
+
}
|
|
269
|
+
} catch {
|
|
270
|
+
// Fall through to scraper cache.
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
return this.ollamaScraper.scrapeAllModels(false);
|
|
274
|
+
}
|
|
275
|
+
|
|
251
276
|
/**
|
|
252
277
|
* Pick the best installed evaluator model
|
|
253
278
|
*/
|
|
@@ -44,11 +44,16 @@ class DeterministicModelSelector {
|
|
|
44
44
|
this.familyBumps = {
|
|
45
45
|
'qwen2.5': 2,
|
|
46
46
|
'qwen3': 4,
|
|
47
|
+
'gemma3': 3,
|
|
47
48
|
'deepseek': 3,
|
|
49
|
+
'deepseek-r1': 5,
|
|
50
|
+
'deepseek-coder': 4,
|
|
48
51
|
'mistral': 1,
|
|
49
52
|
'llama3.1': 1,
|
|
50
53
|
'llama3.2': 2,
|
|
51
54
|
'gemma2': 1,
|
|
55
|
+
'yi': -3,
|
|
56
|
+
'yi-coder': 1,
|
|
52
57
|
'phi-3': 0,
|
|
53
58
|
'granite': 0,
|
|
54
59
|
'solar': 0,
|
|
@@ -750,7 +755,13 @@ class DeterministicModelSelector {
|
|
|
750
755
|
if (ollamaModel.primary_category === 'reasoning') derivedTags.add('reasoning');
|
|
751
756
|
if (ollamaModel.primary_category === 'creative') derivedTags.add('creative');
|
|
752
757
|
|
|
753
|
-
|
|
758
|
+
const hasConcreteVariants = variants.some((variant) => this.variantHasConcreteSizeOrParams(variant));
|
|
759
|
+
const selectableVariants = hasConcreteVariants
|
|
760
|
+
? variants.filter((variant) => this.variantHasConcreteSizeOrParams(variant))
|
|
761
|
+
: variants;
|
|
762
|
+
|
|
763
|
+
return selectableVariants
|
|
764
|
+
.map((variant) => {
|
|
754
765
|
const variantTag = variant.tag || fallbackTag;
|
|
755
766
|
const quant = this.resolveVariantQuantization(variant, variantTag);
|
|
756
767
|
const paramsB = this.resolveVariantParamsB(ollamaModel, variant, quant);
|
|
@@ -821,6 +832,8 @@ class DeterministicModelSelector {
|
|
|
821
832
|
modalities,
|
|
822
833
|
tags: modelTags,
|
|
823
834
|
model_identifier: variantTag,
|
|
835
|
+
last_updated: ollamaModel.last_updated || ollamaModel.lastUpdated || '',
|
|
836
|
+
updated_at: ollamaModel.updated_at || ollamaModel.updatedAt || '',
|
|
824
837
|
installed: false,
|
|
825
838
|
pulls: ollamaModel.actual_pulls || ollamaModel.pulls || 0,
|
|
826
839
|
availableQuantizations,
|
|
@@ -842,6 +855,28 @@ class DeterministicModelSelector {
|
|
|
842
855
|
});
|
|
843
856
|
}
|
|
844
857
|
|
|
858
|
+
variantHasConcreteSizeOrParams(variant = {}) {
|
|
859
|
+
const params = this.extractParamsFromString(
|
|
860
|
+
variant.params_b,
|
|
861
|
+
variant.paramsB,
|
|
862
|
+
variant.parameter_size,
|
|
863
|
+
variant.size,
|
|
864
|
+
variant.tag,
|
|
865
|
+
variant.label,
|
|
866
|
+
variant.name
|
|
867
|
+
);
|
|
868
|
+
if (Number.isFinite(params) && params > 0) return true;
|
|
869
|
+
|
|
870
|
+
const artifactSize = Number(
|
|
871
|
+
variant.real_size_gb ??
|
|
872
|
+
variant.estimated_size_gb ??
|
|
873
|
+
variant.size_gb ??
|
|
874
|
+
NaN
|
|
875
|
+
);
|
|
876
|
+
|
|
877
|
+
return Number.isFinite(artifactSize) && artifactSize > 0;
|
|
878
|
+
}
|
|
879
|
+
|
|
845
880
|
parseBillionsValue(rawValue) {
|
|
846
881
|
return parseMoEBillionsValue(rawValue);
|
|
847
882
|
}
|
|
@@ -861,7 +896,26 @@ class DeterministicModelSelector {
|
|
|
861
896
|
|
|
862
897
|
parseDateSafe(value) {
|
|
863
898
|
if (!value || typeof value !== 'string') return null;
|
|
864
|
-
const
|
|
899
|
+
const normalized = value.trim();
|
|
900
|
+
const relativeMatch = normalized.match(/^(\d+)\s*(minutes?|hours?|days?|weeks?|months?|years?)\s+ago$/i);
|
|
901
|
+
if (relativeMatch) {
|
|
902
|
+
const amount = parseInt(relativeMatch[1], 10);
|
|
903
|
+
const unit = relativeMatch[2].toLowerCase();
|
|
904
|
+
const days =
|
|
905
|
+
unit.startsWith('minute') ? amount / (24 * 60) :
|
|
906
|
+
unit.startsWith('hour') ? amount / 24 :
|
|
907
|
+
unit.startsWith('day') ? amount :
|
|
908
|
+
unit.startsWith('week') ? amount * 7 :
|
|
909
|
+
unit.startsWith('month') ? amount * 30 :
|
|
910
|
+
unit.startsWith('year') ? amount * 365 :
|
|
911
|
+
null;
|
|
912
|
+
|
|
913
|
+
if (Number.isFinite(days)) {
|
|
914
|
+
return new Date(Date.now() - days * 24 * 60 * 60 * 1000);
|
|
915
|
+
}
|
|
916
|
+
}
|
|
917
|
+
|
|
918
|
+
const parsed = new Date(normalized);
|
|
865
919
|
if (Number.isNaN(parsed.getTime())) return null;
|
|
866
920
|
return parsed;
|
|
867
921
|
}
|
|
@@ -912,8 +966,7 @@ class DeterministicModelSelector {
|
|
|
912
966
|
model.updatedAt,
|
|
913
967
|
model.release_date,
|
|
914
968
|
model.released_at,
|
|
915
|
-
model.created_at
|
|
916
|
-
model.detailed_scraped_at
|
|
969
|
+
model.created_at
|
|
917
970
|
];
|
|
918
971
|
|
|
919
972
|
const updatedAt = dateCandidates
|
|
@@ -1027,6 +1080,9 @@ class DeterministicModelSelector {
|
|
|
1027
1080
|
|
|
1028
1081
|
const regex = /(\d+\.?\d*)\s*([BbMm])/g;
|
|
1029
1082
|
for (const match of value.matchAll(regex)) {
|
|
1083
|
+
const suffix = value.slice(match.index + match[0].length, match.index + match[0].length + 2);
|
|
1084
|
+
if (/^\s*b\b/i.test(suffix) || /^\s*[gk]b\b/i.test(suffix)) continue;
|
|
1085
|
+
|
|
1030
1086
|
const amount = parseFloat(match[1]);
|
|
1031
1087
|
const unit = match[2].toUpperCase();
|
|
1032
1088
|
pushCandidate(unit === 'M' ? amount / 1000 : amount);
|
|
@@ -1103,7 +1159,7 @@ class DeterministicModelSelector {
|
|
|
1103
1159
|
ollamaModel.parameter_count
|
|
1104
1160
|
);
|
|
1105
1161
|
if (metadataCandidates.length > 0) {
|
|
1106
|
-
return
|
|
1162
|
+
return metadataCandidates[0];
|
|
1107
1163
|
}
|
|
1108
1164
|
|
|
1109
1165
|
const artifactSizeGB = this.extractVariantSizeGB(variant, null);
|
|
@@ -1136,7 +1192,7 @@ class DeterministicModelSelector {
|
|
|
1136
1192
|
}
|
|
1137
1193
|
|
|
1138
1194
|
extractVariantSizeGB(variant, paramsB) {
|
|
1139
|
-
const candidate = Number(variant.real_size_gb ?? variant.estimated_size_gb ?? NaN);
|
|
1195
|
+
const candidate = Number(variant.real_size_gb ?? variant.estimated_size_gb ?? variant.size_gb ?? NaN);
|
|
1140
1196
|
if (Number.isFinite(candidate) && candidate > 0) return candidate;
|
|
1141
1197
|
if (!Number.isFinite(paramsB) || paramsB <= 0) return 0.5;
|
|
1142
1198
|
return Math.max(0.5, Math.round((paramsB * 0.58 + 0.5) * 10) / 10);
|
|
@@ -1207,11 +1263,14 @@ class DeterministicModelSelector {
|
|
|
1207
1263
|
if (name.includes('qwen2.5')) return 'qwen2.5';
|
|
1208
1264
|
if (name.includes('qwen3')) return 'qwen3';
|
|
1209
1265
|
if (name.includes('qwen')) return 'qwen2.5';
|
|
1266
|
+
if (name.includes('deepseek-r1')) return 'deepseek-r1';
|
|
1267
|
+
if (name.includes('deepseek-coder')) return 'deepseek-coder';
|
|
1210
1268
|
if (name.includes('deepseek')) return 'deepseek';
|
|
1211
1269
|
if (name.includes('llama3.2') || name.includes('llama3.3')) return 'llama3.2';
|
|
1212
1270
|
if (name.includes('llama3.1')) return 'llama3.1';
|
|
1213
1271
|
if (name.includes('llama')) return 'llama';
|
|
1214
1272
|
if (name.includes('mistral')) return 'mistral';
|
|
1273
|
+
if (name.includes('gemma3')) return 'gemma3';
|
|
1215
1274
|
if (name.includes('gemma')) return 'gemma2';
|
|
1216
1275
|
if (name.includes('phi')) return 'phi-3';
|
|
1217
1276
|
if (name.includes('llava')) return 'llava';
|
|
@@ -1219,6 +1278,8 @@ class DeterministicModelSelector {
|
|
|
1219
1278
|
if (name.includes('solar')) return 'solar';
|
|
1220
1279
|
if (name.includes('starcoder')) return 'starcoder';
|
|
1221
1280
|
if (name.includes('minicpm')) return 'minicpm';
|
|
1281
|
+
if (name.includes('yi-coder')) return 'yi-coder';
|
|
1282
|
+
if (name.includes('yi')) return 'yi';
|
|
1222
1283
|
return 'unknown';
|
|
1223
1284
|
}
|
|
1224
1285
|
|
|
@@ -1351,7 +1412,9 @@ class DeterministicModelSelector {
|
|
|
1351
1412
|
const hardware = this.normalizeHardwareProfile(detectedHardware);
|
|
1352
1413
|
const installed = Array.isArray(installedModels) ? installedModels : await this.getInstalledModels();
|
|
1353
1414
|
const externalPool = Array.isArray(modelPool) && modelPool.length > 0
|
|
1354
|
-
?
|
|
1415
|
+
? (modelPool.some(model => typeof model?.paramsB === 'number' && model?.model_identifier)
|
|
1416
|
+
? modelPool
|
|
1417
|
+
: this.normalizeExternalModels(modelPool))
|
|
1355
1418
|
: await this.loadModelPool();
|
|
1356
1419
|
|
|
1357
1420
|
if (!silent) {
|
|
@@ -1445,6 +1508,10 @@ class DeterministicModelSelector {
|
|
|
1445
1508
|
|
|
1446
1509
|
filterByCategory(models, category) {
|
|
1447
1510
|
return models.filter(model => {
|
|
1511
|
+
if (this.isCloudVariantTag(model.model_identifier || model.name)) {
|
|
1512
|
+
return false;
|
|
1513
|
+
}
|
|
1514
|
+
|
|
1448
1515
|
switch (category) {
|
|
1449
1516
|
case 'coding':
|
|
1450
1517
|
return model.tags.some(tag => ['coder', 'code', 'instruct'].includes(tag)) ||
|
|
@@ -1682,6 +1749,12 @@ class DeterministicModelSelector {
|
|
|
1682
1749
|
// Freshness/deprecation adjustment
|
|
1683
1750
|
const freshnessAdjustment = this.calculateFreshnessAdjustment(model);
|
|
1684
1751
|
Q += freshnessAdjustment;
|
|
1752
|
+
|
|
1753
|
+
const pulls = Number(model.pulls || model.actual_pulls || 0);
|
|
1754
|
+
if (pulls >= 100000000) Q += 4;
|
|
1755
|
+
else if (pulls >= 20000000) Q += 3;
|
|
1756
|
+
else if (pulls >= 5000000) Q += 2;
|
|
1757
|
+
else if (pulls >= 1000000) Q += 1;
|
|
1685
1758
|
|
|
1686
1759
|
// Task alignment bump
|
|
1687
1760
|
const taskBump = this.getTaskAlignmentBump(model, category);
|
|
@@ -2141,6 +2214,10 @@ class DeterministicModelSelector {
|
|
|
2141
2214
|
|
|
2142
2215
|
mapHardwareTier(hardware = {}) {
|
|
2143
2216
|
const summary = hardware?.summary || {};
|
|
2217
|
+
const canonicalTier = summary.hardwareTier || summary.hardware_tier;
|
|
2218
|
+
if (typeof canonicalTier === 'string' && canonicalTier.trim()) {
|
|
2219
|
+
return canonicalTier.trim().toLowerCase().replace(/\s+/g, '_');
|
|
2220
|
+
}
|
|
2144
2221
|
const effectiveMemory = Number(summary.effectiveMemory);
|
|
2145
2222
|
const speedCoefficient = Number(summary.speedCoefficient);
|
|
2146
2223
|
if (Number.isFinite(effectiveMemory) && effectiveMemory > 0 && Number.isFinite(speedCoefficient)) {
|
package/src/ollama/client.js
CHANGED
|
@@ -668,6 +668,127 @@ class OllamaClient {
|
|
|
668
668
|
throw new Error(`Failed to run chat request: ${error.message}`);
|
|
669
669
|
}
|
|
670
670
|
}
|
|
671
|
+
|
|
672
|
+
async streamChat(modelName, messages, options = {}, onChunk = null) {
|
|
673
|
+
const availability = await this.checkOllamaAvailability();
|
|
674
|
+
if (!availability.available) {
|
|
675
|
+
throw new Error(`Ollama not available: ${availability.error}`);
|
|
676
|
+
}
|
|
677
|
+
|
|
678
|
+
const {
|
|
679
|
+
tools,
|
|
680
|
+
format,
|
|
681
|
+
keepAlive,
|
|
682
|
+
timeoutMs = 120000,
|
|
683
|
+
generationOptions = {}
|
|
684
|
+
} = options;
|
|
685
|
+
|
|
686
|
+
const payload = {
|
|
687
|
+
model: modelName,
|
|
688
|
+
messages: Array.isArray(messages) ? messages : [],
|
|
689
|
+
stream: true
|
|
690
|
+
};
|
|
691
|
+
|
|
692
|
+
if (Array.isArray(tools) && tools.length > 0) payload.tools = tools;
|
|
693
|
+
if (format) payload.format = format;
|
|
694
|
+
if (keepAlive) payload.keep_alive = keepAlive;
|
|
695
|
+
if (generationOptions && Object.keys(generationOptions).length > 0) {
|
|
696
|
+
payload.options = generationOptions;
|
|
697
|
+
}
|
|
698
|
+
|
|
699
|
+
const startTime = Date.now();
|
|
700
|
+
const controller = new AbortController();
|
|
701
|
+
const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
|
|
702
|
+
|
|
703
|
+
try {
|
|
704
|
+
const response = await fetch(`${this.baseURL}/api/chat`, {
|
|
705
|
+
method: 'POST',
|
|
706
|
+
signal: controller.signal,
|
|
707
|
+
headers: { 'Content-Type': 'application/json' },
|
|
708
|
+
body: JSON.stringify(payload)
|
|
709
|
+
});
|
|
710
|
+
|
|
711
|
+
if (!response.ok) {
|
|
712
|
+
const errorText = await response.text();
|
|
713
|
+
throw new Error(`HTTP ${response.status}: ${response.statusText} - ${errorText}`);
|
|
714
|
+
}
|
|
715
|
+
|
|
716
|
+
const decoder = new TextDecoder();
|
|
717
|
+
let buffer = '';
|
|
718
|
+
let content = '';
|
|
719
|
+
let finalData = null;
|
|
720
|
+
|
|
721
|
+
const handleLine = (line) => {
|
|
722
|
+
if (!line.trim()) return;
|
|
723
|
+
|
|
724
|
+
const data = JSON.parse(line);
|
|
725
|
+
const chunk = data?.message?.content || '';
|
|
726
|
+
if (chunk) {
|
|
727
|
+
content += chunk;
|
|
728
|
+
if (typeof onChunk === 'function') {
|
|
729
|
+
onChunk(chunk, data);
|
|
730
|
+
}
|
|
731
|
+
}
|
|
732
|
+
|
|
733
|
+
if (data.done) {
|
|
734
|
+
finalData = data;
|
|
735
|
+
}
|
|
736
|
+
};
|
|
737
|
+
|
|
738
|
+
if (response.body && typeof response.body.getReader === 'function') {
|
|
739
|
+
const reader = response.body.getReader();
|
|
740
|
+
while (true) {
|
|
741
|
+
const { done, value } = await reader.read();
|
|
742
|
+
if (done) break;
|
|
743
|
+
|
|
744
|
+
buffer += decoder.decode(value, { stream: true });
|
|
745
|
+
const lines = buffer.split('\n');
|
|
746
|
+
buffer = lines.pop() || '';
|
|
747
|
+
|
|
748
|
+
for (const line of lines) {
|
|
749
|
+
handleLine(line);
|
|
750
|
+
}
|
|
751
|
+
}
|
|
752
|
+
} else if (response.body && typeof response.body[Symbol.asyncIterator] === 'function') {
|
|
753
|
+
for await (const value of response.body) {
|
|
754
|
+
buffer += decoder.decode(value, { stream: true });
|
|
755
|
+
const lines = buffer.split('\n');
|
|
756
|
+
buffer = lines.pop() || '';
|
|
757
|
+
|
|
758
|
+
for (const line of lines) {
|
|
759
|
+
handleLine(line);
|
|
760
|
+
}
|
|
761
|
+
}
|
|
762
|
+
} else {
|
|
763
|
+
throw new Error('Streaming response body is not readable');
|
|
764
|
+
}
|
|
765
|
+
|
|
766
|
+
buffer += decoder.decode();
|
|
767
|
+
if (buffer.trim()) {
|
|
768
|
+
handleLine(buffer);
|
|
769
|
+
}
|
|
770
|
+
|
|
771
|
+
const responseTime = Date.now() - startTime;
|
|
772
|
+
const speed = this.calculateTokensPerSecond(finalData || {}, responseTime);
|
|
773
|
+
|
|
774
|
+
return {
|
|
775
|
+
...(finalData || {}),
|
|
776
|
+
message: {
|
|
777
|
+
role: 'assistant',
|
|
778
|
+
content
|
|
779
|
+
},
|
|
780
|
+
response: content,
|
|
781
|
+
responseTime,
|
|
782
|
+
tokensPerSecond: speed.tokensPerSecond,
|
|
783
|
+
evalTokensPerSecond: speed.evalTokensPerSecond,
|
|
784
|
+
endToEndTokensPerSecond: speed.endToEndTokensPerSecond
|
|
785
|
+
};
|
|
786
|
+
} catch (error) {
|
|
787
|
+
throw new Error(`Failed to run streaming chat request: ${error.message}`);
|
|
788
|
+
} finally {
|
|
789
|
+
clearTimeout(timeoutId);
|
|
790
|
+
}
|
|
791
|
+
}
|
|
671
792
|
}
|
|
672
793
|
|
|
673
794
|
module.exports = OllamaClient;
|