llm-checker 3.5.12 → 3.5.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +83 -17
- package/bin/cli.js +40 -0
- package/bin/enhanced_cli.js +360 -33
- package/package.json +2 -1
- package/src/ai/model-selector.js +47 -16
- package/src/ai/multi-objective-selector.js +55 -9
- package/src/data/model-database.js +92 -1
- package/src/data/seed/README.md +8 -0
- package/src/data/seed/models.db +0 -0
- package/src/hardware/backends/rocm-detector.js +469 -68
- package/src/hardware/unified-detector.js +39 -5
- package/src/index.js +40 -7
- package/src/models/ai-check-selector.js +27 -2
- package/src/models/deterministic-selector.js +80 -7
- package/src/ollama/client.js +121 -0
- package/src/ollama/enhanced-scraper.js +40 -26
- package/src/ollama/native-scraper.js +52 -27
- package/src/ui/cli-theme.js +139 -24
- package/src/ui/interactive-panel.js +1 -18
- package/src/utils/verbose-progress.js +144 -187
|
@@ -307,7 +307,10 @@ class UnifiedDetector {
|
|
|
307
307
|
summary.dedicatedGpuCount = topology.dedicatedCount;
|
|
308
308
|
summary.integratedGpuModels = topology.integratedModels;
|
|
309
309
|
summary.dedicatedGpuModels = topology.dedicatedModels;
|
|
310
|
-
summary.integratedSharedMemory =
|
|
310
|
+
summary.integratedSharedMemory = Math.max(
|
|
311
|
+
topology.integratedSharedMemory,
|
|
312
|
+
this.getPrimaryIntegratedSharedMemory(primary)
|
|
313
|
+
);
|
|
311
314
|
if (!summary.gpuModel) {
|
|
312
315
|
summary.gpuModel = topology.primaryModel || null;
|
|
313
316
|
}
|
|
@@ -324,9 +327,17 @@ class UnifiedDetector {
|
|
|
324
327
|
summary.runtimeBackendName = runtimeSelection.name;
|
|
325
328
|
summary.hasRuntimeAssist = runtimeSelection.assisted;
|
|
326
329
|
|
|
327
|
-
// Effective memory for LLM loading
|
|
328
|
-
//
|
|
329
|
-
|
|
330
|
+
// Effective memory for LLM loading. Integrated ROCm/iGPU devices expose
|
|
331
|
+
// a small aperture as VRAM and a much larger shared pool for model-fit
|
|
332
|
+
// decisions, so avoid treating the aperture as dedicated VRAM.
|
|
333
|
+
if (
|
|
334
|
+
['rocm', 'intel'].includes(primary?.type) &&
|
|
335
|
+
summary.hasIntegratedGPU &&
|
|
336
|
+
!summary.hasDedicatedGPU &&
|
|
337
|
+
summary.integratedSharedMemory > 0
|
|
338
|
+
) {
|
|
339
|
+
summary.effectiveMemory = summary.integratedSharedMemory;
|
|
340
|
+
} else if (summary.totalVRAM > 0 && ['cuda', 'rocm', 'intel'].includes(primary?.type)) {
|
|
330
341
|
summary.effectiveMemory = summary.totalVRAM;
|
|
331
342
|
} else {
|
|
332
343
|
// Use 70% of system RAM for models (leave room for OS)
|
|
@@ -339,6 +350,21 @@ class UnifiedDetector {
|
|
|
339
350
|
return summary;
|
|
340
351
|
}
|
|
341
352
|
|
|
353
|
+
getPrimaryIntegratedSharedMemory(primary) {
|
|
354
|
+
const gpus = Array.isArray(primary?.info?.gpus) ? primary.info.gpus : [];
|
|
355
|
+
return gpus
|
|
356
|
+
.filter((gpu) => gpu?.type === 'integrated')
|
|
357
|
+
.reduce((max, gpu) => {
|
|
358
|
+
const candidates = [
|
|
359
|
+
gpu?.sharedMemory,
|
|
360
|
+
gpu?.unifiedMemory,
|
|
361
|
+
gpu?.memory?.shared,
|
|
362
|
+
gpu?.memory?.total
|
|
363
|
+
].map(Number).filter((value) => Number.isFinite(value) && value > 0);
|
|
364
|
+
return Math.max(max, ...candidates, 0);
|
|
365
|
+
}, 0);
|
|
366
|
+
}
|
|
367
|
+
|
|
342
368
|
classifyHardwareTierFromSummary(summary = {}) {
|
|
343
369
|
const effectiveMem = Number(summary.effectiveMemory) || 0;
|
|
344
370
|
const speed = Number(summary.speedCoefficient) || 0;
|
|
@@ -848,7 +874,11 @@ class UnifiedDetector {
|
|
|
848
874
|
const summary = result.summary;
|
|
849
875
|
|
|
850
876
|
// Leave headroom (2GB for GPU, 20% for RAM)
|
|
851
|
-
if (
|
|
877
|
+
if (
|
|
878
|
+
summary.bestBackend === 'cpu' ||
|
|
879
|
+
summary.bestBackend === 'metal' ||
|
|
880
|
+
(summary.hasIntegratedGPU && !summary.hasDedicatedGPU && summary.integratedSharedMemory > 0)
|
|
881
|
+
) {
|
|
852
882
|
return sizeGB <= (summary.effectiveMemory - 2);
|
|
853
883
|
} else {
|
|
854
884
|
const availableVRAM = useMultiGPU ? summary.totalVRAM : (summary.totalVRAM / summary.gpuCount);
|
|
@@ -939,6 +969,10 @@ class UnifiedDetector {
|
|
|
939
969
|
const gpuDesc = summary.gpuInventory || (
|
|
940
970
|
summary.isMultiGPU ? `${summary.gpuCount}x ${summary.gpuModel}` : summary.gpuModel
|
|
941
971
|
);
|
|
972
|
+
if (summary.hasIntegratedGPU && !summary.hasDedicatedGPU && summary.integratedSharedMemory > 0) {
|
|
973
|
+
const dedicatedLabel = summary.totalVRAM > 0 ? `, ${summary.totalVRAM}GB aperture` : '';
|
|
974
|
+
return `${gpuDesc} (${summary.integratedSharedMemory}GB shared memory${dedicatedLabel}) + ${summary.cpuModel}`;
|
|
975
|
+
}
|
|
942
976
|
return `${gpuDesc} (${summary.totalVRAM}GB VRAM) + ${summary.cpuModel}`;
|
|
943
977
|
}
|
|
944
978
|
else if (summary.bestBackend === 'metal') {
|
package/src/index.js
CHANGED
|
@@ -78,7 +78,6 @@ class LLMChecker {
|
|
|
78
78
|
// Report hardware detection progress before platform-specific analysis
|
|
79
79
|
if (this.progress) {
|
|
80
80
|
this.progress.substep(`CPU detected: ${hardware.cpu.brand} (${hardware.cpu.cores} cores)`);
|
|
81
|
-
await new Promise(resolve => setTimeout(resolve, 200)); // Small delay for demo
|
|
82
81
|
const isApple = detectedPlatform === 'darwin';
|
|
83
82
|
const memLabel = isApple ? 'unified memory' : 'RAM';
|
|
84
83
|
this.progress.substep(`Memory detected: ${hardware.memory.total}GB ${memLabel}`, true);
|
|
@@ -117,7 +116,6 @@ class LLMChecker {
|
|
|
117
116
|
// Apple Silicon optimized analysis with unified memory consideration
|
|
118
117
|
if (this.progress) {
|
|
119
118
|
this.progress.substep(`CPU detected: ${hardware.cpu.brand} (${hardware.cpu.cores} cores)`);
|
|
120
|
-
await new Promise(resolve => setTimeout(resolve, 200));
|
|
121
119
|
this.progress.substep(`Memory detected: ${hardware.memory.total}GB unified memory`, true);
|
|
122
120
|
const summary = `${hardware.cpu.brand}, ${hardware.memory.total}GB RAM, ${hardware.gpu.model || 'Apple Silicon GPU'}`;
|
|
123
121
|
this.progress.stepComplete(summary);
|
|
@@ -131,7 +129,6 @@ class LLMChecker {
|
|
|
131
129
|
// Windows-specific analysis with discrete GPU / iGPU handling
|
|
132
130
|
if (this.progress) {
|
|
133
131
|
this.progress.substep(`CPU detected: ${hardware.cpu.brand} (${hardware.cpu.cores} cores)`);
|
|
134
|
-
await new Promise(resolve => setTimeout(resolve, 200));
|
|
135
132
|
this.progress.substep(`Memory detected: ${hardware.memory.total}GB RAM`, true);
|
|
136
133
|
const summary = `${hardware.cpu.brand}, ${hardware.memory.total}GB RAM, ${hardware.gpu.model || 'Integrated GPU'}`;
|
|
137
134
|
this.progress.stepComplete(summary);
|
|
@@ -145,7 +142,6 @@ class LLMChecker {
|
|
|
145
142
|
// Linux-specific analysis (similar to Windows but with Linux considerations)
|
|
146
143
|
if (this.progress) {
|
|
147
144
|
this.progress.substep(`CPU detected: ${hardware.cpu.brand} (${hardware.cpu.cores} cores)`);
|
|
148
|
-
await new Promise(resolve => setTimeout(resolve, 200));
|
|
149
145
|
this.progress.substep(`Memory detected: ${hardware.memory.total}GB RAM`, true);
|
|
150
146
|
const summary = `${hardware.cpu.brand}, ${hardware.memory.total}GB RAM, ${hardware.gpu.model || 'GPU'}`;
|
|
151
147
|
this.progress.stepComplete(summary);
|
|
@@ -516,7 +512,7 @@ class LLMChecker {
|
|
|
516
512
|
|
|
517
513
|
try {
|
|
518
514
|
// 1. Obtener TODOS los modelos de la base de datos de Ollama
|
|
519
|
-
const ollamaData = await this.
|
|
515
|
+
const ollamaData = await this.loadOllamaModelData();
|
|
520
516
|
const allOllamaModels = ollamaData.models || [];
|
|
521
517
|
this.logger.info(`Found ${allOllamaModels.length} models in Ollama database`);
|
|
522
518
|
|
|
@@ -2430,14 +2426,51 @@ class LLMChecker {
|
|
|
2430
2426
|
this.getAllModels().find(m => m.name.toLowerCase().includes(name.toLowerCase()));
|
|
2431
2427
|
}
|
|
2432
2428
|
|
|
2429
|
+
async loadSyncedOllamaModelData() {
|
|
2430
|
+
const ModelDatabase = require('./data/model-database');
|
|
2431
|
+
const database = new ModelDatabase();
|
|
2432
|
+
|
|
2433
|
+
try {
|
|
2434
|
+
await database.initialize();
|
|
2435
|
+
const models = database.getAllModelsWithVariants();
|
|
2436
|
+
const stats = database.getStats();
|
|
2437
|
+
|
|
2438
|
+
if (models.length > 0) {
|
|
2439
|
+
return {
|
|
2440
|
+
models,
|
|
2441
|
+
total_count: models.length,
|
|
2442
|
+
cached_at: stats.lastSync || null,
|
|
2443
|
+
source: 'ollama_sqlite_database'
|
|
2444
|
+
};
|
|
2445
|
+
}
|
|
2446
|
+
} finally {
|
|
2447
|
+
database.close();
|
|
2448
|
+
}
|
|
2449
|
+
|
|
2450
|
+
return null;
|
|
2451
|
+
}
|
|
2452
|
+
|
|
2453
|
+
async loadOllamaModelData() {
|
|
2454
|
+
try {
|
|
2455
|
+
const syncedData = await this.loadSyncedOllamaModelData();
|
|
2456
|
+
if (syncedData?.models?.length > 0) {
|
|
2457
|
+
return syncedData;
|
|
2458
|
+
}
|
|
2459
|
+
} catch (error) {
|
|
2460
|
+
this.logger.warn('Synced SQLite model database unavailable, falling back to Ollama cache', { error: error.message });
|
|
2461
|
+
}
|
|
2462
|
+
|
|
2463
|
+
return this.ollamaScraper.scrapeAllModels(false);
|
|
2464
|
+
}
|
|
2465
|
+
|
|
2433
2466
|
|
|
2434
2467
|
async generateIntelligentRecommendations(hardware, options = {}) {
|
|
2435
2468
|
try {
|
|
2436
2469
|
this.logger.info('Generating intelligent recommendations...');
|
|
2437
2470
|
const selectedRuntime = normalizeRuntime(options.runtime || 'ollama');
|
|
2438
2471
|
|
|
2439
|
-
//
|
|
2440
|
-
const ollamaData = await this.
|
|
2472
|
+
// Prefer the synced SQLite catalog so `llm-checker sync` updates recommendations immediately.
|
|
2473
|
+
const ollamaData = await this.loadOllamaModelData();
|
|
2441
2474
|
const allModels = ollamaData.models || [];
|
|
2442
2475
|
|
|
2443
2476
|
if (allModels.length === 0) {
|
|
@@ -77,8 +77,8 @@ Respond with JSON only, no additional text.`;
|
|
|
77
77
|
// Phase 1: Get ALL available models from the 177-model Ollama database
|
|
78
78
|
const hardware = await this.deterministicSelector.getHardware();
|
|
79
79
|
|
|
80
|
-
// Use the same
|
|
81
|
-
const ollamaData = await this.
|
|
80
|
+
// Use the same synced database that recommend/check use.
|
|
81
|
+
const ollamaData = await this.loadModelDatabase();
|
|
82
82
|
const allOllamaModels = ollamaData.models || [];
|
|
83
83
|
|
|
84
84
|
if (!silent) {
|
|
@@ -248,6 +248,31 @@ Respond with JSON only, no additional text.`;
|
|
|
248
248
|
};
|
|
249
249
|
}
|
|
250
250
|
|
|
251
|
+
async loadModelDatabase() {
|
|
252
|
+
try {
|
|
253
|
+
const ModelDatabase = require('../data/model-database');
|
|
254
|
+
const database = new ModelDatabase();
|
|
255
|
+
await database.initialize();
|
|
256
|
+
|
|
257
|
+
try {
|
|
258
|
+
const models = database.getAllModelsWithVariants();
|
|
259
|
+
if (models.length > 0) {
|
|
260
|
+
return {
|
|
261
|
+
models,
|
|
262
|
+
total_count: models.length,
|
|
263
|
+
source: 'ollama_sqlite_database'
|
|
264
|
+
};
|
|
265
|
+
}
|
|
266
|
+
} finally {
|
|
267
|
+
database.close();
|
|
268
|
+
}
|
|
269
|
+
} catch {
|
|
270
|
+
// Fall through to scraper cache.
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
return this.ollamaScraper.scrapeAllModels(false);
|
|
274
|
+
}
|
|
275
|
+
|
|
251
276
|
/**
|
|
252
277
|
* Pick the best installed evaluator model
|
|
253
278
|
*/
|
|
@@ -44,11 +44,16 @@ class DeterministicModelSelector {
|
|
|
44
44
|
this.familyBumps = {
|
|
45
45
|
'qwen2.5': 2,
|
|
46
46
|
'qwen3': 4,
|
|
47
|
+
'gemma3': 3,
|
|
47
48
|
'deepseek': 3,
|
|
49
|
+
'deepseek-r1': 5,
|
|
50
|
+
'deepseek-coder': 4,
|
|
48
51
|
'mistral': 1,
|
|
49
52
|
'llama3.1': 1,
|
|
50
53
|
'llama3.2': 2,
|
|
51
54
|
'gemma2': 1,
|
|
55
|
+
'yi': -3,
|
|
56
|
+
'yi-coder': 1,
|
|
52
57
|
'phi-3': 0,
|
|
53
58
|
'granite': 0,
|
|
54
59
|
'solar': 0,
|
|
@@ -750,7 +755,13 @@ class DeterministicModelSelector {
|
|
|
750
755
|
if (ollamaModel.primary_category === 'reasoning') derivedTags.add('reasoning');
|
|
751
756
|
if (ollamaModel.primary_category === 'creative') derivedTags.add('creative');
|
|
752
757
|
|
|
753
|
-
|
|
758
|
+
const hasConcreteVariants = variants.some((variant) => this.variantHasConcreteSizeOrParams(variant));
|
|
759
|
+
const selectableVariants = hasConcreteVariants
|
|
760
|
+
? variants.filter((variant) => this.variantHasConcreteSizeOrParams(variant))
|
|
761
|
+
: variants;
|
|
762
|
+
|
|
763
|
+
return selectableVariants
|
|
764
|
+
.map((variant) => {
|
|
754
765
|
const variantTag = variant.tag || fallbackTag;
|
|
755
766
|
const quant = this.resolveVariantQuantization(variant, variantTag);
|
|
756
767
|
const paramsB = this.resolveVariantParamsB(ollamaModel, variant, quant);
|
|
@@ -821,6 +832,8 @@ class DeterministicModelSelector {
|
|
|
821
832
|
modalities,
|
|
822
833
|
tags: modelTags,
|
|
823
834
|
model_identifier: variantTag,
|
|
835
|
+
last_updated: ollamaModel.last_updated || ollamaModel.lastUpdated || '',
|
|
836
|
+
updated_at: ollamaModel.updated_at || ollamaModel.updatedAt || '',
|
|
824
837
|
installed: false,
|
|
825
838
|
pulls: ollamaModel.actual_pulls || ollamaModel.pulls || 0,
|
|
826
839
|
availableQuantizations,
|
|
@@ -842,6 +855,28 @@ class DeterministicModelSelector {
|
|
|
842
855
|
});
|
|
843
856
|
}
|
|
844
857
|
|
|
858
|
+
variantHasConcreteSizeOrParams(variant = {}) {
|
|
859
|
+
const params = this.extractParamsFromString(
|
|
860
|
+
variant.params_b,
|
|
861
|
+
variant.paramsB,
|
|
862
|
+
variant.parameter_size,
|
|
863
|
+
variant.size,
|
|
864
|
+
variant.tag,
|
|
865
|
+
variant.label,
|
|
866
|
+
variant.name
|
|
867
|
+
);
|
|
868
|
+
if (Number.isFinite(params) && params > 0) return true;
|
|
869
|
+
|
|
870
|
+
const artifactSize = Number(
|
|
871
|
+
variant.real_size_gb ??
|
|
872
|
+
variant.estimated_size_gb ??
|
|
873
|
+
variant.size_gb ??
|
|
874
|
+
NaN
|
|
875
|
+
);
|
|
876
|
+
|
|
877
|
+
return Number.isFinite(artifactSize) && artifactSize > 0;
|
|
878
|
+
}
|
|
879
|
+
|
|
845
880
|
parseBillionsValue(rawValue) {
|
|
846
881
|
return parseMoEBillionsValue(rawValue);
|
|
847
882
|
}
|
|
@@ -861,7 +896,26 @@ class DeterministicModelSelector {
|
|
|
861
896
|
|
|
862
897
|
parseDateSafe(value) {
|
|
863
898
|
if (!value || typeof value !== 'string') return null;
|
|
864
|
-
const
|
|
899
|
+
const normalized = value.trim();
|
|
900
|
+
const relativeMatch = normalized.match(/^(\d+)\s*(minutes?|hours?|days?|weeks?|months?|years?)\s+ago$/i);
|
|
901
|
+
if (relativeMatch) {
|
|
902
|
+
const amount = parseInt(relativeMatch[1], 10);
|
|
903
|
+
const unit = relativeMatch[2].toLowerCase();
|
|
904
|
+
const days =
|
|
905
|
+
unit.startsWith('minute') ? amount / (24 * 60) :
|
|
906
|
+
unit.startsWith('hour') ? amount / 24 :
|
|
907
|
+
unit.startsWith('day') ? amount :
|
|
908
|
+
unit.startsWith('week') ? amount * 7 :
|
|
909
|
+
unit.startsWith('month') ? amount * 30 :
|
|
910
|
+
unit.startsWith('year') ? amount * 365 :
|
|
911
|
+
null;
|
|
912
|
+
|
|
913
|
+
if (Number.isFinite(days)) {
|
|
914
|
+
return new Date(Date.now() - days * 24 * 60 * 60 * 1000);
|
|
915
|
+
}
|
|
916
|
+
}
|
|
917
|
+
|
|
918
|
+
const parsed = new Date(normalized);
|
|
865
919
|
if (Number.isNaN(parsed.getTime())) return null;
|
|
866
920
|
return parsed;
|
|
867
921
|
}
|
|
@@ -912,8 +966,7 @@ class DeterministicModelSelector {
|
|
|
912
966
|
model.updatedAt,
|
|
913
967
|
model.release_date,
|
|
914
968
|
model.released_at,
|
|
915
|
-
model.created_at
|
|
916
|
-
model.detailed_scraped_at
|
|
969
|
+
model.created_at
|
|
917
970
|
];
|
|
918
971
|
|
|
919
972
|
const updatedAt = dateCandidates
|
|
@@ -1027,6 +1080,9 @@ class DeterministicModelSelector {
|
|
|
1027
1080
|
|
|
1028
1081
|
const regex = /(\d+\.?\d*)\s*([BbMm])/g;
|
|
1029
1082
|
for (const match of value.matchAll(regex)) {
|
|
1083
|
+
const suffix = value.slice(match.index + match[0].length, match.index + match[0].length + 2);
|
|
1084
|
+
if (/^\s*b\b/i.test(suffix) || /^\s*[gk]b\b/i.test(suffix)) continue;
|
|
1085
|
+
|
|
1030
1086
|
const amount = parseFloat(match[1]);
|
|
1031
1087
|
const unit = match[2].toUpperCase();
|
|
1032
1088
|
pushCandidate(unit === 'M' ? amount / 1000 : amount);
|
|
@@ -1103,7 +1159,7 @@ class DeterministicModelSelector {
|
|
|
1103
1159
|
ollamaModel.parameter_count
|
|
1104
1160
|
);
|
|
1105
1161
|
if (metadataCandidates.length > 0) {
|
|
1106
|
-
return
|
|
1162
|
+
return metadataCandidates[0];
|
|
1107
1163
|
}
|
|
1108
1164
|
|
|
1109
1165
|
const artifactSizeGB = this.extractVariantSizeGB(variant, null);
|
|
@@ -1136,7 +1192,7 @@ class DeterministicModelSelector {
|
|
|
1136
1192
|
}
|
|
1137
1193
|
|
|
1138
1194
|
extractVariantSizeGB(variant, paramsB) {
|
|
1139
|
-
const candidate = Number(variant.real_size_gb ?? variant.estimated_size_gb ?? NaN);
|
|
1195
|
+
const candidate = Number(variant.real_size_gb ?? variant.estimated_size_gb ?? variant.size_gb ?? NaN);
|
|
1140
1196
|
if (Number.isFinite(candidate) && candidate > 0) return candidate;
|
|
1141
1197
|
if (!Number.isFinite(paramsB) || paramsB <= 0) return 0.5;
|
|
1142
1198
|
return Math.max(0.5, Math.round((paramsB * 0.58 + 0.5) * 10) / 10);
|
|
@@ -1207,11 +1263,14 @@ class DeterministicModelSelector {
|
|
|
1207
1263
|
if (name.includes('qwen2.5')) return 'qwen2.5';
|
|
1208
1264
|
if (name.includes('qwen3')) return 'qwen3';
|
|
1209
1265
|
if (name.includes('qwen')) return 'qwen2.5';
|
|
1266
|
+
if (name.includes('deepseek-r1')) return 'deepseek-r1';
|
|
1267
|
+
if (name.includes('deepseek-coder')) return 'deepseek-coder';
|
|
1210
1268
|
if (name.includes('deepseek')) return 'deepseek';
|
|
1211
1269
|
if (name.includes('llama3.2') || name.includes('llama3.3')) return 'llama3.2';
|
|
1212
1270
|
if (name.includes('llama3.1')) return 'llama3.1';
|
|
1213
1271
|
if (name.includes('llama')) return 'llama';
|
|
1214
1272
|
if (name.includes('mistral')) return 'mistral';
|
|
1273
|
+
if (name.includes('gemma3')) return 'gemma3';
|
|
1215
1274
|
if (name.includes('gemma')) return 'gemma2';
|
|
1216
1275
|
if (name.includes('phi')) return 'phi-3';
|
|
1217
1276
|
if (name.includes('llava')) return 'llava';
|
|
@@ -1219,6 +1278,8 @@ class DeterministicModelSelector {
|
|
|
1219
1278
|
if (name.includes('solar')) return 'solar';
|
|
1220
1279
|
if (name.includes('starcoder')) return 'starcoder';
|
|
1221
1280
|
if (name.includes('minicpm')) return 'minicpm';
|
|
1281
|
+
if (name.includes('yi-coder')) return 'yi-coder';
|
|
1282
|
+
if (name.includes('yi')) return 'yi';
|
|
1222
1283
|
return 'unknown';
|
|
1223
1284
|
}
|
|
1224
1285
|
|
|
@@ -1351,7 +1412,9 @@ class DeterministicModelSelector {
|
|
|
1351
1412
|
const hardware = this.normalizeHardwareProfile(detectedHardware);
|
|
1352
1413
|
const installed = Array.isArray(installedModels) ? installedModels : await this.getInstalledModels();
|
|
1353
1414
|
const externalPool = Array.isArray(modelPool) && modelPool.length > 0
|
|
1354
|
-
?
|
|
1415
|
+
? (modelPool.some(model => typeof model?.paramsB === 'number' && model?.model_identifier)
|
|
1416
|
+
? modelPool
|
|
1417
|
+
: this.normalizeExternalModels(modelPool))
|
|
1355
1418
|
: await this.loadModelPool();
|
|
1356
1419
|
|
|
1357
1420
|
if (!silent) {
|
|
@@ -1445,6 +1508,10 @@ class DeterministicModelSelector {
|
|
|
1445
1508
|
|
|
1446
1509
|
filterByCategory(models, category) {
|
|
1447
1510
|
return models.filter(model => {
|
|
1511
|
+
if (this.isCloudVariantTag(model.model_identifier || model.name)) {
|
|
1512
|
+
return false;
|
|
1513
|
+
}
|
|
1514
|
+
|
|
1448
1515
|
switch (category) {
|
|
1449
1516
|
case 'coding':
|
|
1450
1517
|
return model.tags.some(tag => ['coder', 'code', 'instruct'].includes(tag)) ||
|
|
@@ -1682,6 +1749,12 @@ class DeterministicModelSelector {
|
|
|
1682
1749
|
// Freshness/deprecation adjustment
|
|
1683
1750
|
const freshnessAdjustment = this.calculateFreshnessAdjustment(model);
|
|
1684
1751
|
Q += freshnessAdjustment;
|
|
1752
|
+
|
|
1753
|
+
const pulls = Number(model.pulls || model.actual_pulls || 0);
|
|
1754
|
+
if (pulls >= 100000000) Q += 4;
|
|
1755
|
+
else if (pulls >= 20000000) Q += 3;
|
|
1756
|
+
else if (pulls >= 5000000) Q += 2;
|
|
1757
|
+
else if (pulls >= 1000000) Q += 1;
|
|
1685
1758
|
|
|
1686
1759
|
// Task alignment bump
|
|
1687
1760
|
const taskBump = this.getTaskAlignmentBump(model, category);
|
package/src/ollama/client.js
CHANGED
|
@@ -668,6 +668,127 @@ class OllamaClient {
|
|
|
668
668
|
throw new Error(`Failed to run chat request: ${error.message}`);
|
|
669
669
|
}
|
|
670
670
|
}
|
|
671
|
+
|
|
672
|
+
async streamChat(modelName, messages, options = {}, onChunk = null) {
|
|
673
|
+
const availability = await this.checkOllamaAvailability();
|
|
674
|
+
if (!availability.available) {
|
|
675
|
+
throw new Error(`Ollama not available: ${availability.error}`);
|
|
676
|
+
}
|
|
677
|
+
|
|
678
|
+
const {
|
|
679
|
+
tools,
|
|
680
|
+
format,
|
|
681
|
+
keepAlive,
|
|
682
|
+
timeoutMs = 120000,
|
|
683
|
+
generationOptions = {}
|
|
684
|
+
} = options;
|
|
685
|
+
|
|
686
|
+
const payload = {
|
|
687
|
+
model: modelName,
|
|
688
|
+
messages: Array.isArray(messages) ? messages : [],
|
|
689
|
+
stream: true
|
|
690
|
+
};
|
|
691
|
+
|
|
692
|
+
if (Array.isArray(tools) && tools.length > 0) payload.tools = tools;
|
|
693
|
+
if (format) payload.format = format;
|
|
694
|
+
if (keepAlive) payload.keep_alive = keepAlive;
|
|
695
|
+
if (generationOptions && Object.keys(generationOptions).length > 0) {
|
|
696
|
+
payload.options = generationOptions;
|
|
697
|
+
}
|
|
698
|
+
|
|
699
|
+
const startTime = Date.now();
|
|
700
|
+
const controller = new AbortController();
|
|
701
|
+
const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
|
|
702
|
+
|
|
703
|
+
try {
|
|
704
|
+
const response = await fetch(`${this.baseURL}/api/chat`, {
|
|
705
|
+
method: 'POST',
|
|
706
|
+
signal: controller.signal,
|
|
707
|
+
headers: { 'Content-Type': 'application/json' },
|
|
708
|
+
body: JSON.stringify(payload)
|
|
709
|
+
});
|
|
710
|
+
|
|
711
|
+
if (!response.ok) {
|
|
712
|
+
const errorText = await response.text();
|
|
713
|
+
throw new Error(`HTTP ${response.status}: ${response.statusText} - ${errorText}`);
|
|
714
|
+
}
|
|
715
|
+
|
|
716
|
+
const decoder = new TextDecoder();
|
|
717
|
+
let buffer = '';
|
|
718
|
+
let content = '';
|
|
719
|
+
let finalData = null;
|
|
720
|
+
|
|
721
|
+
const handleLine = (line) => {
|
|
722
|
+
if (!line.trim()) return;
|
|
723
|
+
|
|
724
|
+
const data = JSON.parse(line);
|
|
725
|
+
const chunk = data?.message?.content || '';
|
|
726
|
+
if (chunk) {
|
|
727
|
+
content += chunk;
|
|
728
|
+
if (typeof onChunk === 'function') {
|
|
729
|
+
onChunk(chunk, data);
|
|
730
|
+
}
|
|
731
|
+
}
|
|
732
|
+
|
|
733
|
+
if (data.done) {
|
|
734
|
+
finalData = data;
|
|
735
|
+
}
|
|
736
|
+
};
|
|
737
|
+
|
|
738
|
+
if (response.body && typeof response.body.getReader === 'function') {
|
|
739
|
+
const reader = response.body.getReader();
|
|
740
|
+
while (true) {
|
|
741
|
+
const { done, value } = await reader.read();
|
|
742
|
+
if (done) break;
|
|
743
|
+
|
|
744
|
+
buffer += decoder.decode(value, { stream: true });
|
|
745
|
+
const lines = buffer.split('\n');
|
|
746
|
+
buffer = lines.pop() || '';
|
|
747
|
+
|
|
748
|
+
for (const line of lines) {
|
|
749
|
+
handleLine(line);
|
|
750
|
+
}
|
|
751
|
+
}
|
|
752
|
+
} else if (response.body && typeof response.body[Symbol.asyncIterator] === 'function') {
|
|
753
|
+
for await (const value of response.body) {
|
|
754
|
+
buffer += decoder.decode(value, { stream: true });
|
|
755
|
+
const lines = buffer.split('\n');
|
|
756
|
+
buffer = lines.pop() || '';
|
|
757
|
+
|
|
758
|
+
for (const line of lines) {
|
|
759
|
+
handleLine(line);
|
|
760
|
+
}
|
|
761
|
+
}
|
|
762
|
+
} else {
|
|
763
|
+
throw new Error('Streaming response body is not readable');
|
|
764
|
+
}
|
|
765
|
+
|
|
766
|
+
buffer += decoder.decode();
|
|
767
|
+
if (buffer.trim()) {
|
|
768
|
+
handleLine(buffer);
|
|
769
|
+
}
|
|
770
|
+
|
|
771
|
+
const responseTime = Date.now() - startTime;
|
|
772
|
+
const speed = this.calculateTokensPerSecond(finalData || {}, responseTime);
|
|
773
|
+
|
|
774
|
+
return {
|
|
775
|
+
...(finalData || {}),
|
|
776
|
+
message: {
|
|
777
|
+
role: 'assistant',
|
|
778
|
+
content
|
|
779
|
+
},
|
|
780
|
+
response: content,
|
|
781
|
+
responseTime,
|
|
782
|
+
tokensPerSecond: speed.tokensPerSecond,
|
|
783
|
+
evalTokensPerSecond: speed.evalTokensPerSecond,
|
|
784
|
+
endToEndTokensPerSecond: speed.endToEndTokensPerSecond
|
|
785
|
+
};
|
|
786
|
+
} catch (error) {
|
|
787
|
+
throw new Error(`Failed to run streaming chat request: ${error.message}`);
|
|
788
|
+
} finally {
|
|
789
|
+
clearTimeout(timeoutId);
|
|
790
|
+
}
|
|
791
|
+
}
|
|
671
792
|
}
|
|
672
793
|
|
|
673
794
|
module.exports = OllamaClient;
|
|
@@ -143,7 +143,8 @@ class EnhancedOllamaScraper {
|
|
|
143
143
|
seen.add(id);
|
|
144
144
|
|
|
145
145
|
// Try to extract pulls
|
|
146
|
-
const
|
|
146
|
+
const cardText = this.cleanText(match[0]);
|
|
147
|
+
const pullsMatch = cardText.match(/(\d+(?:\.\d+)?\s*[KMB]?)\s*(?:Pulls|Downloads)/i);
|
|
147
148
|
const pulls = pullsMatch ? this.parsePulls(pullsMatch[1]) : 0;
|
|
148
149
|
|
|
149
150
|
models.push({ id, pulls });
|
|
@@ -160,13 +161,29 @@ class EnhancedOllamaScraper {
|
|
|
160
161
|
*/
|
|
161
162
|
parsePulls(pullStr) {
|
|
162
163
|
if (!pullStr) return 0;
|
|
163
|
-
const
|
|
164
|
-
|
|
165
|
-
if (
|
|
166
|
-
if (
|
|
164
|
+
const normalized = String(pullStr).replace(/\s+/g, '');
|
|
165
|
+
const num = parseFloat(normalized);
|
|
166
|
+
if (normalized.includes('B')) return Math.round(num * 1e9);
|
|
167
|
+
if (normalized.includes('M')) return Math.round(num * 1e6);
|
|
168
|
+
if (normalized.includes('K')) return Math.round(num * 1e3);
|
|
167
169
|
return Math.round(num);
|
|
168
170
|
}
|
|
169
171
|
|
|
172
|
+
cleanText(html = '') {
|
|
173
|
+
return String(html || '')
|
|
174
|
+
.replace(/<script[\s\S]*?<\/script>/gi, ' ')
|
|
175
|
+
.replace(/<style[\s\S]*?<\/style>/gi, ' ')
|
|
176
|
+
.replace(/<[^>]+>/g, ' ')
|
|
177
|
+
.replace(/ | /g, ' ')
|
|
178
|
+
.replace(/&/g, '&')
|
|
179
|
+
.replace(/</g, '<')
|
|
180
|
+
.replace(/>/g, '>')
|
|
181
|
+
.replace(/"/g, '"')
|
|
182
|
+
.replace(/'|'/g, "'")
|
|
183
|
+
.replace(/\s+/g, ' ')
|
|
184
|
+
.trim();
|
|
185
|
+
}
|
|
186
|
+
|
|
170
187
|
/**
|
|
171
188
|
* Scrape model detail page
|
|
172
189
|
*/
|
|
@@ -296,24 +313,12 @@ class EnhancedOllamaScraper {
|
|
|
296
313
|
* Extract parameter count from tag
|
|
297
314
|
*/
|
|
298
315
|
extractParams(tag) {
|
|
299
|
-
// Match patterns like: 8b, 70b,
|
|
300
|
-
const match = tag.match(/(\d+\.?\d*)[
|
|
316
|
+
// Match patterns like: 8b, 70b, 1.5b, 335m, 22m
|
|
317
|
+
const match = tag.match(/(\d+\.?\d*)\s*([bBmM])(?:[^a-zA-Z]|$)/);
|
|
301
318
|
if (match) {
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
// Check for known sizes in model names
|
|
306
|
-
const sizePatterns = [
|
|
307
|
-
{ pattern: /mini/i, size: 3.8 },
|
|
308
|
-
{ pattern: /tiny/i, size: 1.1 },
|
|
309
|
-
{ pattern: /small/i, size: 7 },
|
|
310
|
-
{ pattern: /medium/i, size: 13 },
|
|
311
|
-
{ pattern: /large/i, size: 34 },
|
|
312
|
-
{ pattern: /xl/i, size: 70 },
|
|
313
|
-
];
|
|
314
|
-
|
|
315
|
-
for (const { pattern, size } of sizePatterns) {
|
|
316
|
-
if (pattern.test(tag)) return size;
|
|
319
|
+
const value = parseFloat(match[1]);
|
|
320
|
+
const unit = match[2].toLowerCase();
|
|
321
|
+
return unit === 'm' ? value / 1000 : value;
|
|
317
322
|
}
|
|
318
323
|
|
|
319
324
|
return null;
|
|
@@ -329,9 +334,11 @@ class EnhancedOllamaScraper {
|
|
|
329
334
|
{ pattern: /q6[_-]?k/i, quant: 'Q6_K' },
|
|
330
335
|
{ pattern: /q5[_-]?k[_-]?m/i, quant: 'Q5_K_M' },
|
|
331
336
|
{ pattern: /q5[_-]?k[_-]?s/i, quant: 'Q5_K_S' },
|
|
337
|
+
{ pattern: /q5[_-]?1/i, quant: 'Q5_K_M' },
|
|
332
338
|
{ pattern: /q5[_-]?0/i, quant: 'Q5_0' },
|
|
333
339
|
{ pattern: /q4[_-]?k[_-]?m/i, quant: 'Q4_K_M' },
|
|
334
340
|
{ pattern: /q4[_-]?k[_-]?s/i, quant: 'Q4_K_S' },
|
|
341
|
+
{ pattern: /q4[_-]?1/i, quant: 'Q4_K_M' },
|
|
335
342
|
{ pattern: /q4[_-]?0/i, quant: 'Q4_0' },
|
|
336
343
|
{ pattern: /q3[_-]?k[_-]?m/i, quant: 'Q3_K_M' },
|
|
337
344
|
{ pattern: /q3[_-]?k[_-]?s/i, quant: 'Q3_K_S' },
|
|
@@ -468,7 +475,10 @@ class EnhancedOllamaScraper {
|
|
|
468
475
|
// Try to get display name from title or h1
|
|
469
476
|
const titleMatch = html.match(/<title>([^<]+)<\/title>/i);
|
|
470
477
|
if (titleMatch) {
|
|
471
|
-
const title = titleMatch[1]
|
|
478
|
+
const title = this.cleanText(titleMatch[1])
|
|
479
|
+
.split('·')[0]
|
|
480
|
+
.replace(/\s+-\s+Ollama.*$/i, '')
|
|
481
|
+
.trim();
|
|
472
482
|
if (title && title.toLowerCase() !== 'ollama') {
|
|
473
483
|
return title;
|
|
474
484
|
}
|
|
@@ -500,12 +510,15 @@ class EnhancedOllamaScraper {
|
|
|
500
510
|
}
|
|
501
511
|
|
|
502
512
|
extractPulls(html) {
|
|
503
|
-
const
|
|
513
|
+
const text = this.cleanText(html);
|
|
514
|
+
const match = text.match(/(\d+(?:\.\d+)?\s*[KMB]?)\s*(?:Pulls|Downloads)\b/i);
|
|
504
515
|
return match ? this.parsePulls(match[1]) : 0;
|
|
505
516
|
}
|
|
506
517
|
|
|
507
518
|
extractTagsCount(html) {
|
|
508
|
-
const
|
|
519
|
+
const text = this.cleanText(html);
|
|
520
|
+
const match = text.match(/\bName\s+(\d+)\s+models?\s+Size\b/i) ||
|
|
521
|
+
text.match(/\b(\d+)\s+(?:Tags|Versions|models?)\b/i);
|
|
509
522
|
return match ? parseInt(match[1]) : 1;
|
|
510
523
|
}
|
|
511
524
|
|
|
@@ -534,7 +547,8 @@ class EnhancedOllamaScraper {
|
|
|
534
547
|
}
|
|
535
548
|
|
|
536
549
|
extractLastUpdated(html) {
|
|
537
|
-
const
|
|
550
|
+
const text = this.cleanText(html);
|
|
551
|
+
const match = text.match(/Updated\s+(\d+\s*(?:minutes?|hours?|days?|weeks?|months?|years?)\s+ago)/i);
|
|
538
552
|
return match ? match[1] : '';
|
|
539
553
|
}
|
|
540
554
|
|