llm-checker 3.5.11 → 3.5.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -307,7 +307,10 @@ class UnifiedDetector {
307
307
  summary.dedicatedGpuCount = topology.dedicatedCount;
308
308
  summary.integratedGpuModels = topology.integratedModels;
309
309
  summary.dedicatedGpuModels = topology.dedicatedModels;
310
- summary.integratedSharedMemory = topology.integratedSharedMemory;
310
+ summary.integratedSharedMemory = Math.max(
311
+ topology.integratedSharedMemory,
312
+ this.getPrimaryIntegratedSharedMemory(primary)
313
+ );
311
314
  if (!summary.gpuModel) {
312
315
  summary.gpuModel = topology.primaryModel || null;
313
316
  }
@@ -324,18 +327,70 @@ class UnifiedDetector {
324
327
  summary.runtimeBackendName = runtimeSelection.name;
325
328
  summary.hasRuntimeAssist = runtimeSelection.assisted;
326
329
 
327
- // Effective memory for LLM loading
328
- // For GPU: use VRAM; for CPU/Metal: use system RAM
329
- if (summary.totalVRAM > 0 && ['cuda', 'rocm', 'intel'].includes(primary?.type)) {
330
+ // Effective memory for LLM loading. Integrated ROCm/iGPU devices expose
331
+ // a small aperture as VRAM and a much larger shared pool for model-fit
332
+ // decisions, so avoid treating the aperture as dedicated VRAM.
333
+ if (
334
+ ['rocm', 'intel'].includes(primary?.type) &&
335
+ summary.hasIntegratedGPU &&
336
+ !summary.hasDedicatedGPU &&
337
+ summary.integratedSharedMemory > 0
338
+ ) {
339
+ summary.effectiveMemory = summary.integratedSharedMemory;
340
+ } else if (summary.totalVRAM > 0 && ['cuda', 'rocm', 'intel'].includes(primary?.type)) {
330
341
  summary.effectiveMemory = summary.totalVRAM;
331
342
  } else {
332
343
  // Use 70% of system RAM for models (leave room for OS)
333
344
  summary.effectiveMemory = Math.round(summary.systemRAM * 0.7);
334
345
  }
335
346
 
347
+ summary.hardwareTier = this.classifyHardwareTierFromSummary(summary);
348
+ summary.bestBackendLabel = this.getBestBackendLabel(summary);
349
+
336
350
  return summary;
337
351
  }
338
352
 
353
+ getPrimaryIntegratedSharedMemory(primary) {
354
+ const gpus = Array.isArray(primary?.info?.gpus) ? primary.info.gpus : [];
355
+ return gpus
356
+ .filter((gpu) => gpu?.type === 'integrated')
357
+ .reduce((max, gpu) => {
358
+ const candidates = [
359
+ gpu?.sharedMemory,
360
+ gpu?.unifiedMemory,
361
+ gpu?.memory?.shared,
362
+ gpu?.memory?.total
363
+ ].map(Number).filter((value) => Number.isFinite(value) && value > 0);
364
+ return Math.max(max, ...candidates, 0);
365
+ }, 0);
366
+ }
367
+
368
+ classifyHardwareTierFromSummary(summary = {}) {
369
+ const effectiveMem = Number(summary.effectiveMemory) || 0;
370
+ const speed = Number(summary.speedCoefficient) || 0;
371
+
372
+ if (effectiveMem >= 80 && speed >= 300) return 'ultra_high'; // H100, MI300
373
+ if (effectiveMem >= 48 && speed >= 200) return 'very_high'; // 2x3090, 4090
374
+ if (effectiveMem >= 24 && speed >= 150) return 'high'; // 3090, 4090, M2 Max
375
+ if (effectiveMem >= 16 && speed >= 100) return 'medium_high'; // 4080, 3080, M3 Pro
376
+ if (effectiveMem >= 12 && speed >= 80) return 'medium'; // 3060, 4060 Ti
377
+ if (effectiveMem >= 8 && speed >= 50) return 'medium_low'; // 3060, M2
378
+ if (effectiveMem >= 6 && speed >= 30) return 'low'; // GTX 1660, iGPU
379
+ return 'ultra_low'; // CPU only
380
+ }
381
+
382
+ getBestBackendLabel(summary = {}) {
383
+ const backendName = summary.backendName || String(summary.bestBackend || 'cpu').toUpperCase();
384
+ if (
385
+ summary.hasRuntimeAssist &&
386
+ summary.runtimeBackend &&
387
+ summary.runtimeBackend !== summary.bestBackend
388
+ ) {
389
+ return `${backendName} + ${summary.runtimeBackendName || summary.runtimeBackend} assist`;
390
+ }
391
+ return backendName;
392
+ }
393
+
339
394
  summarizeGPUInventory(gpus = []) {
340
395
  const normalized = this.normalizeGpuInventory(gpus);
341
396
  const counts = new Map();
@@ -819,7 +874,11 @@ class UnifiedDetector {
819
874
  const summary = result.summary;
820
875
 
821
876
  // Leave headroom (2GB for GPU, 20% for RAM)
822
- if (summary.bestBackend === 'cpu' || summary.bestBackend === 'metal') {
877
+ if (
878
+ summary.bestBackend === 'cpu' ||
879
+ summary.bestBackend === 'metal' ||
880
+ (summary.hasIntegratedGPU && !summary.hasDedicatedGPU && summary.integratedSharedMemory > 0)
881
+ ) {
823
882
  return sizeGB <= (summary.effectiveMemory - 2);
824
883
  } else {
825
884
  const availableVRAM = useMultiGPU ? summary.totalVRAM : (summary.totalVRAM / summary.gpuCount);
@@ -844,19 +903,7 @@ class UnifiedDetector {
844
903
  const result = this.cache;
845
904
  if (!result) return 'unknown';
846
905
 
847
- const summary = result.summary;
848
- const effectiveMem = summary.effectiveMemory;
849
- const speed = summary.speedCoefficient;
850
-
851
- // Tier based on effective memory and speed
852
- if (effectiveMem >= 80 && speed >= 300) return 'ultra_high'; // H100, MI300
853
- if (effectiveMem >= 48 && speed >= 200) return 'very_high'; // 2x3090, 4090
854
- if (effectiveMem >= 24 && speed >= 150) return 'high'; // 3090, 4090, M2 Max
855
- if (effectiveMem >= 16 && speed >= 100) return 'medium_high'; // 4080, 3080, M3 Pro
856
- if (effectiveMem >= 12 && speed >= 80) return 'medium'; // 3060, 4060 Ti
857
- if (effectiveMem >= 8 && speed >= 50) return 'medium_low'; // 3060, M2
858
- if (effectiveMem >= 6 && speed >= 30) return 'low'; // GTX 1660, iGPU
859
- return 'ultra_low'; // CPU only
906
+ return result.summary?.hardwareTier || this.classifyHardwareTierFromSummary(result.summary);
860
907
  }
861
908
 
862
909
  /**
@@ -922,6 +969,10 @@ class UnifiedDetector {
922
969
  const gpuDesc = summary.gpuInventory || (
923
970
  summary.isMultiGPU ? `${summary.gpuCount}x ${summary.gpuModel}` : summary.gpuModel
924
971
  );
972
+ if (summary.hasIntegratedGPU && !summary.hasDedicatedGPU && summary.integratedSharedMemory > 0) {
973
+ const dedicatedLabel = summary.totalVRAM > 0 ? `, ${summary.totalVRAM}GB aperture` : '';
974
+ return `${gpuDesc} (${summary.integratedSharedMemory}GB shared memory${dedicatedLabel}) + ${summary.cpuModel}`;
975
+ }
925
976
  return `${gpuDesc} (${summary.totalVRAM}GB VRAM) + ${summary.cpuModel}`;
926
977
  }
927
978
  else if (summary.bestBackend === 'metal') {
package/src/index.js CHANGED
@@ -78,7 +78,6 @@ class LLMChecker {
78
78
  // Report hardware detection progress before platform-specific analysis
79
79
  if (this.progress) {
80
80
  this.progress.substep(`CPU detected: ${hardware.cpu.brand} (${hardware.cpu.cores} cores)`);
81
- await new Promise(resolve => setTimeout(resolve, 200)); // Small delay for demo
82
81
  const isApple = detectedPlatform === 'darwin';
83
82
  const memLabel = isApple ? 'unified memory' : 'RAM';
84
83
  this.progress.substep(`Memory detected: ${hardware.memory.total}GB ${memLabel}`, true);
@@ -117,7 +116,6 @@ class LLMChecker {
117
116
  // Apple Silicon optimized analysis with unified memory consideration
118
117
  if (this.progress) {
119
118
  this.progress.substep(`CPU detected: ${hardware.cpu.brand} (${hardware.cpu.cores} cores)`);
120
- await new Promise(resolve => setTimeout(resolve, 200));
121
119
  this.progress.substep(`Memory detected: ${hardware.memory.total}GB unified memory`, true);
122
120
  const summary = `${hardware.cpu.brand}, ${hardware.memory.total}GB RAM, ${hardware.gpu.model || 'Apple Silicon GPU'}`;
123
121
  this.progress.stepComplete(summary);
@@ -131,7 +129,6 @@ class LLMChecker {
131
129
  // Windows-specific analysis with discrete GPU / iGPU handling
132
130
  if (this.progress) {
133
131
  this.progress.substep(`CPU detected: ${hardware.cpu.brand} (${hardware.cpu.cores} cores)`);
134
- await new Promise(resolve => setTimeout(resolve, 200));
135
132
  this.progress.substep(`Memory detected: ${hardware.memory.total}GB RAM`, true);
136
133
  const summary = `${hardware.cpu.brand}, ${hardware.memory.total}GB RAM, ${hardware.gpu.model || 'Integrated GPU'}`;
137
134
  this.progress.stepComplete(summary);
@@ -145,7 +142,6 @@ class LLMChecker {
145
142
  // Linux-specific analysis (similar to Windows but with Linux considerations)
146
143
  if (this.progress) {
147
144
  this.progress.substep(`CPU detected: ${hardware.cpu.brand} (${hardware.cpu.cores} cores)`);
148
- await new Promise(resolve => setTimeout(resolve, 200));
149
145
  this.progress.substep(`Memory detected: ${hardware.memory.total}GB RAM`, true);
150
146
  const summary = `${hardware.cpu.brand}, ${hardware.memory.total}GB RAM, ${hardware.gpu.model || 'GPU'}`;
151
147
  this.progress.stepComplete(summary);
@@ -516,7 +512,7 @@ class LLMChecker {
516
512
 
517
513
  try {
518
514
  // 1. Obtener TODOS los modelos de la base de datos de Ollama
519
- const ollamaData = await this.ollamaScraper.scrapeAllModels(false);
515
+ const ollamaData = await this.loadOllamaModelData();
520
516
  const allOllamaModels = ollamaData.models || [];
521
517
  this.logger.info(`Found ${allOllamaModels.length} models in Ollama database`);
522
518
 
@@ -1345,9 +1341,27 @@ class LLMChecker {
1345
1341
  }
1346
1342
 
1347
1343
  getHardwareTier(hardware) {
1344
+ const canonicalTier = hardware?.summary?.hardwareTier;
1345
+ if (typeof canonicalTier === 'string' && canonicalTier.trim()) {
1346
+ return canonicalTier.trim().toLowerCase().replace(/\s+/g, '_');
1347
+ }
1348
1348
  return this.calculateHardwareScore(hardware).tier;
1349
1349
  }
1350
1350
 
1351
+ getHardwareTierBucket(hardware) {
1352
+ const tier = this.getHardwareTier(hardware);
1353
+ switch (tier) {
1354
+ case 'very_high':
1355
+ return 'ultra_high';
1356
+ case 'medium_high':
1357
+ return 'high';
1358
+ case 'medium_low':
1359
+ return 'low';
1360
+ default:
1361
+ return tier;
1362
+ }
1363
+ }
1364
+
1351
1365
  calculateHardwareScore(hardware) {
1352
1366
  const clamp = (x, a = 0, b = 1) => Math.max(a, Math.min(b, x));
1353
1367
 
@@ -2003,7 +2017,7 @@ class LLMChecker {
2003
2017
  score -= 15;
2004
2018
  }
2005
2019
 
2006
- const hardwareTier = this.getHardwareTier(hardware);
2020
+ const hardwareTier = this.getHardwareTierBucket(hardware);
2007
2021
  switch (hardwareTier) {
2008
2022
  case 'ultra_high':
2009
2023
  score += 15;
@@ -2412,14 +2426,51 @@ class LLMChecker {
2412
2426
  this.getAllModels().find(m => m.name.toLowerCase().includes(name.toLowerCase()));
2413
2427
  }
2414
2428
 
2429
+ async loadSyncedOllamaModelData() {
2430
+ const ModelDatabase = require('./data/model-database');
2431
+ const database = new ModelDatabase();
2432
+
2433
+ try {
2434
+ await database.initialize();
2435
+ const models = database.getAllModelsWithVariants();
2436
+ const stats = database.getStats();
2437
+
2438
+ if (models.length > 0) {
2439
+ return {
2440
+ models,
2441
+ total_count: models.length,
2442
+ cached_at: stats.lastSync || null,
2443
+ source: 'ollama_sqlite_database'
2444
+ };
2445
+ }
2446
+ } finally {
2447
+ database.close();
2448
+ }
2449
+
2450
+ return null;
2451
+ }
2452
+
2453
+ async loadOllamaModelData() {
2454
+ try {
2455
+ const syncedData = await this.loadSyncedOllamaModelData();
2456
+ if (syncedData?.models?.length > 0) {
2457
+ return syncedData;
2458
+ }
2459
+ } catch (error) {
2460
+ this.logger.warn('Synced SQLite model database unavailable, falling back to Ollama cache', { error: error.message });
2461
+ }
2462
+
2463
+ return this.ollamaScraper.scrapeAllModels(false);
2464
+ }
2465
+
2415
2466
 
2416
2467
  async generateIntelligentRecommendations(hardware, options = {}) {
2417
2468
  try {
2418
2469
  this.logger.info('Generating intelligent recommendations...');
2419
2470
  const selectedRuntime = normalizeRuntime(options.runtime || 'ollama');
2420
2471
 
2421
- // Obtener todos los modelos de Ollama
2422
- const ollamaData = await this.ollamaScraper.scrapeAllModels(false);
2472
+ // Prefer the synced SQLite catalog so `llm-checker sync` updates recommendations immediately.
2473
+ const ollamaData = await this.loadOllamaModelData();
2423
2474
  const allModels = ollamaData.models || [];
2424
2475
 
2425
2476
  if (allModels.length === 0) {
@@ -77,8 +77,8 @@ Respond with JSON only, no additional text.`;
77
77
  // Phase 1: Get ALL available models from the 177-model Ollama database
78
78
  const hardware = await this.deterministicSelector.getHardware();
79
79
 
80
- // Use the same large database that check command uses (177 models)
81
- const ollamaData = await this.ollamaScraper.scrapeAllModels(false);
80
+ // Use the same synced database that recommend/check use.
81
+ const ollamaData = await this.loadModelDatabase();
82
82
  const allOllamaModels = ollamaData.models || [];
83
83
 
84
84
  if (!silent) {
@@ -248,6 +248,31 @@ Respond with JSON only, no additional text.`;
248
248
  };
249
249
  }
250
250
 
251
+ async loadModelDatabase() {
252
+ try {
253
+ const ModelDatabase = require('../data/model-database');
254
+ const database = new ModelDatabase();
255
+ await database.initialize();
256
+
257
+ try {
258
+ const models = database.getAllModelsWithVariants();
259
+ if (models.length > 0) {
260
+ return {
261
+ models,
262
+ total_count: models.length,
263
+ source: 'ollama_sqlite_database'
264
+ };
265
+ }
266
+ } finally {
267
+ database.close();
268
+ }
269
+ } catch {
270
+ // Fall through to scraper cache.
271
+ }
272
+
273
+ return this.ollamaScraper.scrapeAllModels(false);
274
+ }
275
+
251
276
  /**
252
277
  * Pick the best installed evaluator model
253
278
  */
@@ -44,11 +44,16 @@ class DeterministicModelSelector {
44
44
  this.familyBumps = {
45
45
  'qwen2.5': 2,
46
46
  'qwen3': 4,
47
+ 'gemma3': 3,
47
48
  'deepseek': 3,
49
+ 'deepseek-r1': 5,
50
+ 'deepseek-coder': 4,
48
51
  'mistral': 1,
49
52
  'llama3.1': 1,
50
53
  'llama3.2': 2,
51
54
  'gemma2': 1,
55
+ 'yi': -3,
56
+ 'yi-coder': 1,
52
57
  'phi-3': 0,
53
58
  'granite': 0,
54
59
  'solar': 0,
@@ -750,7 +755,13 @@ class DeterministicModelSelector {
750
755
  if (ollamaModel.primary_category === 'reasoning') derivedTags.add('reasoning');
751
756
  if (ollamaModel.primary_category === 'creative') derivedTags.add('creative');
752
757
 
753
- return variants.map((variant) => {
758
+ const hasConcreteVariants = variants.some((variant) => this.variantHasConcreteSizeOrParams(variant));
759
+ const selectableVariants = hasConcreteVariants
760
+ ? variants.filter((variant) => this.variantHasConcreteSizeOrParams(variant))
761
+ : variants;
762
+
763
+ return selectableVariants
764
+ .map((variant) => {
754
765
  const variantTag = variant.tag || fallbackTag;
755
766
  const quant = this.resolveVariantQuantization(variant, variantTag);
756
767
  const paramsB = this.resolveVariantParamsB(ollamaModel, variant, quant);
@@ -821,6 +832,8 @@ class DeterministicModelSelector {
821
832
  modalities,
822
833
  tags: modelTags,
823
834
  model_identifier: variantTag,
835
+ last_updated: ollamaModel.last_updated || ollamaModel.lastUpdated || '',
836
+ updated_at: ollamaModel.updated_at || ollamaModel.updatedAt || '',
824
837
  installed: false,
825
838
  pulls: ollamaModel.actual_pulls || ollamaModel.pulls || 0,
826
839
  availableQuantizations,
@@ -842,6 +855,28 @@ class DeterministicModelSelector {
842
855
  });
843
856
  }
844
857
 
858
+ variantHasConcreteSizeOrParams(variant = {}) {
859
+ const params = this.extractParamsFromString(
860
+ variant.params_b,
861
+ variant.paramsB,
862
+ variant.parameter_size,
863
+ variant.size,
864
+ variant.tag,
865
+ variant.label,
866
+ variant.name
867
+ );
868
+ if (Number.isFinite(params) && params > 0) return true;
869
+
870
+ const artifactSize = Number(
871
+ variant.real_size_gb ??
872
+ variant.estimated_size_gb ??
873
+ variant.size_gb ??
874
+ NaN
875
+ );
876
+
877
+ return Number.isFinite(artifactSize) && artifactSize > 0;
878
+ }
879
+
845
880
  parseBillionsValue(rawValue) {
846
881
  return parseMoEBillionsValue(rawValue);
847
882
  }
@@ -861,7 +896,26 @@ class DeterministicModelSelector {
861
896
 
862
897
  parseDateSafe(value) {
863
898
  if (!value || typeof value !== 'string') return null;
864
- const parsed = new Date(value);
899
+ const normalized = value.trim();
900
+ const relativeMatch = normalized.match(/^(\d+)\s*(minutes?|hours?|days?|weeks?|months?|years?)\s+ago$/i);
901
+ if (relativeMatch) {
902
+ const amount = parseInt(relativeMatch[1], 10);
903
+ const unit = relativeMatch[2].toLowerCase();
904
+ const days =
905
+ unit.startsWith('minute') ? amount / (24 * 60) :
906
+ unit.startsWith('hour') ? amount / 24 :
907
+ unit.startsWith('day') ? amount :
908
+ unit.startsWith('week') ? amount * 7 :
909
+ unit.startsWith('month') ? amount * 30 :
910
+ unit.startsWith('year') ? amount * 365 :
911
+ null;
912
+
913
+ if (Number.isFinite(days)) {
914
+ return new Date(Date.now() - days * 24 * 60 * 60 * 1000);
915
+ }
916
+ }
917
+
918
+ const parsed = new Date(normalized);
865
919
  if (Number.isNaN(parsed.getTime())) return null;
866
920
  return parsed;
867
921
  }
@@ -912,8 +966,7 @@ class DeterministicModelSelector {
912
966
  model.updatedAt,
913
967
  model.release_date,
914
968
  model.released_at,
915
- model.created_at,
916
- model.detailed_scraped_at
969
+ model.created_at
917
970
  ];
918
971
 
919
972
  const updatedAt = dateCandidates
@@ -1027,6 +1080,9 @@ class DeterministicModelSelector {
1027
1080
 
1028
1081
  const regex = /(\d+\.?\d*)\s*([BbMm])/g;
1029
1082
  for (const match of value.matchAll(regex)) {
1083
+ const suffix = value.slice(match.index + match[0].length, match.index + match[0].length + 2);
1084
+ if (/^\s*b\b/i.test(suffix) || /^\s*[gk]b\b/i.test(suffix)) continue;
1085
+
1030
1086
  const amount = parseFloat(match[1]);
1031
1087
  const unit = match[2].toUpperCase();
1032
1088
  pushCandidate(unit === 'M' ? amount / 1000 : amount);
@@ -1103,7 +1159,7 @@ class DeterministicModelSelector {
1103
1159
  ollamaModel.parameter_count
1104
1160
  );
1105
1161
  if (metadataCandidates.length > 0) {
1106
- return Math.max(...metadataCandidates);
1162
+ return metadataCandidates[0];
1107
1163
  }
1108
1164
 
1109
1165
  const artifactSizeGB = this.extractVariantSizeGB(variant, null);
@@ -1136,7 +1192,7 @@ class DeterministicModelSelector {
1136
1192
  }
1137
1193
 
1138
1194
  extractVariantSizeGB(variant, paramsB) {
1139
- const candidate = Number(variant.real_size_gb ?? variant.estimated_size_gb ?? NaN);
1195
+ const candidate = Number(variant.real_size_gb ?? variant.estimated_size_gb ?? variant.size_gb ?? NaN);
1140
1196
  if (Number.isFinite(candidate) && candidate > 0) return candidate;
1141
1197
  if (!Number.isFinite(paramsB) || paramsB <= 0) return 0.5;
1142
1198
  return Math.max(0.5, Math.round((paramsB * 0.58 + 0.5) * 10) / 10);
@@ -1207,11 +1263,14 @@ class DeterministicModelSelector {
1207
1263
  if (name.includes('qwen2.5')) return 'qwen2.5';
1208
1264
  if (name.includes('qwen3')) return 'qwen3';
1209
1265
  if (name.includes('qwen')) return 'qwen2.5';
1266
+ if (name.includes('deepseek-r1')) return 'deepseek-r1';
1267
+ if (name.includes('deepseek-coder')) return 'deepseek-coder';
1210
1268
  if (name.includes('deepseek')) return 'deepseek';
1211
1269
  if (name.includes('llama3.2') || name.includes('llama3.3')) return 'llama3.2';
1212
1270
  if (name.includes('llama3.1')) return 'llama3.1';
1213
1271
  if (name.includes('llama')) return 'llama';
1214
1272
  if (name.includes('mistral')) return 'mistral';
1273
+ if (name.includes('gemma3')) return 'gemma3';
1215
1274
  if (name.includes('gemma')) return 'gemma2';
1216
1275
  if (name.includes('phi')) return 'phi-3';
1217
1276
  if (name.includes('llava')) return 'llava';
@@ -1219,6 +1278,8 @@ class DeterministicModelSelector {
1219
1278
  if (name.includes('solar')) return 'solar';
1220
1279
  if (name.includes('starcoder')) return 'starcoder';
1221
1280
  if (name.includes('minicpm')) return 'minicpm';
1281
+ if (name.includes('yi-coder')) return 'yi-coder';
1282
+ if (name.includes('yi')) return 'yi';
1222
1283
  return 'unknown';
1223
1284
  }
1224
1285
 
@@ -1351,7 +1412,9 @@ class DeterministicModelSelector {
1351
1412
  const hardware = this.normalizeHardwareProfile(detectedHardware);
1352
1413
  const installed = Array.isArray(installedModels) ? installedModels : await this.getInstalledModels();
1353
1414
  const externalPool = Array.isArray(modelPool) && modelPool.length > 0
1354
- ? this.normalizeExternalModels(modelPool)
1415
+ ? (modelPool.some(model => typeof model?.paramsB === 'number' && model?.model_identifier)
1416
+ ? modelPool
1417
+ : this.normalizeExternalModels(modelPool))
1355
1418
  : await this.loadModelPool();
1356
1419
 
1357
1420
  if (!silent) {
@@ -1445,6 +1508,10 @@ class DeterministicModelSelector {
1445
1508
 
1446
1509
  filterByCategory(models, category) {
1447
1510
  return models.filter(model => {
1511
+ if (this.isCloudVariantTag(model.model_identifier || model.name)) {
1512
+ return false;
1513
+ }
1514
+
1448
1515
  switch (category) {
1449
1516
  case 'coding':
1450
1517
  return model.tags.some(tag => ['coder', 'code', 'instruct'].includes(tag)) ||
@@ -1682,6 +1749,12 @@ class DeterministicModelSelector {
1682
1749
  // Freshness/deprecation adjustment
1683
1750
  const freshnessAdjustment = this.calculateFreshnessAdjustment(model);
1684
1751
  Q += freshnessAdjustment;
1752
+
1753
+ const pulls = Number(model.pulls || model.actual_pulls || 0);
1754
+ if (pulls >= 100000000) Q += 4;
1755
+ else if (pulls >= 20000000) Q += 3;
1756
+ else if (pulls >= 5000000) Q += 2;
1757
+ else if (pulls >= 1000000) Q += 1;
1685
1758
 
1686
1759
  // Task alignment bump
1687
1760
  const taskBump = this.getTaskAlignmentBump(model, category);
@@ -2141,6 +2214,10 @@ class DeterministicModelSelector {
2141
2214
 
2142
2215
  mapHardwareTier(hardware = {}) {
2143
2216
  const summary = hardware?.summary || {};
2217
+ const canonicalTier = summary.hardwareTier || summary.hardware_tier;
2218
+ if (typeof canonicalTier === 'string' && canonicalTier.trim()) {
2219
+ return canonicalTier.trim().toLowerCase().replace(/\s+/g, '_');
2220
+ }
2144
2221
  const effectiveMemory = Number(summary.effectiveMemory);
2145
2222
  const speedCoefficient = Number(summary.speedCoefficient);
2146
2223
  if (Number.isFinite(effectiveMemory) && effectiveMemory > 0 && Number.isFinite(speedCoefficient)) {
@@ -668,6 +668,127 @@ class OllamaClient {
668
668
  throw new Error(`Failed to run chat request: ${error.message}`);
669
669
  }
670
670
  }
671
+
672
+ async streamChat(modelName, messages, options = {}, onChunk = null) {
673
+ const availability = await this.checkOllamaAvailability();
674
+ if (!availability.available) {
675
+ throw new Error(`Ollama not available: ${availability.error}`);
676
+ }
677
+
678
+ const {
679
+ tools,
680
+ format,
681
+ keepAlive,
682
+ timeoutMs = 120000,
683
+ generationOptions = {}
684
+ } = options;
685
+
686
+ const payload = {
687
+ model: modelName,
688
+ messages: Array.isArray(messages) ? messages : [],
689
+ stream: true
690
+ };
691
+
692
+ if (Array.isArray(tools) && tools.length > 0) payload.tools = tools;
693
+ if (format) payload.format = format;
694
+ if (keepAlive) payload.keep_alive = keepAlive;
695
+ if (generationOptions && Object.keys(generationOptions).length > 0) {
696
+ payload.options = generationOptions;
697
+ }
698
+
699
+ const startTime = Date.now();
700
+ const controller = new AbortController();
701
+ const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
702
+
703
+ try {
704
+ const response = await fetch(`${this.baseURL}/api/chat`, {
705
+ method: 'POST',
706
+ signal: controller.signal,
707
+ headers: { 'Content-Type': 'application/json' },
708
+ body: JSON.stringify(payload)
709
+ });
710
+
711
+ if (!response.ok) {
712
+ const errorText = await response.text();
713
+ throw new Error(`HTTP ${response.status}: ${response.statusText} - ${errorText}`);
714
+ }
715
+
716
+ const decoder = new TextDecoder();
717
+ let buffer = '';
718
+ let content = '';
719
+ let finalData = null;
720
+
721
+ const handleLine = (line) => {
722
+ if (!line.trim()) return;
723
+
724
+ const data = JSON.parse(line);
725
+ const chunk = data?.message?.content || '';
726
+ if (chunk) {
727
+ content += chunk;
728
+ if (typeof onChunk === 'function') {
729
+ onChunk(chunk, data);
730
+ }
731
+ }
732
+
733
+ if (data.done) {
734
+ finalData = data;
735
+ }
736
+ };
737
+
738
+ if (response.body && typeof response.body.getReader === 'function') {
739
+ const reader = response.body.getReader();
740
+ while (true) {
741
+ const { done, value } = await reader.read();
742
+ if (done) break;
743
+
744
+ buffer += decoder.decode(value, { stream: true });
745
+ const lines = buffer.split('\n');
746
+ buffer = lines.pop() || '';
747
+
748
+ for (const line of lines) {
749
+ handleLine(line);
750
+ }
751
+ }
752
+ } else if (response.body && typeof response.body[Symbol.asyncIterator] === 'function') {
753
+ for await (const value of response.body) {
754
+ buffer += decoder.decode(value, { stream: true });
755
+ const lines = buffer.split('\n');
756
+ buffer = lines.pop() || '';
757
+
758
+ for (const line of lines) {
759
+ handleLine(line);
760
+ }
761
+ }
762
+ } else {
763
+ throw new Error('Streaming response body is not readable');
764
+ }
765
+
766
+ buffer += decoder.decode();
767
+ if (buffer.trim()) {
768
+ handleLine(buffer);
769
+ }
770
+
771
+ const responseTime = Date.now() - startTime;
772
+ const speed = this.calculateTokensPerSecond(finalData || {}, responseTime);
773
+
774
+ return {
775
+ ...(finalData || {}),
776
+ message: {
777
+ role: 'assistant',
778
+ content
779
+ },
780
+ response: content,
781
+ responseTime,
782
+ tokensPerSecond: speed.tokensPerSecond,
783
+ evalTokensPerSecond: speed.evalTokensPerSecond,
784
+ endToEndTokensPerSecond: speed.endToEndTokensPerSecond
785
+ };
786
+ } catch (error) {
787
+ throw new Error(`Failed to run streaming chat request: ${error.message}`);
788
+ } finally {
789
+ clearTimeout(timeoutId);
790
+ }
791
+ }
671
792
  }
672
793
 
673
794
  module.exports = OllamaClient;