chat-console 0.2.0__py3-none-any.whl → 0.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- app/__init__.py +1 -1
- app/api/ollama.py +741 -1
- app/main.py +192 -69
- app/ui/model_browser.py +1146 -0
- app/utils.py +23 -22
- {chat_console-0.2.0.dist-info → chat_console-0.2.5.dist-info}/METADATA +1 -1
- {chat_console-0.2.0.dist-info → chat_console-0.2.5.dist-info}/RECORD +11 -10
- {chat_console-0.2.0.dist-info → chat_console-0.2.5.dist-info}/WHEEL +0 -0
- {chat_console-0.2.0.dist-info → chat_console-0.2.5.dist-info}/entry_points.txt +0 -0
- {chat_console-0.2.0.dist-info → chat_console-0.2.5.dist-info}/licenses/LICENSE +0 -0
- {chat_console-0.2.0.dist-info → chat_console-0.2.5.dist-info}/top_level.txt +0 -0
app/api/ollama.py
CHANGED
@@ -2,6 +2,10 @@ import aiohttp
|
|
2
2
|
import asyncio
|
3
3
|
import json
|
4
4
|
import logging
|
5
|
+
import os
|
6
|
+
import time
|
7
|
+
from datetime import datetime, timedelta
|
8
|
+
from pathlib import Path
|
5
9
|
from typing import List, Dict, Any, Optional, Generator, AsyncGenerator
|
6
10
|
from .base import BaseModelClient
|
7
11
|
|
@@ -15,6 +19,12 @@ class OllamaClient(BaseModelClient):
|
|
15
19
|
self.base_url = OLLAMA_BASE_URL.rstrip('/')
|
16
20
|
logger.info(f"Initializing Ollama client with base URL: {self.base_url}")
|
17
21
|
|
22
|
+
# Track active stream session
|
23
|
+
self._active_stream_session = None
|
24
|
+
|
25
|
+
# Path to the cached models file
|
26
|
+
self.models_cache_path = Path(__file__).parent.parent / "data" / "ollama-models.json"
|
27
|
+
|
18
28
|
# Try to start Ollama if not running
|
19
29
|
if not ensure_ollama_running():
|
20
30
|
raise Exception(f"Failed to start Ollama server. Please ensure Ollama is installed and try again.")
|
@@ -158,6 +168,7 @@ class OllamaClient(BaseModelClient):
|
|
158
168
|
prompt = self._prepare_messages(messages, style)
|
159
169
|
retries = 2
|
160
170
|
last_error = None
|
171
|
+
self._active_stream_session = None # Track the active session
|
161
172
|
|
162
173
|
while retries >= 0:
|
163
174
|
try:
|
@@ -192,7 +203,10 @@ class OllamaClient(BaseModelClient):
|
|
192
203
|
logger.info("Model pulled successfully")
|
193
204
|
|
194
205
|
# Now proceed with actual generation
|
195
|
-
|
206
|
+
session = aiohttp.ClientSession()
|
207
|
+
self._active_stream_session = session # Store reference to active session
|
208
|
+
|
209
|
+
try:
|
196
210
|
logger.debug(f"Sending streaming request to {self.base_url}/api/generate")
|
197
211
|
async with session.post(
|
198
212
|
f"{self.base_url}/api/generate",
|
@@ -216,6 +230,9 @@ class OllamaClient(BaseModelClient):
|
|
216
230
|
continue
|
217
231
|
logger.info("Streaming completed successfully")
|
218
232
|
return
|
233
|
+
finally:
|
234
|
+
self._active_stream_session = None # Clear reference when done
|
235
|
+
await session.close() # Ensure session is closed
|
219
236
|
|
220
237
|
except aiohttp.ClientConnectorError:
|
221
238
|
last_error = "Could not connect to Ollama server. Make sure Ollama is running and accessible at " + self.base_url
|
@@ -223,6 +240,9 @@ class OllamaClient(BaseModelClient):
|
|
223
240
|
last_error = f"Ollama API error: {e.status} - {e.message}"
|
224
241
|
except aiohttp.ClientTimeout:
|
225
242
|
last_error = "Request to Ollama server timed out"
|
243
|
+
except asyncio.CancelledError:
|
244
|
+
logger.info("Streaming cancelled by client")
|
245
|
+
raise # Propagate cancellation
|
226
246
|
except Exception as e:
|
227
247
|
last_error = f"Error streaming completion: {str(e)}"
|
228
248
|
|
@@ -233,3 +253,723 @@ class OllamaClient(BaseModelClient):
|
|
233
253
|
await asyncio.sleep(1)
|
234
254
|
|
235
255
|
raise Exception(last_error)
|
256
|
+
|
257
|
+
async def cancel_stream(self) -> None:
|
258
|
+
"""Cancel any active streaming request"""
|
259
|
+
if self._active_stream_session:
|
260
|
+
logger.info("Cancelling active stream session")
|
261
|
+
await self._active_stream_session.close()
|
262
|
+
self._active_stream_session = None
|
263
|
+
|
264
|
+
async def get_model_details(self, model_id: str) -> Dict[str, Any]:
|
265
|
+
"""Get detailed information about a specific Ollama model"""
|
266
|
+
logger.info(f"Getting details for model: {model_id}")
|
267
|
+
try:
|
268
|
+
async with aiohttp.ClientSession() as session:
|
269
|
+
async with session.post(
|
270
|
+
f"{self.base_url}/api/show",
|
271
|
+
json={"name": model_id},
|
272
|
+
timeout=5
|
273
|
+
) as response:
|
274
|
+
response.raise_for_status()
|
275
|
+
data = await response.json()
|
276
|
+
logger.debug(f"Ollama model details response: {data}")
|
277
|
+
return data
|
278
|
+
except Exception as e:
|
279
|
+
logger.error(f"Error getting model details: {str(e)}")
|
280
|
+
# Return a dict with error info instead of raising an exception
|
281
|
+
return {
|
282
|
+
"error": str(e),
|
283
|
+
"modelfile": None,
|
284
|
+
"parameters": None,
|
285
|
+
"size": 0,
|
286
|
+
"created_at": None,
|
287
|
+
"modified_at": None
|
288
|
+
}
|
289
|
+
|
290
|
+
async def _fetch_and_cache_models(self) -> List[Dict[str, Any]]:
|
291
|
+
"""Fetch models from Ollama website and cache them for 24 hours"""
|
292
|
+
logger.info("Performing a full fetch of Ollama models to update cache")
|
293
|
+
|
294
|
+
try:
|
295
|
+
# First load models from base file
|
296
|
+
base_models = []
|
297
|
+
try:
|
298
|
+
# Read the base models file
|
299
|
+
base_file_path = Path(__file__).parent.parent / "data" / "ollama-models-base.json"
|
300
|
+
if base_file_path.exists():
|
301
|
+
with open(base_file_path, 'r') as f:
|
302
|
+
base_data = json.load(f)
|
303
|
+
if "models" in base_data:
|
304
|
+
base_models = base_data["models"]
|
305
|
+
logger.info(f"Loaded {len(base_models)} models from base file")
|
306
|
+
|
307
|
+
# Process models from the base file to ensure consistent format
|
308
|
+
for model in base_models:
|
309
|
+
# Convert any missing fields to expected format
|
310
|
+
if "parameter_size" not in model and "variants" in model and model["variants"]:
|
311
|
+
# Use the first variant as the default parameter size if not specified
|
312
|
+
for variant in model["variants"]:
|
313
|
+
if any(char.isdigit() for char in variant):
|
314
|
+
# This looks like a size variant (e.g., "7b", "70b")
|
315
|
+
if variant.lower().endswith('b'):
|
316
|
+
model["parameter_size"] = variant.upper()
|
317
|
+
else:
|
318
|
+
model["parameter_size"] = f"{variant}B"
|
319
|
+
break
|
320
|
+
|
321
|
+
except Exception as e:
|
322
|
+
logger.warning(f"Error loading base models file: {str(e)}")
|
323
|
+
|
324
|
+
# Web scraping for more models
|
325
|
+
scraped_models = []
|
326
|
+
try:
|
327
|
+
async with aiohttp.ClientSession() as session:
|
328
|
+
# Get model data from the Ollama website search page (without query to get all models)
|
329
|
+
search_url = "https://ollama.com/search"
|
330
|
+
|
331
|
+
logger.info(f"Fetching all models from Ollama web: {search_url}")
|
332
|
+
async with session.get(
|
333
|
+
search_url,
|
334
|
+
timeout=20, # Longer timeout for comprehensive scrape
|
335
|
+
headers={"User-Agent": "Mozilla/5.0 (compatible; chat-console/1.0)"}
|
336
|
+
) as response:
|
337
|
+
if response.status == 200:
|
338
|
+
html = await response.text()
|
339
|
+
|
340
|
+
# Extract model data from JSON embedded in the page
|
341
|
+
try:
|
342
|
+
import re
|
343
|
+
|
344
|
+
# Look for model data in JSON format
|
345
|
+
model_match = re.search(r'window\.__NEXT_DATA__\s*=\s*({.+?});', html, re.DOTALL)
|
346
|
+
if model_match:
|
347
|
+
json_data = json.loads(model_match.group(1))
|
348
|
+
|
349
|
+
# Navigate to where models are stored in the JSON
|
350
|
+
if (json_data and 'props' in json_data and
|
351
|
+
'pageProps' in json_data['props'] and
|
352
|
+
'models' in json_data['props']['pageProps']):
|
353
|
+
|
354
|
+
web_models = json_data['props']['pageProps']['models']
|
355
|
+
logger.info(f"Found {len(web_models)} models on Ollama website")
|
356
|
+
|
357
|
+
# Process models
|
358
|
+
for model in web_models:
|
359
|
+
try:
|
360
|
+
# Skip models without necessary data
|
361
|
+
if not model.get('name'):
|
362
|
+
continue
|
363
|
+
|
364
|
+
# Create structured model data
|
365
|
+
processed_model = {
|
366
|
+
"name": model.get('name', ''),
|
367
|
+
"description": model.get('description', f"{model.get('name')} model"),
|
368
|
+
"model_family": model.get('modelFamily', 'Unknown'),
|
369
|
+
}
|
370
|
+
|
371
|
+
# Add variants if available
|
372
|
+
if model.get('variants'):
|
373
|
+
processed_model["variants"] = model.get('variants', [])
|
374
|
+
|
375
|
+
# Extract parameter size from model details
|
376
|
+
if model.get('parameterSize'):
|
377
|
+
processed_model["parameter_size"] = f"{model.get('parameterSize')}B"
|
378
|
+
else:
|
379
|
+
# Try to extract from name
|
380
|
+
name = model.get('name', '').lower()
|
381
|
+
param_size = None
|
382
|
+
|
383
|
+
# Check for specific patterns
|
384
|
+
if "70b" in name:
|
385
|
+
param_size = "70B"
|
386
|
+
elif "405b" in name or "400b" in name:
|
387
|
+
param_size = "405B"
|
388
|
+
elif "34b" in name or "35b" in name:
|
389
|
+
param_size = "34B"
|
390
|
+
elif "27b" in name or "28b" in name:
|
391
|
+
param_size = "27B"
|
392
|
+
elif "13b" in name or "14b" in name:
|
393
|
+
param_size = "13B"
|
394
|
+
elif "8b" in name:
|
395
|
+
param_size = "8B"
|
396
|
+
elif "7b" in name:
|
397
|
+
param_size = "7B"
|
398
|
+
elif "6b" in name:
|
399
|
+
param_size = "6B"
|
400
|
+
elif "3b" in name:
|
401
|
+
param_size = "3B"
|
402
|
+
elif "2b" in name:
|
403
|
+
param_size = "2B"
|
404
|
+
elif "1b" in name:
|
405
|
+
param_size = "1B"
|
406
|
+
elif "mini" in name:
|
407
|
+
param_size = "3B"
|
408
|
+
elif "small" in name:
|
409
|
+
param_size = "7B"
|
410
|
+
elif "medium" in name:
|
411
|
+
param_size = "13B"
|
412
|
+
elif "large" in name:
|
413
|
+
param_size = "34B"
|
414
|
+
|
415
|
+
# Special handling for models with ":latest" or no size indicator
|
416
|
+
if not param_size and ("latest" in name or not any(size in name for size in ["1b", "2b", "3b", "6b", "7b", "8b", "13b", "14b", "27b", "28b", "34b", "35b", "70b", "405b", "400b", "mini", "small", "medium", "large"])):
|
417
|
+
# Strip the ":latest" part to get base model
|
418
|
+
base_name = name.split(":")[0]
|
419
|
+
|
420
|
+
# Check if we have default parameter sizes for known models
|
421
|
+
model_defaults = {
|
422
|
+
"llama3": "8B",
|
423
|
+
"llama2": "7B",
|
424
|
+
"mistral": "7B",
|
425
|
+
"gemma": "7B",
|
426
|
+
"gemma2": "9B",
|
427
|
+
"phi": "3B",
|
428
|
+
"phi2": "3B",
|
429
|
+
"phi3": "3B",
|
430
|
+
"phi4": "7B",
|
431
|
+
"orca-mini": "7B",
|
432
|
+
"llava": "7B",
|
433
|
+
"codellama": "7B",
|
434
|
+
"neural-chat": "7B",
|
435
|
+
"wizard-math": "7B",
|
436
|
+
"yi": "6B",
|
437
|
+
"deepseek": "7B",
|
438
|
+
"deepseek-coder": "7B",
|
439
|
+
"qwen": "7B",
|
440
|
+
"falcon": "7B",
|
441
|
+
"stable-code": "3B"
|
442
|
+
}
|
443
|
+
|
444
|
+
# Try to find a match in default sizes
|
445
|
+
for model_name, default_size in model_defaults.items():
|
446
|
+
if model_name in base_name:
|
447
|
+
param_size = default_size
|
448
|
+
break
|
449
|
+
|
450
|
+
# If we still don't have a param size, check model metadata
|
451
|
+
if not param_size and model.get('defaultParameterSize'):
|
452
|
+
param_size = f"{model.get('defaultParameterSize')}B"
|
453
|
+
|
454
|
+
# Check model variants for clues
|
455
|
+
if not param_size and model.get('variants'):
|
456
|
+
# The default variant is often the first one
|
457
|
+
try:
|
458
|
+
variants = model.get('variants', [])
|
459
|
+
if variants and len(variants) > 0:
|
460
|
+
# Try to get parameter size from the first variant
|
461
|
+
first_variant = variants[0]
|
462
|
+
if first_variant and 'parameterSize' in first_variant:
|
463
|
+
param_size = f"{first_variant['parameterSize']}B"
|
464
|
+
# Just use the first variant if it looks like a size
|
465
|
+
elif isinstance(first_variant, str) and any(char.isdigit() for char in first_variant):
|
466
|
+
if first_variant.lower().endswith('b'):
|
467
|
+
param_size = first_variant.upper()
|
468
|
+
else:
|
469
|
+
param_size = f"{first_variant}B"
|
470
|
+
except Exception as e:
|
471
|
+
logger.warning(f"Error getting parameter size from variants: {str(e)}")
|
472
|
+
|
473
|
+
processed_model["parameter_size"] = param_size or "Unknown"
|
474
|
+
|
475
|
+
# Set disk size based on parameter size
|
476
|
+
param_value = processed_model.get("parameter_size", "").lower()
|
477
|
+
if "70b" in param_value:
|
478
|
+
processed_model["size"] = 40000000000 # ~40GB
|
479
|
+
elif "405b" in param_value or "400b" in param_value:
|
480
|
+
processed_model["size"] = 200000000000 # ~200GB
|
481
|
+
elif "34b" in param_value or "35b" in param_value:
|
482
|
+
processed_model["size"] = 20000000000 # ~20GB
|
483
|
+
elif "27b" in param_value or "28b" in param_value:
|
484
|
+
processed_model["size"] = 15000000000 # ~15GB
|
485
|
+
elif "13b" in param_value or "14b" in param_value:
|
486
|
+
processed_model["size"] = 8000000000 # ~8GB
|
487
|
+
elif "8b" in param_value:
|
488
|
+
processed_model["size"] = 4800000000 # ~4.8GB
|
489
|
+
elif "7b" in param_value:
|
490
|
+
processed_model["size"] = 4500000000 # ~4.5GB
|
491
|
+
elif "6b" in param_value:
|
492
|
+
processed_model["size"] = 3500000000 # ~3.5GB
|
493
|
+
elif "3b" in param_value:
|
494
|
+
processed_model["size"] = 2000000000 # ~2GB
|
495
|
+
elif "2b" in param_value:
|
496
|
+
processed_model["size"] = 1500000000 # ~1.5GB
|
497
|
+
elif "1b" in param_value:
|
498
|
+
processed_model["size"] = 800000000 # ~800MB
|
499
|
+
else:
|
500
|
+
processed_model["size"] = 4500000000 # Default to ~4.5GB
|
501
|
+
|
502
|
+
scraped_models.append(processed_model)
|
503
|
+
except Exception as e:
|
504
|
+
logger.warning(f"Error processing web model {model.get('name', 'unknown')}: {str(e)}")
|
505
|
+
except Exception as e:
|
506
|
+
logger.warning(f"Error extracting model data from Ollama website: {str(e)}")
|
507
|
+
except Exception as web_e:
|
508
|
+
logger.warning(f"Error fetching from Ollama website: {str(web_e)}")
|
509
|
+
|
510
|
+
# Add curated models from the registry
|
511
|
+
curated_models = await self.get_registry_models("")
|
512
|
+
|
513
|
+
# Combine all models - prefer base models, then scraped models, then curated
|
514
|
+
all_models = []
|
515
|
+
existing_names = set()
|
516
|
+
|
517
|
+
# First add all base models (highest priority)
|
518
|
+
for model in base_models:
|
519
|
+
if model.get("name"):
|
520
|
+
all_models.append(model)
|
521
|
+
existing_names.add(model["name"])
|
522
|
+
|
523
|
+
# Then add scraped models if not already added
|
524
|
+
for model in scraped_models:
|
525
|
+
if model.get("name") and model["name"] not in existing_names:
|
526
|
+
all_models.append(model)
|
527
|
+
existing_names.add(model["name"])
|
528
|
+
|
529
|
+
# Finally add curated models if not already added
|
530
|
+
for model in curated_models:
|
531
|
+
if model.get("name") and model["name"] not in existing_names:
|
532
|
+
all_models.append(model)
|
533
|
+
existing_names.add(model["name"])
|
534
|
+
|
535
|
+
# Cache the combined models
|
536
|
+
cache_data = {
|
537
|
+
"last_updated": datetime.now().isoformat(),
|
538
|
+
"models": all_models
|
539
|
+
}
|
540
|
+
|
541
|
+
try:
|
542
|
+
with open(self.models_cache_path, 'w') as f:
|
543
|
+
json.dump(cache_data, f, indent=2)
|
544
|
+
logger.info(f"Cached {len(all_models)} models to {self.models_cache_path}")
|
545
|
+
except Exception as cache_error:
|
546
|
+
logger.error(f"Error caching models: {str(cache_error)}")
|
547
|
+
|
548
|
+
return all_models
|
549
|
+
|
550
|
+
except Exception as e:
|
551
|
+
logger.error(f"Error during model fetch and cache: {str(e)}")
|
552
|
+
# Return an empty list in case of catastrophic failure
|
553
|
+
return []
|
554
|
+
|
555
|
+
async def list_available_models_from_registry(self, query: str = "") -> List[Dict[str, Any]]:
|
556
|
+
"""List available models from Ollama registry with cache support"""
|
557
|
+
logger.info(f"Fetching available models from Ollama registry, query: '{query}'")
|
558
|
+
|
559
|
+
# Check if we need to update the cache
|
560
|
+
need_cache_update = True
|
561
|
+
models_from_cache = []
|
562
|
+
|
563
|
+
try:
|
564
|
+
# Try to read from cache first
|
565
|
+
if self.models_cache_path.exists():
|
566
|
+
try:
|
567
|
+
with open(self.models_cache_path, 'r') as f:
|
568
|
+
cache_data = json.load(f)
|
569
|
+
|
570
|
+
# Check if cache is still valid (less than 24 hours old)
|
571
|
+
if cache_data.get("last_updated"):
|
572
|
+
last_updated = datetime.fromisoformat(cache_data["last_updated"])
|
573
|
+
# Cache valid if less than 24 hours old
|
574
|
+
if datetime.now() - last_updated < timedelta(hours=24):
|
575
|
+
need_cache_update = False
|
576
|
+
models_from_cache = cache_data.get("models", [])
|
577
|
+
logger.info(f"Using cached models from {last_updated.isoformat()} ({len(models_from_cache)} models)")
|
578
|
+
else:
|
579
|
+
logger.info(f"Cache from {last_updated.isoformat()} is older than 24 hours, refreshing")
|
580
|
+
except Exception as e:
|
581
|
+
logger.warning(f"Error reading cache: {str(e)}, will refresh")
|
582
|
+
else:
|
583
|
+
logger.info("No cache found, creating a new one")
|
584
|
+
except Exception as e:
|
585
|
+
logger.warning(f"Error checking cache: {str(e)}")
|
586
|
+
|
587
|
+
# Always read the base file first
|
588
|
+
base_models = []
|
589
|
+
try:
|
590
|
+
# Read the base models file
|
591
|
+
base_file_path = Path(__file__).parent.parent / "data" / "ollama-models-base.json"
|
592
|
+
if base_file_path.exists():
|
593
|
+
with open(base_file_path, 'r') as f:
|
594
|
+
base_data = json.load(f)
|
595
|
+
if "models" in base_data:
|
596
|
+
base_models = base_data["models"]
|
597
|
+
logger.info(f"Loaded {len(base_models)} models from base file")
|
598
|
+
|
599
|
+
# Process base models to ensure they have proper format
|
600
|
+
for model in base_models:
|
601
|
+
# Make sure they have model_family
|
602
|
+
if "model_family" not in model and "name" in model:
|
603
|
+
name = model["name"].lower()
|
604
|
+
if "llama" in name:
|
605
|
+
model["model_family"] = "Llama"
|
606
|
+
elif "mistral" in name:
|
607
|
+
model["model_family"] = "Mistral"
|
608
|
+
elif "phi" in name:
|
609
|
+
model["model_family"] = "Phi"
|
610
|
+
elif "gemma" in name:
|
611
|
+
model["model_family"] = "Gemma"
|
612
|
+
elif "qwen" in name:
|
613
|
+
model["model_family"] = "Qwen"
|
614
|
+
else:
|
615
|
+
# Try to extract family from name (before any colon)
|
616
|
+
base_name = name.split(":")[0]
|
617
|
+
model["model_family"] = base_name.capitalize()
|
618
|
+
|
619
|
+
# If no cache yet but base file exists, use base models and trigger update
|
620
|
+
if not models_from_cache and base_models:
|
621
|
+
models_from_cache = base_models
|
622
|
+
logger.info(f"Using {len(base_models)} models from base file while cache updates")
|
623
|
+
|
624
|
+
# Start cache update in background
|
625
|
+
asyncio.create_task(self._fetch_and_cache_models())
|
626
|
+
need_cache_update = False
|
627
|
+
except Exception as e:
|
628
|
+
logger.warning(f"Error loading base models file: {str(e)}")
|
629
|
+
|
630
|
+
# If we need to update the cache, do it now
|
631
|
+
if need_cache_update:
|
632
|
+
# Run the cache update in the background if we have cached data
|
633
|
+
if models_from_cache:
|
634
|
+
# We can use cached data for now but update in background
|
635
|
+
asyncio.create_task(self._fetch_and_cache_models())
|
636
|
+
else:
|
637
|
+
# We need to wait for the cache update
|
638
|
+
models_from_cache = await self._fetch_and_cache_models()
|
639
|
+
|
640
|
+
# Always make sure base models are included
|
641
|
+
if base_models:
|
642
|
+
# Create a set of existing model names
|
643
|
+
existing_names = set(model.get("name", "") for model in models_from_cache)
|
644
|
+
|
645
|
+
# Add base models if not already in cache
|
646
|
+
for model in base_models:
|
647
|
+
if model.get("name") and model["name"] not in existing_names:
|
648
|
+
models_from_cache.append(model)
|
649
|
+
existing_names.add(model["name"])
|
650
|
+
|
651
|
+
logger.info(f"Combined total: {len(models_from_cache)} models")
|
652
|
+
|
653
|
+
# Log the number of models available
|
654
|
+
logger.info(f"Total available models: {len(models_from_cache)}")
|
655
|
+
|
656
|
+
# No filtering here - the UI will handle filtering
|
657
|
+
return models_from_cache
|
658
|
+
|
659
|
+
async def get_registry_models(self, query: str = "") -> List[Dict[str, Any]]:
|
660
|
+
"""Get a curated list of popular Ollama models"""
|
661
|
+
logger.info("Returning a curated list of popular Ollama models (query: {})".format(query or "none"))
|
662
|
+
|
663
|
+
# Provide a curated list of popular models as fallback
|
664
|
+
models = [
|
665
|
+
# Llama 3 models
|
666
|
+
{
|
667
|
+
"name": "llama3",
|
668
|
+
"description": "Meta's Llama 3 8B model",
|
669
|
+
"model_family": "Llama",
|
670
|
+
"size": 4500000000,
|
671
|
+
"parameter_size": "8B"
|
672
|
+
},
|
673
|
+
{
|
674
|
+
"name": "llama3:8b",
|
675
|
+
"description": "Meta's Llama 3 8B parameter model",
|
676
|
+
"model_family": "Llama",
|
677
|
+
"size": 4500000000,
|
678
|
+
"parameter_size": "8B"
|
679
|
+
},
|
680
|
+
{
|
681
|
+
"name": "llama3:70b",
|
682
|
+
"description": "Meta's Llama 3 70B parameter model",
|
683
|
+
"model_family": "Llama",
|
684
|
+
"size": 40000000000,
|
685
|
+
"parameter_size": "70B"
|
686
|
+
},
|
687
|
+
# Llama 3.1 models
|
688
|
+
{
|
689
|
+
"name": "llama3.1:8b",
|
690
|
+
"description": "Meta's Llama 3.1 8B parameter model",
|
691
|
+
"model_family": "Llama",
|
692
|
+
"size": 4500000000
|
693
|
+
},
|
694
|
+
{
|
695
|
+
"name": "llama3.1:70b",
|
696
|
+
"description": "Meta's Llama 3.1 70B parameter model",
|
697
|
+
"model_family": "Llama",
|
698
|
+
"size": 40000000000
|
699
|
+
},
|
700
|
+
{
|
701
|
+
"name": "llama3.1:405b",
|
702
|
+
"description": "Meta's Llama 3.1 405B parameter model",
|
703
|
+
"model_family": "Llama",
|
704
|
+
"size": 200000000000
|
705
|
+
},
|
706
|
+
# Gemma models
|
707
|
+
{
|
708
|
+
"name": "gemma:2b",
|
709
|
+
"description": "Google's Gemma 2B parameter model",
|
710
|
+
"model_family": "Gemma",
|
711
|
+
"size": 1500000000
|
712
|
+
},
|
713
|
+
{
|
714
|
+
"name": "gemma:7b",
|
715
|
+
"description": "Google's Gemma 7B parameter model",
|
716
|
+
"model_family": "Gemma",
|
717
|
+
"size": 4000000000
|
718
|
+
},
|
719
|
+
{
|
720
|
+
"name": "gemma2:9b",
|
721
|
+
"description": "Google's Gemma 2 9B parameter model",
|
722
|
+
"model_family": "Gemma",
|
723
|
+
"size": 5000000000
|
724
|
+
},
|
725
|
+
{
|
726
|
+
"name": "gemma2:27b",
|
727
|
+
"description": "Google's Gemma 2 27B parameter model",
|
728
|
+
"model_family": "Gemma",
|
729
|
+
"size": 15000000000
|
730
|
+
},
|
731
|
+
# Mistral models
|
732
|
+
{
|
733
|
+
"name": "mistral",
|
734
|
+
"description": "Mistral 7B model - balanced performance",
|
735
|
+
"model_family": "Mistral",
|
736
|
+
"size": 4200000000
|
737
|
+
},
|
738
|
+
{
|
739
|
+
"name": "mistral:7b",
|
740
|
+
"description": "Mistral 7B model - balanced performance",
|
741
|
+
"model_family": "Mistral",
|
742
|
+
"size": 4200000000
|
743
|
+
},
|
744
|
+
{
|
745
|
+
"name": "mistral:8x7b",
|
746
|
+
"description": "Mistral 8x7B mixture of experts model",
|
747
|
+
"model_family": "Mistral",
|
748
|
+
"size": 15000000000
|
749
|
+
},
|
750
|
+
# Phi models
|
751
|
+
{
|
752
|
+
"name": "phi3:mini",
|
753
|
+
"description": "Microsoft's Phi-3 Mini model",
|
754
|
+
"model_family": "Phi",
|
755
|
+
"size": 3500000000
|
756
|
+
},
|
757
|
+
{
|
758
|
+
"name": "phi3:small",
|
759
|
+
"description": "Microsoft's Phi-3 Small model",
|
760
|
+
"model_family": "Phi",
|
761
|
+
"size": 7000000000
|
762
|
+
},
|
763
|
+
{
|
764
|
+
"name": "phi3:medium",
|
765
|
+
"description": "Microsoft's Phi-3 Medium model",
|
766
|
+
"model_family": "Phi",
|
767
|
+
"size": 14000000000
|
768
|
+
},
|
769
|
+
{
|
770
|
+
"name": "phi2",
|
771
|
+
"description": "Microsoft's Phi-2 model, small but capable",
|
772
|
+
"model_family": "Phi",
|
773
|
+
"size": 2800000000
|
774
|
+
},
|
775
|
+
# Orca models
|
776
|
+
{
|
777
|
+
"name": "orca-mini",
|
778
|
+
"description": "Small, fast model optimized for chat",
|
779
|
+
"model_family": "Orca",
|
780
|
+
"size": 2000000000
|
781
|
+
},
|
782
|
+
{
|
783
|
+
"name": "orca-mini:3b",
|
784
|
+
"description": "Small 3B parameter model optimized for chat",
|
785
|
+
"model_family": "Orca",
|
786
|
+
"size": 2000000000
|
787
|
+
},
|
788
|
+
{
|
789
|
+
"name": "orca-mini:7b",
|
790
|
+
"description": "Medium 7B parameter model optimized for chat",
|
791
|
+
"model_family": "Orca",
|
792
|
+
"size": 4000000000
|
793
|
+
},
|
794
|
+
# Llava models (multimodal)
|
795
|
+
{
|
796
|
+
"name": "llava",
|
797
|
+
"description": "Multimodal model with vision capabilities",
|
798
|
+
"model_family": "LLaVA",
|
799
|
+
"size": 4700000000
|
800
|
+
},
|
801
|
+
{
|
802
|
+
"name": "llava:13b",
|
803
|
+
"description": "Multimodal model with vision capabilities (13B)",
|
804
|
+
"model_family": "LLaVA",
|
805
|
+
"size": 8000000000
|
806
|
+
},
|
807
|
+
{
|
808
|
+
"name": "llava:34b",
|
809
|
+
"description": "Multimodal model with vision capabilities (34B)",
|
810
|
+
"model_family": "LLaVA",
|
811
|
+
"size": 20000000000
|
812
|
+
},
|
813
|
+
# CodeLlama models
|
814
|
+
{
|
815
|
+
"name": "codellama",
|
816
|
+
"description": "Llama model fine-tuned for code generation",
|
817
|
+
"model_family": "CodeLlama",
|
818
|
+
"size": 4200000000
|
819
|
+
},
|
820
|
+
{
|
821
|
+
"name": "codellama:7b",
|
822
|
+
"description": "7B parameter Llama model for code generation",
|
823
|
+
"model_family": "CodeLlama",
|
824
|
+
"size": 4200000000
|
825
|
+
},
|
826
|
+
{
|
827
|
+
"name": "codellama:13b",
|
828
|
+
"description": "13B parameter Llama model for code generation",
|
829
|
+
"model_family": "CodeLlama",
|
830
|
+
"size": 8000000000
|
831
|
+
},
|
832
|
+
{
|
833
|
+
"name": "codellama:34b",
|
834
|
+
"description": "34B parameter Llama model for code generation",
|
835
|
+
"model_family": "CodeLlama",
|
836
|
+
"size": 20000000000
|
837
|
+
},
|
838
|
+
# Other models
|
839
|
+
{
|
840
|
+
"name": "neural-chat",
|
841
|
+
"description": "Intel's Neural Chat model",
|
842
|
+
"model_family": "Neural Chat",
|
843
|
+
"size": 4200000000
|
844
|
+
},
|
845
|
+
{
|
846
|
+
"name": "wizard-math",
|
847
|
+
"description": "Specialized for math problem solving",
|
848
|
+
"model_family": "Wizard",
|
849
|
+
"size": 4200000000
|
850
|
+
},
|
851
|
+
{
|
852
|
+
"name": "yi",
|
853
|
+
"description": "01AI's Yi model, high performance",
|
854
|
+
"model_family": "Yi",
|
855
|
+
"size": 4500000000
|
856
|
+
},
|
857
|
+
{
|
858
|
+
"name": "yi:6b",
|
859
|
+
"description": "01AI's Yi 6B parameter model",
|
860
|
+
"model_family": "Yi",
|
861
|
+
"size": 3500000000
|
862
|
+
},
|
863
|
+
{
|
864
|
+
"name": "yi:9b",
|
865
|
+
"description": "01AI's Yi 9B parameter model",
|
866
|
+
"model_family": "Yi",
|
867
|
+
"size": 5000000000
|
868
|
+
},
|
869
|
+
{
|
870
|
+
"name": "yi:34b",
|
871
|
+
"description": "01AI's Yi 34B parameter model, excellent performance",
|
872
|
+
"model_family": "Yi",
|
873
|
+
"size": 20000000000
|
874
|
+
},
|
875
|
+
{
|
876
|
+
"name": "stable-code",
|
877
|
+
"description": "Stability AI's code generation model",
|
878
|
+
"model_family": "StableCode",
|
879
|
+
"size": 4200000000
|
880
|
+
},
|
881
|
+
{
|
882
|
+
"name": "llama2",
|
883
|
+
"description": "Meta's Llama 2 model",
|
884
|
+
"model_family": "Llama",
|
885
|
+
"size": 4200000000
|
886
|
+
},
|
887
|
+
{
|
888
|
+
"name": "llama2:7b",
|
889
|
+
"description": "Meta's Llama 2 7B parameter model",
|
890
|
+
"model_family": "Llama",
|
891
|
+
"size": 4200000000
|
892
|
+
},
|
893
|
+
{
|
894
|
+
"name": "llama2:13b",
|
895
|
+
"description": "Meta's Llama 2 13B parameter model",
|
896
|
+
"model_family": "Llama",
|
897
|
+
"size": 8000000000
|
898
|
+
},
|
899
|
+
{
|
900
|
+
"name": "llama2:70b",
|
901
|
+
"description": "Meta's Llama 2 70B parameter model",
|
902
|
+
"model_family": "Llama",
|
903
|
+
"size": 40000000000
|
904
|
+
},
|
905
|
+
{
|
906
|
+
"name": "deepseek-coder",
|
907
|
+
"description": "DeepSeek's code generation model",
|
908
|
+
"model_family": "DeepSeek",
|
909
|
+
"size": 4200000000
|
910
|
+
},
|
911
|
+
{
|
912
|
+
"name": "falcon:40b",
|
913
|
+
"description": "TII's Falcon 40B, very capable model",
|
914
|
+
"model_family": "Falcon",
|
915
|
+
"size": 25000000000
|
916
|
+
},
|
917
|
+
{
|
918
|
+
"name": "qwen:14b",
|
919
|
+
"description": "Alibaba's Qwen 14B model",
|
920
|
+
"model_family": "Qwen",
|
921
|
+
"size": 9000000000
|
922
|
+
}
|
923
|
+
]
|
924
|
+
|
925
|
+
# Filter by query if provided
|
926
|
+
query = query.lower() if query else ""
|
927
|
+
if query:
|
928
|
+
filtered_models = []
|
929
|
+
for model in models:
|
930
|
+
if (query in model["name"].lower() or
|
931
|
+
query in model["description"].lower() or
|
932
|
+
query in model["model_family"].lower()):
|
933
|
+
filtered_models.append(model)
|
934
|
+
return filtered_models
|
935
|
+
|
936
|
+
return models
|
937
|
+
|
938
|
+
async def pull_model(self, model_id: str) -> AsyncGenerator[Dict[str, Any], None]:
|
939
|
+
"""Pull a model from Ollama registry with progress updates"""
|
940
|
+
logger.info(f"Pulling model: {model_id}")
|
941
|
+
try:
|
942
|
+
async with aiohttp.ClientSession() as session:
|
943
|
+
async with session.post(
|
944
|
+
f"{self.base_url}/api/pull",
|
945
|
+
json={"name": model_id},
|
946
|
+
timeout=3600 # 1 hour timeout for large models
|
947
|
+
) as response:
|
948
|
+
response.raise_for_status()
|
949
|
+
async for line in response.content:
|
950
|
+
if line:
|
951
|
+
chunk = line.decode().strip()
|
952
|
+
try:
|
953
|
+
data = json.loads(chunk)
|
954
|
+
yield data
|
955
|
+
except json.JSONDecodeError:
|
956
|
+
continue
|
957
|
+
except Exception as e:
|
958
|
+
logger.error(f"Error pulling model: {str(e)}")
|
959
|
+
raise Exception(f"Failed to pull model: {str(e)}")
|
960
|
+
|
961
|
+
async def delete_model(self, model_id: str) -> None:
|
962
|
+
"""Delete a model from Ollama"""
|
963
|
+
logger.info(f"Deleting model: {model_id}")
|
964
|
+
try:
|
965
|
+
async with aiohttp.ClientSession() as session:
|
966
|
+
async with session.delete(
|
967
|
+
f"{self.base_url}/api/delete",
|
968
|
+
json={"name": model_id},
|
969
|
+
timeout=30
|
970
|
+
) as response:
|
971
|
+
response.raise_for_status()
|
972
|
+
logger.info(f"Model {model_id} deleted successfully")
|
973
|
+
except Exception as e:
|
974
|
+
logger.error(f"Error deleting model: {str(e)}")
|
975
|
+
raise Exception(f"Failed to delete model: {str(e)}")
|