chat-console 0.2.0__py3-none-any.whl → 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
app/api/ollama.py CHANGED
@@ -2,6 +2,10 @@ import aiohttp
2
2
  import asyncio
3
3
  import json
4
4
  import logging
5
+ import os
6
+ import time
7
+ from datetime import datetime, timedelta
8
+ from pathlib import Path
5
9
  from typing import List, Dict, Any, Optional, Generator, AsyncGenerator
6
10
  from .base import BaseModelClient
7
11
 
@@ -15,6 +19,12 @@ class OllamaClient(BaseModelClient):
15
19
  self.base_url = OLLAMA_BASE_URL.rstrip('/')
16
20
  logger.info(f"Initializing Ollama client with base URL: {self.base_url}")
17
21
 
22
+ # Track active stream session
23
+ self._active_stream_session = None
24
+
25
+ # Path to the cached models file
26
+ self.models_cache_path = Path(__file__).parent.parent / "data" / "ollama-models.json"
27
+
18
28
  # Try to start Ollama if not running
19
29
  if not ensure_ollama_running():
20
30
  raise Exception(f"Failed to start Ollama server. Please ensure Ollama is installed and try again.")
@@ -158,6 +168,7 @@ class OllamaClient(BaseModelClient):
158
168
  prompt = self._prepare_messages(messages, style)
159
169
  retries = 2
160
170
  last_error = None
171
+ self._active_stream_session = None # Track the active session
161
172
 
162
173
  while retries >= 0:
163
174
  try:
@@ -192,7 +203,10 @@ class OllamaClient(BaseModelClient):
192
203
  logger.info("Model pulled successfully")
193
204
 
194
205
  # Now proceed with actual generation
195
- async with aiohttp.ClientSession() as session:
206
+ session = aiohttp.ClientSession()
207
+ self._active_stream_session = session # Store reference to active session
208
+
209
+ try:
196
210
  logger.debug(f"Sending streaming request to {self.base_url}/api/generate")
197
211
  async with session.post(
198
212
  f"{self.base_url}/api/generate",
@@ -216,6 +230,9 @@ class OllamaClient(BaseModelClient):
216
230
  continue
217
231
  logger.info("Streaming completed successfully")
218
232
  return
233
+ finally:
234
+ self._active_stream_session = None # Clear reference when done
235
+ await session.close() # Ensure session is closed
219
236
 
220
237
  except aiohttp.ClientConnectorError:
221
238
  last_error = "Could not connect to Ollama server. Make sure Ollama is running and accessible at " + self.base_url
@@ -223,6 +240,9 @@ class OllamaClient(BaseModelClient):
223
240
  last_error = f"Ollama API error: {e.status} - {e.message}"
224
241
  except aiohttp.ClientTimeout:
225
242
  last_error = "Request to Ollama server timed out"
243
+ except asyncio.CancelledError:
244
+ logger.info("Streaming cancelled by client")
245
+ raise # Propagate cancellation
226
246
  except Exception as e:
227
247
  last_error = f"Error streaming completion: {str(e)}"
228
248
 
@@ -233,3 +253,723 @@ class OllamaClient(BaseModelClient):
233
253
  await asyncio.sleep(1)
234
254
 
235
255
  raise Exception(last_error)
256
+
257
+ async def cancel_stream(self) -> None:
258
+ """Cancel any active streaming request"""
259
+ if self._active_stream_session:
260
+ logger.info("Cancelling active stream session")
261
+ await self._active_stream_session.close()
262
+ self._active_stream_session = None
263
+
264
+ async def get_model_details(self, model_id: str) -> Dict[str, Any]:
265
+ """Get detailed information about a specific Ollama model"""
266
+ logger.info(f"Getting details for model: {model_id}")
267
+ try:
268
+ async with aiohttp.ClientSession() as session:
269
+ async with session.post(
270
+ f"{self.base_url}/api/show",
271
+ json={"name": model_id},
272
+ timeout=5
273
+ ) as response:
274
+ response.raise_for_status()
275
+ data = await response.json()
276
+ logger.debug(f"Ollama model details response: {data}")
277
+ return data
278
+ except Exception as e:
279
+ logger.error(f"Error getting model details: {str(e)}")
280
+ # Return a dict with error info instead of raising an exception
281
+ return {
282
+ "error": str(e),
283
+ "modelfile": None,
284
+ "parameters": None,
285
+ "size": 0,
286
+ "created_at": None,
287
+ "modified_at": None
288
+ }
289
+
290
+ async def _fetch_and_cache_models(self) -> List[Dict[str, Any]]:
291
+ """Fetch models from Ollama website and cache them for 24 hours"""
292
+ logger.info("Performing a full fetch of Ollama models to update cache")
293
+
294
+ try:
295
+ # First load models from base file
296
+ base_models = []
297
+ try:
298
+ # Read the base models file
299
+ base_file_path = Path(__file__).parent.parent / "data" / "ollama-models-base.json"
300
+ if base_file_path.exists():
301
+ with open(base_file_path, 'r') as f:
302
+ base_data = json.load(f)
303
+ if "models" in base_data:
304
+ base_models = base_data["models"]
305
+ logger.info(f"Loaded {len(base_models)} models from base file")
306
+
307
+ # Process models from the base file to ensure consistent format
308
+ for model in base_models:
309
+ # Convert any missing fields to expected format
310
+ if "parameter_size" not in model and "variants" in model and model["variants"]:
311
+ # Use the first variant as the default parameter size if not specified
312
+ for variant in model["variants"]:
313
+ if any(char.isdigit() for char in variant):
314
+ # This looks like a size variant (e.g., "7b", "70b")
315
+ if variant.lower().endswith('b'):
316
+ model["parameter_size"] = variant.upper()
317
+ else:
318
+ model["parameter_size"] = f"{variant}B"
319
+ break
320
+
321
+ except Exception as e:
322
+ logger.warning(f"Error loading base models file: {str(e)}")
323
+
324
+ # Web scraping for more models
325
+ scraped_models = []
326
+ try:
327
+ async with aiohttp.ClientSession() as session:
328
+ # Get model data from the Ollama website search page (without query to get all models)
329
+ search_url = "https://ollama.com/search"
330
+
331
+ logger.info(f"Fetching all models from Ollama web: {search_url}")
332
+ async with session.get(
333
+ search_url,
334
+ timeout=20, # Longer timeout for comprehensive scrape
335
+ headers={"User-Agent": "Mozilla/5.0 (compatible; chat-console/1.0)"}
336
+ ) as response:
337
+ if response.status == 200:
338
+ html = await response.text()
339
+
340
+ # Extract model data from JSON embedded in the page
341
+ try:
342
+ import re
343
+
344
+ # Look for model data in JSON format
345
+ model_match = re.search(r'window\.__NEXT_DATA__\s*=\s*({.+?});', html, re.DOTALL)
346
+ if model_match:
347
+ json_data = json.loads(model_match.group(1))
348
+
349
+ # Navigate to where models are stored in the JSON
350
+ if (json_data and 'props' in json_data and
351
+ 'pageProps' in json_data['props'] and
352
+ 'models' in json_data['props']['pageProps']):
353
+
354
+ web_models = json_data['props']['pageProps']['models']
355
+ logger.info(f"Found {len(web_models)} models on Ollama website")
356
+
357
+ # Process models
358
+ for model in web_models:
359
+ try:
360
+ # Skip models without necessary data
361
+ if not model.get('name'):
362
+ continue
363
+
364
+ # Create structured model data
365
+ processed_model = {
366
+ "name": model.get('name', ''),
367
+ "description": model.get('description', f"{model.get('name')} model"),
368
+ "model_family": model.get('modelFamily', 'Unknown'),
369
+ }
370
+
371
+ # Add variants if available
372
+ if model.get('variants'):
373
+ processed_model["variants"] = model.get('variants', [])
374
+
375
+ # Extract parameter size from model details
376
+ if model.get('parameterSize'):
377
+ processed_model["parameter_size"] = f"{model.get('parameterSize')}B"
378
+ else:
379
+ # Try to extract from name
380
+ name = model.get('name', '').lower()
381
+ param_size = None
382
+
383
+ # Check for specific patterns
384
+ if "70b" in name:
385
+ param_size = "70B"
386
+ elif "405b" in name or "400b" in name:
387
+ param_size = "405B"
388
+ elif "34b" in name or "35b" in name:
389
+ param_size = "34B"
390
+ elif "27b" in name or "28b" in name:
391
+ param_size = "27B"
392
+ elif "13b" in name or "14b" in name:
393
+ param_size = "13B"
394
+ elif "8b" in name:
395
+ param_size = "8B"
396
+ elif "7b" in name:
397
+ param_size = "7B"
398
+ elif "6b" in name:
399
+ param_size = "6B"
400
+ elif "3b" in name:
401
+ param_size = "3B"
402
+ elif "2b" in name:
403
+ param_size = "2B"
404
+ elif "1b" in name:
405
+ param_size = "1B"
406
+ elif "mini" in name:
407
+ param_size = "3B"
408
+ elif "small" in name:
409
+ param_size = "7B"
410
+ elif "medium" in name:
411
+ param_size = "13B"
412
+ elif "large" in name:
413
+ param_size = "34B"
414
+
415
+ # Special handling for models with ":latest" or no size indicator
416
+ if not param_size and ("latest" in name or not any(size in name for size in ["1b", "2b", "3b", "6b", "7b", "8b", "13b", "14b", "27b", "28b", "34b", "35b", "70b", "405b", "400b", "mini", "small", "medium", "large"])):
417
+ # Strip the ":latest" part to get base model
418
+ base_name = name.split(":")[0]
419
+
420
+ # Check if we have default parameter sizes for known models
421
+ model_defaults = {
422
+ "llama3": "8B",
423
+ "llama2": "7B",
424
+ "mistral": "7B",
425
+ "gemma": "7B",
426
+ "gemma2": "9B",
427
+ "phi": "3B",
428
+ "phi2": "3B",
429
+ "phi3": "3B",
430
+ "phi4": "7B",
431
+ "orca-mini": "7B",
432
+ "llava": "7B",
433
+ "codellama": "7B",
434
+ "neural-chat": "7B",
435
+ "wizard-math": "7B",
436
+ "yi": "6B",
437
+ "deepseek": "7B",
438
+ "deepseek-coder": "7B",
439
+ "qwen": "7B",
440
+ "falcon": "7B",
441
+ "stable-code": "3B"
442
+ }
443
+
444
+ # Try to find a match in default sizes
445
+ for model_name, default_size in model_defaults.items():
446
+ if model_name in base_name:
447
+ param_size = default_size
448
+ break
449
+
450
+ # If we still don't have a param size, check model metadata
451
+ if not param_size and model.get('defaultParameterSize'):
452
+ param_size = f"{model.get('defaultParameterSize')}B"
453
+
454
+ # Check model variants for clues
455
+ if not param_size and model.get('variants'):
456
+ # The default variant is often the first one
457
+ try:
458
+ variants = model.get('variants', [])
459
+ if variants and len(variants) > 0:
460
+ # Try to get parameter size from the first variant
461
+ first_variant = variants[0]
462
+ if first_variant and 'parameterSize' in first_variant:
463
+ param_size = f"{first_variant['parameterSize']}B"
464
+ # Just use the first variant if it looks like a size
465
+ elif isinstance(first_variant, str) and any(char.isdigit() for char in first_variant):
466
+ if first_variant.lower().endswith('b'):
467
+ param_size = first_variant.upper()
468
+ else:
469
+ param_size = f"{first_variant}B"
470
+ except Exception as e:
471
+ logger.warning(f"Error getting parameter size from variants: {str(e)}")
472
+
473
+ processed_model["parameter_size"] = param_size or "Unknown"
474
+
475
+ # Set disk size based on parameter size
476
+ param_value = processed_model.get("parameter_size", "").lower()
477
+ if "70b" in param_value:
478
+ processed_model["size"] = 40000000000 # ~40GB
479
+ elif "405b" in param_value or "400b" in param_value:
480
+ processed_model["size"] = 200000000000 # ~200GB
481
+ elif "34b" in param_value or "35b" in param_value:
482
+ processed_model["size"] = 20000000000 # ~20GB
483
+ elif "27b" in param_value or "28b" in param_value:
484
+ processed_model["size"] = 15000000000 # ~15GB
485
+ elif "13b" in param_value or "14b" in param_value:
486
+ processed_model["size"] = 8000000000 # ~8GB
487
+ elif "8b" in param_value:
488
+ processed_model["size"] = 4800000000 # ~4.8GB
489
+ elif "7b" in param_value:
490
+ processed_model["size"] = 4500000000 # ~4.5GB
491
+ elif "6b" in param_value:
492
+ processed_model["size"] = 3500000000 # ~3.5GB
493
+ elif "3b" in param_value:
494
+ processed_model["size"] = 2000000000 # ~2GB
495
+ elif "2b" in param_value:
496
+ processed_model["size"] = 1500000000 # ~1.5GB
497
+ elif "1b" in param_value:
498
+ processed_model["size"] = 800000000 # ~800MB
499
+ else:
500
+ processed_model["size"] = 4500000000 # Default to ~4.5GB
501
+
502
+ scraped_models.append(processed_model)
503
+ except Exception as e:
504
+ logger.warning(f"Error processing web model {model.get('name', 'unknown')}: {str(e)}")
505
+ except Exception as e:
506
+ logger.warning(f"Error extracting model data from Ollama website: {str(e)}")
507
+ except Exception as web_e:
508
+ logger.warning(f"Error fetching from Ollama website: {str(web_e)}")
509
+
510
+ # Add curated models from the registry
511
+ curated_models = await self.get_registry_models("")
512
+
513
+ # Combine all models - prefer base models, then scraped models, then curated
514
+ all_models = []
515
+ existing_names = set()
516
+
517
+ # First add all base models (highest priority)
518
+ for model in base_models:
519
+ if model.get("name"):
520
+ all_models.append(model)
521
+ existing_names.add(model["name"])
522
+
523
+ # Then add scraped models if not already added
524
+ for model in scraped_models:
525
+ if model.get("name") and model["name"] not in existing_names:
526
+ all_models.append(model)
527
+ existing_names.add(model["name"])
528
+
529
+ # Finally add curated models if not already added
530
+ for model in curated_models:
531
+ if model.get("name") and model["name"] not in existing_names:
532
+ all_models.append(model)
533
+ existing_names.add(model["name"])
534
+
535
+ # Cache the combined models
536
+ cache_data = {
537
+ "last_updated": datetime.now().isoformat(),
538
+ "models": all_models
539
+ }
540
+
541
+ try:
542
+ with open(self.models_cache_path, 'w') as f:
543
+ json.dump(cache_data, f, indent=2)
544
+ logger.info(f"Cached {len(all_models)} models to {self.models_cache_path}")
545
+ except Exception as cache_error:
546
+ logger.error(f"Error caching models: {str(cache_error)}")
547
+
548
+ return all_models
549
+
550
+ except Exception as e:
551
+ logger.error(f"Error during model fetch and cache: {str(e)}")
552
+ # Return an empty list in case of catastrophic failure
553
+ return []
554
+
555
+ async def list_available_models_from_registry(self, query: str = "") -> List[Dict[str, Any]]:
556
+ """List available models from Ollama registry with cache support"""
557
+ logger.info(f"Fetching available models from Ollama registry, query: '{query}'")
558
+
559
+ # Check if we need to update the cache
560
+ need_cache_update = True
561
+ models_from_cache = []
562
+
563
+ try:
564
+ # Try to read from cache first
565
+ if self.models_cache_path.exists():
566
+ try:
567
+ with open(self.models_cache_path, 'r') as f:
568
+ cache_data = json.load(f)
569
+
570
+ # Check if cache is still valid (less than 24 hours old)
571
+ if cache_data.get("last_updated"):
572
+ last_updated = datetime.fromisoformat(cache_data["last_updated"])
573
+ # Cache valid if less than 24 hours old
574
+ if datetime.now() - last_updated < timedelta(hours=24):
575
+ need_cache_update = False
576
+ models_from_cache = cache_data.get("models", [])
577
+ logger.info(f"Using cached models from {last_updated.isoformat()} ({len(models_from_cache)} models)")
578
+ else:
579
+ logger.info(f"Cache from {last_updated.isoformat()} is older than 24 hours, refreshing")
580
+ except Exception as e:
581
+ logger.warning(f"Error reading cache: {str(e)}, will refresh")
582
+ else:
583
+ logger.info("No cache found, creating a new one")
584
+ except Exception as e:
585
+ logger.warning(f"Error checking cache: {str(e)}")
586
+
587
+ # Always read the base file first
588
+ base_models = []
589
+ try:
590
+ # Read the base models file
591
+ base_file_path = Path(__file__).parent.parent / "data" / "ollama-models-base.json"
592
+ if base_file_path.exists():
593
+ with open(base_file_path, 'r') as f:
594
+ base_data = json.load(f)
595
+ if "models" in base_data:
596
+ base_models = base_data["models"]
597
+ logger.info(f"Loaded {len(base_models)} models from base file")
598
+
599
+ # Process base models to ensure they have proper format
600
+ for model in base_models:
601
+ # Make sure they have model_family
602
+ if "model_family" not in model and "name" in model:
603
+ name = model["name"].lower()
604
+ if "llama" in name:
605
+ model["model_family"] = "Llama"
606
+ elif "mistral" in name:
607
+ model["model_family"] = "Mistral"
608
+ elif "phi" in name:
609
+ model["model_family"] = "Phi"
610
+ elif "gemma" in name:
611
+ model["model_family"] = "Gemma"
612
+ elif "qwen" in name:
613
+ model["model_family"] = "Qwen"
614
+ else:
615
+ # Try to extract family from name (before any colon)
616
+ base_name = name.split(":")[0]
617
+ model["model_family"] = base_name.capitalize()
618
+
619
+ # If no cache yet but base file exists, use base models and trigger update
620
+ if not models_from_cache and base_models:
621
+ models_from_cache = base_models
622
+ logger.info(f"Using {len(base_models)} models from base file while cache updates")
623
+
624
+ # Start cache update in background
625
+ asyncio.create_task(self._fetch_and_cache_models())
626
+ need_cache_update = False
627
+ except Exception as e:
628
+ logger.warning(f"Error loading base models file: {str(e)}")
629
+
630
+ # If we need to update the cache, do it now
631
+ if need_cache_update:
632
+ # Run the cache update in the background if we have cached data
633
+ if models_from_cache:
634
+ # We can use cached data for now but update in background
635
+ asyncio.create_task(self._fetch_and_cache_models())
636
+ else:
637
+ # We need to wait for the cache update
638
+ models_from_cache = await self._fetch_and_cache_models()
639
+
640
+ # Always make sure base models are included
641
+ if base_models:
642
+ # Create a set of existing model names
643
+ existing_names = set(model.get("name", "") for model in models_from_cache)
644
+
645
+ # Add base models if not already in cache
646
+ for model in base_models:
647
+ if model.get("name") and model["name"] not in existing_names:
648
+ models_from_cache.append(model)
649
+ existing_names.add(model["name"])
650
+
651
+ logger.info(f"Combined total: {len(models_from_cache)} models")
652
+
653
+ # Log the number of models available
654
+ logger.info(f"Total available models: {len(models_from_cache)}")
655
+
656
+ # No filtering here - the UI will handle filtering
657
+ return models_from_cache
658
+
659
+ async def get_registry_models(self, query: str = "") -> List[Dict[str, Any]]:
660
+ """Get a curated list of popular Ollama models"""
661
+ logger.info("Returning a curated list of popular Ollama models (query: {})".format(query or "none"))
662
+
663
+ # Provide a curated list of popular models as fallback
664
+ models = [
665
+ # Llama 3 models
666
+ {
667
+ "name": "llama3",
668
+ "description": "Meta's Llama 3 8B model",
669
+ "model_family": "Llama",
670
+ "size": 4500000000,
671
+ "parameter_size": "8B"
672
+ },
673
+ {
674
+ "name": "llama3:8b",
675
+ "description": "Meta's Llama 3 8B parameter model",
676
+ "model_family": "Llama",
677
+ "size": 4500000000,
678
+ "parameter_size": "8B"
679
+ },
680
+ {
681
+ "name": "llama3:70b",
682
+ "description": "Meta's Llama 3 70B parameter model",
683
+ "model_family": "Llama",
684
+ "size": 40000000000,
685
+ "parameter_size": "70B"
686
+ },
687
+ # Llama 3.1 models
688
+ {
689
+ "name": "llama3.1:8b",
690
+ "description": "Meta's Llama 3.1 8B parameter model",
691
+ "model_family": "Llama",
692
+ "size": 4500000000
693
+ },
694
+ {
695
+ "name": "llama3.1:70b",
696
+ "description": "Meta's Llama 3.1 70B parameter model",
697
+ "model_family": "Llama",
698
+ "size": 40000000000
699
+ },
700
+ {
701
+ "name": "llama3.1:405b",
702
+ "description": "Meta's Llama 3.1 405B parameter model",
703
+ "model_family": "Llama",
704
+ "size": 200000000000
705
+ },
706
+ # Gemma models
707
+ {
708
+ "name": "gemma:2b",
709
+ "description": "Google's Gemma 2B parameter model",
710
+ "model_family": "Gemma",
711
+ "size": 1500000000
712
+ },
713
+ {
714
+ "name": "gemma:7b",
715
+ "description": "Google's Gemma 7B parameter model",
716
+ "model_family": "Gemma",
717
+ "size": 4000000000
718
+ },
719
+ {
720
+ "name": "gemma2:9b",
721
+ "description": "Google's Gemma 2 9B parameter model",
722
+ "model_family": "Gemma",
723
+ "size": 5000000000
724
+ },
725
+ {
726
+ "name": "gemma2:27b",
727
+ "description": "Google's Gemma 2 27B parameter model",
728
+ "model_family": "Gemma",
729
+ "size": 15000000000
730
+ },
731
+ # Mistral models
732
+ {
733
+ "name": "mistral",
734
+ "description": "Mistral 7B model - balanced performance",
735
+ "model_family": "Mistral",
736
+ "size": 4200000000
737
+ },
738
+ {
739
+ "name": "mistral:7b",
740
+ "description": "Mistral 7B model - balanced performance",
741
+ "model_family": "Mistral",
742
+ "size": 4200000000
743
+ },
744
+ {
745
+ "name": "mistral:8x7b",
746
+ "description": "Mistral 8x7B mixture of experts model",
747
+ "model_family": "Mistral",
748
+ "size": 15000000000
749
+ },
750
+ # Phi models
751
+ {
752
+ "name": "phi3:mini",
753
+ "description": "Microsoft's Phi-3 Mini model",
754
+ "model_family": "Phi",
755
+ "size": 3500000000
756
+ },
757
+ {
758
+ "name": "phi3:small",
759
+ "description": "Microsoft's Phi-3 Small model",
760
+ "model_family": "Phi",
761
+ "size": 7000000000
762
+ },
763
+ {
764
+ "name": "phi3:medium",
765
+ "description": "Microsoft's Phi-3 Medium model",
766
+ "model_family": "Phi",
767
+ "size": 14000000000
768
+ },
769
+ {
770
+ "name": "phi2",
771
+ "description": "Microsoft's Phi-2 model, small but capable",
772
+ "model_family": "Phi",
773
+ "size": 2800000000
774
+ },
775
+ # Orca models
776
+ {
777
+ "name": "orca-mini",
778
+ "description": "Small, fast model optimized for chat",
779
+ "model_family": "Orca",
780
+ "size": 2000000000
781
+ },
782
+ {
783
+ "name": "orca-mini:3b",
784
+ "description": "Small 3B parameter model optimized for chat",
785
+ "model_family": "Orca",
786
+ "size": 2000000000
787
+ },
788
+ {
789
+ "name": "orca-mini:7b",
790
+ "description": "Medium 7B parameter model optimized for chat",
791
+ "model_family": "Orca",
792
+ "size": 4000000000
793
+ },
794
+ # Llava models (multimodal)
795
+ {
796
+ "name": "llava",
797
+ "description": "Multimodal model with vision capabilities",
798
+ "model_family": "LLaVA",
799
+ "size": 4700000000
800
+ },
801
+ {
802
+ "name": "llava:13b",
803
+ "description": "Multimodal model with vision capabilities (13B)",
804
+ "model_family": "LLaVA",
805
+ "size": 8000000000
806
+ },
807
+ {
808
+ "name": "llava:34b",
809
+ "description": "Multimodal model with vision capabilities (34B)",
810
+ "model_family": "LLaVA",
811
+ "size": 20000000000
812
+ },
813
+ # CodeLlama models
814
+ {
815
+ "name": "codellama",
816
+ "description": "Llama model fine-tuned for code generation",
817
+ "model_family": "CodeLlama",
818
+ "size": 4200000000
819
+ },
820
+ {
821
+ "name": "codellama:7b",
822
+ "description": "7B parameter Llama model for code generation",
823
+ "model_family": "CodeLlama",
824
+ "size": 4200000000
825
+ },
826
+ {
827
+ "name": "codellama:13b",
828
+ "description": "13B parameter Llama model for code generation",
829
+ "model_family": "CodeLlama",
830
+ "size": 8000000000
831
+ },
832
+ {
833
+ "name": "codellama:34b",
834
+ "description": "34B parameter Llama model for code generation",
835
+ "model_family": "CodeLlama",
836
+ "size": 20000000000
837
+ },
838
+ # Other models
839
+ {
840
+ "name": "neural-chat",
841
+ "description": "Intel's Neural Chat model",
842
+ "model_family": "Neural Chat",
843
+ "size": 4200000000
844
+ },
845
+ {
846
+ "name": "wizard-math",
847
+ "description": "Specialized for math problem solving",
848
+ "model_family": "Wizard",
849
+ "size": 4200000000
850
+ },
851
+ {
852
+ "name": "yi",
853
+ "description": "01AI's Yi model, high performance",
854
+ "model_family": "Yi",
855
+ "size": 4500000000
856
+ },
857
+ {
858
+ "name": "yi:6b",
859
+ "description": "01AI's Yi 6B parameter model",
860
+ "model_family": "Yi",
861
+ "size": 3500000000
862
+ },
863
+ {
864
+ "name": "yi:9b",
865
+ "description": "01AI's Yi 9B parameter model",
866
+ "model_family": "Yi",
867
+ "size": 5000000000
868
+ },
869
+ {
870
+ "name": "yi:34b",
871
+ "description": "01AI's Yi 34B parameter model, excellent performance",
872
+ "model_family": "Yi",
873
+ "size": 20000000000
874
+ },
875
+ {
876
+ "name": "stable-code",
877
+ "description": "Stability AI's code generation model",
878
+ "model_family": "StableCode",
879
+ "size": 4200000000
880
+ },
881
+ {
882
+ "name": "llama2",
883
+ "description": "Meta's Llama 2 model",
884
+ "model_family": "Llama",
885
+ "size": 4200000000
886
+ },
887
+ {
888
+ "name": "llama2:7b",
889
+ "description": "Meta's Llama 2 7B parameter model",
890
+ "model_family": "Llama",
891
+ "size": 4200000000
892
+ },
893
+ {
894
+ "name": "llama2:13b",
895
+ "description": "Meta's Llama 2 13B parameter model",
896
+ "model_family": "Llama",
897
+ "size": 8000000000
898
+ },
899
+ {
900
+ "name": "llama2:70b",
901
+ "description": "Meta's Llama 2 70B parameter model",
902
+ "model_family": "Llama",
903
+ "size": 40000000000
904
+ },
905
+ {
906
+ "name": "deepseek-coder",
907
+ "description": "DeepSeek's code generation model",
908
+ "model_family": "DeepSeek",
909
+ "size": 4200000000
910
+ },
911
+ {
912
+ "name": "falcon:40b",
913
+ "description": "TII's Falcon 40B, very capable model",
914
+ "model_family": "Falcon",
915
+ "size": 25000000000
916
+ },
917
+ {
918
+ "name": "qwen:14b",
919
+ "description": "Alibaba's Qwen 14B model",
920
+ "model_family": "Qwen",
921
+ "size": 9000000000
922
+ }
923
+ ]
924
+
925
+ # Filter by query if provided
926
+ query = query.lower() if query else ""
927
+ if query:
928
+ filtered_models = []
929
+ for model in models:
930
+ if (query in model["name"].lower() or
931
+ query in model["description"].lower() or
932
+ query in model["model_family"].lower()):
933
+ filtered_models.append(model)
934
+ return filtered_models
935
+
936
+ return models
937
+
938
+ async def pull_model(self, model_id: str) -> AsyncGenerator[Dict[str, Any], None]:
939
+ """Pull a model from Ollama registry with progress updates"""
940
+ logger.info(f"Pulling model: {model_id}")
941
+ try:
942
+ async with aiohttp.ClientSession() as session:
943
+ async with session.post(
944
+ f"{self.base_url}/api/pull",
945
+ json={"name": model_id},
946
+ timeout=3600 # 1 hour timeout for large models
947
+ ) as response:
948
+ response.raise_for_status()
949
+ async for line in response.content:
950
+ if line:
951
+ chunk = line.decode().strip()
952
+ try:
953
+ data = json.loads(chunk)
954
+ yield data
955
+ except json.JSONDecodeError:
956
+ continue
957
+ except Exception as e:
958
+ logger.error(f"Error pulling model: {str(e)}")
959
+ raise Exception(f"Failed to pull model: {str(e)}")
960
+
961
+ async def delete_model(self, model_id: str) -> None:
962
+ """Delete a model from Ollama"""
963
+ logger.info(f"Deleting model: {model_id}")
964
+ try:
965
+ async with aiohttp.ClientSession() as session:
966
+ async with session.delete(
967
+ f"{self.base_url}/api/delete",
968
+ json={"name": model_id},
969
+ timeout=30
970
+ ) as response:
971
+ response.raise_for_status()
972
+ logger.info(f"Model {model_id} deleted successfully")
973
+ except Exception as e:
974
+ logger.error(f"Error deleting model: {str(e)}")
975
+ raise Exception(f"Failed to delete model: {str(e)}")