chat-console 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
app/__init__.py CHANGED
@@ -3,4 +3,4 @@ Chat CLI
3
3
  A command-line interface for chatting with various LLM providers like ChatGPT and Claude.
4
4
  """
5
5
 
6
- __version__ = "0.4.0"
6
+ __version__ = "0.4.2"
app/api/ollama.py CHANGED
@@ -31,8 +31,96 @@ class OllamaClient(BaseModelClient):
31
31
  # Track model loading state
32
32
  self._model_loading = False
33
33
 
34
+ # Track preloaded models and their last use timestamp
35
+ self._preloaded_models = {}
36
+
37
+ # Default timeout values (in seconds)
38
+ self.DEFAULT_TIMEOUT = 30
39
+ self.MODEL_LOAD_TIMEOUT = 120
40
+ self.MODEL_PULL_TIMEOUT = 3600 # 1 hour for large models
41
+
34
42
  # Path to the cached models file
35
43
  self.models_cache_path = Path(__file__).parent.parent / "data" / "ollama-models.json"
44
+
45
+ def get_timeout_for_model(self, model_id: str, operation: str = "generate") -> int:
46
+ """
47
+ Calculate an appropriate timeout based on model size
48
+
49
+ Parameters:
50
+ - model_id: The model identifier
51
+ - operation: The operation type ('generate', 'load', 'pull')
52
+
53
+ Returns:
54
+ - Timeout in seconds
55
+ """
56
+ # Default timeouts by operation
57
+ default_timeouts = {
58
+ "generate": self.DEFAULT_TIMEOUT, # 30s
59
+ "load": self.MODEL_LOAD_TIMEOUT, # 2min
60
+ "pull": self.MODEL_PULL_TIMEOUT, # 1h
61
+ "list": 5, # 5s
62
+ "test": 2 # 2s
63
+ }
64
+
65
+ # Parameter size multipliers
66
+ size_multipliers = {
67
+ # For models < 3B
68
+ "1b": 0.5,
69
+ "2b": 0.7,
70
+ "3b": 1.0,
71
+ # For models 3B-10B
72
+ "5b": 1.2,
73
+ "6b": 1.3,
74
+ "7b": 1.5,
75
+ "8b": 1.7,
76
+ "9b": 1.8,
77
+ # For models 10B-20B
78
+ "13b": 2.0,
79
+ "14b": 2.0,
80
+ # For models 20B-50B
81
+ "27b": 3.0,
82
+ "34b": 3.5,
83
+ "40b": 4.0,
84
+ # For models 50B+
85
+ "70b": 5.0,
86
+ "80b": 6.0,
87
+ "100b": 7.0,
88
+ "400b": 10.0,
89
+ "405b": 10.0,
90
+ }
91
+
92
+ # Get the base timeout for the operation
93
+ base_timeout = default_timeouts.get(operation, self.DEFAULT_TIMEOUT)
94
+
95
+ # Try to determine the model size from the model ID
96
+ model_size = "7b" # Default assumption is 7B parameters
97
+ model_lower = model_id.lower()
98
+
99
+ # Check for size indicators in the model name
100
+ for size in size_multipliers.keys():
101
+ if size in model_lower:
102
+ model_size = size
103
+ break
104
+
105
+ # If it's a known large model without size in name
106
+ if "llama3.1" in model_lower and not any(size in model_lower for size in size_multipliers.keys()):
107
+ model_size = "8b" # Default for llama3.1 without size specified
108
+
109
+ # For first generation after model selection, if preloaded, use shorter timeout
110
+ if operation == "generate" and model_id in self._preloaded_models:
111
+ # For preloaded models, use a shorter timeout
112
+ return max(int(base_timeout * 0.7), 20) # Min 20 seconds
113
+
114
+ # Calculate final timeout with multiplier
115
+ multiplier = size_multipliers.get(model_size, 1.0)
116
+ timeout = int(base_timeout * multiplier)
117
+
118
+ # For pull operation, ensure we have a reasonable maximum
119
+ if operation == "pull":
120
+ return min(timeout, 7200) # Max 2 hours
121
+
122
+ logger.info(f"Calculated timeout for {model_id} ({operation}): {timeout}s (base: {base_timeout}s, multiplier: {multiplier})")
123
+ return timeout
36
124
 
37
125
  @classmethod
38
126
  async def create(cls) -> 'OllamaClient':
@@ -61,7 +149,29 @@ class OllamaClient(BaseModelClient):
61
149
  style_instructions = self._get_style_instructions(style)
62
150
  debug_log(f"Adding style instructions: {style_instructions[:50]}...")
63
151
  formatted_messages.append(style_instructions)
152
+
153
+ # Special case for title generation - check if this is a title generation message
154
+ is_title_generation = False
155
+ for msg in messages:
156
+ if msg.get("role") == "system" and "generate a brief, descriptive title" in msg.get("content", "").lower():
157
+ is_title_generation = True
158
+ debug_log("Detected title generation prompt")
159
+ break
160
+
161
+ # For title generation, use a direct approach
162
+ if is_title_generation:
163
+ debug_log("Using specialized formatting for title generation")
164
+ # Find the user message containing the input for title generation
165
+ user_msg = next((msg for msg in messages if msg.get("role") == "user"), None)
166
+ if user_msg and "content" in user_msg:
167
+ # Create a direct prompt
168
+ prompt = "Generate a short descriptive title (maximum 40 characters) for this conversation. ONLY RESPOND WITH THE TITLE FOR THE FOLLOWING MESSAGE:\n\n" + user_msg["content"]
169
+ debug_log(f"Created title generation prompt: {prompt[:100]}...")
170
+ return prompt
171
+ else:
172
+ debug_log("Could not find user message for title generation, using standard formatting")
64
173
 
174
+ # Standard processing for normal chat messages
65
175
  # Add message content, preserving conversation flow
66
176
  for i, msg in enumerate(messages):
67
177
  try:
@@ -185,6 +295,7 @@ class OllamaClient(BaseModelClient):
185
295
  try:
186
296
  async with aiohttp.ClientSession() as session:
187
297
  logger.debug(f"Sending request to {self.base_url}/api/generate")
298
+ gen_timeout = self.get_timeout_for_model(model, "generate")
188
299
  async with session.post(
189
300
  f"{self.base_url}/api/generate",
190
301
  json={
@@ -193,12 +304,16 @@ class OllamaClient(BaseModelClient):
193
304
  "temperature": temperature,
194
305
  "stream": False
195
306
  },
196
- timeout=30
307
+ timeout=gen_timeout
197
308
  ) as response:
198
309
  response.raise_for_status()
199
310
  data = await response.json()
200
311
  if "response" not in data:
201
312
  raise Exception("Invalid response format from Ollama server")
313
+
314
+ # Update the model usage timestamp to keep it hot
315
+ self.update_model_usage(model)
316
+
202
317
  return data["response"]
203
318
 
204
319
  except aiohttp.ClientConnectorError:
@@ -324,10 +439,11 @@ class OllamaClient(BaseModelClient):
324
439
  "stream": False
325
440
  }
326
441
 
442
+ test_timeout = self.get_timeout_for_model(model, "test")
327
443
  async with session.post(
328
444
  f"{self.base_url}/api/generate",
329
445
  json=test_payload,
330
- timeout=2
446
+ timeout=test_timeout
331
447
  ) as response:
332
448
  if response.status != 200:
333
449
  logger.warning(f"Model test request failed with status {response.status}")
@@ -361,10 +477,11 @@ class OllamaClient(BaseModelClient):
361
477
  debug_log(f"Error preparing pull payload: {str(pull_err)}, using default")
362
478
  pull_payload = {"name": "gemma:2b"} # Safe default
363
479
 
480
+ pull_timeout = self.get_timeout_for_model(model, "pull")
364
481
  async with session.post(
365
482
  f"{self.base_url}/api/pull",
366
483
  json=pull_payload,
367
- timeout=60
484
+ timeout=pull_timeout
368
485
  ) as pull_response:
369
486
  if pull_response.status != 200:
370
487
  logger.error("Failed to pull model")
@@ -415,10 +532,11 @@ class OllamaClient(BaseModelClient):
415
532
  }
416
533
 
417
534
  debug_log(f"Sending request to Ollama API")
535
+ gen_timeout = self.get_timeout_for_model(model, "generate")
418
536
  response = await session.post(
419
537
  f"{self.base_url}/api/generate",
420
538
  json=request_payload,
421
- timeout=60 # Longer timeout for actual generation
539
+ timeout=gen_timeout
422
540
  )
423
541
  response.raise_for_status()
424
542
  debug_log(f"Response status: {response.status}")
@@ -426,6 +544,9 @@ class OllamaClient(BaseModelClient):
426
544
  # Use a simpler async iteration pattern that's less error-prone
427
545
  debug_log("Starting to process response stream")
428
546
 
547
+ # Update the model usage timestamp to keep it hot
548
+ self.update_model_usage(model)
549
+
429
550
  # Set a flag to track if we've yielded any content
430
551
  has_yielded_content = False
431
552
 
@@ -535,6 +656,123 @@ class OllamaClient(BaseModelClient):
535
656
  def is_loading_model(self) -> bool:
536
657
  """Check if Ollama is currently loading a model"""
537
658
  return self._model_loading
659
+
660
+ async def preload_model(self, model_id: str) -> bool:
661
+ """
662
+ Preload a model to keep it hot/ready for use
663
+ Returns True if successful, False otherwise
664
+ """
665
+ from datetime import datetime
666
+ import asyncio
667
+
668
+ logger.info(f"Preloading model: {model_id}")
669
+
670
+ # First, check if the model is already preloaded
671
+ if model_id in self._preloaded_models:
672
+ # Update timestamp if already preloaded
673
+ self._preloaded_models[model_id] = datetime.now()
674
+ logger.info(f"Model {model_id} already preloaded, updated timestamp")
675
+ return True
676
+
677
+ try:
678
+ # We'll use a minimal prompt to load the model
679
+ warm_up_prompt = "hello"
680
+
681
+ # Set model loading state
682
+ old_loading_state = self._model_loading
683
+ self._model_loading = True
684
+
685
+ async with aiohttp.ClientSession() as session:
686
+ # First try pulling the model if needed
687
+ try:
688
+ logger.info(f"Ensuring model {model_id} is pulled")
689
+ pull_payload = {"name": model_id}
690
+ pull_timeout = self.get_timeout_for_model(model_id, "pull")
691
+ async with session.post(
692
+ f"{self.base_url}/api/pull",
693
+ json=pull_payload,
694
+ timeout=pull_timeout
695
+ ) as pull_response:
696
+ # We don't need to process the full pull, just initiate it
697
+ if pull_response.status != 200:
698
+ logger.warning(f"Pull request for model {model_id} failed with status {pull_response.status}")
699
+ except Exception as e:
700
+ logger.warning(f"Error during model pull check: {str(e)}")
701
+
702
+ # Now send a small generation request to load the model into memory
703
+ logger.info(f"Sending warm-up request for model {model_id}")
704
+ gen_timeout = self.get_timeout_for_model(model_id, "load")
705
+ async with session.post(
706
+ f"{self.base_url}/api/generate",
707
+ json={
708
+ "model": model_id,
709
+ "prompt": warm_up_prompt,
710
+ "temperature": 0.7,
711
+ "stream": False
712
+ },
713
+ timeout=gen_timeout
714
+ ) as response:
715
+ if response.status != 200:
716
+ logger.error(f"Failed to preload model {model_id}, status: {response.status}")
717
+ self._model_loading = old_loading_state
718
+ return False
719
+
720
+ # Read the response to ensure the model is fully loaded
721
+ await response.json()
722
+
723
+ # Update preloaded models with timestamp
724
+ self._preloaded_models[model_id] = datetime.now()
725
+ logger.info(f"Successfully preloaded model {model_id}")
726
+ return True
727
+ except Exception as e:
728
+ logger.error(f"Error preloading model {model_id}: {str(e)}")
729
+ return False
730
+ finally:
731
+ # Reset model loading state
732
+ self._model_loading = old_loading_state
733
+
734
+ def get_preloaded_models(self) -> Dict[str, datetime]:
735
+ """Return the dict of preloaded models and their last use times"""
736
+ return self._preloaded_models
737
+
738
+ def update_model_usage(self, model_id: str) -> None:
739
+ """Update the timestamp for a model that is being used"""
740
+ if model_id and model_id in self._preloaded_models:
741
+ from datetime import datetime
742
+ self._preloaded_models[model_id] = datetime.now()
743
+ logger.info(f"Updated usage timestamp for model {model_id}")
744
+
745
+ async def release_inactive_models(self, max_inactive_minutes: int = 30) -> List[str]:
746
+ """
747
+ Release models that have been inactive for more than the specified time
748
+ Returns a list of model IDs that were released
749
+ """
750
+ from datetime import datetime, timedelta
751
+
752
+ if not self._preloaded_models:
753
+ return []
754
+
755
+ now = datetime.now()
756
+ inactive_threshold = timedelta(minutes=max_inactive_minutes)
757
+ models_to_release = []
758
+
759
+ # Find models that have been inactive for too long
760
+ for model_id, last_used in list(self._preloaded_models.items()):
761
+ if now - last_used > inactive_threshold:
762
+ models_to_release.append(model_id)
763
+
764
+ # Release the models
765
+ released_models = []
766
+ for model_id in models_to_release:
767
+ try:
768
+ logger.info(f"Releasing inactive model: {model_id} (inactive for {(now - self._preloaded_models[model_id]).total_seconds() / 60:.1f} minutes)")
769
+ # We don't have an explicit "unload" API in Ollama, but we can remove it from our tracking
770
+ del self._preloaded_models[model_id]
771
+ released_models.append(model_id)
772
+ except Exception as e:
773
+ logger.error(f"Error releasing model {model_id}: {str(e)}")
774
+
775
+ return released_models
538
776
 
539
777
  async def get_model_details(self, model_id: str) -> Dict[str, Any]:
540
778
  """Get detailed information about a specific Ollama model"""
app/config.py CHANGED
@@ -151,7 +151,9 @@ DEFAULT_CONFIG = {
151
151
  "max_history_items": 100,
152
152
  "highlight_code": True,
153
153
  "auto_save": True,
154
- "generate_dynamic_titles": True
154
+ "generate_dynamic_titles": True,
155
+ "ollama_model_preload": True,
156
+ "ollama_inactive_timeout_minutes": 30
155
157
  }
156
158
 
157
159
  def validate_config(config):
app/main.py CHANGED
@@ -363,7 +363,13 @@ class SimpleChatApp(App): # Keep SimpleChatApp class definition
363
363
  self.selected_model = resolve_model_id(default_model_from_config)
364
364
  self.selected_style = CONFIG["default_style"] # Keep SimpleChatApp __init__
365
365
  self.initial_text = initial_text # Keep SimpleChatApp __init__
366
- # Removed self.input_widget instance variable
366
+
367
+ # Task for model cleanup
368
+ self._model_cleanup_task = None
369
+
370
+ # Inactivity threshold in minutes before releasing model resources
371
+ # Read from config, default to 30 minutes
372
+ self.MODEL_INACTIVITY_THRESHOLD = CONFIG.get("ollama_inactive_timeout_minutes", 30)
367
373
 
368
374
  def compose(self) -> ComposeResult: # Modify SimpleChatApp compose
369
375
  """Create the simplified application layout."""
@@ -420,6 +426,11 @@ class SimpleChatApp(App): # Keep SimpleChatApp class definition
420
426
  pass # Silently ignore if widget not found yet
421
427
 
422
428
  self.update_app_info() # Update the model info
429
+
430
+ # Start the background task for model cleanup if model preloading is enabled
431
+ if CONFIG.get("ollama_model_preload", True):
432
+ self._model_cleanup_task = asyncio.create_task(self._check_inactive_models())
433
+ debug_log("Started background task for model cleanup")
423
434
 
424
435
  # Check API keys and services # Keep SimpleChatApp on_mount
425
436
  api_issues = [] # Keep SimpleChatApp on_mount
@@ -675,29 +686,87 @@ class SimpleChatApp(App): # Keep SimpleChatApp class definition
675
686
 
676
687
  # Determine title client and model based on available keys
677
688
  if OPENAI_API_KEY:
689
+ # For highest success rate, use OpenAI for title generation when available
678
690
  from app.api.openai import OpenAIClient
679
691
  title_client = await OpenAIClient.create()
680
692
  title_model = "gpt-3.5-turbo"
681
693
  debug_log("Using OpenAI for background title generation")
682
694
  elif ANTHROPIC_API_KEY:
695
+ # Next best option is Anthropic
683
696
  from app.api.anthropic import AnthropicClient
684
697
  title_client = await AnthropicClient.create()
685
698
  title_model = "claude-3-haiku-20240307"
686
699
  debug_log("Using Anthropic for background title generation")
687
700
  else:
688
701
  # Fallback to the currently selected model's client if no API keys
702
+ # Get client type first to ensure we correctly identify Ollama models
703
+ from app.api.ollama import OllamaClient
689
704
  selected_model_resolved = resolve_model_id(self.selected_model)
690
- title_client = await BaseModelClient.get_client_for_model(selected_model_resolved)
691
- title_model = selected_model_resolved
692
- debug_log(f"Using selected model's client ({type(title_client).__name__}) for background title generation")
705
+ client_type = BaseModelClient.get_client_type_for_model(selected_model_resolved)
706
+
707
+ # For Ollama models, special handling is required
708
+ if client_type == OllamaClient:
709
+ debug_log(f"Title generation with Ollama model detected: {selected_model_resolved}")
710
+
711
+ # Try common small/fast models first if they exist
712
+ try:
713
+ # Check if we have any smaller models available for faster title generation
714
+ ollama_client = await OllamaClient.create()
715
+ available_models = await ollama_client.get_available_models()
716
+ small_model_options = ["gemma:2b", "phi3:mini", "llama3:8b", "orca-mini:3b", "phi2"]
717
+
718
+ small_model_found = False
719
+ for model_name in small_model_options:
720
+ if any(model["id"] == model_name for model in available_models):
721
+ debug_log(f"Found smaller Ollama model for title generation: {model_name}")
722
+ title_model = model_name
723
+ small_model_found = True
724
+ break
725
+
726
+ if not small_model_found:
727
+ # Use the current model if no smaller models found
728
+ title_model = selected_model_resolved
729
+ debug_log(f"No smaller models found, using current model: {title_model}")
730
+
731
+ # Always create a fresh client instance to avoid interference with model preloading
732
+ title_client = ollama_client
733
+ debug_log(f"Created dedicated Ollama client for title generation with model: {title_model}")
734
+ except Exception as e:
735
+ debug_log(f"Error finding optimized Ollama model for title generation: {str(e)}")
736
+ # Fallback to standard approach
737
+ title_client = await OllamaClient.create()
738
+ title_model = selected_model_resolved
739
+ else:
740
+ # For other providers, use normal client acquisition
741
+ title_client = await BaseModelClient.get_client_for_model(selected_model_resolved)
742
+ title_model = selected_model_resolved
743
+ debug_log(f"Using selected model's client ({type(title_client).__name__}) for background title generation")
693
744
 
694
745
  if not title_client or not title_model:
695
746
  raise Exception("Could not determine a client/model for title generation.")
696
747
 
697
748
  # Call the utility function
698
749
  from app.utils import generate_conversation_title # Import locally if needed
699
- new_title = await generate_conversation_title(content, title_model, title_client)
700
- debug_log(f"Background generated title: {new_title}")
750
+
751
+ # Add timeout handling for title generation to prevent hangs
752
+ try:
753
+ # Create a task with timeout
754
+ import asyncio
755
+ title_generation_task = asyncio.create_task(
756
+ generate_conversation_title(content, title_model, title_client)
757
+ )
758
+
759
+ # Wait for completion with timeout (30 seconds)
760
+ new_title = await asyncio.wait_for(title_generation_task, timeout=30)
761
+ debug_log(f"Background generated title: {new_title}")
762
+ except asyncio.TimeoutError:
763
+ debug_log("Title generation timed out after 30 seconds")
764
+ # Use default title in case of timeout
765
+ new_title = f"Conversation ({datetime.now().strftime('%Y-%m-%d %H:%M')})"
766
+ # Try to cancel the task
767
+ if not title_generation_task.done():
768
+ title_generation_task.cancel()
769
+ debug_log("Cancelled timed out title generation task")
701
770
 
702
771
  # Check if title generation returned the default or a real title
703
772
  if new_title and not new_title.startswith("Conversation ("):
@@ -718,8 +787,8 @@ class SimpleChatApp(App): # Keep SimpleChatApp class definition
718
787
  title_widget.update(new_title)
719
788
  self.current_conversation.title = new_title # Update local object too
720
789
  log(f"Background title update successful: {new_title}")
721
- # Maybe a subtle notification? Optional.
722
- # self.notify(f"Title set: {new_title}", severity="information", timeout=2)
790
+ # Subtle notification to show title was updated
791
+ self.notify(f"Conversation titled: {new_title}", severity="information", timeout=2)
723
792
  else:
724
793
  log("Conversation changed before background title update could apply.")
725
794
  else:
@@ -1226,6 +1295,94 @@ class SimpleChatApp(App): # Keep SimpleChatApp class definition
1226
1295
  log(f"Stored selected provider: {self.selected_provider} for model: {self.selected_model}")
1227
1296
 
1228
1297
  self.update_app_info() # Update the displayed model info
1298
+
1299
+ # Preload the model if it's an Ollama model and preloading is enabled
1300
+ if self.selected_provider == "ollama" and CONFIG.get("ollama_model_preload", True):
1301
+ # Start the background task to preload the model
1302
+ debug_log(f"Starting background task to preload Ollama model: {self.selected_model}")
1303
+ asyncio.create_task(self._preload_ollama_model(self.selected_model))
1304
+
1305
+ async def _preload_ollama_model(self, model_id: str) -> None:
1306
+ """Preload an Ollama model in the background"""
1307
+ from app.api.ollama import OllamaClient
1308
+
1309
+ debug_log(f"Preloading Ollama model: {model_id}")
1310
+ # Show a subtle notification to the user
1311
+ self.notify("Preparing model for use...", severity="information", timeout=3)
1312
+
1313
+ try:
1314
+ # Initialize the client
1315
+ client = await OllamaClient.create()
1316
+
1317
+ # Update the loading indicator to show model loading
1318
+ loading = self.query_one("#loading-indicator")
1319
+ loading.remove_class("hidden")
1320
+ loading.add_class("model-loading")
1321
+ loading.update(f"⚙️ Loading Ollama model...")
1322
+
1323
+ # Preload the model
1324
+ success = await client.preload_model(model_id)
1325
+
1326
+ # Hide the loading indicator
1327
+ loading.add_class("hidden")
1328
+ loading.remove_class("model-loading")
1329
+
1330
+ if success:
1331
+ debug_log(f"Successfully preloaded model: {model_id}")
1332
+ self.notify(f"Model ready for use", severity="success", timeout=2)
1333
+ else:
1334
+ debug_log(f"Failed to preload model: {model_id}")
1335
+ # No need to notify the user about failure - will happen naturally on first use
1336
+ except Exception as e:
1337
+ debug_log(f"Error preloading model: {str(e)}")
1338
+ # Make sure to hide the loading indicator
1339
+ try:
1340
+ loading = self.query_one("#loading-indicator")
1341
+ loading.add_class("hidden")
1342
+ loading.remove_class("model-loading")
1343
+ except Exception:
1344
+ pass
1345
+
1346
+ async def _check_inactive_models(self) -> None:
1347
+ """Background task to check for and release inactive models"""
1348
+ from app.api.ollama import OllamaClient
1349
+
1350
+ # How often to check for inactive models (in seconds)
1351
+ CHECK_INTERVAL = 600 # 10 minutes
1352
+
1353
+ debug_log(f"Starting inactive model check task with interval {CHECK_INTERVAL}s")
1354
+
1355
+ try:
1356
+ while True:
1357
+ await asyncio.sleep(CHECK_INTERVAL)
1358
+
1359
+ debug_log("Checking for inactive models...")
1360
+
1361
+ try:
1362
+ # Initialize the client
1363
+ client = await OllamaClient.create()
1364
+
1365
+ # Get the threshold from instance variable
1366
+ threshold = getattr(self, "MODEL_INACTIVITY_THRESHOLD", 30)
1367
+
1368
+ # Check and release inactive models
1369
+ released_models = await client.release_inactive_models(threshold)
1370
+
1371
+ if released_models:
1372
+ debug_log(f"Released {len(released_models)} inactive models: {released_models}")
1373
+ else:
1374
+ debug_log("No inactive models to release")
1375
+
1376
+ except Exception as e:
1377
+ debug_log(f"Error checking for inactive models: {str(e)}")
1378
+ # Continue loop even if this check fails
1379
+
1380
+ except asyncio.CancelledError:
1381
+ debug_log("Model cleanup task cancelled")
1382
+ # Normal task cancellation, clean exit
1383
+ except Exception as e:
1384
+ debug_log(f"Unexpected error in model cleanup task: {str(e)}")
1385
+ # Log but don't crash
1229
1386
 
1230
1387
  def on_style_selector_style_selected(self, event: StyleSelector.StyleSelected) -> None: # Keep SimpleChatApp on_style_selector_style_selected
1231
1388
  """Handle style selection""" # Keep SimpleChatApp on_style_selector_style_selected docstring
app/utils.py CHANGED
@@ -32,6 +32,11 @@ async def generate_conversation_title(message: str, model: str, client: Any) ->
32
32
 
33
33
  # Try-except the entire function to ensure we always return a title
34
34
  try:
35
+ # Check if we're using an Ollama client
36
+ from app.api.ollama import OllamaClient
37
+ is_ollama_client = isinstance(client, OllamaClient)
38
+ debug_log(f"Client is Ollama: {is_ollama_client}")
39
+
35
40
  # Pick a reliable title generation model - prefer OpenAI if available
36
41
  from app.config import OPENAI_API_KEY, ANTHROPIC_API_KEY
37
42
 
@@ -46,10 +51,16 @@ async def generate_conversation_title(message: str, model: str, client: Any) ->
46
51
  title_model = "claude-3-haiku-20240307"
47
52
  debug_log("Using Anthropic for title generation")
48
53
  else:
49
- # Use the passed client if no API keys available
50
- title_client = client
51
- title_model = model
52
- debug_log(f"Using provided {type(client).__name__} for title generation")
54
+ # For Ollama clients, ensure we have a clean instance to avoid conflicts with preloaded models
55
+ if is_ollama_client:
56
+ debug_log("Creating fresh Ollama client instance for title generation")
57
+ title_client = await OllamaClient.create()
58
+ title_model = model
59
+ else:
60
+ # Use the passed client for other providers
61
+ title_client = client
62
+ title_model = model
63
+ debug_log(f"Using {type(title_client).__name__} for title generation with model {title_model}")
53
64
 
54
65
  # Create a special prompt for title generation
55
66
  title_prompt = [
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: chat-console
3
- Version: 0.4.0
3
+ Version: 0.4.2
4
4
  Summary: A command-line interface for chatting with LLMs, storing chats and (future) rag interactions
5
5
  Home-page: https://github.com/wazacraftrfid/chat-console
6
6
  Author: Johnathan Greenaway
@@ -1,13 +1,13 @@
1
- app/__init__.py,sha256=wlOzzHK2mYbpcY6MSNp2l_vlKYdCrr8nzakp8kl3ffU,130
2
- app/config.py,sha256=tuTdJfmRsGx2-6PwkpbIsuP2hnB3ZPXRJdCvgzFd8PQ,8391
1
+ app/__init__.py,sha256=aSLqazfzD7omcg24nSmExHXGmkwbhjI84zBzS-AL-To,130
2
+ app/config.py,sha256=F-0hO3NT5kRJxZelGLxaeUmnwx8i0LPHzYtNftL6CwM,8468
3
3
  app/database.py,sha256=nt8CVuDpy6zw8mOYqDcfUmNw611t7Ln7pz22M0b6-MI,9967
4
- app/main.py,sha256=WOcMP6yRwoEzftTSHf0e3zVK1aEuBgKMAsNbzHyKgiA,77427
4
+ app/main.py,sha256=lOJiPpBE2F8PQlraVeRHGKS_C0Zka8n__lcE5LW3Gvo,85554
5
5
  app/models.py,sha256=4-y9Lytay2exWPFi0FDlVeRL3K2-I7E-jBqNzTfokqY,2644
6
- app/utils.py,sha256=G8e7ucCuIa-M8tpPDEd9PeWKIb8hN1FPpZnk_RHeRKo,40283
6
+ app/utils.py,sha256=RtyW_kK3M8WWSDfkuH6G2OuLcInIXfPG4Nv_xijf-gs,40877
7
7
  app/api/__init__.py,sha256=A8UL84ldYlv8l7O-yKzraVFcfww86SgWfpl4p7R03-w,62
8
8
  app/api/anthropic.py,sha256=uInwNvGLJ_iPUs4BjdwaqXTU6NfmK1SzX7498Pt44fI,10667
9
9
  app/api/base.py,sha256=valBWV5So76r8tjrgU5-sLfY73WaViTrszdCy8Rimjo,10314
10
- app/api/ollama.py,sha256=eFG24nI2MlF57z9EHiA97v02NgFJ0kxaPUX26xAXFsg,66154
10
+ app/api/ollama.py,sha256=bVG2UPkpA2arHKyRVLwHt2Q9Lth_qdZLfijiNXvPuYA,76771
11
11
  app/api/openai.py,sha256=XuHJHpD7tN_ZHLkRpNUcL1VxTtsXOVk1hDPXX8JnBxQ,15322
12
12
  app/ui/__init__.py,sha256=RndfbQ1Tv47qdSiuQzvWP96lPS547SDaGE-BgOtiP_w,55
13
13
  app/ui/chat_interface.py,sha256=oSDZi0Jgj_L8WnBh1RuJpIeIcN-RQ38CNejwsXiWTVg,18267
@@ -16,9 +16,9 @@ app/ui/model_browser.py,sha256=pdblLVkdyVF0_Bo02bqbErGAtieyH-y6IfhMOPEqIso,71124
16
16
  app/ui/model_selector.py,sha256=2G0TOXfcNodrXZOhLeaJJ2iG3Nck4c_NN1AvUAmaF3M,19172
17
17
  app/ui/search.py,sha256=b-m14kG3ovqW1-i0qDQ8KnAqFJbi5b1FLM9dOnbTyIs,9763
18
18
  app/ui/styles.py,sha256=04AhPuLrOd2yenfRySFRestPeuTPeMLzhmMB67NdGvw,5615
19
- chat_console-0.4.0.dist-info/licenses/LICENSE,sha256=srHZ3fvcAuZY1LHxE7P6XWju2njRCHyK6h_ftEbzxSE,1057
20
- chat_console-0.4.0.dist-info/METADATA,sha256=5vwV6wZ4lkp7Puo12-JP6Do3Ap3ZG7Z75lnsn7VGqnI,3810
21
- chat_console-0.4.0.dist-info/WHEEL,sha256=wXxTzcEDnjrTwFYjLPcsW_7_XihufBwmpiBeiXNBGEA,91
22
- chat_console-0.4.0.dist-info/entry_points.txt,sha256=kkVdEc22U9PAi2AeruoKklfkng_a_aHAP6VRVwrAD7c,67
23
- chat_console-0.4.0.dist-info/top_level.txt,sha256=io9g7LCbfmTG1SFKgEOGXmCFB9uMP2H5lerm0HiHWQE,4
24
- chat_console-0.4.0.dist-info/RECORD,,
19
+ chat_console-0.4.2.dist-info/licenses/LICENSE,sha256=srHZ3fvcAuZY1LHxE7P6XWju2njRCHyK6h_ftEbzxSE,1057
20
+ chat_console-0.4.2.dist-info/METADATA,sha256=_-h1KJkkcpmExhmDOmF-4P2Qj8iEAm-RiziHBX-_11Q,3810
21
+ chat_console-0.4.2.dist-info/WHEEL,sha256=7ciDxtlje1X8OhobNuGgi1t-ACdFSelPnSmDPrtlobY,91
22
+ chat_console-0.4.2.dist-info/entry_points.txt,sha256=kkVdEc22U9PAi2AeruoKklfkng_a_aHAP6VRVwrAD7c,67
23
+ chat_console-0.4.2.dist-info/top_level.txt,sha256=io9g7LCbfmTG1SFKgEOGXmCFB9uMP2H5lerm0HiHWQE,4
24
+ chat_console-0.4.2.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.1.0)
2
+ Generator: setuptools (80.2.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5