chat-console 0.4.0__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
app/__init__.py CHANGED
@@ -3,4 +3,4 @@ Chat CLI
3
3
  A command-line interface for chatting with various LLM providers like ChatGPT and Claude.
4
4
  """
5
5
 
6
- __version__ = "0.4.0"
6
+ __version__ = "0.4.3"
app/api/ollama.py CHANGED
@@ -31,8 +31,96 @@ class OllamaClient(BaseModelClient):
31
31
  # Track model loading state
32
32
  self._model_loading = False
33
33
 
34
+ # Track preloaded models and their last use timestamp
35
+ self._preloaded_models = {}
36
+
37
+ # Default timeout values (in seconds)
38
+ self.DEFAULT_TIMEOUT = 30
39
+ self.MODEL_LOAD_TIMEOUT = 120
40
+ self.MODEL_PULL_TIMEOUT = 3600 # 1 hour for large models
41
+
34
42
  # Path to the cached models file
35
43
  self.models_cache_path = Path(__file__).parent.parent / "data" / "ollama-models.json"
44
+
45
+ def get_timeout_for_model(self, model_id: str, operation: str = "generate") -> int:
46
+ """
47
+ Calculate an appropriate timeout based on model size
48
+
49
+ Parameters:
50
+ - model_id: The model identifier
51
+ - operation: The operation type ('generate', 'load', 'pull')
52
+
53
+ Returns:
54
+ - Timeout in seconds
55
+ """
56
+ # Default timeouts by operation
57
+ default_timeouts = {
58
+ "generate": self.DEFAULT_TIMEOUT, # 30s
59
+ "load": self.MODEL_LOAD_TIMEOUT, # 2min
60
+ "pull": self.MODEL_PULL_TIMEOUT, # 1h
61
+ "list": 5, # 5s
62
+ "test": 2 # 2s
63
+ }
64
+
65
+ # Parameter size multipliers
66
+ size_multipliers = {
67
+ # For models < 3B
68
+ "1b": 0.5,
69
+ "2b": 0.7,
70
+ "3b": 1.0,
71
+ # For models 3B-10B
72
+ "5b": 1.2,
73
+ "6b": 1.3,
74
+ "7b": 1.5,
75
+ "8b": 1.7,
76
+ "9b": 1.8,
77
+ # For models 10B-20B
78
+ "13b": 2.0,
79
+ "14b": 2.0,
80
+ # For models 20B-50B
81
+ "27b": 3.0,
82
+ "34b": 3.5,
83
+ "40b": 4.0,
84
+ # For models 50B+
85
+ "70b": 5.0,
86
+ "80b": 6.0,
87
+ "100b": 7.0,
88
+ "400b": 10.0,
89
+ "405b": 10.0,
90
+ }
91
+
92
+ # Get the base timeout for the operation
93
+ base_timeout = default_timeouts.get(operation, self.DEFAULT_TIMEOUT)
94
+
95
+ # Try to determine the model size from the model ID
96
+ model_size = "7b" # Default assumption is 7B parameters
97
+ model_lower = model_id.lower()
98
+
99
+ # Check for size indicators in the model name
100
+ for size in size_multipliers.keys():
101
+ if size in model_lower:
102
+ model_size = size
103
+ break
104
+
105
+ # If it's a known large model without size in name
106
+ if "llama3.1" in model_lower and not any(size in model_lower for size in size_multipliers.keys()):
107
+ model_size = "8b" # Default for llama3.1 without size specified
108
+
109
+ # For first generation after model selection, if preloaded, use shorter timeout
110
+ if operation == "generate" and model_id in self._preloaded_models:
111
+ # For preloaded models, use a shorter timeout
112
+ return max(int(base_timeout * 0.7), 20) # Min 20 seconds
113
+
114
+ # Calculate final timeout with multiplier
115
+ multiplier = size_multipliers.get(model_size, 1.0)
116
+ timeout = int(base_timeout * multiplier)
117
+
118
+ # For pull operation, ensure we have a reasonable maximum
119
+ if operation == "pull":
120
+ return min(timeout, 7200) # Max 2 hours
121
+
122
+ logger.info(f"Calculated timeout for {model_id} ({operation}): {timeout}s (base: {base_timeout}s, multiplier: {multiplier})")
123
+ return timeout
36
124
 
37
125
  @classmethod
38
126
  async def create(cls) -> 'OllamaClient':
@@ -61,7 +149,29 @@ class OllamaClient(BaseModelClient):
61
149
  style_instructions = self._get_style_instructions(style)
62
150
  debug_log(f"Adding style instructions: {style_instructions[:50]}...")
63
151
  formatted_messages.append(style_instructions)
152
+
153
+ # Special case for title generation - check if this is a title generation message
154
+ is_title_generation = False
155
+ for msg in messages:
156
+ if msg.get("role") == "system" and "generate a brief, descriptive title" in msg.get("content", "").lower():
157
+ is_title_generation = True
158
+ debug_log("Detected title generation prompt")
159
+ break
160
+
161
+ # For title generation, use a direct approach
162
+ if is_title_generation:
163
+ debug_log("Using specialized formatting for title generation")
164
+ # Find the user message containing the input for title generation
165
+ user_msg = next((msg for msg in messages if msg.get("role") == "user"), None)
166
+ if user_msg and "content" in user_msg:
167
+ # Create a direct prompt
168
+ prompt = "You must generate a short, descriptive title (maximum 40 characters) for this conversation. ONLY output the title with no additional text, no quotes, and no explanation. Do not start with phrases like 'Here's a title' or 'Title:'. RESPOND ONLY WITH THE TITLE TEXT for the following message:\n\n" + user_msg["content"]
169
+ debug_log(f"Created title generation prompt: {prompt[:100]}...")
170
+ return prompt
171
+ else:
172
+ debug_log("Could not find user message for title generation, using standard formatting")
64
173
 
174
+ # Standard processing for normal chat messages
65
175
  # Add message content, preserving conversation flow
66
176
  for i, msg in enumerate(messages):
67
177
  try:
@@ -185,6 +295,7 @@ class OllamaClient(BaseModelClient):
185
295
  try:
186
296
  async with aiohttp.ClientSession() as session:
187
297
  logger.debug(f"Sending request to {self.base_url}/api/generate")
298
+ gen_timeout = self.get_timeout_for_model(model, "generate")
188
299
  async with session.post(
189
300
  f"{self.base_url}/api/generate",
190
301
  json={
@@ -193,12 +304,16 @@ class OllamaClient(BaseModelClient):
193
304
  "temperature": temperature,
194
305
  "stream": False
195
306
  },
196
- timeout=30
307
+ timeout=gen_timeout
197
308
  ) as response:
198
309
  response.raise_for_status()
199
310
  data = await response.json()
200
311
  if "response" not in data:
201
312
  raise Exception("Invalid response format from Ollama server")
313
+
314
+ # Update the model usage timestamp to keep it hot
315
+ self.update_model_usage(model)
316
+
202
317
  return data["response"]
203
318
 
204
319
  except aiohttp.ClientConnectorError:
@@ -324,10 +439,11 @@ class OllamaClient(BaseModelClient):
324
439
  "stream": False
325
440
  }
326
441
 
442
+ test_timeout = self.get_timeout_for_model(model, "test")
327
443
  async with session.post(
328
444
  f"{self.base_url}/api/generate",
329
445
  json=test_payload,
330
- timeout=2
446
+ timeout=test_timeout
331
447
  ) as response:
332
448
  if response.status != 200:
333
449
  logger.warning(f"Model test request failed with status {response.status}")
@@ -361,10 +477,11 @@ class OllamaClient(BaseModelClient):
361
477
  debug_log(f"Error preparing pull payload: {str(pull_err)}, using default")
362
478
  pull_payload = {"name": "gemma:2b"} # Safe default
363
479
 
480
+ pull_timeout = self.get_timeout_for_model(model, "pull")
364
481
  async with session.post(
365
482
  f"{self.base_url}/api/pull",
366
483
  json=pull_payload,
367
- timeout=60
484
+ timeout=pull_timeout
368
485
  ) as pull_response:
369
486
  if pull_response.status != 200:
370
487
  logger.error("Failed to pull model")
@@ -415,10 +532,11 @@ class OllamaClient(BaseModelClient):
415
532
  }
416
533
 
417
534
  debug_log(f"Sending request to Ollama API")
535
+ gen_timeout = self.get_timeout_for_model(model, "generate")
418
536
  response = await session.post(
419
537
  f"{self.base_url}/api/generate",
420
538
  json=request_payload,
421
- timeout=60 # Longer timeout for actual generation
539
+ timeout=gen_timeout
422
540
  )
423
541
  response.raise_for_status()
424
542
  debug_log(f"Response status: {response.status}")
@@ -426,6 +544,9 @@ class OllamaClient(BaseModelClient):
426
544
  # Use a simpler async iteration pattern that's less error-prone
427
545
  debug_log("Starting to process response stream")
428
546
 
547
+ # Update the model usage timestamp to keep it hot
548
+ self.update_model_usage(model)
549
+
429
550
  # Set a flag to track if we've yielded any content
430
551
  has_yielded_content = False
431
552
 
@@ -535,6 +656,123 @@ class OllamaClient(BaseModelClient):
535
656
  def is_loading_model(self) -> bool:
536
657
  """Check if Ollama is currently loading a model"""
537
658
  return self._model_loading
659
+
660
+ async def preload_model(self, model_id: str) -> bool:
661
+ """
662
+ Preload a model to keep it hot/ready for use
663
+ Returns True if successful, False otherwise
664
+ """
665
+ from datetime import datetime
666
+ import asyncio
667
+
668
+ logger.info(f"Preloading model: {model_id}")
669
+
670
+ # First, check if the model is already preloaded
671
+ if model_id in self._preloaded_models:
672
+ # Update timestamp if already preloaded
673
+ self._preloaded_models[model_id] = datetime.now()
674
+ logger.info(f"Model {model_id} already preloaded, updated timestamp")
675
+ return True
676
+
677
+ try:
678
+ # We'll use a minimal prompt to load the model
679
+ warm_up_prompt = "hello"
680
+
681
+ # Set model loading state
682
+ old_loading_state = self._model_loading
683
+ self._model_loading = True
684
+
685
+ async with aiohttp.ClientSession() as session:
686
+ # First try pulling the model if needed
687
+ try:
688
+ logger.info(f"Ensuring model {model_id} is pulled")
689
+ pull_payload = {"name": model_id}
690
+ pull_timeout = self.get_timeout_for_model(model_id, "pull")
691
+ async with session.post(
692
+ f"{self.base_url}/api/pull",
693
+ json=pull_payload,
694
+ timeout=pull_timeout
695
+ ) as pull_response:
696
+ # We don't need to process the full pull, just initiate it
697
+ if pull_response.status != 200:
698
+ logger.warning(f"Pull request for model {model_id} failed with status {pull_response.status}")
699
+ except Exception as e:
700
+ logger.warning(f"Error during model pull check: {str(e)}")
701
+
702
+ # Now send a small generation request to load the model into memory
703
+ logger.info(f"Sending warm-up request for model {model_id}")
704
+ gen_timeout = self.get_timeout_for_model(model_id, "load")
705
+ async with session.post(
706
+ f"{self.base_url}/api/generate",
707
+ json={
708
+ "model": model_id,
709
+ "prompt": warm_up_prompt,
710
+ "temperature": 0.7,
711
+ "stream": False
712
+ },
713
+ timeout=gen_timeout
714
+ ) as response:
715
+ if response.status != 200:
716
+ logger.error(f"Failed to preload model {model_id}, status: {response.status}")
717
+ self._model_loading = old_loading_state
718
+ return False
719
+
720
+ # Read the response to ensure the model is fully loaded
721
+ await response.json()
722
+
723
+ # Update preloaded models with timestamp
724
+ self._preloaded_models[model_id] = datetime.now()
725
+ logger.info(f"Successfully preloaded model {model_id}")
726
+ return True
727
+ except Exception as e:
728
+ logger.error(f"Error preloading model {model_id}: {str(e)}")
729
+ return False
730
+ finally:
731
+ # Reset model loading state
732
+ self._model_loading = old_loading_state
733
+
734
+ def get_preloaded_models(self) -> Dict[str, datetime]:
735
+ """Return the dict of preloaded models and their last use times"""
736
+ return self._preloaded_models
737
+
738
+ def update_model_usage(self, model_id: str) -> None:
739
+ """Update the timestamp for a model that is being used"""
740
+ if model_id and model_id in self._preloaded_models:
741
+ from datetime import datetime
742
+ self._preloaded_models[model_id] = datetime.now()
743
+ logger.info(f"Updated usage timestamp for model {model_id}")
744
+
745
+ async def release_inactive_models(self, max_inactive_minutes: int = 30) -> List[str]:
746
+ """
747
+ Release models that have been inactive for more than the specified time
748
+ Returns a list of model IDs that were released
749
+ """
750
+ from datetime import datetime, timedelta
751
+
752
+ if not self._preloaded_models:
753
+ return []
754
+
755
+ now = datetime.now()
756
+ inactive_threshold = timedelta(minutes=max_inactive_minutes)
757
+ models_to_release = []
758
+
759
+ # Find models that have been inactive for too long
760
+ for model_id, last_used in list(self._preloaded_models.items()):
761
+ if now - last_used > inactive_threshold:
762
+ models_to_release.append(model_id)
763
+
764
+ # Release the models
765
+ released_models = []
766
+ for model_id in models_to_release:
767
+ try:
768
+ logger.info(f"Releasing inactive model: {model_id} (inactive for {(now - self._preloaded_models[model_id]).total_seconds() / 60:.1f} minutes)")
769
+ # We don't have an explicit "unload" API in Ollama, but we can remove it from our tracking
770
+ del self._preloaded_models[model_id]
771
+ released_models.append(model_id)
772
+ except Exception as e:
773
+ logger.error(f"Error releasing model {model_id}: {str(e)}")
774
+
775
+ return released_models
538
776
 
539
777
  async def get_model_details(self, model_id: str) -> Dict[str, Any]:
540
778
  """Get detailed information about a specific Ollama model"""
app/config.py CHANGED
@@ -151,7 +151,9 @@ DEFAULT_CONFIG = {
151
151
  "max_history_items": 100,
152
152
  "highlight_code": True,
153
153
  "auto_save": True,
154
- "generate_dynamic_titles": True
154
+ "generate_dynamic_titles": True,
155
+ "ollama_model_preload": True,
156
+ "ollama_inactive_timeout_minutes": 30
155
157
  }
156
158
 
157
159
  def validate_config(config):
app/main.py CHANGED
@@ -363,7 +363,13 @@ class SimpleChatApp(App): # Keep SimpleChatApp class definition
363
363
  self.selected_model = resolve_model_id(default_model_from_config)
364
364
  self.selected_style = CONFIG["default_style"] # Keep SimpleChatApp __init__
365
365
  self.initial_text = initial_text # Keep SimpleChatApp __init__
366
- # Removed self.input_widget instance variable
366
+
367
+ # Task for model cleanup
368
+ self._model_cleanup_task = None
369
+
370
+ # Inactivity threshold in minutes before releasing model resources
371
+ # Read from config, default to 30 minutes
372
+ self.MODEL_INACTIVITY_THRESHOLD = CONFIG.get("ollama_inactive_timeout_minutes", 30)
367
373
 
368
374
  def compose(self) -> ComposeResult: # Modify SimpleChatApp compose
369
375
  """Create the simplified application layout."""
@@ -420,6 +426,11 @@ class SimpleChatApp(App): # Keep SimpleChatApp class definition
420
426
  pass # Silently ignore if widget not found yet
421
427
 
422
428
  self.update_app_info() # Update the model info
429
+
430
+ # Start the background task for model cleanup if model preloading is enabled
431
+ if CONFIG.get("ollama_model_preload", True):
432
+ self._model_cleanup_task = asyncio.create_task(self._check_inactive_models())
433
+ debug_log("Started background task for model cleanup")
423
434
 
424
435
  # Check API keys and services # Keep SimpleChatApp on_mount
425
436
  api_issues = [] # Keep SimpleChatApp on_mount
@@ -675,29 +686,98 @@ class SimpleChatApp(App): # Keep SimpleChatApp class definition
675
686
 
676
687
  # Determine title client and model based on available keys
677
688
  if OPENAI_API_KEY:
689
+ # For highest success rate, use OpenAI for title generation when available
678
690
  from app.api.openai import OpenAIClient
679
691
  title_client = await OpenAIClient.create()
680
692
  title_model = "gpt-3.5-turbo"
681
693
  debug_log("Using OpenAI for background title generation")
682
694
  elif ANTHROPIC_API_KEY:
695
+ # Next best option is Anthropic
683
696
  from app.api.anthropic import AnthropicClient
684
697
  title_client = await AnthropicClient.create()
685
698
  title_model = "claude-3-haiku-20240307"
686
699
  debug_log("Using Anthropic for background title generation")
687
700
  else:
688
701
  # Fallback to the currently selected model's client if no API keys
702
+ # Get client type first to ensure we correctly identify Ollama models
703
+ from app.api.ollama import OllamaClient
689
704
  selected_model_resolved = resolve_model_id(self.selected_model)
690
- title_client = await BaseModelClient.get_client_for_model(selected_model_resolved)
691
- title_model = selected_model_resolved
692
- debug_log(f"Using selected model's client ({type(title_client).__name__}) for background title generation")
705
+ client_type = BaseModelClient.get_client_type_for_model(selected_model_resolved)
706
+
707
+ # For Ollama models, special handling is required
708
+ if client_type == OllamaClient:
709
+ debug_log(f"Title generation with Ollama model detected: {selected_model_resolved}")
710
+
711
+ # Always try to use smalllm2:135m first, then fall back to other small models
712
+ try:
713
+ # Check if we have smalllm2:135m or other smaller models available
714
+ ollama_client = await OllamaClient.create()
715
+ available_models = await ollama_client.get_available_models()
716
+
717
+ # Use smalllm2:135m if available (extremely small and fast)
718
+ preferred_model = "smalllm2:135m"
719
+ fallback_models = ["tinyllama", "gemma:2b", "phi3:mini", "llama3:8b", "orca-mini:3b", "phi2"]
720
+
721
+ # First check for our preferred smallest model
722
+ small_model_found = False
723
+ if any(model["id"] == preferred_model for model in available_models):
724
+ debug_log(f"Found optimal small model for title generation: {preferred_model}")
725
+ title_model = preferred_model
726
+ small_model_found = True
727
+
728
+ # If not found, try fallbacks in order
729
+ if not small_model_found:
730
+ for model_name in fallback_models:
731
+ if any(model["id"] == model_name for model in available_models):
732
+ debug_log(f"Found alternative small model for title generation: {model_name}")
733
+ title_model = model_name
734
+ small_model_found = True
735
+ break
736
+
737
+ if not small_model_found:
738
+ # Use the current model if no smaller models found
739
+ title_model = selected_model_resolved
740
+ debug_log(f"No smaller models found, using current model: {title_model}")
741
+
742
+ # Always create a fresh client instance to avoid interference with model preloading
743
+ title_client = ollama_client
744
+ debug_log(f"Created dedicated Ollama client for title generation with model: {title_model}")
745
+ except Exception as e:
746
+ debug_log(f"Error finding optimized Ollama model for title generation: {str(e)}")
747
+ # Fallback to standard approach
748
+ title_client = await OllamaClient.create()
749
+ title_model = selected_model_resolved
750
+ else:
751
+ # For other providers, use normal client acquisition
752
+ title_client = await BaseModelClient.get_client_for_model(selected_model_resolved)
753
+ title_model = selected_model_resolved
754
+ debug_log(f"Using selected model's client ({type(title_client).__name__}) for background title generation")
693
755
 
694
756
  if not title_client or not title_model:
695
757
  raise Exception("Could not determine a client/model for title generation.")
696
758
 
697
759
  # Call the utility function
698
760
  from app.utils import generate_conversation_title # Import locally if needed
699
- new_title = await generate_conversation_title(content, title_model, title_client)
700
- debug_log(f"Background generated title: {new_title}")
761
+
762
+ # Add timeout handling for title generation to prevent hangs
763
+ try:
764
+ # Create a task with timeout
765
+ import asyncio
766
+ title_generation_task = asyncio.create_task(
767
+ generate_conversation_title(content, title_model, title_client)
768
+ )
769
+
770
+ # Wait for completion with timeout (30 seconds)
771
+ new_title = await asyncio.wait_for(title_generation_task, timeout=30)
772
+ debug_log(f"Background generated title: {new_title}")
773
+ except asyncio.TimeoutError:
774
+ debug_log("Title generation timed out after 30 seconds")
775
+ # Use default title in case of timeout
776
+ new_title = f"Conversation ({datetime.now().strftime('%Y-%m-%d %H:%M')})"
777
+ # Try to cancel the task
778
+ if not title_generation_task.done():
779
+ title_generation_task.cancel()
780
+ debug_log("Cancelled timed out title generation task")
701
781
 
702
782
  # Check if title generation returned the default or a real title
703
783
  if new_title and not new_title.startswith("Conversation ("):
@@ -718,8 +798,8 @@ class SimpleChatApp(App): # Keep SimpleChatApp class definition
718
798
  title_widget.update(new_title)
719
799
  self.current_conversation.title = new_title # Update local object too
720
800
  log(f"Background title update successful: {new_title}")
721
- # Maybe a subtle notification? Optional.
722
- # self.notify(f"Title set: {new_title}", severity="information", timeout=2)
801
+ # Subtle notification to show title was updated
802
+ self.notify(f"Conversation titled: {new_title}", severity="information", timeout=2)
723
803
  else:
724
804
  log("Conversation changed before background title update could apply.")
725
805
  else:
@@ -1226,6 +1306,94 @@ class SimpleChatApp(App): # Keep SimpleChatApp class definition
1226
1306
  log(f"Stored selected provider: {self.selected_provider} for model: {self.selected_model}")
1227
1307
 
1228
1308
  self.update_app_info() # Update the displayed model info
1309
+
1310
+ # Preload the model if it's an Ollama model and preloading is enabled
1311
+ if self.selected_provider == "ollama" and CONFIG.get("ollama_model_preload", True):
1312
+ # Start the background task to preload the model
1313
+ debug_log(f"Starting background task to preload Ollama model: {self.selected_model}")
1314
+ asyncio.create_task(self._preload_ollama_model(self.selected_model))
1315
+
1316
+ async def _preload_ollama_model(self, model_id: str) -> None:
1317
+ """Preload an Ollama model in the background"""
1318
+ from app.api.ollama import OllamaClient
1319
+
1320
+ debug_log(f"Preloading Ollama model: {model_id}")
1321
+ # Show a subtle notification to the user
1322
+ self.notify("Preparing model for use...", severity="information", timeout=3)
1323
+
1324
+ try:
1325
+ # Initialize the client
1326
+ client = await OllamaClient.create()
1327
+
1328
+ # Update the loading indicator to show model loading
1329
+ loading = self.query_one("#loading-indicator")
1330
+ loading.remove_class("hidden")
1331
+ loading.add_class("model-loading")
1332
+ loading.update(f"⚙️ Loading Ollama model...")
1333
+
1334
+ # Preload the model
1335
+ success = await client.preload_model(model_id)
1336
+
1337
+ # Hide the loading indicator
1338
+ loading.add_class("hidden")
1339
+ loading.remove_class("model-loading")
1340
+
1341
+ if success:
1342
+ debug_log(f"Successfully preloaded model: {model_id}")
1343
+ self.notify(f"Model ready for use", severity="success", timeout=2)
1344
+ else:
1345
+ debug_log(f"Failed to preload model: {model_id}")
1346
+ # No need to notify the user about failure - will happen naturally on first use
1347
+ except Exception as e:
1348
+ debug_log(f"Error preloading model: {str(e)}")
1349
+ # Make sure to hide the loading indicator
1350
+ try:
1351
+ loading = self.query_one("#loading-indicator")
1352
+ loading.add_class("hidden")
1353
+ loading.remove_class("model-loading")
1354
+ except Exception:
1355
+ pass
1356
+
1357
+ async def _check_inactive_models(self) -> None:
1358
+ """Background task to check for and release inactive models"""
1359
+ from app.api.ollama import OllamaClient
1360
+
1361
+ # How often to check for inactive models (in seconds)
1362
+ CHECK_INTERVAL = 600 # 10 minutes
1363
+
1364
+ debug_log(f"Starting inactive model check task with interval {CHECK_INTERVAL}s")
1365
+
1366
+ try:
1367
+ while True:
1368
+ await asyncio.sleep(CHECK_INTERVAL)
1369
+
1370
+ debug_log("Checking for inactive models...")
1371
+
1372
+ try:
1373
+ # Initialize the client
1374
+ client = await OllamaClient.create()
1375
+
1376
+ # Get the threshold from instance variable
1377
+ threshold = getattr(self, "MODEL_INACTIVITY_THRESHOLD", 30)
1378
+
1379
+ # Check and release inactive models
1380
+ released_models = await client.release_inactive_models(threshold)
1381
+
1382
+ if released_models:
1383
+ debug_log(f"Released {len(released_models)} inactive models: {released_models}")
1384
+ else:
1385
+ debug_log("No inactive models to release")
1386
+
1387
+ except Exception as e:
1388
+ debug_log(f"Error checking for inactive models: {str(e)}")
1389
+ # Continue loop even if this check fails
1390
+
1391
+ except asyncio.CancelledError:
1392
+ debug_log("Model cleanup task cancelled")
1393
+ # Normal task cancellation, clean exit
1394
+ except Exception as e:
1395
+ debug_log(f"Unexpected error in model cleanup task: {str(e)}")
1396
+ # Log but don't crash
1229
1397
 
1230
1398
  def on_style_selector_style_selected(self, event: StyleSelector.StyleSelected) -> None: # Keep SimpleChatApp on_style_selector_style_selected
1231
1399
  """Handle style selection""" # Keep SimpleChatApp on_style_selector_style_selected docstring
app/utils.py CHANGED
@@ -32,6 +32,11 @@ async def generate_conversation_title(message: str, model: str, client: Any) ->
32
32
 
33
33
  # Try-except the entire function to ensure we always return a title
34
34
  try:
35
+ # Check if we're using an Ollama client
36
+ from app.api.ollama import OllamaClient
37
+ is_ollama_client = isinstance(client, OllamaClient)
38
+ debug_log(f"Client is Ollama: {is_ollama_client}")
39
+
35
40
  # Pick a reliable title generation model - prefer OpenAI if available
36
41
  from app.config import OPENAI_API_KEY, ANTHROPIC_API_KEY
37
42
 
@@ -46,16 +51,22 @@ async def generate_conversation_title(message: str, model: str, client: Any) ->
46
51
  title_model = "claude-3-haiku-20240307"
47
52
  debug_log("Using Anthropic for title generation")
48
53
  else:
49
- # Use the passed client if no API keys available
50
- title_client = client
51
- title_model = model
52
- debug_log(f"Using provided {type(client).__name__} for title generation")
54
+ # For Ollama clients, ensure we have a clean instance to avoid conflicts with preloaded models
55
+ if is_ollama_client:
56
+ debug_log("Creating fresh Ollama client instance for title generation")
57
+ title_client = await OllamaClient.create()
58
+ title_model = model
59
+ else:
60
+ # Use the passed client for other providers
61
+ title_client = client
62
+ title_model = model
63
+ debug_log(f"Using {type(title_client).__name__} for title generation with model {title_model}")
53
64
 
54
65
  # Create a special prompt for title generation
55
66
  title_prompt = [
56
67
  {
57
68
  "role": "system",
58
- "content": "Generate a brief, descriptive title (maximum 40 characters) for a conversation that starts with the following message. Return only the title text with no additional explanation or formatting."
69
+ "content": "Generate a brief, descriptive title (maximum 40 characters) for a conversation that starts with the following message. ONLY output the title text. DO NOT include phrases like 'Sure, here's a title' or any additional formatting, explanation, or quotes."
59
70
  },
60
71
  {
61
72
  "role": "user",
@@ -85,12 +96,31 @@ async def generate_conversation_title(message: str, model: str, client: Any) ->
85
96
  max_tokens=60
86
97
  )
87
98
 
88
- # Sanitize the title
99
+ # Sanitize the title - remove quotes, extra spaces and unwanted prefixes
89
100
  title = title.strip().strip('"\'').strip()
101
+
102
+ # Remove common LLM prefixes like "Title:", "Sure, here's a title:", etc.
103
+ prefixes_to_remove = [
104
+ "title:", "here's a title:", "here is a title:",
105
+ "a title for this conversation:", "sure,", "certainly,",
106
+ "the title is:", "suggested title:"
107
+ ]
108
+
109
+ # Case-insensitive prefix removal
110
+ title_lower = title.lower()
111
+ for prefix in prefixes_to_remove:
112
+ if title_lower.startswith(prefix):
113
+ title = title[len(prefix):].strip()
114
+ title_lower = title.lower() # Update lowercase version after removal
115
+
116
+ # Remove any remaining quotes
117
+ title = title.strip('"\'').strip()
118
+
119
+ # Enforce length limit
90
120
  if len(title) > 40:
91
121
  title = title[:37] + "..."
92
122
 
93
- debug_log(f"Generated title: {title}")
123
+ debug_log(f"Generated title (after sanitization): {title}")
94
124
  return title
95
125
 
96
126
  except Exception as e:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: chat-console
3
- Version: 0.4.0
3
+ Version: 0.4.3
4
4
  Summary: A command-line interface for chatting with LLMs, storing chats and (future) rag interactions
5
5
  Home-page: https://github.com/wazacraftrfid/chat-console
6
6
  Author: Johnathan Greenaway
@@ -1,13 +1,13 @@
1
- app/__init__.py,sha256=wlOzzHK2mYbpcY6MSNp2l_vlKYdCrr8nzakp8kl3ffU,130
2
- app/config.py,sha256=tuTdJfmRsGx2-6PwkpbIsuP2hnB3ZPXRJdCvgzFd8PQ,8391
1
+ app/__init__.py,sha256=T3d41tTB1sDy6ix7bg43dp4zvNqqJku0JGlwMGKFGHo,130
2
+ app/config.py,sha256=F-0hO3NT5kRJxZelGLxaeUmnwx8i0LPHzYtNftL6CwM,8468
3
3
  app/database.py,sha256=nt8CVuDpy6zw8mOYqDcfUmNw611t7Ln7pz22M0b6-MI,9967
4
- app/main.py,sha256=WOcMP6yRwoEzftTSHf0e3zVK1aEuBgKMAsNbzHyKgiA,77427
4
+ app/main.py,sha256=8UU9GcPJINu_TmbKKKFBZXIgLHNDf6vabyupKjj3Img,86297
5
5
  app/models.py,sha256=4-y9Lytay2exWPFi0FDlVeRL3K2-I7E-jBqNzTfokqY,2644
6
- app/utils.py,sha256=G8e7ucCuIa-M8tpPDEd9PeWKIb8hN1FPpZnk_RHeRKo,40283
6
+ app/utils.py,sha256=-L38KGP8TlVl5vtZl5QgTiEAdhLcDsIXm7e62nnXgP8,41765
7
7
  app/api/__init__.py,sha256=A8UL84ldYlv8l7O-yKzraVFcfww86SgWfpl4p7R03-w,62
8
8
  app/api/anthropic.py,sha256=uInwNvGLJ_iPUs4BjdwaqXTU6NfmK1SzX7498Pt44fI,10667
9
9
  app/api/base.py,sha256=valBWV5So76r8tjrgU5-sLfY73WaViTrszdCy8Rimjo,10314
10
- app/api/ollama.py,sha256=eFG24nI2MlF57z9EHiA97v02NgFJ0kxaPUX26xAXFsg,66154
10
+ app/api/ollama.py,sha256=364PcXoPLJq9jLMF-HhPyQvaBp87U6FzNHDWx4g_Cvc,76925
11
11
  app/api/openai.py,sha256=XuHJHpD7tN_ZHLkRpNUcL1VxTtsXOVk1hDPXX8JnBxQ,15322
12
12
  app/ui/__init__.py,sha256=RndfbQ1Tv47qdSiuQzvWP96lPS547SDaGE-BgOtiP_w,55
13
13
  app/ui/chat_interface.py,sha256=oSDZi0Jgj_L8WnBh1RuJpIeIcN-RQ38CNejwsXiWTVg,18267
@@ -16,9 +16,9 @@ app/ui/model_browser.py,sha256=pdblLVkdyVF0_Bo02bqbErGAtieyH-y6IfhMOPEqIso,71124
16
16
  app/ui/model_selector.py,sha256=2G0TOXfcNodrXZOhLeaJJ2iG3Nck4c_NN1AvUAmaF3M,19172
17
17
  app/ui/search.py,sha256=b-m14kG3ovqW1-i0qDQ8KnAqFJbi5b1FLM9dOnbTyIs,9763
18
18
  app/ui/styles.py,sha256=04AhPuLrOd2yenfRySFRestPeuTPeMLzhmMB67NdGvw,5615
19
- chat_console-0.4.0.dist-info/licenses/LICENSE,sha256=srHZ3fvcAuZY1LHxE7P6XWju2njRCHyK6h_ftEbzxSE,1057
20
- chat_console-0.4.0.dist-info/METADATA,sha256=5vwV6wZ4lkp7Puo12-JP6Do3Ap3ZG7Z75lnsn7VGqnI,3810
21
- chat_console-0.4.0.dist-info/WHEEL,sha256=wXxTzcEDnjrTwFYjLPcsW_7_XihufBwmpiBeiXNBGEA,91
22
- chat_console-0.4.0.dist-info/entry_points.txt,sha256=kkVdEc22U9PAi2AeruoKklfkng_a_aHAP6VRVwrAD7c,67
23
- chat_console-0.4.0.dist-info/top_level.txt,sha256=io9g7LCbfmTG1SFKgEOGXmCFB9uMP2H5lerm0HiHWQE,4
24
- chat_console-0.4.0.dist-info/RECORD,,
19
+ chat_console-0.4.3.dist-info/licenses/LICENSE,sha256=srHZ3fvcAuZY1LHxE7P6XWju2njRCHyK6h_ftEbzxSE,1057
20
+ chat_console-0.4.3.dist-info/METADATA,sha256=y01SPyzTsYIyCbc5gEpCx6dlQ_ROG16T1gTZPOScia8,3810
21
+ chat_console-0.4.3.dist-info/WHEEL,sha256=GHB6lJx2juba1wDgXDNlMTyM13ckjBMKf-OnwgKOCtA,91
22
+ chat_console-0.4.3.dist-info/entry_points.txt,sha256=kkVdEc22U9PAi2AeruoKklfkng_a_aHAP6VRVwrAD7c,67
23
+ chat_console-0.4.3.dist-info/top_level.txt,sha256=io9g7LCbfmTG1SFKgEOGXmCFB9uMP2H5lerm0HiHWQE,4
24
+ chat_console-0.4.3.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.1.0)
2
+ Generator: setuptools (80.3.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5