PyPI - chat-console - Versions diffs - 0.4.0__py3-none-any.whl → 0.4.3__py3-none-any.whl - Mend

chat-console 0.4.0py3-none-any.whl → 0.4.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

app/__init__.py +1 -1
app/api/ollama.py +242 -4
app/config.py +3 -1
app/main.py +176 -8
app/utils.py +37 -7
{chat_console-0.4.0.dist-info → chat_console-0.4.3.dist-info}/METADATA +1 -1
{chat_console-0.4.0.dist-info → chat_console-0.4.3.dist-info}/RECORD +11 -11
{chat_console-0.4.0.dist-info → chat_console-0.4.3.dist-info}/WHEEL +1 -1
{chat_console-0.4.0.dist-info → chat_console-0.4.3.dist-info}/entry_points.txt +0 -0
{chat_console-0.4.0.dist-info → chat_console-0.4.3.dist-info}/licenses/LICENSE +0 -0
{chat_console-0.4.0.dist-info → chat_console-0.4.3.dist-info}/top_level.txt +0 -0

app/__init__.py CHANGED Viewed

@@ -3,4 +3,4 @@ Chat CLI
 A command-line interface for chatting with various LLM providers like ChatGPT and Claude.
 """
-__version__ = "0.4.0"
+__version__ = "0.4.3"

app/api/ollama.py CHANGED Viewed

@@ -31,8 +31,96 @@ class OllamaClient(BaseModelClient):
         # Track model loading state
         self._model_loading = False
+        # Track preloaded models and their last use timestamp
+        self._preloaded_models = {}
+        # Default timeout values (in seconds)
+        self.DEFAULT_TIMEOUT = 30
+        self.MODEL_LOAD_TIMEOUT = 120
+        self.MODEL_PULL_TIMEOUT = 3600  # 1 hour for large models
         # Path to the cached models file
         self.models_cache_path = Path(__file__).parent.parent / "data" / "ollama-models.json"
+    def get_timeout_for_model(self, model_id: str, operation: str = "generate") -> int:
+        """
+        Calculate an appropriate timeout based on model size
+        Parameters:
+        - model_id: The model identifier
+        - operation: The operation type ('generate', 'load', 'pull')
+        Returns:
+        - Timeout in seconds
+        """
+        # Default timeouts by operation
+        default_timeouts = {
+            "generate": self.DEFAULT_TIMEOUT,      # 30s
+            "load": self.MODEL_LOAD_TIMEOUT,       # 2min
+            "pull": self.MODEL_PULL_TIMEOUT,       # 1h
+            "list": 5,                             # 5s
+            "test": 2                              # 2s
+        }
+        # Parameter size multipliers
+        size_multipliers = {
+            # For models < 3B
+            "1b": 0.5,
+            "2b": 0.7,
+            "3b": 1.0,
+            # For models 3B-10B
+            "5b": 1.2,
+            "6b": 1.3,
+            "7b": 1.5,
+            "8b": 1.7,
+            "9b": 1.8,
+            # For models 10B-20B
+            "13b": 2.0,
+            "14b": 2.0,
+            # For models 20B-50B
+            "27b": 3.0,
+            "34b": 3.5,
+            "40b": 4.0,
+            # For models 50B+
+            "70b": 5.0,
+            "80b": 6.0,
+            "100b": 7.0,
+            "400b": 10.0,
+            "405b": 10.0,
+        }
+        # Get the base timeout for the operation
+        base_timeout = default_timeouts.get(operation, self.DEFAULT_TIMEOUT)
+        # Try to determine the model size from the model ID
+        model_size = "7b"  # Default assumption is 7B parameters
+        model_lower = model_id.lower()
+        # Check for size indicators in the model name
+        for size in size_multipliers.keys():
+            if size in model_lower:
+                model_size = size
+                break
+        # If it's a known large model without size in name
+        if "llama3.1" in model_lower and not any(size in model_lower for size in size_multipliers.keys()):
+            model_size = "8b"  # Default for llama3.1 without size specified
+        # For first generation after model selection, if preloaded, use shorter timeout
+        if operation == "generate" and model_id in self._preloaded_models:
+            # For preloaded models, use a shorter timeout
+            return max(int(base_timeout * 0.7), 20)  # Min 20 seconds
+        # Calculate final timeout with multiplier
+        multiplier = size_multipliers.get(model_size, 1.0)
+        timeout = int(base_timeout * multiplier)
+        # For pull operation, ensure we have a reasonable maximum
+        if operation == "pull":
+            return min(timeout, 7200)  # Max 2 hours
+        logger.info(f"Calculated timeout for {model_id} ({operation}): {timeout}s (base: {base_timeout}s, multiplier: {multiplier})")
+        return timeout
     @classmethod
     async def create(cls) -> 'OllamaClient':
@@ -61,7 +149,29 @@ class OllamaClient(BaseModelClient):
             style_instructions = self._get_style_instructions(style)
             debug_log(f"Adding style instructions: {style_instructions[:50]}...")
             formatted_messages.append(style_instructions)
+        # Special case for title generation - check if this is a title generation message
+        is_title_generation = False
+        for msg in messages:
+            if msg.get("role") == "system" and "generate a brief, descriptive title" in msg.get("content", "").lower():
+                is_title_generation = True
+                debug_log("Detected title generation prompt")
+                break
+        # For title generation, use a direct approach
+        if is_title_generation:
+            debug_log("Using specialized formatting for title generation")
+            # Find the user message containing the input for title generation
+            user_msg = next((msg for msg in messages if msg.get("role") == "user"), None)
+            if user_msg and "content" in user_msg:
+                # Create a direct prompt
+                prompt = "You must generate a short, descriptive title (maximum 40 characters) for this conversation. ONLY output the title with no additional text, no quotes, and no explanation. Do not start with phrases like 'Here's a title' or 'Title:'. RESPOND ONLY WITH THE TITLE TEXT for the following message:\n\n" + user_msg["content"]
+                debug_log(f"Created title generation prompt: {prompt[:100]}...")
+                return prompt
+            else:
+                debug_log("Could not find user message for title generation, using standard formatting")
+        # Standard processing for normal chat messages
         # Add message content, preserving conversation flow
         for i, msg in enumerate(messages):
             try:
@@ -185,6 +295,7 @@ class OllamaClient(BaseModelClient):
             try:
                 async with aiohttp.ClientSession() as session:
                     logger.debug(f"Sending request to {self.base_url}/api/generate")
+                    gen_timeout = self.get_timeout_for_model(model, "generate")
                     async with session.post(
                         f"{self.base_url}/api/generate",
                         json={
@@ -193,12 +304,16 @@ class OllamaClient(BaseModelClient):
                             "temperature": temperature,
                             "stream": False
                         },
-                        timeout=30
+                        timeout=gen_timeout
                     ) as response:
                         response.raise_for_status()
                         data = await response.json()
                         if "response" not in data:
                             raise Exception("Invalid response format from Ollama server")
+                        # Update the model usage timestamp to keep it hot
+                        self.update_model_usage(model)
                         return data["response"]
             except aiohttp.ClientConnectorError:
@@ -324,10 +439,11 @@ class OllamaClient(BaseModelClient):
                                 "stream": False
                             }
+                        test_timeout = self.get_timeout_for_model(model, "test")
                         async with session.post(
                             f"{self.base_url}/api/generate",
                             json=test_payload,
-                            timeout=2
+                            timeout=test_timeout
                         ) as response:
                             if response.status != 200:
                                 logger.warning(f"Model test request failed with status {response.status}")
@@ -361,10 +477,11 @@ class OllamaClient(BaseModelClient):
                             debug_log(f"Error preparing pull payload: {str(pull_err)}, using default")
                             pull_payload = {"name": "gemma:2b"}  # Safe default
+                        pull_timeout = self.get_timeout_for_model(model, "pull")
                         async with session.post(
                             f"{self.base_url}/api/pull",
                             json=pull_payload,
-                            timeout=60
+                            timeout=pull_timeout
                         ) as pull_response:
                             if pull_response.status != 200:
                                 logger.error("Failed to pull model")
@@ -415,10 +532,11 @@ class OllamaClient(BaseModelClient):
                         }
                     debug_log(f"Sending request to Ollama API")
+                    gen_timeout = self.get_timeout_for_model(model, "generate")
                     response = await session.post(
                         f"{self.base_url}/api/generate",
                         json=request_payload,
-                        timeout=60  # Longer timeout for actual generation
+                        timeout=gen_timeout
                     )
                     response.raise_for_status()
                     debug_log(f"Response status: {response.status}")
@@ -426,6 +544,9 @@ class OllamaClient(BaseModelClient):
                     # Use a simpler async iteration pattern that's less error-prone
                     debug_log("Starting to process response stream")
+                    # Update the model usage timestamp to keep it hot
+                    self.update_model_usage(model)
                     # Set a flag to track if we've yielded any content
                     has_yielded_content = False
@@ -535,6 +656,123 @@ class OllamaClient(BaseModelClient):
     def is_loading_model(self) -> bool:
         """Check if Ollama is currently loading a model"""
         return self._model_loading
+    async def preload_model(self, model_id: str) -> bool:
+        """
+        Preload a model to keep it hot/ready for use
+        Returns True if successful, False otherwise
+        """
+        from datetime import datetime
+        import asyncio
+        logger.info(f"Preloading model: {model_id}")
+        # First, check if the model is already preloaded
+        if model_id in self._preloaded_models:
+            # Update timestamp if already preloaded
+            self._preloaded_models[model_id] = datetime.now()
+            logger.info(f"Model {model_id} already preloaded, updated timestamp")
+            return True
+        try:
+            # We'll use a minimal prompt to load the model
+            warm_up_prompt = "hello"
+            # Set model loading state
+            old_loading_state = self._model_loading
+            self._model_loading = True
+            async with aiohttp.ClientSession() as session:
+                # First try pulling the model if needed
+                try:
+                    logger.info(f"Ensuring model {model_id} is pulled")
+                    pull_payload = {"name": model_id}
+                    pull_timeout = self.get_timeout_for_model(model_id, "pull")
+                    async with session.post(
+                        f"{self.base_url}/api/pull",
+                        json=pull_payload,
+                        timeout=pull_timeout
+                    ) as pull_response:
+                        # We don't need to process the full pull, just initiate it
+                        if pull_response.status != 200:
+                            logger.warning(f"Pull request for model {model_id} failed with status {pull_response.status}")
+                except Exception as e:
+                    logger.warning(f"Error during model pull check: {str(e)}")
+                # Now send a small generation request to load the model into memory
+                logger.info(f"Sending warm-up request for model {model_id}")
+                gen_timeout = self.get_timeout_for_model(model_id, "load")
+                async with session.post(
+                    f"{self.base_url}/api/generate",
+                    json={
+                        "model": model_id,
+                        "prompt": warm_up_prompt,
+                        "temperature": 0.7,
+                        "stream": False
+                    },
+                    timeout=gen_timeout
+                ) as response:
+                    if response.status != 200:
+                        logger.error(f"Failed to preload model {model_id}, status: {response.status}")
+                        self._model_loading = old_loading_state
+                        return False
+                    # Read the response to ensure the model is fully loaded
+                    await response.json()
+                    # Update preloaded models with timestamp
+                    self._preloaded_models[model_id] = datetime.now()
+                    logger.info(f"Successfully preloaded model {model_id}")
+                    return True
+        except Exception as e:
+            logger.error(f"Error preloading model {model_id}: {str(e)}")
+            return False
+        finally:
+            # Reset model loading state
+            self._model_loading = old_loading_state
+    def get_preloaded_models(self) -> Dict[str, datetime]:
+        """Return the dict of preloaded models and their last use times"""
+        return self._preloaded_models
+    def update_model_usage(self, model_id: str) -> None:
+        """Update the timestamp for a model that is being used"""
+        if model_id and model_id in self._preloaded_models:
+            from datetime import datetime
+            self._preloaded_models[model_id] = datetime.now()
+            logger.info(f"Updated usage timestamp for model {model_id}")
+    async def release_inactive_models(self, max_inactive_minutes: int = 30) -> List[str]:
+        """
+        Release models that have been inactive for more than the specified time
+        Returns a list of model IDs that were released
+        """
+        from datetime import datetime, timedelta
+        if not self._preloaded_models:
+            return []
+        now = datetime.now()
+        inactive_threshold = timedelta(minutes=max_inactive_minutes)
+        models_to_release = []
+        # Find models that have been inactive for too long
+        for model_id, last_used in list(self._preloaded_models.items()):
+            if now - last_used > inactive_threshold:
+                models_to_release.append(model_id)
+        # Release the models
+        released_models = []
+        for model_id in models_to_release:
+            try:
+                logger.info(f"Releasing inactive model: {model_id} (inactive for {(now - self._preloaded_models[model_id]).total_seconds() / 60:.1f} minutes)")
+                # We don't have an explicit "unload" API in Ollama, but we can remove it from our tracking
+                del self._preloaded_models[model_id]
+                released_models.append(model_id)
+            except Exception as e:
+                logger.error(f"Error releasing model {model_id}: {str(e)}")
+        return released_models
     async def get_model_details(self, model_id: str) -> Dict[str, Any]:
         """Get detailed information about a specific Ollama model"""

app/config.py CHANGED Viewed

@@ -151,7 +151,9 @@ DEFAULT_CONFIG = {
     "max_history_items": 100,
     "highlight_code": True,
     "auto_save": True,
-    "generate_dynamic_titles": True
+    "generate_dynamic_titles": True,
+    "ollama_model_preload": True,
+    "ollama_inactive_timeout_minutes": 30
 }
 def validate_config(config):

app/main.py CHANGED Viewed

@@ -363,7 +363,13 @@ class SimpleChatApp(App): # Keep SimpleChatApp class definition
         self.selected_model = resolve_model_id(default_model_from_config)
         self.selected_style = CONFIG["default_style"] # Keep SimpleChatApp __init__
         self.initial_text = initial_text # Keep SimpleChatApp __init__
-        # Removed self.input_widget instance variable
+        # Task for model cleanup
+        self._model_cleanup_task = None
+        # Inactivity threshold in minutes before releasing model resources
+        # Read from config, default to 30 minutes
+        self.MODEL_INACTIVITY_THRESHOLD = CONFIG.get("ollama_inactive_timeout_minutes", 30)
     def compose(self) -> ComposeResult: # Modify SimpleChatApp compose
         """Create the simplified application layout."""
@@ -420,6 +426,11 @@ class SimpleChatApp(App): # Keep SimpleChatApp class definition
             pass # Silently ignore if widget not found yet
         self.update_app_info()  # Update the model info
+        # Start the background task for model cleanup if model preloading is enabled
+        if CONFIG.get("ollama_model_preload", True):
+            self._model_cleanup_task = asyncio.create_task(self._check_inactive_models())
+            debug_log("Started background task for model cleanup")
         # Check API keys and services # Keep SimpleChatApp on_mount
         api_issues = [] # Keep SimpleChatApp on_mount
@@ -675,29 +686,98 @@ class SimpleChatApp(App): # Keep SimpleChatApp class definition
             # Determine title client and model based on available keys
             if OPENAI_API_KEY:
+                # For highest success rate, use OpenAI for title generation when available
                 from app.api.openai import OpenAIClient
                 title_client = await OpenAIClient.create()
                 title_model = "gpt-3.5-turbo"
                 debug_log("Using OpenAI for background title generation")
             elif ANTHROPIC_API_KEY:
+                # Next best option is Anthropic
                 from app.api.anthropic import AnthropicClient
                 title_client = await AnthropicClient.create()
                 title_model = "claude-3-haiku-20240307"
                 debug_log("Using Anthropic for background title generation")
             else:
                 # Fallback to the currently selected model's client if no API keys
+                # Get client type first to ensure we correctly identify Ollama models
+                from app.api.ollama import OllamaClient
                 selected_model_resolved = resolve_model_id(self.selected_model)
-                title_client = await BaseModelClient.get_client_for_model(selected_model_resolved)
-                title_model = selected_model_resolved
-                debug_log(f"Using selected model's client ({type(title_client).__name__}) for background title generation")
+                client_type = BaseModelClient.get_client_type_for_model(selected_model_resolved)
+                # For Ollama models, special handling is required
+                if client_type == OllamaClient:
+                    debug_log(f"Title generation with Ollama model detected: {selected_model_resolved}")
+                    # Always try to use smalllm2:135m first, then fall back to other small models
+                    try:
+                        # Check if we have smalllm2:135m or other smaller models available
+                        ollama_client = await OllamaClient.create()
+                        available_models = await ollama_client.get_available_models()
+                        # Use smalllm2:135m if available (extremely small and fast)
+                        preferred_model = "smalllm2:135m"
+                        fallback_models = ["tinyllama", "gemma:2b", "phi3:mini", "llama3:8b", "orca-mini:3b", "phi2"]
+                        # First check for our preferred smallest model
+                        small_model_found = False
+                        if any(model["id"] == preferred_model for model in available_models):
+                            debug_log(f"Found optimal small model for title generation: {preferred_model}")
+                            title_model = preferred_model
+                            small_model_found = True
+                        # If not found, try fallbacks in order
+                        if not small_model_found:
+                            for model_name in fallback_models:
+                                if any(model["id"] == model_name for model in available_models):
+                                    debug_log(f"Found alternative small model for title generation: {model_name}")
+                                    title_model = model_name
+                                    small_model_found = True
+                                    break
+                        if not small_model_found:
+                            # Use the current model if no smaller models found
+                            title_model = selected_model_resolved
+                            debug_log(f"No smaller models found, using current model: {title_model}")
+                        # Always create a fresh client instance to avoid interference with model preloading
+                        title_client = ollama_client
+                        debug_log(f"Created dedicated Ollama client for title generation with model: {title_model}")
+                    except Exception as e:
+                        debug_log(f"Error finding optimized Ollama model for title generation: {str(e)}")
+                        # Fallback to standard approach
+                        title_client = await OllamaClient.create()
+                        title_model = selected_model_resolved
+                else:
+                    # For other providers, use normal client acquisition
+                    title_client = await BaseModelClient.get_client_for_model(selected_model_resolved)
+                    title_model = selected_model_resolved
+                    debug_log(f"Using selected model's client ({type(title_client).__name__}) for background title generation")
             if not title_client or not title_model:
                 raise Exception("Could not determine a client/model for title generation.")
             # Call the utility function
             from app.utils import generate_conversation_title # Import locally if needed
-            new_title = await generate_conversation_title(content, title_model, title_client)
-            debug_log(f"Background generated title: {new_title}")
+            # Add timeout handling for title generation to prevent hangs
+            try:
+                # Create a task with timeout
+                import asyncio
+                title_generation_task = asyncio.create_task(
+                    generate_conversation_title(content, title_model, title_client)
+                )
+                # Wait for completion with timeout (30 seconds)
+                new_title = await asyncio.wait_for(title_generation_task, timeout=30)
+                debug_log(f"Background generated title: {new_title}")
+            except asyncio.TimeoutError:
+                debug_log("Title generation timed out after 30 seconds")
+                # Use default title in case of timeout
+                new_title = f"Conversation ({datetime.now().strftime('%Y-%m-%d %H:%M')})"
+                # Try to cancel the task
+                if not title_generation_task.done():
+                    title_generation_task.cancel()
+                    debug_log("Cancelled timed out title generation task")
             # Check if title generation returned the default or a real title
             if new_title and not new_title.startswith("Conversation ("):
@@ -718,8 +798,8 @@ class SimpleChatApp(App): # Keep SimpleChatApp class definition
                         title_widget.update(new_title)
                         self.current_conversation.title = new_title # Update local object too
                         log(f"Background title update successful: {new_title}")
-                        # Maybe a subtle notification? Optional.
-                        # self.notify(f"Title set: {new_title}", severity="information", timeout=2)
+                        # Subtle notification to show title was updated
+                        self.notify(f"Conversation titled: {new_title}", severity="information", timeout=2)
                     else:
                         log("Conversation changed before background title update could apply.")
                 else:
@@ -1226,6 +1306,94 @@ class SimpleChatApp(App): # Keep SimpleChatApp class definition
             log(f"Stored selected provider: {self.selected_provider} for model: {self.selected_model}")
         self.update_app_info()  # Update the displayed model info
+        # Preload the model if it's an Ollama model and preloading is enabled
+        if self.selected_provider == "ollama" and CONFIG.get("ollama_model_preload", True):
+            # Start the background task to preload the model
+            debug_log(f"Starting background task to preload Ollama model: {self.selected_model}")
+            asyncio.create_task(self._preload_ollama_model(self.selected_model))
+    async def _preload_ollama_model(self, model_id: str) -> None:
+        """Preload an Ollama model in the background"""
+        from app.api.ollama import OllamaClient
+        debug_log(f"Preloading Ollama model: {model_id}")
+        # Show a subtle notification to the user
+        self.notify("Preparing model for use...", severity="information", timeout=3)
+        try:
+            # Initialize the client
+            client = await OllamaClient.create()
+            # Update the loading indicator to show model loading
+            loading = self.query_one("#loading-indicator")
+            loading.remove_class("hidden")
+            loading.add_class("model-loading")
+            loading.update(f"⚙️ Loading Ollama model...")
+            # Preload the model
+            success = await client.preload_model(model_id)
+            # Hide the loading indicator
+            loading.add_class("hidden")
+            loading.remove_class("model-loading")
+            if success:
+                debug_log(f"Successfully preloaded model: {model_id}")
+                self.notify(f"Model ready for use", severity="success", timeout=2)
+            else:
+                debug_log(f"Failed to preload model: {model_id}")
+                # No need to notify the user about failure - will happen naturally on first use
+        except Exception as e:
+            debug_log(f"Error preloading model: {str(e)}")
+            # Make sure to hide the loading indicator
+            try:
+                loading = self.query_one("#loading-indicator")
+                loading.add_class("hidden")
+                loading.remove_class("model-loading")
+            except Exception:
+                pass
+    async def _check_inactive_models(self) -> None:
+        """Background task to check for and release inactive models"""
+        from app.api.ollama import OllamaClient
+        # How often to check for inactive models (in seconds)
+        CHECK_INTERVAL = 600  # 10 minutes
+        debug_log(f"Starting inactive model check task with interval {CHECK_INTERVAL}s")
+        try:
+            while True:
+                await asyncio.sleep(CHECK_INTERVAL)
+                debug_log("Checking for inactive models...")
+                try:
+                    # Initialize the client
+                    client = await OllamaClient.create()
+                    # Get the threshold from instance variable
+                    threshold = getattr(self, "MODEL_INACTIVITY_THRESHOLD", 30)
+                    # Check and release inactive models
+                    released_models = await client.release_inactive_models(threshold)
+                    if released_models:
+                        debug_log(f"Released {len(released_models)} inactive models: {released_models}")
+                    else:
+                        debug_log("No inactive models to release")
+                except Exception as e:
+                    debug_log(f"Error checking for inactive models: {str(e)}")
+                    # Continue loop even if this check fails
+        except asyncio.CancelledError:
+            debug_log("Model cleanup task cancelled")
+            # Normal task cancellation, clean exit
+        except Exception as e:
+            debug_log(f"Unexpected error in model cleanup task: {str(e)}")
+            # Log but don't crash
     def on_style_selector_style_selected(self, event: StyleSelector.StyleSelected) -> None: # Keep SimpleChatApp on_style_selector_style_selected
         """Handle style selection""" # Keep SimpleChatApp on_style_selector_style_selected docstring

app/utils.py CHANGED Viewed

@@ -32,6 +32,11 @@ async def generate_conversation_title(message: str, model: str, client: Any) ->
     # Try-except the entire function to ensure we always return a title
     try:
+        # Check if we're using an Ollama client
+        from app.api.ollama import OllamaClient
+        is_ollama_client = isinstance(client, OllamaClient)
+        debug_log(f"Client is Ollama: {is_ollama_client}")
         # Pick a reliable title generation model - prefer OpenAI if available
         from app.config import OPENAI_API_KEY, ANTHROPIC_API_KEY
@@ -46,16 +51,22 @@ async def generate_conversation_title(message: str, model: str, client: Any) ->
             title_model = "claude-3-haiku-20240307"
             debug_log("Using Anthropic for title generation")
         else:
-            # Use the passed client if no API keys available
-            title_client = client
-            title_model = model
-            debug_log(f"Using provided {type(client).__name__} for title generation")
+            # For Ollama clients, ensure we have a clean instance to avoid conflicts with preloaded models
+            if is_ollama_client:
+                debug_log("Creating fresh Ollama client instance for title generation")
+                title_client = await OllamaClient.create()
+                title_model = model
+            else:
+                # Use the passed client for other providers
+                title_client = client
+                title_model = model
+            debug_log(f"Using {type(title_client).__name__} for title generation with model {title_model}")
         # Create a special prompt for title generation
         title_prompt = [
             {
                 "role": "system",
-                "content": "Generate a brief, descriptive title (maximum 40 characters) for a conversation that starts with the following message. Return only the title text with no additional explanation or formatting."
+                "content": "Generate a brief, descriptive title (maximum 40 characters) for a conversation that starts with the following message. ONLY output the title text. DO NOT include phrases like 'Sure, here's a title' or any additional formatting, explanation, or quotes."
             },
             {
                 "role": "user",
@@ -85,12 +96,31 @@ async def generate_conversation_title(message: str, model: str, client: Any) ->
                 max_tokens=60
             )
-        # Sanitize the title
+        # Sanitize the title - remove quotes, extra spaces and unwanted prefixes
         title = title.strip().strip('"\'').strip()
+        # Remove common LLM prefixes like "Title:", "Sure, here's a title:", etc.
+        prefixes_to_remove = [
+            "title:", "here's a title:", "here is a title:",
+            "a title for this conversation:", "sure,", "certainly,",
+            "the title is:", "suggested title:"
+        ]
+        # Case-insensitive prefix removal
+        title_lower = title.lower()
+        for prefix in prefixes_to_remove:
+            if title_lower.startswith(prefix):
+                title = title[len(prefix):].strip()
+                title_lower = title.lower()  # Update lowercase version after removal
+        # Remove any remaining quotes
+        title = title.strip('"\'').strip()
+        # Enforce length limit
         if len(title) > 40:
             title = title[:37] + "..."
-        debug_log(f"Generated title: {title}")
+        debug_log(f"Generated title (after sanitization): {title}")
         return title
     except Exception as e:

{chat_console-0.4.0.dist-info → chat_console-0.4.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: chat-console
-Version: 0.4.0
+Version: 0.4.3
 Summary: A command-line interface for chatting with LLMs, storing chats and (future) rag interactions
 Home-page: https://github.com/wazacraftrfid/chat-console
 Author: Johnathan Greenaway

{chat_console-0.4.0.dist-info → chat_console-0.4.3.dist-info}/RECORD RENAMED Viewed

@@ -1,13 +1,13 @@
-app/__init__.py,sha256=wlOzzHK2mYbpcY6MSNp2l_vlKYdCrr8nzakp8kl3ffU,130
-app/config.py,sha256=tuTdJfmRsGx2-6PwkpbIsuP2hnB3ZPXRJdCvgzFd8PQ,8391
+app/__init__.py,sha256=T3d41tTB1sDy6ix7bg43dp4zvNqqJku0JGlwMGKFGHo,130
+app/config.py,sha256=F-0hO3NT5kRJxZelGLxaeUmnwx8i0LPHzYtNftL6CwM,8468
 app/database.py,sha256=nt8CVuDpy6zw8mOYqDcfUmNw611t7Ln7pz22M0b6-MI,9967
-app/main.py,sha256=WOcMP6yRwoEzftTSHf0e3zVK1aEuBgKMAsNbzHyKgiA,77427
+app/main.py,sha256=8UU9GcPJINu_TmbKKKFBZXIgLHNDf6vabyupKjj3Img,86297
 app/models.py,sha256=4-y9Lytay2exWPFi0FDlVeRL3K2-I7E-jBqNzTfokqY,2644
-app/utils.py,sha256=G8e7ucCuIa-M8tpPDEd9PeWKIb8hN1FPpZnk_RHeRKo,40283
+app/utils.py,sha256=-L38KGP8TlVl5vtZl5QgTiEAdhLcDsIXm7e62nnXgP8,41765
 app/api/__init__.py,sha256=A8UL84ldYlv8l7O-yKzraVFcfww86SgWfpl4p7R03-w,62
 app/api/anthropic.py,sha256=uInwNvGLJ_iPUs4BjdwaqXTU6NfmK1SzX7498Pt44fI,10667
 app/api/base.py,sha256=valBWV5So76r8tjrgU5-sLfY73WaViTrszdCy8Rimjo,10314
-app/api/ollama.py,sha256=eFG24nI2MlF57z9EHiA97v02NgFJ0kxaPUX26xAXFsg,66154
+app/api/ollama.py,sha256=364PcXoPLJq9jLMF-HhPyQvaBp87U6FzNHDWx4g_Cvc,76925
 app/api/openai.py,sha256=XuHJHpD7tN_ZHLkRpNUcL1VxTtsXOVk1hDPXX8JnBxQ,15322
 app/ui/__init__.py,sha256=RndfbQ1Tv47qdSiuQzvWP96lPS547SDaGE-BgOtiP_w,55
 app/ui/chat_interface.py,sha256=oSDZi0Jgj_L8WnBh1RuJpIeIcN-RQ38CNejwsXiWTVg,18267
@@ -16,9 +16,9 @@ app/ui/model_browser.py,sha256=pdblLVkdyVF0_Bo02bqbErGAtieyH-y6IfhMOPEqIso,71124
 app/ui/model_selector.py,sha256=2G0TOXfcNodrXZOhLeaJJ2iG3Nck4c_NN1AvUAmaF3M,19172
 app/ui/search.py,sha256=b-m14kG3ovqW1-i0qDQ8KnAqFJbi5b1FLM9dOnbTyIs,9763
 app/ui/styles.py,sha256=04AhPuLrOd2yenfRySFRestPeuTPeMLzhmMB67NdGvw,5615
-chat_console-0.4.0.dist-info/licenses/LICENSE,sha256=srHZ3fvcAuZY1LHxE7P6XWju2njRCHyK6h_ftEbzxSE,1057
-chat_console-0.4.0.dist-info/METADATA,sha256=5vwV6wZ4lkp7Puo12-JP6Do3Ap3ZG7Z75lnsn7VGqnI,3810
-chat_console-0.4.0.dist-info/WHEEL,sha256=wXxTzcEDnjrTwFYjLPcsW_7_XihufBwmpiBeiXNBGEA,91
-chat_console-0.4.0.dist-info/entry_points.txt,sha256=kkVdEc22U9PAi2AeruoKklfkng_a_aHAP6VRVwrAD7c,67
-chat_console-0.4.0.dist-info/top_level.txt,sha256=io9g7LCbfmTG1SFKgEOGXmCFB9uMP2H5lerm0HiHWQE,4
-chat_console-0.4.0.dist-info/RECORD,,
+chat_console-0.4.3.dist-info/licenses/LICENSE,sha256=srHZ3fvcAuZY1LHxE7P6XWju2njRCHyK6h_ftEbzxSE,1057
+chat_console-0.4.3.dist-info/METADATA,sha256=y01SPyzTsYIyCbc5gEpCx6dlQ_ROG16T1gTZPOScia8,3810
+chat_console-0.4.3.dist-info/WHEEL,sha256=GHB6lJx2juba1wDgXDNlMTyM13ckjBMKf-OnwgKOCtA,91
+chat_console-0.4.3.dist-info/entry_points.txt,sha256=kkVdEc22U9PAi2AeruoKklfkng_a_aHAP6VRVwrAD7c,67
+chat_console-0.4.3.dist-info/top_level.txt,sha256=io9g7LCbfmTG1SFKgEOGXmCFB9uMP2H5lerm0HiHWQE,4
+chat_console-0.4.3.dist-info/RECORD,,

{chat_console-0.4.0.dist-info → chat_console-0.4.3.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.1.0)
+Generator: setuptools (80.3.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{chat_console-0.4.0.dist-info → chat_console-0.4.3.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{chat_console-0.4.0.dist-info → chat_console-0.4.3.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{chat_console-0.4.0.dist-info → chat_console-0.4.3.dist-info}/top_level.txt RENAMED Viewed

File without changes

chat-console 0.4.0__py3-none-any.whl → 0.4.3__py3-none-any.whl

chat-console 0.4.0py3-none-any.whl → 0.4.3py3-none-any.whl