PyPI - chat-console - Versions diffs - 0.2.6__py3-none-any.whl → 0.2.9__py3-none-any.whl - Mend

chat-console 0.2.6py3-none-any.whl → 0.2.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

app/__init__.py +1 -1
app/api/ollama.py +46 -14
app/main.py +172 -168
app/ui/chat_interface.py +22 -3
app/utils.py +79 -18
{chat_console-0.2.6.dist-info → chat_console-0.2.9.dist-info}/METADATA +1 -1
{chat_console-0.2.6.dist-info → chat_console-0.2.9.dist-info}/RECORD +11 -11
{chat_console-0.2.6.dist-info → chat_console-0.2.9.dist-info}/WHEEL +0 -0
{chat_console-0.2.6.dist-info → chat_console-0.2.9.dist-info}/entry_points.txt +0 -0
{chat_console-0.2.6.dist-info → chat_console-0.2.9.dist-info}/licenses/LICENSE +0 -0
{chat_console-0.2.6.dist-info → chat_console-0.2.9.dist-info}/top_level.txt +0 -0

app/__init__.py CHANGED Viewed

@@ -3,4 +3,4 @@ Chat CLI
 A command-line interface for chatting with various LLM providers like ChatGPT and Claude.
 """
-__version__ = "0.2.6"
+__version__ = "0.2.9"

app/api/ollama.py CHANGED Viewed

@@ -22,6 +22,9 @@ class OllamaClient(BaseModelClient):
         # Track active stream session
         self._active_stream_session = None
+        # Track model loading state
+        self._model_loading = False
         # Path to the cached models file
         self.models_cache_path = Path(__file__).parent.parent / "data" / "ollama-models.json"
@@ -191,6 +194,10 @@ class OllamaClient(BaseModelClient):
                                 raise aiohttp.ClientError("Model not ready")
                     except (aiohttp.ClientError, asyncio.TimeoutError) as e:
                         logger.info(f"Model cold start detected: {str(e)}")
+                        # Set model loading flag
+                        self._model_loading = True
+                        logger.info("Setting model_loading state to True")
                         # Model might need loading, try pulling it
                         async with session.post(
                             f"{self.base_url}/api/pull",
@@ -199,8 +206,10 @@ class OllamaClient(BaseModelClient):
                         ) as pull_response:
                             if pull_response.status != 200:
                                 logger.error("Failed to pull model")
+                                self._model_loading = False  # Reset flag on failure
                                 raise Exception("Failed to pull model")
                             logger.info("Model pulled successfully")
+                            self._model_loading = False  # Reset flag after successful pull
                 # Now proceed with actual generation
                 session = aiohttp.ClientSession()
@@ -208,7 +217,7 @@ class OllamaClient(BaseModelClient):
                 try:
                     logger.debug(f"Sending streaming request to {self.base_url}/api/generate")
-                    async with session.post(
+                    response = await session.post(
                         f"{self.base_url}/api/generate",
                         json={
                             "model": model,
@@ -217,19 +226,36 @@ class OllamaClient(BaseModelClient):
                             "stream": True
                         },
                         timeout=60  # Longer timeout for actual generation
-                    ) as response:
-                        response.raise_for_status()
-                        async for line in response.content:
-                            if line:
-                                chunk = line.decode().strip()
-                                try:
-                                    data = json.loads(chunk)
-                                    if "response" in data:
-                                        yield data["response"]
-                                except json.JSONDecodeError:
-                                    continue
-                        logger.info("Streaming completed successfully")
-                        return
+                    )
+                    response.raise_for_status()
+                    # Process the response stream
+                    while True:
+                        if not self._active_stream_session:
+                            logger.info("Stream session was closed externally")
+                            break
+                        try:
+                            line = await asyncio.wait_for(response.content.readline(), timeout=0.5)
+                            if not line:  # End of stream
+                                break
+                            chunk = line.decode().strip()
+                            try:
+                                data = json.loads(chunk)
+                                if "response" in data:
+                                    yield data["response"]
+                            except json.JSONDecodeError:
+                                continue
+                        except asyncio.TimeoutError:
+                            # This allows checking for cancellation regularly
+                            continue
+                        except asyncio.CancelledError:
+                            logger.info("Stream processing was cancelled")
+                            raise
+                    logger.info("Streaming completed successfully")
+                    return
                 finally:
                     self._active_stream_session = None  # Clear reference when done
                     await session.close()  # Ensure session is closed
@@ -260,6 +286,12 @@ class OllamaClient(BaseModelClient):
             logger.info("Cancelling active stream session")
             await self._active_stream_session.close()
             self._active_stream_session = None
+            self._model_loading = False
+            logger.info("Stream session closed successfully")
+    def is_loading_model(self) -> bool:
+        """Check if Ollama is currently loading a model"""
+        return self._model_loading
     async def get_model_details(self, model_id: str) -> Dict[str, Any]:
         """Get detailed information about a specific Ollama model"""

app/main.py CHANGED Viewed

@@ -302,7 +302,7 @@ class SimpleChatApp(App): # Keep SimpleChatApp class definition
         Binding("q", "quit", "Quit", show=True, key_display="q"),
         # Removed binding for "n" (new chat) since there's a dedicated button
         Binding("c", "action_new_conversation", "New Chat", show=False, key_display="c", priority=True), # Keep alias with priority
-        Binding("escape", "escape", "Cancel / Stop", show=True, key_display="esc"), # Escape might close settings panel too
+        Binding("escape", "action_escape", "Cancel / Stop", show=True, key_display="esc"), # Updated to call our async method
         Binding("ctrl+c", "quit", "Quit", show=False),
         Binding("h", "view_history", "History", show=True, key_display="h", priority=True), # Add priority
         Binding("s", "settings", "Settings", show=True, key_display="s", priority=True),     # Add priority
@@ -463,7 +463,7 @@ class SimpleChatApp(App): # Keep SimpleChatApp class definition
         await self.create_new_conversation() # Keep SimpleChatApp action_new_conversation
         log("action_new_conversation finished") # Added log
-    def action_escape(self) -> None:
+    async def action_escape(self) -> None:
         """Handle escape key globally."""
         log("action_escape triggered")
         settings_panel = self.query_one("#settings-panel")
@@ -477,18 +477,45 @@ class SimpleChatApp(App): # Keep SimpleChatApp class definition
             log("Attempting to cancel generation task")
             if self.current_generation_task and not self.current_generation_task.done():
                 log("Cancelling active generation task.")
-                self.current_generation_task.cancel()
-                # The finally block in generate_response will handle is_generating = False and UI updates
-                self.notify("Stopping generation...", severity="warning", timeout=2) # Notify user immediately
+                # Get the client for the current model first and cancel the connection
+                try:
+                    model = self.selected_model
+                    client = BaseModelClient.get_client_for_model(model)
+                    # Call the client's cancel method if it's supported
+                    if hasattr(client, 'cancel_stream'):
+                        log("Calling client.cancel_stream() to terminate API session")
+                        try:
+                            # This will close the HTTP connection to Ollama server
+                            await client.cancel_stream()
+                            log("Client stream cancelled successfully")
+                        except Exception as e:
+                            log.error(f"Error in client.cancel_stream(): {str(e)}")
+                except Exception as e:
+                    log.error(f"Error setting up client cancellation: {str(e)}")
+                # Now cancel the asyncio task - this should raise CancelledError in the task
+                try:
+                    log("Cancelling asyncio task")
+                    self.current_generation_task.cancel()
+                    # Give a moment for cancellation to propagate
+                    await asyncio.sleep(0.1)
+                    log(f"Task cancelled. Task done: {self.current_generation_task.done()}")
+                except Exception as e:
+                    log.error(f"Error cancelling task: {str(e)}")
+                # Notify user that we're stopping
+                self.notify("Stopping generation...", severity="warning", timeout=2)
             else:
-                # This case might happen if is_generating is True, but no active task found to cancel. Resetting flag.")
-                self.is_generating = False # Reset flag manually if task is missing
+                # This happens if is_generating is True, but no active task found to cancel
+                log("No active generation task found, but is_generating=True. Resetting state.")
+                self.is_generating = False
                 loading = self.query_one("#loading-indicator")
                 loading.add_class("hidden")
         else:
             log("Escape pressed, but settings not visible and not actively generating.")
-            # Optionally add other escape behaviors here if needed for the main screen
-            # e.g., clear input, deselect item, etc.
+            # Optionally add other escape behaviors here if needed
     def update_app_info(self) -> None:
         """Update the displayed app information."""
@@ -608,166 +635,143 @@ class SimpleChatApp(App): # Keep SimpleChatApp class definition
         # Focus back on input
         input_widget.focus()
-    async def generate_response(self) -> None: # Keep SimpleChatApp generate_response
-        """Generate an AI response.""" # Keep SimpleChatApp generate_response docstring
-        if not self.current_conversation or not self.messages: # Keep SimpleChatApp generate_response
-            return # Keep SimpleChatApp generate_response
-        self.is_generating = True # Keep SimpleChatApp generate_response
-        log(f"Setting is_generating to True") # Added log
-        loading = self.query_one("#loading-indicator") # Keep SimpleChatApp generate_response
-        loading.remove_class("hidden") # Keep SimpleChatApp generate_response
-        try: # Keep SimpleChatApp generate_response
-            # Get conversation parameters # Keep SimpleChatApp generate_response
-            model = self.selected_model # Keep SimpleChatApp generate_response
-            style = self.selected_style # Keep SimpleChatApp generate_response
-            # Convert messages to API format # Keep SimpleChatApp generate_response
-            api_messages = [] # Keep SimpleChatApp generate_response
-            for msg in self.messages: # Keep SimpleChatApp generate_response
-                api_messages.append({ # Keep SimpleChatApp generate_response
-                    "role": msg.role, # Keep SimpleChatApp generate_response
-                    "content": msg.content # Keep SimpleChatApp generate_response
-                }) # Keep SimpleChatApp generate_response
-            # Get appropriate client # Keep SimpleChatApp generate_response
-            try: # Keep SimpleChatApp generate_response
-                client = BaseModelClient.get_client_for_model(model) # Keep SimpleChatApp generate_response
-                if client is None: # Keep SimpleChatApp generate_response
-                    raise Exception(f"No client available for model: {model}") # Keep SimpleChatApp generate_response
-            except Exception as e: # Keep SimpleChatApp generate_response
-                self.notify(f"Failed to initialize model client: {str(e)}", severity="error") # Keep SimpleChatApp generate_response
-                return # Keep SimpleChatApp generate_response
-            # Start streaming response # Keep SimpleChatApp generate_response
-            assistant_message = Message(role="assistant", content="Thinking...") # Keep SimpleChatApp generate_response
-            self.messages.append(assistant_message) # Keep SimpleChatApp generate_response
-            messages_container = self.query_one("#messages-container") # Keep SimpleChatApp generate_response
-            message_display = MessageDisplay(assistant_message, highlight_code=CONFIG["highlight_code"]) # Keep SimpleChatApp generate_response
-            messages_container.mount(message_display) # Keep SimpleChatApp generate_response
-            messages_container.scroll_end(animate=False) # Keep SimpleChatApp generate_response
-            # Add small delay to show thinking state # Keep SimpleChatApp generate_response
-            await asyncio.sleep(0.5) # Keep SimpleChatApp generate_response
-            # Stream chunks to the UI with synchronization # Keep SimpleChatApp generate_response
-            update_lock = asyncio.Lock() # Keep SimpleChatApp generate_response
-            async def update_ui(content: str): # Keep SimpleChatApp generate_response
-                if not self.is_generating: # Keep SimpleChatApp generate_response
-                    log("update_ui called but is_generating is False, returning.") # Added log
-                    return # Keep SimpleChatApp generate_response
-                async with update_lock: # Keep SimpleChatApp generate_response
-                    try: # Keep SimpleChatApp generate_response
-                        # Clear thinking indicator on first content # Keep SimpleChatApp generate_response
-                        if assistant_message.content == "Thinking...": # Keep SimpleChatApp generate_response
-                            assistant_message.content = "" # Keep SimpleChatApp generate_response
-                        # Update message with full content so far # Keep SimpleChatApp generate_response
-                        assistant_message.content = content # Keep SimpleChatApp generate_response
-                        # Update UI with full content # Keep SimpleChatApp generate_response
-                        await message_display.update_content(content) # Keep SimpleChatApp generate_response
-                        # Force a refresh and scroll # Keep SimpleChatApp generate_response
-                        self.refresh(layout=True) # Keep SimpleChatApp generate_response
-                        await asyncio.sleep(0.05)  # Longer delay for UI stability # Keep SimpleChatApp generate_response
-                        messages_container.scroll_end(animate=False) # Keep SimpleChatApp generate_response
-                        # Force another refresh to ensure content is visible # Keep SimpleChatApp generate_response
-                        self.refresh(layout=True) # Keep SimpleChatApp generate_response
-                    except Exception as e: # Keep SimpleChatApp generate_response
-                        log.error(f"Error updating UI: {str(e)}") # Use log instead of logger
-            # Generate the response with timeout and cleanup # Keep SimpleChatApp generate_response
-            self.current_generation_task = None # Clear previous task reference
-            try: # Keep SimpleChatApp generate_response
-                # Create a task for the response generation # Keep SimpleChatApp generate_response
-                self.current_generation_task = asyncio.create_task( # Keep SimpleChatApp generate_response
-                    generate_streaming_response( # Keep SimpleChatApp generate_response
-                        self, # Pass the app instance
-                        api_messages, # Keep SimpleChatApp generate_response
-                        model, # Keep SimpleChatApp generate_response
-                        style, # Keep SimpleChatApp generate_response
-                        client, # Keep SimpleChatApp generate_response
-                        update_ui # Keep SimpleChatApp generate_response
-                    ) # Keep SimpleChatApp generate_response
-                ) # Keep SimpleChatApp generate_response
-                # Wait for response with timeout # Keep SimpleChatApp generate_response
-                log.info(f"Waiting for generation task {self.current_generation_task} with timeout...") # Add log
-                full_response = await asyncio.wait_for(self.current_generation_task, timeout=60)  # Longer timeout # Keep SimpleChatApp generate_response
-                log.info(f"Generation task {self.current_generation_task} completed. Full response length: {len(full_response) if full_response else 0}") # Add log
-                # Save to database only if we got a complete response and weren't cancelled
-                if self.is_generating and full_response: # Check is_generating flag here
-                    log("Generation finished normally, saving full response to DB") # Added log
-                    self.db.add_message( # Keep SimpleChatApp generate_response
-                        self.current_conversation.id, # Keep SimpleChatApp generate_response
-                        "assistant", # Keep SimpleChatApp generate_response
-                        full_response # Keep SimpleChatApp generate_response
-                    ) # Keep SimpleChatApp generate_response
-                    # Force a final refresh # Keep SimpleChatApp generate_response
-                    self.refresh(layout=True) # Keep SimpleChatApp generate_response
-                    await asyncio.sleep(0.1)  # Wait for UI to update # Keep SimpleChatApp generate_response
-                elif not full_response and self.is_generating: # Only log if not cancelled
-                    log("Generation finished but full_response is empty/None") # Added log
-                else:
-                    # This case handles cancellation where full_response might be partial or None
-                    log("Generation was cancelled or finished without a full response.")
-            except asyncio.CancelledError: # Handle cancellation explicitly
-                log.warning("Generation task was cancelled.")
-                self.notify("Generation stopped by user.", severity="warning")
-                # Remove the potentially incomplete message from UI state
-                if self.messages and self.messages[-1].role == "assistant":
-                    self.messages.pop()
-                await self.update_messages_ui() # Update UI to remove partial message
-            except asyncio.TimeoutError: # Keep SimpleChatApp generate_response
-                log.error(f"Response generation timed out waiting for task {self.current_generation_task}") # Use log instead of logger
-                # Log state at timeout
-                log.error(f"Timeout state: is_generating={self.is_generating}, task_done={self.current_generation_task.done() if self.current_generation_task else 'N/A'}")
-                error_msg = "Response generation timed out. The model may be busy or unresponsive. Please try again." # Keep SimpleChatApp generate_response
-                self.notify(error_msg, severity="error") # Keep SimpleChatApp generate_response
-                # Remove the incomplete message # Keep SimpleChatApp generate_response
-                if self.messages and self.messages[-1].role == "assistant": # Keep SimpleChatApp generate_response
-                    self.messages.pop() # Keep SimpleChatApp generate_response
-                # Update UI to remove the incomplete message # Keep SimpleChatApp generate_response
-                await self.update_messages_ui() # Keep SimpleChatApp generate_response
-            finally: # Keep SimpleChatApp generate_response
-                # Ensure flag is reset and task reference is cleared
-                log(f"Setting is_generating to False in finally block") # Added log
-                self.is_generating = False # Keep SimpleChatApp generate_response
-                self.current_generation_task = None # Clear task reference
-                loading = self.query_one("#loading-indicator") # Keep SimpleChatApp generate_response
-                loading.add_class("hidden") # Keep SimpleChatApp generate_response
-                # Force a final UI refresh # Keep SimpleChatApp generate_response
-                self.refresh(layout=True) # Keep SimpleChatApp generate_response
-        except Exception as e: # Keep SimpleChatApp generate_response
-            # Catch any other unexpected errors during generation setup/handling
-            log.error(f"Unexpected exception during generate_response: {str(e)}") # Added log
-            self.notify(f"Error generating response: {str(e)}", severity="error") # Keep SimpleChatApp generate_response
-            # Add error message to UI # Keep SimpleChatApp generate_response
-            error_msg = f"Error: {str(e)}" # Keep SimpleChatApp generate_response
-            self.messages.append(Message(role="assistant", content=error_msg)) # Keep SimpleChatApp generate_response
-            await self.update_messages_ui() # Keep SimpleChatApp generate_response
-            # The finally block below will handle resetting is_generating and hiding loading
-        finally: # Keep SimpleChatApp generate_response - This finally block now primarily handles cleanup
-            log(f"Ensuring is_generating is False and task is cleared in outer finally block") # Added log
-            self.is_generating = False # Ensure flag is always reset
-            self.current_generation_task = None # Ensure task ref is cleared
-            loading = self.query_one("#loading-indicator") # Keep SimpleChatApp generate_response
-            loading.add_class("hidden") # Ensure loading indicator is hidden
-            # Re-focus input after generation attempt (success, failure, or cancel)
+    async def generate_response(self) -> None:
+        """Generate an AI response using a non-blocking worker."""
+        if not self.current_conversation or not self.messages:
+            return
+        self.is_generating = True
+        log("Setting is_generating to True")
+        loading = self.query_one("#loading-indicator")
+        loading.remove_class("hidden")
+        try:
+            # Get conversation parameters
+            model = self.selected_model
+            style = self.selected_style
+            # Convert messages to API format
+            api_messages = []
+            for msg in self.messages:
+                api_messages.append({
+                    "role": msg.role,
+                    "content": msg.content
+                })
+            # Get appropriate client
             try:
-                self.query_one("#message-input").focus()
-            except Exception:
-                pass # Ignore if input not found
+                client = BaseModelClient.get_client_for_model(model)
+                if client is None:
+                    raise Exception(f"No client available for model: {model}")
+            except Exception as e:
+                self.notify(f"Failed to initialize model client: {str(e)}", severity="error")
+                self.is_generating = False
+                loading.add_class("hidden")
+                return
+            # Start streaming response
+            assistant_message = Message(role="assistant", content="Thinking...")
+            self.messages.append(assistant_message)
+            messages_container = self.query_one("#messages-container")
+            message_display = MessageDisplay(assistant_message, highlight_code=CONFIG["highlight_code"])
+            messages_container.mount(message_display)
+            messages_container.scroll_end(animate=False)
+            # Add small delay to show thinking state
+            await asyncio.sleep(0.5)
+            # Stream chunks to the UI with synchronization
+            update_lock = asyncio.Lock()
+            async def update_ui(content: str):
+                if not self.is_generating:
+                    log("update_ui called but is_generating is False, returning.")
+                    return
+                async with update_lock:
+                    try:
+                        # Clear thinking indicator on first content
+                        if assistant_message.content == "Thinking...":
+                            assistant_message.content = ""
+                        # Update message with full content so far
+                        assistant_message.content = content
+                        # Update UI with full content
+                        await message_display.update_content(content)
+                        # Force a refresh and scroll
+                        self.refresh(layout=True)
+                        await asyncio.sleep(0.05)  # Longer delay for UI stability
+                        messages_container.scroll_end(animate=False)
+                        # Force another refresh to ensure content is visible
+                        self.refresh(layout=True)
+                    except Exception as e:
+                        log.error(f"Error updating UI: {str(e)}")
+            # Define worker for background processing
+            @work(exit_on_error=True)
+            async def run_generation_worker():
+                try:
+                    # Generate the response in background
+                    full_response = await generate_streaming_response(
+                        self,
+                        api_messages,
+                        model,
+                        style,
+                        client,
+                        update_ui
+                    )
+                    # Save complete response to database
+                    if self.is_generating and full_response:
+                        log("Generation completed normally, saving to database")
+                        self.db.add_message(
+                            self.current_conversation.id,
+                            "assistant",
+                            full_response
+                        )
+                    # Final UI refresh
+                    self.refresh(layout=True)
+                except asyncio.CancelledError:
+                    log.warning("Generation worker was cancelled")
+                    # Remove the incomplete message
+                    if self.messages and self.messages[-1].role == "assistant":
+                        self.messages.pop()
+                    await self.update_messages_ui()
+                    self.notify("Generation stopped by user", severity="warning", timeout=2)
+                except Exception as e:
+                    log.error(f"Error in generation worker: {str(e)}")
+                    self.notify(f"Generation error: {str(e)}", severity="error", timeout=5)
+                    # Add error message to UI
+                    if self.messages and self.messages[-1].role == "assistant":
+                        self.messages.pop()  # Remove thinking message
+                    error_msg = f"Error: {str(e)}"
+                    self.messages.append(Message(role="assistant", content=error_msg))
+                    await self.update_messages_ui()
+                finally:
+                    # Always clean up state and UI
+                    log("Generation worker completed, resetting state")
+                    self.is_generating = False
+                    self.current_generation_task = None
+                    loading = self.query_one("#loading-indicator")
+                    loading.add_class("hidden")
+                    self.refresh(layout=True)
+                    self.query_one("#message-input").focus()
+            # Start the worker and keep a reference to it
+            worker = run_generation_worker()
+            self.current_generation_task = worker
+        except Exception as e:
+            log.error(f"Error setting up generation: {str(e)}")
+            self.notify(f"Error: {str(e)}", severity="error")
+            self.is_generating = False
+            loading = self.query_one("#loading-indicator")
+            loading.add_class("hidden")
+            self.query_one("#message-input").focus()
     def on_model_selector_model_selected(self, event: ModelSelector.ModelSelected) -> None: # Keep SimpleChatApp on_model_selector_model_selected
         """Handle model selection""" # Keep SimpleChatApp on_model_selector_model_selected docstring

app/ui/chat_interface.py CHANGED Viewed

@@ -204,6 +204,11 @@ class ChatInterface(Container):
         display: none;
         padding: 0 1;
     }
+    #loading-indicator.model-loading {
+        background: $warning;
+        color: $text;
+    }
     """
     class MessageSent(Message):
@@ -238,7 +243,7 @@ class ChatInterface(Container):
                 yield MessageDisplay(message, highlight_code=CONFIG["highlight_code"])
         with Container(id="input-area"):
             yield Container(
-                Label("Generating response...", id="loading-text"),
+                Label("▪▪▪ Generating response...", id="loading-text", markup=True),
                 id="loading-indicator"
             )
             with Container(id="controls"):
@@ -328,16 +333,30 @@ class ChatInterface(Container):
         if input_widget.has_focus:
             input_widget.focus()
-    def start_loading(self) -> None:
-        """Show loading indicator"""
+    def start_loading(self, model_loading: bool = False) -> None:
+        """Show loading indicator
+        Args:
+            model_loading: If True, indicates Ollama is loading a model
+        """
         self.is_loading = True
         loading = self.query_one("#loading-indicator")
+        loading_text = self.query_one("#loading-text")
+        if model_loading:
+            loading.add_class("model-loading")
+            loading_text.update("⚙️ Loading Ollama model...")
+        else:
+            loading.remove_class("model-loading")
+            loading_text.update("▪▪▪ Generating response...")
         loading.display = True
     def stop_loading(self) -> None:
         """Hide loading indicator"""
         self.is_loading = False
         loading = self.query_one("#loading-indicator")
+        loading.remove_class("model-loading")
         loading.display = False
     def clear_messages(self) -> None:

app/utils.py CHANGED Viewed

@@ -86,12 +86,71 @@ async def generate_streaming_response(app: 'SimpleChatApp', messages: List[Dict]
     buffer = []
     last_update = time.time()
     update_interval = 0.1  # Update UI every 100ms
-    generation_task = None
     try:
-        # The cancellation is now handled by cancelling the asyncio Task in main.py
-        # which will raise CancelledError here, interrupting the loop.
-        async for chunk in client.generate_stream(messages, model, style):
+        # Update UI with model loading state if it's an Ollama client
+        if hasattr(client, 'is_loading_model'):
+            # Send signal to update UI for model loading if needed
+            try:
+                # The client might be in model loading state even before generating
+                model_loading = client.is_loading_model()
+                logger.info(f"Initial model loading state: {model_loading}")
+                # Get the chat interface and update loading indicator
+                if hasattr(app, 'query_one'):
+                    loading = app.query_one("#loading-indicator")
+                    if model_loading:
+                        loading.add_class("model-loading")
+                        app.query_one("#loading-text").update("Loading Ollama model...")
+                    else:
+                        loading.remove_class("model-loading")
+            except Exception as e:
+                logger.error(f"Error setting initial loading state: {str(e)}")
+        stream_generator = client.generate_stream(messages, model, style)
+        # Check if we just entered model loading state
+        if hasattr(client, 'is_loading_model') and client.is_loading_model():
+            logger.info("Model loading started during generation")
+            try:
+                if hasattr(app, 'query_one'):
+                    loading = app.query_one("#loading-indicator")
+                    loading.add_class("model-loading")
+                    app.query_one("#loading-text").update("Loading Ollama model...")
+            except Exception as e:
+                logger.error(f"Error updating UI for model loading: {str(e)}")
+        # Use asyncio.shield to ensure we can properly interrupt the stream processing
+        async for chunk in stream_generator:
+            # Check for cancellation frequently
+            if asyncio.current_task().cancelled():
+                logger.info("Task cancellation detected during chunk processing")
+                # Close the client stream if possible
+                if hasattr(client, 'cancel_stream'):
+                    await client.cancel_stream()
+                raise asyncio.CancelledError()
+            # Check if model loading state changed
+            if hasattr(client, 'is_loading_model'):
+                model_loading = client.is_loading_model()
+                try:
+                    if hasattr(app, 'query_one'):
+                        loading = app.query_one("#loading-indicator")
+                        loading_text = app.query_one("#loading-text")
+                        if model_loading and not loading.has_class("model-loading"):
+                            # Model loading started
+                            logger.info("Model loading started during streaming")
+                            loading.add_class("model-loading")
+                            loading_text.update("⚙️ Loading Ollama model...")
+                        elif not model_loading and loading.has_class("model-loading"):
+                            # Model loading finished
+                            logger.info("Model loading finished during streaming")
+                            loading.remove_class("model-loading")
+                            loading_text.update("▪▪▪ Generating response...")
+                except Exception as e:
+                    logger.error(f"Error updating loading state during streaming: {str(e)}")
             if chunk:  # Only process non-empty chunks
                 buffer.append(chunk)
                 current_time = time.time()
@@ -100,7 +159,7 @@ async def generate_streaming_response(app: 'SimpleChatApp', messages: List[Dict]
                 if current_time - last_update >= update_interval or len(''.join(buffer)) > 100:
                     new_content = ''.join(buffer)
                     full_response += new_content
-                    # No need to check app.is_generating here, rely on CancelledError
+                    # Send content to UI
                     await callback(full_response)
                     buffer = []
                     last_update = current_time
@@ -114,23 +173,25 @@ async def generate_streaming_response(app: 'SimpleChatApp', messages: List[Dict]
             full_response += new_content
             await callback(full_response)
-        logger.info("Streaming response loop finished normally.") # Clarify log message
-        # Add log before returning
-        logger.info(f"generate_streaming_response returning normally. Full response length: {len(full_response)}")
+        logger.info(f"Streaming response completed successfully. Response length: {len(full_response)}")
         return full_response
     except asyncio.CancelledError:
         # This is expected when the user cancels via Escape
-        logger.info("Streaming response task cancelled.") # Clarify log message
-        # Add log before returning
-        logger.info(f"generate_streaming_response returning after cancellation. Partial response length: {len(full_response)}")
-        # Do not re-raise CancelledError, let the caller handle it
-        return full_response # Return whatever was collected so far (might be partial)
+        logger.info(f"Streaming response task cancelled. Partial response length: {len(full_response)}")
+        # Ensure the client stream is closed
+        if hasattr(client, 'cancel_stream'):
+            await client.cancel_stream()
+        # Return whatever was collected so far
+        return full_response
     except Exception as e:
-        logger.error(f"Error during streaming response: {str(e)}") # Clarify log message
-        # Ensure the app knows generation stopped on error ONLY if it wasn't cancelled
-        if not isinstance(e, asyncio.CancelledError):
-             app.is_generating = False # Reset flag on other errors
-        raise # Re-raise other exceptions
+        logger.error(f"Error during streaming response: {str(e)}")
+        # Close the client stream if possible
+        if hasattr(client, 'cancel_stream'):
+            await client.cancel_stream()
+        # Re-raise the exception for the caller to handle
+        raise
 def ensure_ollama_running() -> bool:
     """

{chat_console-0.2.6.dist-info → chat_console-0.2.9.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: chat-console
-Version: 0.2.6
+Version: 0.2.9
 Summary: A command-line interface for chatting with LLMs, storing chats and (future) rag interactions
 Home-page: https://github.com/wazacraftrfid/chat-console
 Author: Johnathan Greenaway

{chat_console-0.2.6.dist-info → chat_console-0.2.9.dist-info}/RECORD RENAMED Viewed

@@ -1,24 +1,24 @@
-app/__init__.py,sha256=RrJwTK4_61saurFka_5PJFu9CCSfTuI8Y7POMR59MOk,130
+app/__init__.py,sha256=g2BzewDN5X96Dl5Zzw8uag1TBEdPIU1ceTm7u-BJrjM,130
 app/config.py,sha256=sKNp6Za4ZfW-CZBOvEv0TncAS77AnKi86hTM51C4KQ4,5227
 app/database.py,sha256=nt8CVuDpy6zw8mOYqDcfUmNw611t7Ln7pz22M0b6-MI,9967
-app/main.py,sha256=uuh8z__950Rs-U0X5NmgRAupcSZFREyOazsDbK1eFYM,53365
+app/main.py,sha256=k726xRBcuPgbUsUg4s-REhtaljccjDLNzA_C-fPkQk4,48866
 app/models.py,sha256=4-y9Lytay2exWPFi0FDlVeRL3K2-I7E-jBqNzTfokqY,2644
-app/utils.py,sha256=6XSIJBcJPOXPIHnvKvRwnttdRnN9BSlodcKVj57RLeM,8861
+app/utils.py,sha256=IyINMrM6oGXtN5HRPuKoFEyfKg0fR4FVXIi_0e2KxI0,11798
 app/api/__init__.py,sha256=A8UL84ldYlv8l7O-yKzraVFcfww86SgWfpl4p7R03-w,62
 app/api/anthropic.py,sha256=x5PmBXEKe_ow2NWk8XdqSPR0hLOdCc_ypY5QAySeA78,4234
 app/api/base.py,sha256=-6RSxSpqe-OMwkaq1wVWbu3pVkte-ZYy8rmdvt-Qh48,3953
-app/api/ollama.py,sha256=NgfETreb7EdFIux9fvkDfIBj77wcJvic77ObUV95TlI,49866
+app/api/ollama.py,sha256=FTIlgZmvpZd6K4HL2nUD19-p9Xb1TA859LfnCgewpcU,51354
 app/api/openai.py,sha256=1fYgFXXL6yj_7lQ893Yj28RYG4M8d6gt_q1gzhhjcig,3641
 app/ui/__init__.py,sha256=RndfbQ1Tv47qdSiuQzvWP96lPS547SDaGE-BgOtiP_w,55
-app/ui/chat_interface.py,sha256=VwmVvltxS9l18DI9U7kL43t8kSPPNsrkkrrUSoGu16Q,13623
+app/ui/chat_interface.py,sha256=R8tdy72TcT7veemUzcJOjbPY32WizBdNHgfmq69EFfA,14275
 app/ui/chat_list.py,sha256=WQTYVNSSXlx_gQal3YqILZZKL9UiTjmNMIDX2I9pAMM,11205
 app/ui/model_browser.py,sha256=5h3gVsuGIUrXjYVF-QclZFhYtX2kH14LvT22Ufm9etg,49453
 app/ui/model_selector.py,sha256=Aj1irAs9DQMn8wfcPsFZGxWmx0JTzHjSe7pVdDMwqTQ,13182
 app/ui/search.py,sha256=b-m14kG3ovqW1-i0qDQ8KnAqFJbi5b1FLM9dOnbTyIs,9763
 app/ui/styles.py,sha256=04AhPuLrOd2yenfRySFRestPeuTPeMLzhmMB67NdGvw,5615
-chat_console-0.2.6.dist-info/licenses/LICENSE,sha256=srHZ3fvcAuZY1LHxE7P6XWju2njRCHyK6h_ftEbzxSE,1057
-chat_console-0.2.6.dist-info/METADATA,sha256=xgVw6VxjSqNqA8hdPzltWYtax_Hmui9jbSq1GQBxU7w,2921
-chat_console-0.2.6.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
-chat_console-0.2.6.dist-info/entry_points.txt,sha256=kkVdEc22U9PAi2AeruoKklfkng_a_aHAP6VRVwrAD7c,67
-chat_console-0.2.6.dist-info/top_level.txt,sha256=io9g7LCbfmTG1SFKgEOGXmCFB9uMP2H5lerm0HiHWQE,4
-chat_console-0.2.6.dist-info/RECORD,,
+chat_console-0.2.9.dist-info/licenses/LICENSE,sha256=srHZ3fvcAuZY1LHxE7P6XWju2njRCHyK6h_ftEbzxSE,1057
+chat_console-0.2.9.dist-info/METADATA,sha256=zTSJePqMsi0n6fEz8s4gtLwHe_726-ijfTjPwH_Mumw,2921
+chat_console-0.2.9.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
+chat_console-0.2.9.dist-info/entry_points.txt,sha256=kkVdEc22U9PAi2AeruoKklfkng_a_aHAP6VRVwrAD7c,67
+chat_console-0.2.9.dist-info/top_level.txt,sha256=io9g7LCbfmTG1SFKgEOGXmCFB9uMP2H5lerm0HiHWQE,4
+chat_console-0.2.9.dist-info/RECORD,,

{chat_console-0.2.6.dist-info → chat_console-0.2.9.dist-info}/WHEEL RENAMED Viewed

File without changes

{chat_console-0.2.6.dist-info → chat_console-0.2.9.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{chat_console-0.2.6.dist-info → chat_console-0.2.9.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{chat_console-0.2.6.dist-info → chat_console-0.2.9.dist-info}/top_level.txt RENAMED Viewed

File without changes

chat-console 0.2.6__py3-none-any.whl → 0.2.9__py3-none-any.whl

chat-console 0.2.6py3-none-any.whl → 0.2.9py3-none-any.whl