PyPI - chat-console - Versions diffs - 0.3.0__tar.gz → 0.3.4__tar.gz - Mend

chat-console 0.3.0tar.gz → 0.3.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

{chat_console-0.3.0 → chat_console-0.3.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: chat-console
-Version: 0.3.0
+Version: 0.3.4
 Summary: A command-line interface for chatting with LLMs, storing chats and (future) rag interactions
 Home-page: https://github.com/wazacraftrfid/chat-console
 Author: Johnathan Greenaway

{chat_console-0.3.0 → chat_console-0.3.4}/app/__init__.py RENAMED Viewed

@@ -3,4 +3,4 @@ Chat CLI
 A command-line interface for chatting with various LLM providers like ChatGPT and Claude.
 """
-__version__ = "0.3.0"
+__version__ = "0.3.4"

{chat_console-0.3.0 → chat_console-0.3.4}/app/api/openai.py RENAMED Viewed

@@ -168,10 +168,20 @@ class OpenAIClient(BaseModelClient):
             yield f"Error: {str(e)}"
             raise Exception(f"OpenAI streaming error: {str(e)}")
-    def get_available_models(self) -> List[Dict[str, Any]]:
-        """Get list of available OpenAI models"""
-        return [
-            {"id": "gpt-3.5-turbo", "name": "GPT-3.5 Turbo"},
-            {"id": "gpt-4", "name": "GPT-4"},
-            {"id": "gpt-4-turbo", "name": "GPT-4 Turbo"}
-        ]
+    async def get_available_models(self) -> List[Dict[str, Any]]:
+        """Fetch list of available OpenAI models from the /models endpoint"""
+        try:
+            models_response = await self.client.models.list()
+            # Each model has an 'id' and possibly other metadata
+            models = []
+            for model in models_response.data:
+                # Use 'id' as both id and name for now; can enhance with more info if needed
+                models.append({"id": model.id, "name": model.id})
+            return models
+        except Exception as e:
+            # Fallback to a static list if API call fails
+            return [
+                {"id": "gpt-3.5-turbo", "name": "gpt-3.5-turbo"},
+                {"id": "gpt-4", "name": "gpt-4"},
+                {"id": "gpt-4-turbo", "name": "gpt-4-turbo"}
+            ]

{chat_console-0.3.0 → chat_console-0.3.4}/app/main.py RENAMED Viewed

@@ -647,14 +647,18 @@ class SimpleChatApp(App): # Keep SimpleChatApp class definition
         # Only attempt title generation if the message has sufficient content (at least 3 characters)
         if is_first_message and self.current_conversation and CONFIG.get("generate_dynamic_titles", True) and len(content) >= 3:
             log("First message detected, generating title...")
+            print(f"First message detected, generating conversation title for: {content[:30]}...")
             debug_log(f"First message detected with length {len(content)}, generating conversation title")
-            title_generation_in_progress = True # Use a local flag
+            # Show loading indicator for title generation
             loading = self.query_one("#loading-indicator")
-            loading.remove_class("hidden") # Show loading for title gen
+            loading.remove_class("hidden")
+            loading.update("🔤 Generating title...")
             try:
                 # Get appropriate client
                 model = self.selected_model
+                print(f"Using model for title generation: {model}")
                 debug_log(f"Selected model for title generation: '{model}'")
                 # Check if model is valid
@@ -665,24 +669,12 @@ class SimpleChatApp(App): # Keep SimpleChatApp class definition
                         model = "gpt-3.5-turbo"
                         debug_log("Falling back to OpenAI gpt-3.5-turbo for title generation")
                     elif ANTHROPIC_API_KEY:
-                        model = "claude-instant-1.2"
-                        debug_log("Falling back to Anthropic claude-instant-1.2 for title generation")
+                        model = "claude-3-haiku-20240307"  # Updated to newer Claude model
+                        debug_log("Falling back to Anthropic Claude 3 Haiku for title generation")
                     else:
-                        # Last resort - check for a common Ollama model
-                        try:
-                            from app.api.ollama import OllamaClient
-                            ollama = await OllamaClient.create()
-                            models = await ollama.get_available_models()
-                            if models and len(models) > 0:
-                                debug_log(f"Found {len(models)} Ollama models, using first one")
-                                model = models[0].get("id", "llama3")
-                            else:
-                                model = "llama3"  # Common default
-                            debug_log(f"Falling back to Ollama model: {model}")
-                        except Exception as ollama_err:
-                            debug_log(f"Error getting Ollama models: {str(ollama_err)}")
-                            model = "llama3"  # Final fallback
-                            debug_log("Final fallback to llama3")
+                        # Last resort - use a common Ollama model
+                        model = "llama3"  # Common default
+                        debug_log("Falling back to Ollama model: llama3")
                 debug_log(f"Getting client for model: {model}")
                 client = await BaseModelClient.get_client_for_model(model)
@@ -710,17 +702,19 @@ class SimpleChatApp(App): # Keep SimpleChatApp class definition
                         elif ANTHROPIC_API_KEY:
                             from app.api.anthropic import AnthropicClient
                             client = await AnthropicClient.create()
-                            model = "claude-instant-1.2"
+                            model = "claude-3-haiku-20240307"  # Updated to newer Claude model
                             debug_log("Falling back to Anthropic for title generation")
                         else:
                             raise Exception("No valid API clients available for title generation")
                 # Generate title
+                print(f"Calling generate_conversation_title with model: {model}")
                 log(f"Calling generate_conversation_title with model: {model}")
                 debug_log(f"Calling generate_conversation_title with model: {model}")
                 title = await generate_conversation_title(content, model, client)
                 debug_log(f"Generated title: {title}")
                 log(f"Generated title: {title}")
+                print(f"Generated title: {title}")
                 # Update conversation title in database
                 self.db.update_conversation(
@@ -746,11 +740,10 @@ class SimpleChatApp(App): # Keep SimpleChatApp class definition
             except Exception as e:
                 debug_log(f"Failed to generate title: {str(e)}")
                 log.error(f"Failed to generate title: {str(e)}")
+                print(f"Failed to generate title: {str(e)}")
                 self.notify(f"Failed to generate title: {str(e)}", severity="warning")
             finally:
-                title_generation_in_progress = False
                 # Hide loading indicator *only if* AI response generation isn't about to start
-                # This check might be redundant if generate_response always shows it anyway
                 if not self.is_generating:
                      loading.add_class("hidden")
@@ -910,13 +903,17 @@ class SimpleChatApp(App): # Keep SimpleChatApp class definition
             # Start streaming response
             debug_log("Creating assistant message with 'Thinking...'")
+            print("Creating assistant message with 'Thinking...'")
             assistant_message = Message(role="assistant", content="Thinking...")
             self.messages.append(assistant_message)
             messages_container = self.query_one("#messages-container")
             message_display = MessageDisplay(assistant_message, highlight_code=CONFIG["highlight_code"])
             messages_container.mount(message_display)
+            # Force a layout refresh and scroll to end
+            self.refresh(layout=True)
             messages_container.scroll_end(animate=False)
             # Add small delay to show thinking state
             await asyncio.sleep(0.5)
@@ -943,58 +940,71 @@ class SimpleChatApp(App): # Keep SimpleChatApp class definition
                         # Update the message object with the full content
                         assistant_message.content = content
-                        # Update UI with the content - this no longer triggers refresh itself
+                        # Update UI with the content - the MessageDisplay will now handle its own refresh
+                        # This is a critical change that ensures content is immediately visible
                         await message_display.update_content(content)
-                        # Force a refresh after each update to ensure content is visible
-                        # This is critical for streaming to work properly
-                        self.refresh(layout=False)
-                        # Scroll after each content update to ensure it's visible
+                        # CRITICAL: Force immediate UI refresh after EVERY update
+                        # This ensures we don't need a second Enter press to see content
+                        self.refresh(layout=True)
+                        # Always scroll after each update to ensure visibility
                         messages_container.scroll_end(animate=False)
-                        # Much more aggressive throttling of UI updates to eliminate visual jitter
-                        # By using a larger modulo value, we significantly reduce refresh frequency
-                        # This improves stability at the cost of slightly choppier animations
+                        # For longer responses, we can throttle the heavy refreshes
+                        # to reduce visual jitter, but still do light refreshes for every update
                         content_length = len(content)
-                        # Define some key refresh points - more frequent than before
+                        # Define key refresh points that require more thorough updates
                         new_paragraph = content.endswith("\n") and content.count("\n") > 0
                         code_block = "```" in content
-                        do_refresh = (
-                            content_length < 10 or  # More frequent on first few tokens
-                            content_length % 32 == 0 or  # More frequent periodic updates (32 vs 64)
-                            new_paragraph or  # Refresh on paragraph breaks
-                            code_block  # Refresh when code blocks are detected
+                        needs_thorough_refresh = (
+                            content_length < 30 or       # Very aggressive for short responses
+                            content_length % 16 == 0 or  # More frequent periodic updates
+                            new_paragraph or             # Refresh on paragraph breaks
+                            code_block                   # Refresh when code blocks are detected
                         )
-                        # Check if it's been enough time since last refresh (reduced to 200ms from 250ms)
+                        # Check if it's been enough time since last heavy refresh
+                        # Reduced from 200ms to 100ms for more responsive UI
                         current_time = time.time()
                         time_since_refresh = current_time - last_refresh_time
-                        if do_refresh and time_since_refresh > 0.2:
-                            # Store the time we did the refresh
+                        if needs_thorough_refresh and time_since_refresh > 0.1:
+                            # Store the time we did the heavy refresh
                             last_refresh_time = current_time
-                            # Ensure content is still visible by scrolling
+                            # Ensure content is visible with an aggressive, guaranteed update sequence
+                            # 1. Scroll to ensure visibility
                             messages_container.scroll_end(animate=False)
-                            # Force a more thorough refresh periodically
+                            # 2. Force a comprehensive refresh with layout recalculation
                             self.refresh(layout=True)
+                            # 3. Small delay for rendering
+                            await asyncio.sleep(0.01)
+                            # 4. Another scroll to account for any layout changes
+                            messages_container.scroll_end(animate=False)
                     except Exception as e:
                         debug_log(f"Error updating UI: {str(e)}")
                         log.error(f"Error updating UI: {str(e)}")
             # --- Remove the inner run_generation_worker function ---
-            # Start the worker directly using the imported function
-            debug_log("Starting generate_streaming_response worker")
-            # Call the @work decorated function directly
-            worker = generate_streaming_response(
-                self,
-                api_messages,
-                model,
-                style,
-                client,
-                update_ui # Pass the callback function
+            # Start the worker using Textual's run_worker to ensure state tracking
+            debug_log("Starting generate_streaming_response worker with run_worker")
+            worker = self.run_worker(
+                generate_streaming_response(
+                    self,
+                    api_messages,
+                    model,
+                    style,
+                    client,
+                    update_ui  # Pass the callback function
+                ),
+                name="generate_response"
             )
             self.current_generation_task = worker
             # Worker completion will be handled by on_worker_state_changed

{chat_console-0.3.0 → chat_console-0.3.4}/app/ui/chat_interface.py RENAMED Viewed

@@ -132,37 +132,29 @@ class MessageDisplay(Static): # Inherit from Static instead of RichLog
         # This avoids text reflowing as new tokens arrive
         formatted_content = self._format_content(content)
-        # Use minimal update that doesn't trigger a refresh
-        # This allows parent to control refresh timing and avoid flickering
-        self.update(formatted_content, refresh=False)
+        # Use a direct update that forces refresh - critical fix for streaming
+        # This ensures content is immediately visible
+        self.update(formatted_content, refresh=True)
-        # Always force a minimal refresh to ensure content is visible
-        # This is critical for streaming to work properly
-        self.refresh(layout=False)
-        # For Ollama responses, we need more aggressive refresh
-        # Check if this is likely an Ollama response by looking at the parent app
+        # Force app-level refresh and scroll to ensure visibility
         try:
-            app = self.app
-            if app and hasattr(app, 'selected_model'):
-                model = app.selected_model
-                if model and ('llama' in model.lower() or 'mistral' in model.lower() or
-                             'gemma' in model.lower() or 'phi' in model.lower() or
-                             'ollama' in model.lower()):
-                    # This is likely an Ollama model, force a more thorough refresh
-                    # Without doing a full layout recalculation
-                    self.refresh(layout=True)
-                    # Force parent container to scroll to end
-                    try:
-                        parent = self.parent
-                        if parent and hasattr(parent, 'scroll_end'):
-                            parent.scroll_end(animate=False)
-                    except Exception:
-                        pass
-        except Exception:
-            # Ignore any errors in this detection logic
-            pass
+            # Always force app refresh for every update
+            if self.app:
+                # Force a full layout refresh to ensure content is visible
+                self.app.refresh(layout=True)
+                # Find the messages container and scroll to end
+                containers = self.app.query("ScrollableContainer")
+                for container in containers:
+                    if hasattr(container, 'scroll_end'):
+                        container.scroll_end(animate=False)
+        except Exception as e:
+            # Log the error and fallback to local refresh
+            print(f"Error refreshing app: {str(e)}")
+            self.refresh(layout=True)
+        # Small delay to allow UI to update
+        await asyncio.sleep(0.02)  # Increased delay for better rendering
     def _format_content(self, content: str) -> str:
         """Format message content with timestamp and handle markdown links"""
@@ -181,6 +173,9 @@ class MessageDisplay(Static): # Inherit from Static instead of RichLog
         # But keep our timestamp markup
         timestamp_markup = f"[dim]{timestamp}[/dim]"
+        # Debug print to verify content is being formatted
+        print(f"Formatting content: {len(content)} chars")
         return f"{timestamp_markup} {content}"
 class InputWithFocus(Input):

{chat_console-0.3.0 → chat_console-0.3.4}/app/ui/model_selector.py RENAMED Viewed

@@ -162,14 +162,36 @@ class ModelSelector(Container):
         """Get model options for a specific provider"""
         logger = logging.getLogger(__name__)
         logger.info(f"Getting model options for provider: {provider}")
+        options = []
+        if provider == "openai":
+            try:
+                from ..api.openai import OpenAIClient
+                client = await OpenAIClient.create()
+                models = await client.get_available_models()
+                logger.info(f"Found {len(models)} models from OpenAI API")
+                for model in models:
+                    options.append((model["name"], model["id"]))
+            except Exception as e:
+                logger.error(f"Error getting OpenAI models: {str(e)}")
+                # Fallback to static list
+                options = [
+                    ("gpt-3.5-turbo", "gpt-3.5-turbo"),
+                    ("gpt-4", "gpt-4"),
+                    ("gpt-4-turbo", "gpt-4-turbo"),
+                ]
+            # Do NOT add custom model option for OpenAI
+            return options
+        # Default: config-based models
         options = [
             (model_info["display_name"], model_id)
             for model_id, model_info in CONFIG["available_models"].items()
             if model_info["provider"] == provider
         ]
         logger.info(f"Found {len(options)} models in config for {provider}")
         # Add available Ollama models
         if provider == "ollama":
             try:
@@ -214,7 +236,10 @@ class ModelSelector(Container):
                 ]
                 logger.info("Adding default Ollama models as fallback")
                 options.extend(default_models)
+            options.append(("Custom Model...", "custom"))
+            return options
+        # For Anthropic and others, allow custom model
         options.append(("Custom Model...", "custom"))
         return options

{chat_console-0.3.0 → chat_console-0.3.4}/app/utils.py RENAMED Viewed

@@ -116,82 +116,68 @@ async def generate_conversation_title(message: str, model: str, client: Any) ->
     return f"Conversation ({datetime.now().strftime('%Y-%m-%d %H:%M')})"
 # Make this the worker function directly
-@work(exit_on_error=True)
 async def generate_streaming_response(
     app: 'SimpleChatApp',
     messages: List[Dict],
     model: str,
     style: str,
     client: Any,
-    callback: Callable[[str], Awaitable[None]] # More specific type hint for callback
-) -> Optional[str]: # Return Optional[str] as cancellation might return None implicitly or error
-    """Generate a streaming response from the model (as a Textual worker)"""
-    # Import debug_log function from main
-    # Note: This import might be slightly less reliable inside a worker, but let's try
+    callback: Callable[[str], Awaitable[None]]
+) -> Optional[str]:
+    """
+    Generate a streaming response from the model (as a Textual worker).
+    Refactored to be a coroutine, not an async generator.
+    """
     try:
         from app.main import debug_log
     except ImportError:
-        debug_log = lambda msg: None # Fallback
-    # Worker function needs to handle its own state and cleanup partially
-    # The main app will also need cleanup logic in generate_response
+        debug_log = lambda msg: None
     logger.info(f"Starting streaming response with model: {model}")
     debug_log(f"Starting streaming response with model: '{model}', client type: {type(client).__name__}")
-    # Very defensive check of messages format
     if not messages:
         debug_log("Error: messages list is empty")
         raise ValueError("Messages list cannot be empty")
     for i, msg in enumerate(messages):
         try:
             debug_log(f"Message {i}: role={msg.get('role', 'missing')}, content_len={len(msg.get('content', ''))}")
-            # Ensure essential fields exist
             if 'role' not in msg:
                 debug_log(f"Adding missing 'role' to message {i}")
-                msg['role'] = 'user'  # Default to user
+                msg['role'] = 'user'
             if 'content' not in msg:
                 debug_log(f"Adding missing 'content' to message {i}")
-                msg['content'] = ''  # Default to empty string
+                msg['content'] = ''
         except Exception as e:
             debug_log(f"Error checking message {i}: {str(e)}")
-            # Try to repair the message
             messages[i] = {
                 'role': 'user',
                 'content': str(msg) if msg else ''
             }
             debug_log(f"Repaired message {i}")
-    debug_log(f"Messages validation complete: {len(messages)} total messages")
-    # Import time module within the worker function scope
     import time
     full_response = ""
     buffer = []
     last_update = time.time()
-    update_interval = 0.1  # Update UI every 100ms
+    update_interval = 0.05  # Reduced interval for more frequent updates
     try:
-        # Check that we have a valid client and model before proceeding
         if client is None:
             debug_log("Error: client is None, cannot proceed with streaming")
             raise ValueError("Model client is None, cannot proceed with streaming")
-        # Check if the client has the required generate_stream method
         if not hasattr(client, 'generate_stream'):
             debug_log(f"Error: client {type(client).__name__} does not have generate_stream method")
             raise ValueError(f"Client {type(client).__name__} does not support streaming")
-        # Set initial model loading state if using Ollama
-        # Always show the model loading indicator for Ollama until we confirm otherwise
         is_ollama = 'ollama' in str(type(client)).lower()
         debug_log(f"Is Ollama client: {is_ollama}")
         if is_ollama and hasattr(app, 'query_one'):
             try:
-                # Show model loading indicator by default for Ollama
                 debug_log("Showing initial model loading indicator for Ollama")
                 logger.info("Showing initial model loading indicator for Ollama")
                 loading = app.query_one("#loading-indicator")
@@ -200,12 +186,10 @@ async def generate_streaming_response(
             except Exception as e:
                 debug_log(f"Error setting initial Ollama loading state: {str(e)}")
                 logger.error(f"Error setting initial Ollama loading state: {str(e)}")
-        # Now proceed with streaming
         debug_log(f"Starting stream generation with messages length: {len(messages)}")
         logger.info(f"Starting stream generation for model: {model}")
-        # Defensive approach - wrap the stream generation in a try-except
         try:
             debug_log("Calling client.generate_stream()")
             stream_generator = client.generate_stream(messages, model, style)
@@ -213,9 +197,8 @@ async def generate_streaming_response(
         except Exception as stream_init_error:
             debug_log(f"Error initializing stream generator: {str(stream_init_error)}")
             logger.error(f"Error initializing stream generator: {str(stream_init_error)}")
-            raise  # Re-raise to be handled in the main catch block
-        # After getting the generator, check if we're NOT in model loading state
+            raise
         if hasattr(client, 'is_loading_model') and not client.is_loading_model() and hasattr(app, 'query_one'):
             try:
                 debug_log("Model is ready for generation, updating UI")
@@ -226,42 +209,31 @@ async def generate_streaming_response(
             except Exception as e:
                 debug_log(f"Error updating UI after stream init: {str(e)}")
                 logger.error(f"Error updating UI after stream init: {str(e)}")
-        # Process the stream with careful error handling
         debug_log("Beginning to process stream chunks")
         try:
             async for chunk in stream_generator:
-                # Check for cancellation frequently
                 if asyncio.current_task().cancelled():
                     debug_log("Task cancellation detected during chunk processing")
                     logger.info("Task cancellation detected during chunk processing")
-                    # Close the client stream if possible
                     if hasattr(client, 'cancel_stream'):
                         debug_log("Calling client.cancel_stream() due to task cancellation")
                         await client.cancel_stream()
                     raise asyncio.CancelledError()
-                # Check if model loading state changed, but more safely
                 if hasattr(client, 'is_loading_model'):
                     try:
-                        # Get the model loading state
                         model_loading = client.is_loading_model()
                         debug_log(f"Model loading state: {model_loading}")
-                        # Safely update the UI elements if they exist
                         if hasattr(app, 'query_one'):
                             try:
                                 loading = app.query_one("#loading-indicator")
-                                # Check for class existence first
                                 if model_loading and hasattr(loading, 'has_class') and not loading.has_class("model-loading"):
-                                    # Model loading started
                                     debug_log("Model loading started during streaming")
                                     logger.info("Model loading started during streaming")
                                     loading.add_class("model-loading")
                                     loading.update("⚙️ Loading Ollama model...")
                                 elif not model_loading and hasattr(loading, 'has_class') and loading.has_class("model-loading"):
-                                    # Model loading finished
                                     debug_log("Model loading finished during streaming")
                                     logger.info("Model loading finished during streaming")
                                     loading.remove_class("model-loading")
@@ -272,56 +244,51 @@ async def generate_streaming_response(
                     except Exception as e:
                         debug_log(f"Error checking model loading state: {str(e)}")
                         logger.error(f"Error checking model loading state: {str(e)}")
-                # Process the chunk - with careful type handling
-                if chunk:  # Only process non-empty chunks
-                    # Ensure chunk is a string - critical fix for providers returning other types
+                if chunk:
                     if not isinstance(chunk, str):
                         debug_log(f"WARNING: Received non-string chunk of type: {type(chunk).__name__}")
                         try:
-                            # Try to convert to string if possible
                             chunk = str(chunk)
                             debug_log(f"Successfully converted chunk to string, length: {len(chunk)}")
                         except Exception as e:
                             debug_log(f"Error converting chunk to string: {str(e)}")
-                            # Skip this chunk since it can't be converted
                             continue
                     debug_log(f"Received chunk of length: {len(chunk)}")
                     buffer.append(chunk)
                     current_time = time.time()
-                    # Update UI with every chunk for short messages, or throttle for longer ones
-                    # This is especially important for short messages like "hi" that might otherwise not trigger updates
-                    if (current_time - last_update >= update_interval or
-                        len(''.join(buffer)) > 10 or  # Much more aggressive buffer flush threshold
-                        len(full_response) < 20):     # Always update for very short responses
+                    # Always update immediately for the first few chunks
+                    if (current_time - last_update >= update_interval or
+                        len(''.join(buffer)) > 5 or  # Reduced buffer size threshold
+                        len(full_response) < 50):    # More aggressive updates for early content
                         new_content = ''.join(buffer)
                         full_response += new_content
-                        # Send content to UI
                         debug_log(f"Updating UI with content length: {len(full_response)}")
+                        # Print to console for debugging
+                        print(f"Streaming update: +{len(new_content)} chars, total: {len(full_response)}")
                         try:
+                            # Call the UI callback with the full response so far
                             await callback(full_response)
                             debug_log("UI callback completed successfully")
+                            # Force app refresh after each update
+                            if hasattr(app, 'refresh'):
+                                app.refresh(layout=True)  # Force layout refresh for all models
                         except Exception as callback_err:
                             debug_log(f"Error in UI callback: {str(callback_err)}")
                             logger.error(f"Error in UI callback: {str(callback_err)}")
+                            print(f"Error updating UI: {str(callback_err)}")
                         buffer = []
                         last_update = current_time
-                        # Force UI refresh after each update for Ollama responses
-                        if is_ollama:
-                            debug_log("Forcing UI refresh for Ollama response")
-                            try:
-                                # Ensure the app refreshes the UI
-                                if hasattr(app, 'refresh'):
-                                    app.refresh(layout=False)
-                            except Exception as refresh_err:
-                                debug_log(f"Error forcing UI refresh: {str(refresh_err)}")
-                        # Small delay to let UI catch up
-                        await asyncio.sleep(0.05)
+                        # Shorter sleep between updates for more responsive streaming
+                        await asyncio.sleep(0.02)
         except asyncio.CancelledError:
             debug_log("CancelledError in stream processing")
             raise
@@ -330,7 +297,6 @@ async def generate_streaming_response(
             logger.error(f"Error processing stream chunks: {str(chunk_error)}")
             raise
-        # Send any remaining content if the loop finished normally
         if buffer:
             new_content = ''.join(buffer)
             full_response += new_content
@@ -338,29 +304,48 @@ async def generate_streaming_response(
             try:
                 await callback(full_response)
                 debug_log("Final UI callback completed successfully")
-                # Force final UI refresh for Ollama responses
-                if is_ollama:
-                    debug_log("Forcing final UI refresh for Ollama response")
-                    try:
-                        # Ensure the app refreshes the UI
-                        if hasattr(app, 'refresh'):
-                            app.refresh(layout=True)  # Use layout=True for final refresh
-                    except Exception as refresh_err:
-                        debug_log(f"Error forcing final UI refresh: {str(refresh_err)}")
+                debug_log("Forcing final UI refresh sequence for all models")
+                try:
+                    if hasattr(app, 'refresh'):
+                        app.refresh(layout=False)
+                        await asyncio.sleep(0.02)
+                        try:
+                            messages_container = app.query_one("#messages-container")
+                            if messages_container and hasattr(messages_container, 'scroll_end'):
+                                messages_container.scroll_end(animate=False)
+                        except Exception:
+                            pass
+                        app.refresh(layout=True)
+                        await asyncio.sleep(0.02)
+                        try:
+                            messages_container = app.query_one("#messages-container")
+                            if messages_container and hasattr(messages_container, 'scroll_end'):
+                                messages_container.scroll_end(animate=False)
+                        except Exception:
+                            pass
+                except Exception as refresh_err:
+                    debug_log(f"Error forcing final UI refresh: {str(refresh_err)}")
             except Exception as callback_err:
                 debug_log(f"Error in final UI callback: {str(callback_err)}")
                 logger.error(f"Error in final UI callback: {str(callback_err)}")
+        try:
+            await asyncio.sleep(0.05)
+            debug_log("Sending one final callback to ensure UI refresh")
+            await callback(full_response)
+            if hasattr(app, 'refresh'):
+                app.refresh(layout=True)
+        except Exception as final_err:
+            debug_log(f"Error in final extra callback: {str(final_err)}")
         debug_log(f"Streaming response completed successfully. Response length: {len(full_response)}")
         logger.info(f"Streaming response completed successfully. Response length: {len(full_response)}")
         return full_response
     except asyncio.CancelledError:
-        # This is expected when the user cancels via Escape
         debug_log(f"Streaming response task cancelled. Partial response length: {len(full_response)}")
         logger.info(f"Streaming response task cancelled. Partial response length: {len(full_response)}")
-        # Ensure the client stream is closed
         if hasattr(client, 'cancel_stream'):
             debug_log("Calling client.cancel_stream() after cancellation")
             try:
@@ -368,13 +353,11 @@ async def generate_streaming_response(
                 debug_log("Successfully cancelled client stream")
             except Exception as cancel_err:
                 debug_log(f"Error cancelling client stream: {str(cancel_err)}")
-        # Return whatever was collected so far
         return full_response
     except Exception as e:
         debug_log(f"Error during streaming response: {str(e)}")
         logger.error(f"Error during streaming response: {str(e)}")
-        # Close the client stream if possible
         if hasattr(client, 'cancel_stream'):
             debug_log("Attempting to cancel client stream after error")
             try:
@@ -382,21 +365,13 @@ async def generate_streaming_response(
                 debug_log("Successfully cancelled client stream after error")
             except Exception as cancel_err:
                 debug_log(f"Error cancelling client stream after error: {str(cancel_err)}")
-        # Re-raise the exception for the worker runner to handle
-        # The @work decorator might catch this depending on exit_on_error
         raise
     finally:
-        # Basic cleanup within the worker itself (optional, main cleanup in app)
         debug_log("generate_streaming_response worker finished or errored.")
-        # Return the full response if successful, otherwise error is raised or cancellation occurred
-        # Note: If cancelled, CancelledError is raised, and @work might handle it.
-        # If successful, return the response.
-        # If error, exception is raised.
-        # Let's explicitly return the response on success.
-        # If cancelled or error, this return might not be reached.
         if 'full_response' in locals():
-             return full_response
-        return None # Indicate completion without full response (e.g., error before loop)
+            return full_response
+        return None
 async def ensure_ollama_running() -> bool:
     """
@@ -463,6 +438,8 @@ def resolve_model_id(model_id_or_name: str) -> str:
     """
     Resolves a potentially short model ID or display name to the full model ID
     stored in the configuration. Tries multiple matching strategies.
+    Fix: Only apply dot-to-colon conversion for Ollama models, not for OpenAI/Anthropic/custom.
     """
     if not model_id_or_name:
         logger.warning("resolve_model_id called with empty input, returning empty string.")
@@ -476,17 +453,25 @@ def resolve_model_id(model_id_or_name: str) -> str:
          logger.warning("No available_models found in CONFIG to resolve against.")
          return model_id_or_name # Return original if no models to check
+    # Determine provider if possible
+    provider = None
+    if input_lower in available_models:
+        provider = available_models[input_lower].get("provider")
+    else:
+        # Try to find by display name
+        for model_info in available_models.values():
+            if model_info.get("display_name", "").lower() == input_lower:
+                provider = model_info.get("provider")
+                break
     # Special case for Ollama models with version format (model:version)
-    if ":" in input_lower and not input_lower.startswith("claude-"):
+    if provider == "ollama" and ":" in input_lower and not input_lower.startswith("claude-"):
         logger.info(f"Input '{input_lower}' appears to be an Ollama model with version, returning as-is")
         return model_id_or_name
-    # Handle special cases for common model formats
-    # 1. Handle Ollama models with dot notation (e.g., phi3.latest, llama3.1)
-    if "." in input_lower and not input_lower.startswith("claude-"):
-        # This is likely an Ollama model with dot notation
+    # Only apply dot-to-colon for Ollama models
+    if provider == "ollama" and "." in input_lower and not input_lower.startswith("claude-"):
         logger.info(f"Input '{input_lower}' appears to be an Ollama model with dot notation")
-        # Convert dots to colons for Ollama format if needed
         if ":" not in input_lower:
             parts = input_lower.split(".")
             if len(parts) == 2:

{chat_console-0.3.0 → chat_console-0.3.4}/chat_console.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: chat-console
-Version: 0.3.0
+Version: 0.3.4
 Summary: A command-line interface for chatting with LLMs, storing chats and (future) rag interactions
 Home-page: https://github.com/wazacraftrfid/chat-console
 Author: Johnathan Greenaway