chat-console 0.3.0__tar.gz → 0.3.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {chat_console-0.3.0 → chat_console-0.3.4}/PKG-INFO +1 -1
- {chat_console-0.3.0 → chat_console-0.3.4}/app/__init__.py +1 -1
- {chat_console-0.3.0 → chat_console-0.3.4}/app/api/openai.py +17 -7
- {chat_console-0.3.0 → chat_console-0.3.4}/app/main.py +63 -53
- {chat_console-0.3.0 → chat_console-0.3.4}/app/ui/chat_interface.py +24 -29
- {chat_console-0.3.0 → chat_console-0.3.4}/app/ui/model_selector.py +28 -3
- {chat_console-0.3.0 → chat_console-0.3.4}/app/utils.py +98 -113
- {chat_console-0.3.0 → chat_console-0.3.4}/chat_console.egg-info/PKG-INFO +1 -1
- {chat_console-0.3.0 → chat_console-0.3.4}/LICENSE +0 -0
- {chat_console-0.3.0 → chat_console-0.3.4}/README.md +0 -0
- {chat_console-0.3.0 → chat_console-0.3.4}/app/api/__init__.py +0 -0
- {chat_console-0.3.0 → chat_console-0.3.4}/app/api/anthropic.py +0 -0
- {chat_console-0.3.0 → chat_console-0.3.4}/app/api/base.py +0 -0
- {chat_console-0.3.0 → chat_console-0.3.4}/app/api/ollama.py +0 -0
- {chat_console-0.3.0 → chat_console-0.3.4}/app/config.py +0 -0
- {chat_console-0.3.0 → chat_console-0.3.4}/app/database.py +0 -0
- {chat_console-0.3.0 → chat_console-0.3.4}/app/models.py +0 -0
- {chat_console-0.3.0 → chat_console-0.3.4}/app/ui/__init__.py +0 -0
- {chat_console-0.3.0 → chat_console-0.3.4}/app/ui/chat_list.py +0 -0
- {chat_console-0.3.0 → chat_console-0.3.4}/app/ui/model_browser.py +0 -0
- {chat_console-0.3.0 → chat_console-0.3.4}/app/ui/search.py +0 -0
- {chat_console-0.3.0 → chat_console-0.3.4}/app/ui/styles.py +0 -0
- {chat_console-0.3.0 → chat_console-0.3.4}/chat_console.egg-info/SOURCES.txt +0 -0
- {chat_console-0.3.0 → chat_console-0.3.4}/chat_console.egg-info/dependency_links.txt +0 -0
- {chat_console-0.3.0 → chat_console-0.3.4}/chat_console.egg-info/entry_points.txt +0 -0
- {chat_console-0.3.0 → chat_console-0.3.4}/chat_console.egg-info/requires.txt +0 -0
- {chat_console-0.3.0 → chat_console-0.3.4}/chat_console.egg-info/top_level.txt +0 -0
- {chat_console-0.3.0 → chat_console-0.3.4}/setup.cfg +0 -0
- {chat_console-0.3.0 → chat_console-0.3.4}/setup.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: chat-console
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.4
|
4
4
|
Summary: A command-line interface for chatting with LLMs, storing chats and (future) rag interactions
|
5
5
|
Home-page: https://github.com/wazacraftrfid/chat-console
|
6
6
|
Author: Johnathan Greenaway
|
@@ -168,10 +168,20 @@ class OpenAIClient(BaseModelClient):
|
|
168
168
|
yield f"Error: {str(e)}"
|
169
169
|
raise Exception(f"OpenAI streaming error: {str(e)}")
|
170
170
|
|
171
|
-
def get_available_models(self) -> List[Dict[str, Any]]:
|
172
|
-
"""
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
171
|
+
async def get_available_models(self) -> List[Dict[str, Any]]:
|
172
|
+
"""Fetch list of available OpenAI models from the /models endpoint"""
|
173
|
+
try:
|
174
|
+
models_response = await self.client.models.list()
|
175
|
+
# Each model has an 'id' and possibly other metadata
|
176
|
+
models = []
|
177
|
+
for model in models_response.data:
|
178
|
+
# Use 'id' as both id and name for now; can enhance with more info if needed
|
179
|
+
models.append({"id": model.id, "name": model.id})
|
180
|
+
return models
|
181
|
+
except Exception as e:
|
182
|
+
# Fallback to a static list if API call fails
|
183
|
+
return [
|
184
|
+
{"id": "gpt-3.5-turbo", "name": "gpt-3.5-turbo"},
|
185
|
+
{"id": "gpt-4", "name": "gpt-4"},
|
186
|
+
{"id": "gpt-4-turbo", "name": "gpt-4-turbo"}
|
187
|
+
]
|
@@ -647,14 +647,18 @@ class SimpleChatApp(App): # Keep SimpleChatApp class definition
|
|
647
647
|
# Only attempt title generation if the message has sufficient content (at least 3 characters)
|
648
648
|
if is_first_message and self.current_conversation and CONFIG.get("generate_dynamic_titles", True) and len(content) >= 3:
|
649
649
|
log("First message detected, generating title...")
|
650
|
+
print(f"First message detected, generating conversation title for: {content[:30]}...")
|
650
651
|
debug_log(f"First message detected with length {len(content)}, generating conversation title")
|
651
|
-
|
652
|
+
|
653
|
+
# Show loading indicator for title generation
|
652
654
|
loading = self.query_one("#loading-indicator")
|
653
|
-
loading.remove_class("hidden")
|
655
|
+
loading.remove_class("hidden")
|
656
|
+
loading.update("🔤 Generating title...")
|
654
657
|
|
655
658
|
try:
|
656
659
|
# Get appropriate client
|
657
660
|
model = self.selected_model
|
661
|
+
print(f"Using model for title generation: {model}")
|
658
662
|
debug_log(f"Selected model for title generation: '{model}'")
|
659
663
|
|
660
664
|
# Check if model is valid
|
@@ -665,24 +669,12 @@ class SimpleChatApp(App): # Keep SimpleChatApp class definition
|
|
665
669
|
model = "gpt-3.5-turbo"
|
666
670
|
debug_log("Falling back to OpenAI gpt-3.5-turbo for title generation")
|
667
671
|
elif ANTHROPIC_API_KEY:
|
668
|
-
model = "claude-
|
669
|
-
debug_log("Falling back to Anthropic
|
672
|
+
model = "claude-3-haiku-20240307" # Updated to newer Claude model
|
673
|
+
debug_log("Falling back to Anthropic Claude 3 Haiku for title generation")
|
670
674
|
else:
|
671
|
-
# Last resort -
|
672
|
-
|
673
|
-
|
674
|
-
ollama = await OllamaClient.create()
|
675
|
-
models = await ollama.get_available_models()
|
676
|
-
if models and len(models) > 0:
|
677
|
-
debug_log(f"Found {len(models)} Ollama models, using first one")
|
678
|
-
model = models[0].get("id", "llama3")
|
679
|
-
else:
|
680
|
-
model = "llama3" # Common default
|
681
|
-
debug_log(f"Falling back to Ollama model: {model}")
|
682
|
-
except Exception as ollama_err:
|
683
|
-
debug_log(f"Error getting Ollama models: {str(ollama_err)}")
|
684
|
-
model = "llama3" # Final fallback
|
685
|
-
debug_log("Final fallback to llama3")
|
675
|
+
# Last resort - use a common Ollama model
|
676
|
+
model = "llama3" # Common default
|
677
|
+
debug_log("Falling back to Ollama model: llama3")
|
686
678
|
|
687
679
|
debug_log(f"Getting client for model: {model}")
|
688
680
|
client = await BaseModelClient.get_client_for_model(model)
|
@@ -710,17 +702,19 @@ class SimpleChatApp(App): # Keep SimpleChatApp class definition
|
|
710
702
|
elif ANTHROPIC_API_KEY:
|
711
703
|
from app.api.anthropic import AnthropicClient
|
712
704
|
client = await AnthropicClient.create()
|
713
|
-
model = "claude-
|
705
|
+
model = "claude-3-haiku-20240307" # Updated to newer Claude model
|
714
706
|
debug_log("Falling back to Anthropic for title generation")
|
715
707
|
else:
|
716
708
|
raise Exception("No valid API clients available for title generation")
|
717
709
|
|
718
710
|
# Generate title
|
711
|
+
print(f"Calling generate_conversation_title with model: {model}")
|
719
712
|
log(f"Calling generate_conversation_title with model: {model}")
|
720
713
|
debug_log(f"Calling generate_conversation_title with model: {model}")
|
721
714
|
title = await generate_conversation_title(content, model, client)
|
722
715
|
debug_log(f"Generated title: {title}")
|
723
716
|
log(f"Generated title: {title}")
|
717
|
+
print(f"Generated title: {title}")
|
724
718
|
|
725
719
|
# Update conversation title in database
|
726
720
|
self.db.update_conversation(
|
@@ -746,11 +740,10 @@ class SimpleChatApp(App): # Keep SimpleChatApp class definition
|
|
746
740
|
except Exception as e:
|
747
741
|
debug_log(f"Failed to generate title: {str(e)}")
|
748
742
|
log.error(f"Failed to generate title: {str(e)}")
|
743
|
+
print(f"Failed to generate title: {str(e)}")
|
749
744
|
self.notify(f"Failed to generate title: {str(e)}", severity="warning")
|
750
745
|
finally:
|
751
|
-
title_generation_in_progress = False
|
752
746
|
# Hide loading indicator *only if* AI response generation isn't about to start
|
753
|
-
# This check might be redundant if generate_response always shows it anyway
|
754
747
|
if not self.is_generating:
|
755
748
|
loading.add_class("hidden")
|
756
749
|
|
@@ -910,13 +903,17 @@ class SimpleChatApp(App): # Keep SimpleChatApp class definition
|
|
910
903
|
|
911
904
|
# Start streaming response
|
912
905
|
debug_log("Creating assistant message with 'Thinking...'")
|
906
|
+
print("Creating assistant message with 'Thinking...'")
|
913
907
|
assistant_message = Message(role="assistant", content="Thinking...")
|
914
908
|
self.messages.append(assistant_message)
|
915
909
|
messages_container = self.query_one("#messages-container")
|
916
910
|
message_display = MessageDisplay(assistant_message, highlight_code=CONFIG["highlight_code"])
|
917
911
|
messages_container.mount(message_display)
|
912
|
+
|
913
|
+
# Force a layout refresh and scroll to end
|
914
|
+
self.refresh(layout=True)
|
918
915
|
messages_container.scroll_end(animate=False)
|
919
|
-
|
916
|
+
|
920
917
|
# Add small delay to show thinking state
|
921
918
|
await asyncio.sleep(0.5)
|
922
919
|
|
@@ -943,58 +940,71 @@ class SimpleChatApp(App): # Keep SimpleChatApp class definition
|
|
943
940
|
# Update the message object with the full content
|
944
941
|
assistant_message.content = content
|
945
942
|
|
946
|
-
# Update UI with the content -
|
943
|
+
# Update UI with the content - the MessageDisplay will now handle its own refresh
|
944
|
+
# This is a critical change that ensures content is immediately visible
|
947
945
|
await message_display.update_content(content)
|
948
946
|
|
949
|
-
# Force
|
950
|
-
# This
|
951
|
-
self.refresh(layout=
|
952
|
-
|
953
|
-
#
|
947
|
+
# CRITICAL: Force immediate UI refresh after EVERY update
|
948
|
+
# This ensures we don't need a second Enter press to see content
|
949
|
+
self.refresh(layout=True)
|
950
|
+
|
951
|
+
# Always scroll after each update to ensure visibility
|
954
952
|
messages_container.scroll_end(animate=False)
|
955
953
|
|
956
|
-
#
|
957
|
-
#
|
958
|
-
# This improves stability at the cost of slightly choppier animations
|
954
|
+
# For longer responses, we can throttle the heavy refreshes
|
955
|
+
# to reduce visual jitter, but still do light refreshes for every update
|
959
956
|
content_length = len(content)
|
960
957
|
|
961
|
-
# Define
|
958
|
+
# Define key refresh points that require more thorough updates
|
962
959
|
new_paragraph = content.endswith("\n") and content.count("\n") > 0
|
963
960
|
code_block = "```" in content
|
964
|
-
|
965
|
-
content_length <
|
966
|
-
content_length %
|
967
|
-
new_paragraph or
|
968
|
-
code_block
|
961
|
+
needs_thorough_refresh = (
|
962
|
+
content_length < 30 or # Very aggressive for short responses
|
963
|
+
content_length % 16 == 0 or # More frequent periodic updates
|
964
|
+
new_paragraph or # Refresh on paragraph breaks
|
965
|
+
code_block # Refresh when code blocks are detected
|
969
966
|
)
|
970
967
|
|
971
|
-
# Check if it's been enough time since last refresh
|
968
|
+
# Check if it's been enough time since last heavy refresh
|
969
|
+
# Reduced from 200ms to 100ms for more responsive UI
|
972
970
|
current_time = time.time()
|
973
971
|
time_since_refresh = current_time - last_refresh_time
|
974
972
|
|
975
|
-
if
|
976
|
-
# Store the time we did the refresh
|
973
|
+
if needs_thorough_refresh and time_since_refresh > 0.1:
|
974
|
+
# Store the time we did the heavy refresh
|
977
975
|
last_refresh_time = current_time
|
978
|
-
|
976
|
+
|
977
|
+
# Ensure content is visible with an aggressive, guaranteed update sequence
|
978
|
+
# 1. Scroll to ensure visibility
|
979
979
|
messages_container.scroll_end(animate=False)
|
980
|
-
|
980
|
+
|
981
|
+
# 2. Force a comprehensive refresh with layout recalculation
|
981
982
|
self.refresh(layout=True)
|
983
|
+
|
984
|
+
# 3. Small delay for rendering
|
985
|
+
await asyncio.sleep(0.01)
|
986
|
+
|
987
|
+
# 4. Another scroll to account for any layout changes
|
988
|
+
messages_container.scroll_end(animate=False)
|
989
|
+
|
982
990
|
except Exception as e:
|
983
991
|
debug_log(f"Error updating UI: {str(e)}")
|
984
992
|
log.error(f"Error updating UI: {str(e)}")
|
985
993
|
|
986
994
|
# --- Remove the inner run_generation_worker function ---
|
987
995
|
|
988
|
-
# Start the worker
|
989
|
-
debug_log("Starting generate_streaming_response worker")
|
990
|
-
|
991
|
-
|
992
|
-
|
993
|
-
|
994
|
-
|
995
|
-
|
996
|
-
|
997
|
-
|
996
|
+
# Start the worker using Textual's run_worker to ensure state tracking
|
997
|
+
debug_log("Starting generate_streaming_response worker with run_worker")
|
998
|
+
worker = self.run_worker(
|
999
|
+
generate_streaming_response(
|
1000
|
+
self,
|
1001
|
+
api_messages,
|
1002
|
+
model,
|
1003
|
+
style,
|
1004
|
+
client,
|
1005
|
+
update_ui # Pass the callback function
|
1006
|
+
),
|
1007
|
+
name="generate_response"
|
998
1008
|
)
|
999
1009
|
self.current_generation_task = worker
|
1000
1010
|
# Worker completion will be handled by on_worker_state_changed
|
@@ -132,37 +132,29 @@ class MessageDisplay(Static): # Inherit from Static instead of RichLog
|
|
132
132
|
# This avoids text reflowing as new tokens arrive
|
133
133
|
formatted_content = self._format_content(content)
|
134
134
|
|
135
|
-
# Use
|
136
|
-
# This
|
137
|
-
self.update(formatted_content, refresh=
|
135
|
+
# Use a direct update that forces refresh - critical fix for streaming
|
136
|
+
# This ensures content is immediately visible
|
137
|
+
self.update(formatted_content, refresh=True)
|
138
138
|
|
139
|
-
#
|
140
|
-
# This is critical for streaming to work properly
|
141
|
-
self.refresh(layout=False)
|
142
|
-
|
143
|
-
# For Ollama responses, we need more aggressive refresh
|
144
|
-
# Check if this is likely an Ollama response by looking at the parent app
|
139
|
+
# Force app-level refresh and scroll to ensure visibility
|
145
140
|
try:
|
146
|
-
app
|
147
|
-
if app
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
except Exception:
|
164
|
-
# Ignore any errors in this detection logic
|
165
|
-
pass
|
141
|
+
# Always force app refresh for every update
|
142
|
+
if self.app:
|
143
|
+
# Force a full layout refresh to ensure content is visible
|
144
|
+
self.app.refresh(layout=True)
|
145
|
+
|
146
|
+
# Find the messages container and scroll to end
|
147
|
+
containers = self.app.query("ScrollableContainer")
|
148
|
+
for container in containers:
|
149
|
+
if hasattr(container, 'scroll_end'):
|
150
|
+
container.scroll_end(animate=False)
|
151
|
+
except Exception as e:
|
152
|
+
# Log the error and fallback to local refresh
|
153
|
+
print(f"Error refreshing app: {str(e)}")
|
154
|
+
self.refresh(layout=True)
|
155
|
+
|
156
|
+
# Small delay to allow UI to update
|
157
|
+
await asyncio.sleep(0.02) # Increased delay for better rendering
|
166
158
|
|
167
159
|
def _format_content(self, content: str) -> str:
|
168
160
|
"""Format message content with timestamp and handle markdown links"""
|
@@ -181,6 +173,9 @@ class MessageDisplay(Static): # Inherit from Static instead of RichLog
|
|
181
173
|
# But keep our timestamp markup
|
182
174
|
timestamp_markup = f"[dim]{timestamp}[/dim]"
|
183
175
|
|
176
|
+
# Debug print to verify content is being formatted
|
177
|
+
print(f"Formatting content: {len(content)} chars")
|
178
|
+
|
184
179
|
return f"{timestamp_markup} {content}"
|
185
180
|
|
186
181
|
class InputWithFocus(Input):
|
@@ -162,14 +162,36 @@ class ModelSelector(Container):
|
|
162
162
|
"""Get model options for a specific provider"""
|
163
163
|
logger = logging.getLogger(__name__)
|
164
164
|
logger.info(f"Getting model options for provider: {provider}")
|
165
|
-
|
165
|
+
|
166
|
+
options = []
|
167
|
+
|
168
|
+
if provider == "openai":
|
169
|
+
try:
|
170
|
+
from ..api.openai import OpenAIClient
|
171
|
+
client = await OpenAIClient.create()
|
172
|
+
models = await client.get_available_models()
|
173
|
+
logger.info(f"Found {len(models)} models from OpenAI API")
|
174
|
+
for model in models:
|
175
|
+
options.append((model["name"], model["id"]))
|
176
|
+
except Exception as e:
|
177
|
+
logger.error(f"Error getting OpenAI models: {str(e)}")
|
178
|
+
# Fallback to static list
|
179
|
+
options = [
|
180
|
+
("gpt-3.5-turbo", "gpt-3.5-turbo"),
|
181
|
+
("gpt-4", "gpt-4"),
|
182
|
+
("gpt-4-turbo", "gpt-4-turbo"),
|
183
|
+
]
|
184
|
+
# Do NOT add custom model option for OpenAI
|
185
|
+
return options
|
186
|
+
|
187
|
+
# Default: config-based models
|
166
188
|
options = [
|
167
189
|
(model_info["display_name"], model_id)
|
168
190
|
for model_id, model_info in CONFIG["available_models"].items()
|
169
191
|
if model_info["provider"] == provider
|
170
192
|
]
|
171
193
|
logger.info(f"Found {len(options)} models in config for {provider}")
|
172
|
-
|
194
|
+
|
173
195
|
# Add available Ollama models
|
174
196
|
if provider == "ollama":
|
175
197
|
try:
|
@@ -214,7 +236,10 @@ class ModelSelector(Container):
|
|
214
236
|
]
|
215
237
|
logger.info("Adding default Ollama models as fallback")
|
216
238
|
options.extend(default_models)
|
217
|
-
|
239
|
+
options.append(("Custom Model...", "custom"))
|
240
|
+
return options
|
241
|
+
|
242
|
+
# For Anthropic and others, allow custom model
|
218
243
|
options.append(("Custom Model...", "custom"))
|
219
244
|
return options
|
220
245
|
|
@@ -116,82 +116,68 @@ async def generate_conversation_title(message: str, model: str, client: Any) ->
|
|
116
116
|
return f"Conversation ({datetime.now().strftime('%Y-%m-%d %H:%M')})"
|
117
117
|
|
118
118
|
# Make this the worker function directly
|
119
|
-
@work(exit_on_error=True)
|
120
119
|
async def generate_streaming_response(
|
121
120
|
app: 'SimpleChatApp',
|
122
121
|
messages: List[Dict],
|
123
122
|
model: str,
|
124
123
|
style: str,
|
125
124
|
client: Any,
|
126
|
-
callback: Callable[[str], Awaitable[None]]
|
127
|
-
) -> Optional[str]:
|
128
|
-
"""
|
129
|
-
|
130
|
-
|
125
|
+
callback: Callable[[str], Awaitable[None]]
|
126
|
+
) -> Optional[str]:
|
127
|
+
"""
|
128
|
+
Generate a streaming response from the model (as a Textual worker).
|
129
|
+
Refactored to be a coroutine, not an async generator.
|
130
|
+
"""
|
131
131
|
try:
|
132
132
|
from app.main import debug_log
|
133
133
|
except ImportError:
|
134
|
-
debug_log = lambda msg: None
|
135
|
-
|
136
|
-
# Worker function needs to handle its own state and cleanup partially
|
137
|
-
# The main app will also need cleanup logic in generate_response
|
134
|
+
debug_log = lambda msg: None
|
138
135
|
|
139
136
|
logger.info(f"Starting streaming response with model: {model}")
|
140
137
|
debug_log(f"Starting streaming response with model: '{model}', client type: {type(client).__name__}")
|
141
|
-
|
142
|
-
# Very defensive check of messages format
|
138
|
+
|
143
139
|
if not messages:
|
144
140
|
debug_log("Error: messages list is empty")
|
145
141
|
raise ValueError("Messages list cannot be empty")
|
146
|
-
|
142
|
+
|
147
143
|
for i, msg in enumerate(messages):
|
148
144
|
try:
|
149
145
|
debug_log(f"Message {i}: role={msg.get('role', 'missing')}, content_len={len(msg.get('content', ''))}")
|
150
|
-
# Ensure essential fields exist
|
151
146
|
if 'role' not in msg:
|
152
147
|
debug_log(f"Adding missing 'role' to message {i}")
|
153
|
-
msg['role'] = 'user'
|
148
|
+
msg['role'] = 'user'
|
154
149
|
if 'content' not in msg:
|
155
150
|
debug_log(f"Adding missing 'content' to message {i}")
|
156
|
-
msg['content'] = ''
|
151
|
+
msg['content'] = ''
|
157
152
|
except Exception as e:
|
158
153
|
debug_log(f"Error checking message {i}: {str(e)}")
|
159
|
-
# Try to repair the message
|
160
154
|
messages[i] = {
|
161
155
|
'role': 'user',
|
162
156
|
'content': str(msg) if msg else ''
|
163
157
|
}
|
164
158
|
debug_log(f"Repaired message {i}")
|
165
|
-
|
166
|
-
debug_log(f"Messages validation complete: {len(messages)} total messages")
|
167
|
-
|
168
|
-
# Import time module within the worker function scope
|
159
|
+
|
169
160
|
import time
|
170
|
-
|
161
|
+
|
171
162
|
full_response = ""
|
172
163
|
buffer = []
|
173
164
|
last_update = time.time()
|
174
|
-
update_interval = 0.
|
175
|
-
|
165
|
+
update_interval = 0.05 # Reduced interval for more frequent updates
|
166
|
+
|
176
167
|
try:
|
177
|
-
# Check that we have a valid client and model before proceeding
|
178
168
|
if client is None:
|
179
169
|
debug_log("Error: client is None, cannot proceed with streaming")
|
180
170
|
raise ValueError("Model client is None, cannot proceed with streaming")
|
181
|
-
|
182
|
-
# Check if the client has the required generate_stream method
|
171
|
+
|
183
172
|
if not hasattr(client, 'generate_stream'):
|
184
173
|
debug_log(f"Error: client {type(client).__name__} does not have generate_stream method")
|
185
174
|
raise ValueError(f"Client {type(client).__name__} does not support streaming")
|
186
|
-
|
187
|
-
# Set initial model loading state if using Ollama
|
188
|
-
# Always show the model loading indicator for Ollama until we confirm otherwise
|
175
|
+
|
189
176
|
is_ollama = 'ollama' in str(type(client)).lower()
|
190
177
|
debug_log(f"Is Ollama client: {is_ollama}")
|
191
|
-
|
178
|
+
|
192
179
|
if is_ollama and hasattr(app, 'query_one'):
|
193
180
|
try:
|
194
|
-
# Show model loading indicator by default for Ollama
|
195
181
|
debug_log("Showing initial model loading indicator for Ollama")
|
196
182
|
logger.info("Showing initial model loading indicator for Ollama")
|
197
183
|
loading = app.query_one("#loading-indicator")
|
@@ -200,12 +186,10 @@ async def generate_streaming_response(
|
|
200
186
|
except Exception as e:
|
201
187
|
debug_log(f"Error setting initial Ollama loading state: {str(e)}")
|
202
188
|
logger.error(f"Error setting initial Ollama loading state: {str(e)}")
|
203
|
-
|
204
|
-
# Now proceed with streaming
|
189
|
+
|
205
190
|
debug_log(f"Starting stream generation with messages length: {len(messages)}")
|
206
191
|
logger.info(f"Starting stream generation for model: {model}")
|
207
|
-
|
208
|
-
# Defensive approach - wrap the stream generation in a try-except
|
192
|
+
|
209
193
|
try:
|
210
194
|
debug_log("Calling client.generate_stream()")
|
211
195
|
stream_generator = client.generate_stream(messages, model, style)
|
@@ -213,9 +197,8 @@ async def generate_streaming_response(
|
|
213
197
|
except Exception as stream_init_error:
|
214
198
|
debug_log(f"Error initializing stream generator: {str(stream_init_error)}")
|
215
199
|
logger.error(f"Error initializing stream generator: {str(stream_init_error)}")
|
216
|
-
raise
|
217
|
-
|
218
|
-
# After getting the generator, check if we're NOT in model loading state
|
200
|
+
raise
|
201
|
+
|
219
202
|
if hasattr(client, 'is_loading_model') and not client.is_loading_model() and hasattr(app, 'query_one'):
|
220
203
|
try:
|
221
204
|
debug_log("Model is ready for generation, updating UI")
|
@@ -226,42 +209,31 @@ async def generate_streaming_response(
|
|
226
209
|
except Exception as e:
|
227
210
|
debug_log(f"Error updating UI after stream init: {str(e)}")
|
228
211
|
logger.error(f"Error updating UI after stream init: {str(e)}")
|
229
|
-
|
230
|
-
# Process the stream with careful error handling
|
212
|
+
|
231
213
|
debug_log("Beginning to process stream chunks")
|
232
214
|
try:
|
233
215
|
async for chunk in stream_generator:
|
234
|
-
# Check for cancellation frequently
|
235
216
|
if asyncio.current_task().cancelled():
|
236
217
|
debug_log("Task cancellation detected during chunk processing")
|
237
218
|
logger.info("Task cancellation detected during chunk processing")
|
238
|
-
# Close the client stream if possible
|
239
219
|
if hasattr(client, 'cancel_stream'):
|
240
220
|
debug_log("Calling client.cancel_stream() due to task cancellation")
|
241
221
|
await client.cancel_stream()
|
242
222
|
raise asyncio.CancelledError()
|
243
|
-
|
244
|
-
# Check if model loading state changed, but more safely
|
223
|
+
|
245
224
|
if hasattr(client, 'is_loading_model'):
|
246
225
|
try:
|
247
|
-
# Get the model loading state
|
248
226
|
model_loading = client.is_loading_model()
|
249
227
|
debug_log(f"Model loading state: {model_loading}")
|
250
|
-
|
251
|
-
# Safely update the UI elements if they exist
|
252
228
|
if hasattr(app, 'query_one'):
|
253
229
|
try:
|
254
230
|
loading = app.query_one("#loading-indicator")
|
255
|
-
|
256
|
-
# Check for class existence first
|
257
231
|
if model_loading and hasattr(loading, 'has_class') and not loading.has_class("model-loading"):
|
258
|
-
# Model loading started
|
259
232
|
debug_log("Model loading started during streaming")
|
260
233
|
logger.info("Model loading started during streaming")
|
261
234
|
loading.add_class("model-loading")
|
262
235
|
loading.update("⚙️ Loading Ollama model...")
|
263
236
|
elif not model_loading and hasattr(loading, 'has_class') and loading.has_class("model-loading"):
|
264
|
-
# Model loading finished
|
265
237
|
debug_log("Model loading finished during streaming")
|
266
238
|
logger.info("Model loading finished during streaming")
|
267
239
|
loading.remove_class("model-loading")
|
@@ -272,56 +244,51 @@ async def generate_streaming_response(
|
|
272
244
|
except Exception as e:
|
273
245
|
debug_log(f"Error checking model loading state: {str(e)}")
|
274
246
|
logger.error(f"Error checking model loading state: {str(e)}")
|
275
|
-
|
276
|
-
|
277
|
-
if chunk: # Only process non-empty chunks
|
278
|
-
# Ensure chunk is a string - critical fix for providers returning other types
|
247
|
+
|
248
|
+
if chunk:
|
279
249
|
if not isinstance(chunk, str):
|
280
250
|
debug_log(f"WARNING: Received non-string chunk of type: {type(chunk).__name__}")
|
281
251
|
try:
|
282
|
-
# Try to convert to string if possible
|
283
252
|
chunk = str(chunk)
|
284
253
|
debug_log(f"Successfully converted chunk to string, length: {len(chunk)}")
|
285
254
|
except Exception as e:
|
286
255
|
debug_log(f"Error converting chunk to string: {str(e)}")
|
287
|
-
# Skip this chunk since it can't be converted
|
288
256
|
continue
|
289
|
-
|
257
|
+
|
290
258
|
debug_log(f"Received chunk of length: {len(chunk)}")
|
291
259
|
buffer.append(chunk)
|
292
260
|
current_time = time.time()
|
293
|
-
|
294
|
-
#
|
295
|
-
|
296
|
-
|
297
|
-
len(
|
298
|
-
|
299
|
-
|
261
|
+
|
262
|
+
# Always update immediately for the first few chunks
|
263
|
+
if (current_time - last_update >= update_interval or
|
264
|
+
len(''.join(buffer)) > 5 or # Reduced buffer size threshold
|
265
|
+
len(full_response) < 50): # More aggressive updates for early content
|
266
|
+
|
300
267
|
new_content = ''.join(buffer)
|
301
268
|
full_response += new_content
|
302
|
-
# Send content to UI
|
303
269
|
debug_log(f"Updating UI with content length: {len(full_response)}")
|
270
|
+
|
271
|
+
# Print to console for debugging
|
272
|
+
print(f"Streaming update: +{len(new_content)} chars, total: {len(full_response)}")
|
273
|
+
|
304
274
|
try:
|
275
|
+
# Call the UI callback with the full response so far
|
305
276
|
await callback(full_response)
|
306
277
|
debug_log("UI callback completed successfully")
|
278
|
+
|
279
|
+
# Force app refresh after each update
|
280
|
+
if hasattr(app, 'refresh'):
|
281
|
+
app.refresh(layout=True) # Force layout refresh for all models
|
307
282
|
except Exception as callback_err:
|
308
283
|
debug_log(f"Error in UI callback: {str(callback_err)}")
|
309
284
|
logger.error(f"Error in UI callback: {str(callback_err)}")
|
285
|
+
print(f"Error updating UI: {str(callback_err)}")
|
286
|
+
|
310
287
|
buffer = []
|
311
288
|
last_update = current_time
|
312
289
|
|
313
|
-
#
|
314
|
-
|
315
|
-
debug_log("Forcing UI refresh for Ollama response")
|
316
|
-
try:
|
317
|
-
# Ensure the app refreshes the UI
|
318
|
-
if hasattr(app, 'refresh'):
|
319
|
-
app.refresh(layout=False)
|
320
|
-
except Exception as refresh_err:
|
321
|
-
debug_log(f"Error forcing UI refresh: {str(refresh_err)}")
|
322
|
-
|
323
|
-
# Small delay to let UI catch up
|
324
|
-
await asyncio.sleep(0.05)
|
290
|
+
# Shorter sleep between updates for more responsive streaming
|
291
|
+
await asyncio.sleep(0.02)
|
325
292
|
except asyncio.CancelledError:
|
326
293
|
debug_log("CancelledError in stream processing")
|
327
294
|
raise
|
@@ -330,7 +297,6 @@ async def generate_streaming_response(
|
|
330
297
|
logger.error(f"Error processing stream chunks: {str(chunk_error)}")
|
331
298
|
raise
|
332
299
|
|
333
|
-
# Send any remaining content if the loop finished normally
|
334
300
|
if buffer:
|
335
301
|
new_content = ''.join(buffer)
|
336
302
|
full_response += new_content
|
@@ -338,29 +304,48 @@ async def generate_streaming_response(
|
|
338
304
|
try:
|
339
305
|
await callback(full_response)
|
340
306
|
debug_log("Final UI callback completed successfully")
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
app.
|
349
|
-
|
350
|
-
|
307
|
+
|
308
|
+
debug_log("Forcing final UI refresh sequence for all models")
|
309
|
+
try:
|
310
|
+
if hasattr(app, 'refresh'):
|
311
|
+
app.refresh(layout=False)
|
312
|
+
await asyncio.sleep(0.02)
|
313
|
+
try:
|
314
|
+
messages_container = app.query_one("#messages-container")
|
315
|
+
if messages_container and hasattr(messages_container, 'scroll_end'):
|
316
|
+
messages_container.scroll_end(animate=False)
|
317
|
+
except Exception:
|
318
|
+
pass
|
319
|
+
app.refresh(layout=True)
|
320
|
+
await asyncio.sleep(0.02)
|
321
|
+
try:
|
322
|
+
messages_container = app.query_one("#messages-container")
|
323
|
+
if messages_container and hasattr(messages_container, 'scroll_end'):
|
324
|
+
messages_container.scroll_end(animate=False)
|
325
|
+
except Exception:
|
326
|
+
pass
|
327
|
+
except Exception as refresh_err:
|
328
|
+
debug_log(f"Error forcing final UI refresh: {str(refresh_err)}")
|
351
329
|
except Exception as callback_err:
|
352
330
|
debug_log(f"Error in final UI callback: {str(callback_err)}")
|
353
331
|
logger.error(f"Error in final UI callback: {str(callback_err)}")
|
354
332
|
|
333
|
+
try:
|
334
|
+
await asyncio.sleep(0.05)
|
335
|
+
debug_log("Sending one final callback to ensure UI refresh")
|
336
|
+
await callback(full_response)
|
337
|
+
if hasattr(app, 'refresh'):
|
338
|
+
app.refresh(layout=True)
|
339
|
+
except Exception as final_err:
|
340
|
+
debug_log(f"Error in final extra callback: {str(final_err)}")
|
341
|
+
|
355
342
|
debug_log(f"Streaming response completed successfully. Response length: {len(full_response)}")
|
356
343
|
logger.info(f"Streaming response completed successfully. Response length: {len(full_response)}")
|
357
344
|
return full_response
|
358
|
-
|
345
|
+
|
359
346
|
except asyncio.CancelledError:
|
360
|
-
# This is expected when the user cancels via Escape
|
361
347
|
debug_log(f"Streaming response task cancelled. Partial response length: {len(full_response)}")
|
362
348
|
logger.info(f"Streaming response task cancelled. Partial response length: {len(full_response)}")
|
363
|
-
# Ensure the client stream is closed
|
364
349
|
if hasattr(client, 'cancel_stream'):
|
365
350
|
debug_log("Calling client.cancel_stream() after cancellation")
|
366
351
|
try:
|
@@ -368,13 +353,11 @@ async def generate_streaming_response(
|
|
368
353
|
debug_log("Successfully cancelled client stream")
|
369
354
|
except Exception as cancel_err:
|
370
355
|
debug_log(f"Error cancelling client stream: {str(cancel_err)}")
|
371
|
-
# Return whatever was collected so far
|
372
356
|
return full_response
|
373
|
-
|
357
|
+
|
374
358
|
except Exception as e:
|
375
359
|
debug_log(f"Error during streaming response: {str(e)}")
|
376
360
|
logger.error(f"Error during streaming response: {str(e)}")
|
377
|
-
# Close the client stream if possible
|
378
361
|
if hasattr(client, 'cancel_stream'):
|
379
362
|
debug_log("Attempting to cancel client stream after error")
|
380
363
|
try:
|
@@ -382,21 +365,13 @@ async def generate_streaming_response(
|
|
382
365
|
debug_log("Successfully cancelled client stream after error")
|
383
366
|
except Exception as cancel_err:
|
384
367
|
debug_log(f"Error cancelling client stream after error: {str(cancel_err)}")
|
385
|
-
# Re-raise the exception for the worker runner to handle
|
386
|
-
# The @work decorator might catch this depending on exit_on_error
|
387
368
|
raise
|
369
|
+
|
388
370
|
finally:
|
389
|
-
# Basic cleanup within the worker itself (optional, main cleanup in app)
|
390
371
|
debug_log("generate_streaming_response worker finished or errored.")
|
391
|
-
# Return the full response if successful, otherwise error is raised or cancellation occurred
|
392
|
-
# Note: If cancelled, CancelledError is raised, and @work might handle it.
|
393
|
-
# If successful, return the response.
|
394
|
-
# If error, exception is raised.
|
395
|
-
# Let's explicitly return the response on success.
|
396
|
-
# If cancelled or error, this return might not be reached.
|
397
372
|
if 'full_response' in locals():
|
398
|
-
|
399
|
-
return None
|
373
|
+
return full_response
|
374
|
+
return None
|
400
375
|
|
401
376
|
async def ensure_ollama_running() -> bool:
|
402
377
|
"""
|
@@ -463,6 +438,8 @@ def resolve_model_id(model_id_or_name: str) -> str:
|
|
463
438
|
"""
|
464
439
|
Resolves a potentially short model ID or display name to the full model ID
|
465
440
|
stored in the configuration. Tries multiple matching strategies.
|
441
|
+
|
442
|
+
Fix: Only apply dot-to-colon conversion for Ollama models, not for OpenAI/Anthropic/custom.
|
466
443
|
"""
|
467
444
|
if not model_id_or_name:
|
468
445
|
logger.warning("resolve_model_id called with empty input, returning empty string.")
|
@@ -476,17 +453,25 @@ def resolve_model_id(model_id_or_name: str) -> str:
|
|
476
453
|
logger.warning("No available_models found in CONFIG to resolve against.")
|
477
454
|
return model_id_or_name # Return original if no models to check
|
478
455
|
|
456
|
+
# Determine provider if possible
|
457
|
+
provider = None
|
458
|
+
if input_lower in available_models:
|
459
|
+
provider = available_models[input_lower].get("provider")
|
460
|
+
else:
|
461
|
+
# Try to find by display name
|
462
|
+
for model_info in available_models.values():
|
463
|
+
if model_info.get("display_name", "").lower() == input_lower:
|
464
|
+
provider = model_info.get("provider")
|
465
|
+
break
|
466
|
+
|
479
467
|
# Special case for Ollama models with version format (model:version)
|
480
|
-
if ":" in input_lower and not input_lower.startswith("claude-"):
|
468
|
+
if provider == "ollama" and ":" in input_lower and not input_lower.startswith("claude-"):
|
481
469
|
logger.info(f"Input '{input_lower}' appears to be an Ollama model with version, returning as-is")
|
482
470
|
return model_id_or_name
|
483
471
|
|
484
|
-
#
|
485
|
-
|
486
|
-
if "." in input_lower and not input_lower.startswith("claude-"):
|
487
|
-
# This is likely an Ollama model with dot notation
|
472
|
+
# Only apply dot-to-colon for Ollama models
|
473
|
+
if provider == "ollama" and "." in input_lower and not input_lower.startswith("claude-"):
|
488
474
|
logger.info(f"Input '{input_lower}' appears to be an Ollama model with dot notation")
|
489
|
-
# Convert dots to colons for Ollama format if needed
|
490
475
|
if ":" not in input_lower:
|
491
476
|
parts = input_lower.split(".")
|
492
477
|
if len(parts) == 2:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: chat-console
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.4
|
4
4
|
Summary: A command-line interface for chatting with LLMs, storing chats and (future) rag interactions
|
5
5
|
Home-page: https://github.com/wazacraftrfid/chat-console
|
6
6
|
Author: Johnathan Greenaway
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|