chat-console 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- app/__init__.py +1 -1
- app/api/ollama.py +242 -4
- app/config.py +3 -1
- app/main.py +165 -8
- app/utils.py +15 -4
- {chat_console-0.4.0.dist-info → chat_console-0.4.2.dist-info}/METADATA +1 -1
- {chat_console-0.4.0.dist-info → chat_console-0.4.2.dist-info}/RECORD +11 -11
- {chat_console-0.4.0.dist-info → chat_console-0.4.2.dist-info}/WHEEL +1 -1
- {chat_console-0.4.0.dist-info → chat_console-0.4.2.dist-info}/entry_points.txt +0 -0
- {chat_console-0.4.0.dist-info → chat_console-0.4.2.dist-info}/licenses/LICENSE +0 -0
- {chat_console-0.4.0.dist-info → chat_console-0.4.2.dist-info}/top_level.txt +0 -0
app/__init__.py
CHANGED
app/api/ollama.py
CHANGED
@@ -31,8 +31,96 @@ class OllamaClient(BaseModelClient):
|
|
31
31
|
# Track model loading state
|
32
32
|
self._model_loading = False
|
33
33
|
|
34
|
+
# Track preloaded models and their last use timestamp
|
35
|
+
self._preloaded_models = {}
|
36
|
+
|
37
|
+
# Default timeout values (in seconds)
|
38
|
+
self.DEFAULT_TIMEOUT = 30
|
39
|
+
self.MODEL_LOAD_TIMEOUT = 120
|
40
|
+
self.MODEL_PULL_TIMEOUT = 3600 # 1 hour for large models
|
41
|
+
|
34
42
|
# Path to the cached models file
|
35
43
|
self.models_cache_path = Path(__file__).parent.parent / "data" / "ollama-models.json"
|
44
|
+
|
45
|
+
def get_timeout_for_model(self, model_id: str, operation: str = "generate") -> int:
|
46
|
+
"""
|
47
|
+
Calculate an appropriate timeout based on model size
|
48
|
+
|
49
|
+
Parameters:
|
50
|
+
- model_id: The model identifier
|
51
|
+
- operation: The operation type ('generate', 'load', 'pull')
|
52
|
+
|
53
|
+
Returns:
|
54
|
+
- Timeout in seconds
|
55
|
+
"""
|
56
|
+
# Default timeouts by operation
|
57
|
+
default_timeouts = {
|
58
|
+
"generate": self.DEFAULT_TIMEOUT, # 30s
|
59
|
+
"load": self.MODEL_LOAD_TIMEOUT, # 2min
|
60
|
+
"pull": self.MODEL_PULL_TIMEOUT, # 1h
|
61
|
+
"list": 5, # 5s
|
62
|
+
"test": 2 # 2s
|
63
|
+
}
|
64
|
+
|
65
|
+
# Parameter size multipliers
|
66
|
+
size_multipliers = {
|
67
|
+
# For models < 3B
|
68
|
+
"1b": 0.5,
|
69
|
+
"2b": 0.7,
|
70
|
+
"3b": 1.0,
|
71
|
+
# For models 3B-10B
|
72
|
+
"5b": 1.2,
|
73
|
+
"6b": 1.3,
|
74
|
+
"7b": 1.5,
|
75
|
+
"8b": 1.7,
|
76
|
+
"9b": 1.8,
|
77
|
+
# For models 10B-20B
|
78
|
+
"13b": 2.0,
|
79
|
+
"14b": 2.0,
|
80
|
+
# For models 20B-50B
|
81
|
+
"27b": 3.0,
|
82
|
+
"34b": 3.5,
|
83
|
+
"40b": 4.0,
|
84
|
+
# For models 50B+
|
85
|
+
"70b": 5.0,
|
86
|
+
"80b": 6.0,
|
87
|
+
"100b": 7.0,
|
88
|
+
"400b": 10.0,
|
89
|
+
"405b": 10.0,
|
90
|
+
}
|
91
|
+
|
92
|
+
# Get the base timeout for the operation
|
93
|
+
base_timeout = default_timeouts.get(operation, self.DEFAULT_TIMEOUT)
|
94
|
+
|
95
|
+
# Try to determine the model size from the model ID
|
96
|
+
model_size = "7b" # Default assumption is 7B parameters
|
97
|
+
model_lower = model_id.lower()
|
98
|
+
|
99
|
+
# Check for size indicators in the model name
|
100
|
+
for size in size_multipliers.keys():
|
101
|
+
if size in model_lower:
|
102
|
+
model_size = size
|
103
|
+
break
|
104
|
+
|
105
|
+
# If it's a known large model without size in name
|
106
|
+
if "llama3.1" in model_lower and not any(size in model_lower for size in size_multipliers.keys()):
|
107
|
+
model_size = "8b" # Default for llama3.1 without size specified
|
108
|
+
|
109
|
+
# For first generation after model selection, if preloaded, use shorter timeout
|
110
|
+
if operation == "generate" and model_id in self._preloaded_models:
|
111
|
+
# For preloaded models, use a shorter timeout
|
112
|
+
return max(int(base_timeout * 0.7), 20) # Min 20 seconds
|
113
|
+
|
114
|
+
# Calculate final timeout with multiplier
|
115
|
+
multiplier = size_multipliers.get(model_size, 1.0)
|
116
|
+
timeout = int(base_timeout * multiplier)
|
117
|
+
|
118
|
+
# For pull operation, ensure we have a reasonable maximum
|
119
|
+
if operation == "pull":
|
120
|
+
return min(timeout, 7200) # Max 2 hours
|
121
|
+
|
122
|
+
logger.info(f"Calculated timeout for {model_id} ({operation}): {timeout}s (base: {base_timeout}s, multiplier: {multiplier})")
|
123
|
+
return timeout
|
36
124
|
|
37
125
|
@classmethod
|
38
126
|
async def create(cls) -> 'OllamaClient':
|
@@ -61,7 +149,29 @@ class OllamaClient(BaseModelClient):
|
|
61
149
|
style_instructions = self._get_style_instructions(style)
|
62
150
|
debug_log(f"Adding style instructions: {style_instructions[:50]}...")
|
63
151
|
formatted_messages.append(style_instructions)
|
152
|
+
|
153
|
+
# Special case for title generation - check if this is a title generation message
|
154
|
+
is_title_generation = False
|
155
|
+
for msg in messages:
|
156
|
+
if msg.get("role") == "system" and "generate a brief, descriptive title" in msg.get("content", "").lower():
|
157
|
+
is_title_generation = True
|
158
|
+
debug_log("Detected title generation prompt")
|
159
|
+
break
|
160
|
+
|
161
|
+
# For title generation, use a direct approach
|
162
|
+
if is_title_generation:
|
163
|
+
debug_log("Using specialized formatting for title generation")
|
164
|
+
# Find the user message containing the input for title generation
|
165
|
+
user_msg = next((msg for msg in messages if msg.get("role") == "user"), None)
|
166
|
+
if user_msg and "content" in user_msg:
|
167
|
+
# Create a direct prompt
|
168
|
+
prompt = "Generate a short descriptive title (maximum 40 characters) for this conversation. ONLY RESPOND WITH THE TITLE FOR THE FOLLOWING MESSAGE:\n\n" + user_msg["content"]
|
169
|
+
debug_log(f"Created title generation prompt: {prompt[:100]}...")
|
170
|
+
return prompt
|
171
|
+
else:
|
172
|
+
debug_log("Could not find user message for title generation, using standard formatting")
|
64
173
|
|
174
|
+
# Standard processing for normal chat messages
|
65
175
|
# Add message content, preserving conversation flow
|
66
176
|
for i, msg in enumerate(messages):
|
67
177
|
try:
|
@@ -185,6 +295,7 @@ class OllamaClient(BaseModelClient):
|
|
185
295
|
try:
|
186
296
|
async with aiohttp.ClientSession() as session:
|
187
297
|
logger.debug(f"Sending request to {self.base_url}/api/generate")
|
298
|
+
gen_timeout = self.get_timeout_for_model(model, "generate")
|
188
299
|
async with session.post(
|
189
300
|
f"{self.base_url}/api/generate",
|
190
301
|
json={
|
@@ -193,12 +304,16 @@ class OllamaClient(BaseModelClient):
|
|
193
304
|
"temperature": temperature,
|
194
305
|
"stream": False
|
195
306
|
},
|
196
|
-
timeout=
|
307
|
+
timeout=gen_timeout
|
197
308
|
) as response:
|
198
309
|
response.raise_for_status()
|
199
310
|
data = await response.json()
|
200
311
|
if "response" not in data:
|
201
312
|
raise Exception("Invalid response format from Ollama server")
|
313
|
+
|
314
|
+
# Update the model usage timestamp to keep it hot
|
315
|
+
self.update_model_usage(model)
|
316
|
+
|
202
317
|
return data["response"]
|
203
318
|
|
204
319
|
except aiohttp.ClientConnectorError:
|
@@ -324,10 +439,11 @@ class OllamaClient(BaseModelClient):
|
|
324
439
|
"stream": False
|
325
440
|
}
|
326
441
|
|
442
|
+
test_timeout = self.get_timeout_for_model(model, "test")
|
327
443
|
async with session.post(
|
328
444
|
f"{self.base_url}/api/generate",
|
329
445
|
json=test_payload,
|
330
|
-
timeout=
|
446
|
+
timeout=test_timeout
|
331
447
|
) as response:
|
332
448
|
if response.status != 200:
|
333
449
|
logger.warning(f"Model test request failed with status {response.status}")
|
@@ -361,10 +477,11 @@ class OllamaClient(BaseModelClient):
|
|
361
477
|
debug_log(f"Error preparing pull payload: {str(pull_err)}, using default")
|
362
478
|
pull_payload = {"name": "gemma:2b"} # Safe default
|
363
479
|
|
480
|
+
pull_timeout = self.get_timeout_for_model(model, "pull")
|
364
481
|
async with session.post(
|
365
482
|
f"{self.base_url}/api/pull",
|
366
483
|
json=pull_payload,
|
367
|
-
timeout=
|
484
|
+
timeout=pull_timeout
|
368
485
|
) as pull_response:
|
369
486
|
if pull_response.status != 200:
|
370
487
|
logger.error("Failed to pull model")
|
@@ -415,10 +532,11 @@ class OllamaClient(BaseModelClient):
|
|
415
532
|
}
|
416
533
|
|
417
534
|
debug_log(f"Sending request to Ollama API")
|
535
|
+
gen_timeout = self.get_timeout_for_model(model, "generate")
|
418
536
|
response = await session.post(
|
419
537
|
f"{self.base_url}/api/generate",
|
420
538
|
json=request_payload,
|
421
|
-
timeout=
|
539
|
+
timeout=gen_timeout
|
422
540
|
)
|
423
541
|
response.raise_for_status()
|
424
542
|
debug_log(f"Response status: {response.status}")
|
@@ -426,6 +544,9 @@ class OllamaClient(BaseModelClient):
|
|
426
544
|
# Use a simpler async iteration pattern that's less error-prone
|
427
545
|
debug_log("Starting to process response stream")
|
428
546
|
|
547
|
+
# Update the model usage timestamp to keep it hot
|
548
|
+
self.update_model_usage(model)
|
549
|
+
|
429
550
|
# Set a flag to track if we've yielded any content
|
430
551
|
has_yielded_content = False
|
431
552
|
|
@@ -535,6 +656,123 @@ class OllamaClient(BaseModelClient):
|
|
535
656
|
def is_loading_model(self) -> bool:
|
536
657
|
"""Check if Ollama is currently loading a model"""
|
537
658
|
return self._model_loading
|
659
|
+
|
660
|
+
async def preload_model(self, model_id: str) -> bool:
|
661
|
+
"""
|
662
|
+
Preload a model to keep it hot/ready for use
|
663
|
+
Returns True if successful, False otherwise
|
664
|
+
"""
|
665
|
+
from datetime import datetime
|
666
|
+
import asyncio
|
667
|
+
|
668
|
+
logger.info(f"Preloading model: {model_id}")
|
669
|
+
|
670
|
+
# First, check if the model is already preloaded
|
671
|
+
if model_id in self._preloaded_models:
|
672
|
+
# Update timestamp if already preloaded
|
673
|
+
self._preloaded_models[model_id] = datetime.now()
|
674
|
+
logger.info(f"Model {model_id} already preloaded, updated timestamp")
|
675
|
+
return True
|
676
|
+
|
677
|
+
try:
|
678
|
+
# We'll use a minimal prompt to load the model
|
679
|
+
warm_up_prompt = "hello"
|
680
|
+
|
681
|
+
# Set model loading state
|
682
|
+
old_loading_state = self._model_loading
|
683
|
+
self._model_loading = True
|
684
|
+
|
685
|
+
async with aiohttp.ClientSession() as session:
|
686
|
+
# First try pulling the model if needed
|
687
|
+
try:
|
688
|
+
logger.info(f"Ensuring model {model_id} is pulled")
|
689
|
+
pull_payload = {"name": model_id}
|
690
|
+
pull_timeout = self.get_timeout_for_model(model_id, "pull")
|
691
|
+
async with session.post(
|
692
|
+
f"{self.base_url}/api/pull",
|
693
|
+
json=pull_payload,
|
694
|
+
timeout=pull_timeout
|
695
|
+
) as pull_response:
|
696
|
+
# We don't need to process the full pull, just initiate it
|
697
|
+
if pull_response.status != 200:
|
698
|
+
logger.warning(f"Pull request for model {model_id} failed with status {pull_response.status}")
|
699
|
+
except Exception as e:
|
700
|
+
logger.warning(f"Error during model pull check: {str(e)}")
|
701
|
+
|
702
|
+
# Now send a small generation request to load the model into memory
|
703
|
+
logger.info(f"Sending warm-up request for model {model_id}")
|
704
|
+
gen_timeout = self.get_timeout_for_model(model_id, "load")
|
705
|
+
async with session.post(
|
706
|
+
f"{self.base_url}/api/generate",
|
707
|
+
json={
|
708
|
+
"model": model_id,
|
709
|
+
"prompt": warm_up_prompt,
|
710
|
+
"temperature": 0.7,
|
711
|
+
"stream": False
|
712
|
+
},
|
713
|
+
timeout=gen_timeout
|
714
|
+
) as response:
|
715
|
+
if response.status != 200:
|
716
|
+
logger.error(f"Failed to preload model {model_id}, status: {response.status}")
|
717
|
+
self._model_loading = old_loading_state
|
718
|
+
return False
|
719
|
+
|
720
|
+
# Read the response to ensure the model is fully loaded
|
721
|
+
await response.json()
|
722
|
+
|
723
|
+
# Update preloaded models with timestamp
|
724
|
+
self._preloaded_models[model_id] = datetime.now()
|
725
|
+
logger.info(f"Successfully preloaded model {model_id}")
|
726
|
+
return True
|
727
|
+
except Exception as e:
|
728
|
+
logger.error(f"Error preloading model {model_id}: {str(e)}")
|
729
|
+
return False
|
730
|
+
finally:
|
731
|
+
# Reset model loading state
|
732
|
+
self._model_loading = old_loading_state
|
733
|
+
|
734
|
+
def get_preloaded_models(self) -> Dict[str, datetime]:
|
735
|
+
"""Return the dict of preloaded models and their last use times"""
|
736
|
+
return self._preloaded_models
|
737
|
+
|
738
|
+
def update_model_usage(self, model_id: str) -> None:
|
739
|
+
"""Update the timestamp for a model that is being used"""
|
740
|
+
if model_id and model_id in self._preloaded_models:
|
741
|
+
from datetime import datetime
|
742
|
+
self._preloaded_models[model_id] = datetime.now()
|
743
|
+
logger.info(f"Updated usage timestamp for model {model_id}")
|
744
|
+
|
745
|
+
async def release_inactive_models(self, max_inactive_minutes: int = 30) -> List[str]:
|
746
|
+
"""
|
747
|
+
Release models that have been inactive for more than the specified time
|
748
|
+
Returns a list of model IDs that were released
|
749
|
+
"""
|
750
|
+
from datetime import datetime, timedelta
|
751
|
+
|
752
|
+
if not self._preloaded_models:
|
753
|
+
return []
|
754
|
+
|
755
|
+
now = datetime.now()
|
756
|
+
inactive_threshold = timedelta(minutes=max_inactive_minutes)
|
757
|
+
models_to_release = []
|
758
|
+
|
759
|
+
# Find models that have been inactive for too long
|
760
|
+
for model_id, last_used in list(self._preloaded_models.items()):
|
761
|
+
if now - last_used > inactive_threshold:
|
762
|
+
models_to_release.append(model_id)
|
763
|
+
|
764
|
+
# Release the models
|
765
|
+
released_models = []
|
766
|
+
for model_id in models_to_release:
|
767
|
+
try:
|
768
|
+
logger.info(f"Releasing inactive model: {model_id} (inactive for {(now - self._preloaded_models[model_id]).total_seconds() / 60:.1f} minutes)")
|
769
|
+
# We don't have an explicit "unload" API in Ollama, but we can remove it from our tracking
|
770
|
+
del self._preloaded_models[model_id]
|
771
|
+
released_models.append(model_id)
|
772
|
+
except Exception as e:
|
773
|
+
logger.error(f"Error releasing model {model_id}: {str(e)}")
|
774
|
+
|
775
|
+
return released_models
|
538
776
|
|
539
777
|
async def get_model_details(self, model_id: str) -> Dict[str, Any]:
|
540
778
|
"""Get detailed information about a specific Ollama model"""
|
app/config.py
CHANGED
@@ -151,7 +151,9 @@ DEFAULT_CONFIG = {
|
|
151
151
|
"max_history_items": 100,
|
152
152
|
"highlight_code": True,
|
153
153
|
"auto_save": True,
|
154
|
-
"generate_dynamic_titles": True
|
154
|
+
"generate_dynamic_titles": True,
|
155
|
+
"ollama_model_preload": True,
|
156
|
+
"ollama_inactive_timeout_minutes": 30
|
155
157
|
}
|
156
158
|
|
157
159
|
def validate_config(config):
|
app/main.py
CHANGED
@@ -363,7 +363,13 @@ class SimpleChatApp(App): # Keep SimpleChatApp class definition
|
|
363
363
|
self.selected_model = resolve_model_id(default_model_from_config)
|
364
364
|
self.selected_style = CONFIG["default_style"] # Keep SimpleChatApp __init__
|
365
365
|
self.initial_text = initial_text # Keep SimpleChatApp __init__
|
366
|
-
|
366
|
+
|
367
|
+
# Task for model cleanup
|
368
|
+
self._model_cleanup_task = None
|
369
|
+
|
370
|
+
# Inactivity threshold in minutes before releasing model resources
|
371
|
+
# Read from config, default to 30 minutes
|
372
|
+
self.MODEL_INACTIVITY_THRESHOLD = CONFIG.get("ollama_inactive_timeout_minutes", 30)
|
367
373
|
|
368
374
|
def compose(self) -> ComposeResult: # Modify SimpleChatApp compose
|
369
375
|
"""Create the simplified application layout."""
|
@@ -420,6 +426,11 @@ class SimpleChatApp(App): # Keep SimpleChatApp class definition
|
|
420
426
|
pass # Silently ignore if widget not found yet
|
421
427
|
|
422
428
|
self.update_app_info() # Update the model info
|
429
|
+
|
430
|
+
# Start the background task for model cleanup if model preloading is enabled
|
431
|
+
if CONFIG.get("ollama_model_preload", True):
|
432
|
+
self._model_cleanup_task = asyncio.create_task(self._check_inactive_models())
|
433
|
+
debug_log("Started background task for model cleanup")
|
423
434
|
|
424
435
|
# Check API keys and services # Keep SimpleChatApp on_mount
|
425
436
|
api_issues = [] # Keep SimpleChatApp on_mount
|
@@ -675,29 +686,87 @@ class SimpleChatApp(App): # Keep SimpleChatApp class definition
|
|
675
686
|
|
676
687
|
# Determine title client and model based on available keys
|
677
688
|
if OPENAI_API_KEY:
|
689
|
+
# For highest success rate, use OpenAI for title generation when available
|
678
690
|
from app.api.openai import OpenAIClient
|
679
691
|
title_client = await OpenAIClient.create()
|
680
692
|
title_model = "gpt-3.5-turbo"
|
681
693
|
debug_log("Using OpenAI for background title generation")
|
682
694
|
elif ANTHROPIC_API_KEY:
|
695
|
+
# Next best option is Anthropic
|
683
696
|
from app.api.anthropic import AnthropicClient
|
684
697
|
title_client = await AnthropicClient.create()
|
685
698
|
title_model = "claude-3-haiku-20240307"
|
686
699
|
debug_log("Using Anthropic for background title generation")
|
687
700
|
else:
|
688
701
|
# Fallback to the currently selected model's client if no API keys
|
702
|
+
# Get client type first to ensure we correctly identify Ollama models
|
703
|
+
from app.api.ollama import OllamaClient
|
689
704
|
selected_model_resolved = resolve_model_id(self.selected_model)
|
690
|
-
|
691
|
-
|
692
|
-
|
705
|
+
client_type = BaseModelClient.get_client_type_for_model(selected_model_resolved)
|
706
|
+
|
707
|
+
# For Ollama models, special handling is required
|
708
|
+
if client_type == OllamaClient:
|
709
|
+
debug_log(f"Title generation with Ollama model detected: {selected_model_resolved}")
|
710
|
+
|
711
|
+
# Try common small/fast models first if they exist
|
712
|
+
try:
|
713
|
+
# Check if we have any smaller models available for faster title generation
|
714
|
+
ollama_client = await OllamaClient.create()
|
715
|
+
available_models = await ollama_client.get_available_models()
|
716
|
+
small_model_options = ["gemma:2b", "phi3:mini", "llama3:8b", "orca-mini:3b", "phi2"]
|
717
|
+
|
718
|
+
small_model_found = False
|
719
|
+
for model_name in small_model_options:
|
720
|
+
if any(model["id"] == model_name for model in available_models):
|
721
|
+
debug_log(f"Found smaller Ollama model for title generation: {model_name}")
|
722
|
+
title_model = model_name
|
723
|
+
small_model_found = True
|
724
|
+
break
|
725
|
+
|
726
|
+
if not small_model_found:
|
727
|
+
# Use the current model if no smaller models found
|
728
|
+
title_model = selected_model_resolved
|
729
|
+
debug_log(f"No smaller models found, using current model: {title_model}")
|
730
|
+
|
731
|
+
# Always create a fresh client instance to avoid interference with model preloading
|
732
|
+
title_client = ollama_client
|
733
|
+
debug_log(f"Created dedicated Ollama client for title generation with model: {title_model}")
|
734
|
+
except Exception as e:
|
735
|
+
debug_log(f"Error finding optimized Ollama model for title generation: {str(e)}")
|
736
|
+
# Fallback to standard approach
|
737
|
+
title_client = await OllamaClient.create()
|
738
|
+
title_model = selected_model_resolved
|
739
|
+
else:
|
740
|
+
# For other providers, use normal client acquisition
|
741
|
+
title_client = await BaseModelClient.get_client_for_model(selected_model_resolved)
|
742
|
+
title_model = selected_model_resolved
|
743
|
+
debug_log(f"Using selected model's client ({type(title_client).__name__}) for background title generation")
|
693
744
|
|
694
745
|
if not title_client or not title_model:
|
695
746
|
raise Exception("Could not determine a client/model for title generation.")
|
696
747
|
|
697
748
|
# Call the utility function
|
698
749
|
from app.utils import generate_conversation_title # Import locally if needed
|
699
|
-
|
700
|
-
|
750
|
+
|
751
|
+
# Add timeout handling for title generation to prevent hangs
|
752
|
+
try:
|
753
|
+
# Create a task with timeout
|
754
|
+
import asyncio
|
755
|
+
title_generation_task = asyncio.create_task(
|
756
|
+
generate_conversation_title(content, title_model, title_client)
|
757
|
+
)
|
758
|
+
|
759
|
+
# Wait for completion with timeout (30 seconds)
|
760
|
+
new_title = await asyncio.wait_for(title_generation_task, timeout=30)
|
761
|
+
debug_log(f"Background generated title: {new_title}")
|
762
|
+
except asyncio.TimeoutError:
|
763
|
+
debug_log("Title generation timed out after 30 seconds")
|
764
|
+
# Use default title in case of timeout
|
765
|
+
new_title = f"Conversation ({datetime.now().strftime('%Y-%m-%d %H:%M')})"
|
766
|
+
# Try to cancel the task
|
767
|
+
if not title_generation_task.done():
|
768
|
+
title_generation_task.cancel()
|
769
|
+
debug_log("Cancelled timed out title generation task")
|
701
770
|
|
702
771
|
# Check if title generation returned the default or a real title
|
703
772
|
if new_title and not new_title.startswith("Conversation ("):
|
@@ -718,8 +787,8 @@ class SimpleChatApp(App): # Keep SimpleChatApp class definition
|
|
718
787
|
title_widget.update(new_title)
|
719
788
|
self.current_conversation.title = new_title # Update local object too
|
720
789
|
log(f"Background title update successful: {new_title}")
|
721
|
-
#
|
722
|
-
|
790
|
+
# Subtle notification to show title was updated
|
791
|
+
self.notify(f"Conversation titled: {new_title}", severity="information", timeout=2)
|
723
792
|
else:
|
724
793
|
log("Conversation changed before background title update could apply.")
|
725
794
|
else:
|
@@ -1226,6 +1295,94 @@ class SimpleChatApp(App): # Keep SimpleChatApp class definition
|
|
1226
1295
|
log(f"Stored selected provider: {self.selected_provider} for model: {self.selected_model}")
|
1227
1296
|
|
1228
1297
|
self.update_app_info() # Update the displayed model info
|
1298
|
+
|
1299
|
+
# Preload the model if it's an Ollama model and preloading is enabled
|
1300
|
+
if self.selected_provider == "ollama" and CONFIG.get("ollama_model_preload", True):
|
1301
|
+
# Start the background task to preload the model
|
1302
|
+
debug_log(f"Starting background task to preload Ollama model: {self.selected_model}")
|
1303
|
+
asyncio.create_task(self._preload_ollama_model(self.selected_model))
|
1304
|
+
|
1305
|
+
async def _preload_ollama_model(self, model_id: str) -> None:
|
1306
|
+
"""Preload an Ollama model in the background"""
|
1307
|
+
from app.api.ollama import OllamaClient
|
1308
|
+
|
1309
|
+
debug_log(f"Preloading Ollama model: {model_id}")
|
1310
|
+
# Show a subtle notification to the user
|
1311
|
+
self.notify("Preparing model for use...", severity="information", timeout=3)
|
1312
|
+
|
1313
|
+
try:
|
1314
|
+
# Initialize the client
|
1315
|
+
client = await OllamaClient.create()
|
1316
|
+
|
1317
|
+
# Update the loading indicator to show model loading
|
1318
|
+
loading = self.query_one("#loading-indicator")
|
1319
|
+
loading.remove_class("hidden")
|
1320
|
+
loading.add_class("model-loading")
|
1321
|
+
loading.update(f"⚙️ Loading Ollama model...")
|
1322
|
+
|
1323
|
+
# Preload the model
|
1324
|
+
success = await client.preload_model(model_id)
|
1325
|
+
|
1326
|
+
# Hide the loading indicator
|
1327
|
+
loading.add_class("hidden")
|
1328
|
+
loading.remove_class("model-loading")
|
1329
|
+
|
1330
|
+
if success:
|
1331
|
+
debug_log(f"Successfully preloaded model: {model_id}")
|
1332
|
+
self.notify(f"Model ready for use", severity="success", timeout=2)
|
1333
|
+
else:
|
1334
|
+
debug_log(f"Failed to preload model: {model_id}")
|
1335
|
+
# No need to notify the user about failure - will happen naturally on first use
|
1336
|
+
except Exception as e:
|
1337
|
+
debug_log(f"Error preloading model: {str(e)}")
|
1338
|
+
# Make sure to hide the loading indicator
|
1339
|
+
try:
|
1340
|
+
loading = self.query_one("#loading-indicator")
|
1341
|
+
loading.add_class("hidden")
|
1342
|
+
loading.remove_class("model-loading")
|
1343
|
+
except Exception:
|
1344
|
+
pass
|
1345
|
+
|
1346
|
+
async def _check_inactive_models(self) -> None:
|
1347
|
+
"""Background task to check for and release inactive models"""
|
1348
|
+
from app.api.ollama import OllamaClient
|
1349
|
+
|
1350
|
+
# How often to check for inactive models (in seconds)
|
1351
|
+
CHECK_INTERVAL = 600 # 10 minutes
|
1352
|
+
|
1353
|
+
debug_log(f"Starting inactive model check task with interval {CHECK_INTERVAL}s")
|
1354
|
+
|
1355
|
+
try:
|
1356
|
+
while True:
|
1357
|
+
await asyncio.sleep(CHECK_INTERVAL)
|
1358
|
+
|
1359
|
+
debug_log("Checking for inactive models...")
|
1360
|
+
|
1361
|
+
try:
|
1362
|
+
# Initialize the client
|
1363
|
+
client = await OllamaClient.create()
|
1364
|
+
|
1365
|
+
# Get the threshold from instance variable
|
1366
|
+
threshold = getattr(self, "MODEL_INACTIVITY_THRESHOLD", 30)
|
1367
|
+
|
1368
|
+
# Check and release inactive models
|
1369
|
+
released_models = await client.release_inactive_models(threshold)
|
1370
|
+
|
1371
|
+
if released_models:
|
1372
|
+
debug_log(f"Released {len(released_models)} inactive models: {released_models}")
|
1373
|
+
else:
|
1374
|
+
debug_log("No inactive models to release")
|
1375
|
+
|
1376
|
+
except Exception as e:
|
1377
|
+
debug_log(f"Error checking for inactive models: {str(e)}")
|
1378
|
+
# Continue loop even if this check fails
|
1379
|
+
|
1380
|
+
except asyncio.CancelledError:
|
1381
|
+
debug_log("Model cleanup task cancelled")
|
1382
|
+
# Normal task cancellation, clean exit
|
1383
|
+
except Exception as e:
|
1384
|
+
debug_log(f"Unexpected error in model cleanup task: {str(e)}")
|
1385
|
+
# Log but don't crash
|
1229
1386
|
|
1230
1387
|
def on_style_selector_style_selected(self, event: StyleSelector.StyleSelected) -> None: # Keep SimpleChatApp on_style_selector_style_selected
|
1231
1388
|
"""Handle style selection""" # Keep SimpleChatApp on_style_selector_style_selected docstring
|
app/utils.py
CHANGED
@@ -32,6 +32,11 @@ async def generate_conversation_title(message: str, model: str, client: Any) ->
|
|
32
32
|
|
33
33
|
# Try-except the entire function to ensure we always return a title
|
34
34
|
try:
|
35
|
+
# Check if we're using an Ollama client
|
36
|
+
from app.api.ollama import OllamaClient
|
37
|
+
is_ollama_client = isinstance(client, OllamaClient)
|
38
|
+
debug_log(f"Client is Ollama: {is_ollama_client}")
|
39
|
+
|
35
40
|
# Pick a reliable title generation model - prefer OpenAI if available
|
36
41
|
from app.config import OPENAI_API_KEY, ANTHROPIC_API_KEY
|
37
42
|
|
@@ -46,10 +51,16 @@ async def generate_conversation_title(message: str, model: str, client: Any) ->
|
|
46
51
|
title_model = "claude-3-haiku-20240307"
|
47
52
|
debug_log("Using Anthropic for title generation")
|
48
53
|
else:
|
49
|
-
#
|
50
|
-
|
51
|
-
|
52
|
-
|
54
|
+
# For Ollama clients, ensure we have a clean instance to avoid conflicts with preloaded models
|
55
|
+
if is_ollama_client:
|
56
|
+
debug_log("Creating fresh Ollama client instance for title generation")
|
57
|
+
title_client = await OllamaClient.create()
|
58
|
+
title_model = model
|
59
|
+
else:
|
60
|
+
# Use the passed client for other providers
|
61
|
+
title_client = client
|
62
|
+
title_model = model
|
63
|
+
debug_log(f"Using {type(title_client).__name__} for title generation with model {title_model}")
|
53
64
|
|
54
65
|
# Create a special prompt for title generation
|
55
66
|
title_prompt = [
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: chat-console
|
3
|
-
Version: 0.4.
|
3
|
+
Version: 0.4.2
|
4
4
|
Summary: A command-line interface for chatting with LLMs, storing chats and (future) rag interactions
|
5
5
|
Home-page: https://github.com/wazacraftrfid/chat-console
|
6
6
|
Author: Johnathan Greenaway
|
@@ -1,13 +1,13 @@
|
|
1
|
-
app/__init__.py,sha256=
|
2
|
-
app/config.py,sha256=
|
1
|
+
app/__init__.py,sha256=aSLqazfzD7omcg24nSmExHXGmkwbhjI84zBzS-AL-To,130
|
2
|
+
app/config.py,sha256=F-0hO3NT5kRJxZelGLxaeUmnwx8i0LPHzYtNftL6CwM,8468
|
3
3
|
app/database.py,sha256=nt8CVuDpy6zw8mOYqDcfUmNw611t7Ln7pz22M0b6-MI,9967
|
4
|
-
app/main.py,sha256=
|
4
|
+
app/main.py,sha256=lOJiPpBE2F8PQlraVeRHGKS_C0Zka8n__lcE5LW3Gvo,85554
|
5
5
|
app/models.py,sha256=4-y9Lytay2exWPFi0FDlVeRL3K2-I7E-jBqNzTfokqY,2644
|
6
|
-
app/utils.py,sha256=
|
6
|
+
app/utils.py,sha256=RtyW_kK3M8WWSDfkuH6G2OuLcInIXfPG4Nv_xijf-gs,40877
|
7
7
|
app/api/__init__.py,sha256=A8UL84ldYlv8l7O-yKzraVFcfww86SgWfpl4p7R03-w,62
|
8
8
|
app/api/anthropic.py,sha256=uInwNvGLJ_iPUs4BjdwaqXTU6NfmK1SzX7498Pt44fI,10667
|
9
9
|
app/api/base.py,sha256=valBWV5So76r8tjrgU5-sLfY73WaViTrszdCy8Rimjo,10314
|
10
|
-
app/api/ollama.py,sha256=
|
10
|
+
app/api/ollama.py,sha256=bVG2UPkpA2arHKyRVLwHt2Q9Lth_qdZLfijiNXvPuYA,76771
|
11
11
|
app/api/openai.py,sha256=XuHJHpD7tN_ZHLkRpNUcL1VxTtsXOVk1hDPXX8JnBxQ,15322
|
12
12
|
app/ui/__init__.py,sha256=RndfbQ1Tv47qdSiuQzvWP96lPS547SDaGE-BgOtiP_w,55
|
13
13
|
app/ui/chat_interface.py,sha256=oSDZi0Jgj_L8WnBh1RuJpIeIcN-RQ38CNejwsXiWTVg,18267
|
@@ -16,9 +16,9 @@ app/ui/model_browser.py,sha256=pdblLVkdyVF0_Bo02bqbErGAtieyH-y6IfhMOPEqIso,71124
|
|
16
16
|
app/ui/model_selector.py,sha256=2G0TOXfcNodrXZOhLeaJJ2iG3Nck4c_NN1AvUAmaF3M,19172
|
17
17
|
app/ui/search.py,sha256=b-m14kG3ovqW1-i0qDQ8KnAqFJbi5b1FLM9dOnbTyIs,9763
|
18
18
|
app/ui/styles.py,sha256=04AhPuLrOd2yenfRySFRestPeuTPeMLzhmMB67NdGvw,5615
|
19
|
-
chat_console-0.4.
|
20
|
-
chat_console-0.4.
|
21
|
-
chat_console-0.4.
|
22
|
-
chat_console-0.4.
|
23
|
-
chat_console-0.4.
|
24
|
-
chat_console-0.4.
|
19
|
+
chat_console-0.4.2.dist-info/licenses/LICENSE,sha256=srHZ3fvcAuZY1LHxE7P6XWju2njRCHyK6h_ftEbzxSE,1057
|
20
|
+
chat_console-0.4.2.dist-info/METADATA,sha256=_-h1KJkkcpmExhmDOmF-4P2Qj8iEAm-RiziHBX-_11Q,3810
|
21
|
+
chat_console-0.4.2.dist-info/WHEEL,sha256=7ciDxtlje1X8OhobNuGgi1t-ACdFSelPnSmDPrtlobY,91
|
22
|
+
chat_console-0.4.2.dist-info/entry_points.txt,sha256=kkVdEc22U9PAi2AeruoKklfkng_a_aHAP6VRVwrAD7c,67
|
23
|
+
chat_console-0.4.2.dist-info/top_level.txt,sha256=io9g7LCbfmTG1SFKgEOGXmCFB9uMP2H5lerm0HiHWQE,4
|
24
|
+
chat_console-0.4.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|