chat-console 0.3.9__py3-none-any.whl → 0.3.94__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- app/__init__.py +1 -1
- app/api/base.py +10 -6
- app/api/ollama.py +51 -23
- app/config.py +29 -26
- app/main.py +81 -11
- app/ui/chat_interface.py +31 -58
- app/ui/model_selector.py +5 -0
- app/utils.py +580 -374
- {chat_console-0.3.9.dist-info → chat_console-0.3.94.dist-info}/METADATA +1 -1
- chat_console-0.3.94.dist-info/RECORD +24 -0
- chat_console-0.3.9.dist-info/RECORD +0 -24
- {chat_console-0.3.9.dist-info → chat_console-0.3.94.dist-info}/WHEEL +0 -0
- {chat_console-0.3.9.dist-info → chat_console-0.3.94.dist-info}/entry_points.txt +0 -0
- {chat_console-0.3.9.dist-info → chat_console-0.3.94.dist-info}/licenses/LICENSE +0 -0
- {chat_console-0.3.9.dist-info → chat_console-0.3.94.dist-info}/top_level.txt +0 -0
app/utils.py
CHANGED
@@ -27,458 +27,648 @@ async def generate_conversation_title(message: str, model: str, client: Any) ->
|
|
27
27
|
|
28
28
|
debug_log(f"Starting title generation with model: {model}, client type: {type(client).__name__}")
|
29
29
|
|
30
|
-
#
|
31
|
-
|
32
|
-
if client is None:
|
33
|
-
debug_log("Client is None, will use default title")
|
34
|
-
return f"Conversation ({datetime.now().strftime('%Y-%m-%d %H:%M')})"
|
35
|
-
|
36
|
-
# Determine the best model to use for title generation
|
37
|
-
title_model_id = None
|
30
|
+
# For safety, always use a default title first
|
31
|
+
default_title = f"Conversation ({datetime.now().strftime('%Y-%m-%d %H:%M')})"
|
38
32
|
|
39
|
-
#
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
33
|
+
# Try-except the entire function to ensure we always return a title
|
34
|
+
try:
|
35
|
+
# Pick a reliable title generation model - prefer OpenAI if available
|
36
|
+
from ..config import OPENAI_API_KEY, ANTHROPIC_API_KEY
|
37
|
+
|
38
|
+
if OPENAI_API_KEY:
|
39
|
+
from ..api.openai import OpenAIClient
|
40
|
+
title_client = await OpenAIClient.create()
|
41
|
+
title_model = "gpt-3.5-turbo"
|
42
|
+
debug_log("Using OpenAI for title generation")
|
43
|
+
elif ANTHROPIC_API_KEY:
|
44
|
+
from ..api.anthropic import AnthropicClient
|
45
|
+
title_client = await AnthropicClient.create()
|
46
|
+
title_model = "claude-3-haiku-20240307"
|
47
|
+
debug_log("Using Anthropic for title generation")
|
48
|
+
else:
|
49
|
+
# Use the passed client if no API keys available
|
50
|
+
title_client = client
|
51
|
+
title_model = model
|
52
|
+
debug_log(f"Using provided {type(client).__name__} for title generation")
|
53
|
+
|
54
|
+
# Create a special prompt for title generation
|
55
|
+
title_prompt = [
|
56
|
+
{
|
57
|
+
"role": "system",
|
58
|
+
"content": "Generate a brief, descriptive title (maximum 40 characters) for a conversation that starts with the following message. Return only the title text with no additional explanation or formatting."
|
59
|
+
},
|
60
|
+
{
|
61
|
+
"role": "user",
|
62
|
+
"content": message
|
63
|
+
}
|
64
|
+
]
|
65
|
+
|
66
|
+
# Generate title
|
67
|
+
debug_log(f"Sending title generation request to {title_model}")
|
68
|
+
title = await title_client.generate_completion(
|
69
|
+
messages=title_prompt,
|
70
|
+
model=title_model,
|
71
|
+
temperature=0.7,
|
72
|
+
max_tokens=60
|
73
|
+
)
|
74
|
+
|
75
|
+
# Sanitize the title
|
76
|
+
title = title.strip().strip('"\'').strip()
|
77
|
+
if len(title) > 40:
|
78
|
+
title = title[:37] + "..."
|
47
79
|
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
if any(m.get("id") == sonnet_id for m in available_anthropic_models):
|
57
|
-
title_model_id = sonnet_id
|
58
|
-
debug_log(f"Using Anthropic Sonnet for title generation: {title_model_id}")
|
59
|
-
else:
|
60
|
-
debug_log("Neither Haiku nor Sonnet found in Anthropic models list")
|
61
|
-
except Exception as e:
|
62
|
-
debug_log(f"Error getting Anthropic models: {str(e)}")
|
80
|
+
debug_log(f"Generated title: {title}")
|
81
|
+
return title
|
82
|
+
|
83
|
+
except Exception as e:
|
84
|
+
# Log the error and return default title
|
85
|
+
debug_log(f"Title generation failed: {str(e)}")
|
86
|
+
logger.error(f"Title generation failed: {str(e)}")
|
87
|
+
return default_title
|
63
88
|
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
89
|
+
# Helper function for OpenAI streaming
|
90
|
+
async def _generate_openai_stream(
|
91
|
+
app: 'SimpleChatApp',
|
92
|
+
messages: List[Dict],
|
93
|
+
model: str,
|
94
|
+
style: str,
|
95
|
+
client: Any,
|
96
|
+
callback: Callable[[str], Awaitable[None]],
|
97
|
+
update_lock: asyncio.Lock
|
98
|
+
) -> Optional[str]:
|
99
|
+
"""Generate streaming response using OpenAI provider."""
|
100
|
+
try:
|
101
|
+
from app.main import debug_log
|
102
|
+
except ImportError:
|
103
|
+
debug_log = lambda msg: None
|
73
104
|
|
74
|
-
|
75
|
-
is_ollama = 'ollama' in str(type(client)).lower()
|
76
|
-
if is_ollama and not title_model_id:
|
77
|
-
debug_log("Using Ollama client for title generation")
|
78
|
-
# For Ollama, check if the model exists before using it
|
79
|
-
try:
|
80
|
-
# Try a quick test request to check if model exists
|
81
|
-
debug_log(f"Testing if Ollama model exists: {model}")
|
82
|
-
import aiohttp
|
83
|
-
async with aiohttp.ClientSession() as session:
|
84
|
-
try:
|
85
|
-
base_url = "http://localhost:11434"
|
86
|
-
async with session.post(
|
87
|
-
f"{base_url}/api/generate",
|
88
|
-
json={"model": model, "prompt": "test", "stream": False},
|
89
|
-
timeout=2
|
90
|
-
) as response:
|
91
|
-
if response.status == 200:
|
92
|
-
# Model exists, use it
|
93
|
-
title_model_id = model
|
94
|
-
debug_log(f"Ollama model {model} exists, using it for title generation")
|
95
|
-
else:
|
96
|
-
debug_log(f"Ollama model {model} returned status {response.status}, falling back to default")
|
97
|
-
# Fall back to a common model
|
98
|
-
title_model_id = "llama3"
|
99
|
-
except Exception as e:
|
100
|
-
debug_log(f"Error testing Ollama model: {str(e)}, falling back to default")
|
101
|
-
# Fall back to a common model
|
102
|
-
title_model_id = "llama3"
|
103
|
-
except Exception as e:
|
104
|
-
debug_log(f"Error checking Ollama model: {str(e)}")
|
105
|
-
# Fall back to a common model
|
106
|
-
title_model_id = "llama3"
|
105
|
+
debug_log(f"Using OpenAI-specific streaming for model: {model}")
|
107
106
|
|
108
|
-
#
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
elif is_anthropic:
|
114
|
-
title_model_id = "claude-3-haiku-20240307"
|
115
|
-
elif is_ollama:
|
116
|
-
title_model_id = "llama3" # Common default
|
117
|
-
else:
|
118
|
-
# Last resort - use the originally passed model
|
119
|
-
title_model_id = model
|
120
|
-
|
121
|
-
debug_log(f"No specific model found, using fallback model for title generation: {title_model_id}")
|
107
|
+
# Initialize variables for response tracking
|
108
|
+
full_response = ""
|
109
|
+
buffer = []
|
110
|
+
last_update = time.time()
|
111
|
+
update_interval = 0.03 # Responsive updates for OpenAI
|
122
112
|
|
123
|
-
|
124
|
-
|
113
|
+
try:
|
114
|
+
# Initialize stream generator
|
115
|
+
debug_log("Initializing OpenAI stream generator")
|
116
|
+
stream_generator = client.generate_stream(messages, model, style)
|
117
|
+
|
118
|
+
# Process stream chunks
|
119
|
+
debug_log("Beginning to process OpenAI stream chunks")
|
120
|
+
async for chunk in stream_generator:
|
121
|
+
# Check for task cancellation
|
122
|
+
if asyncio.current_task().cancelled():
|
123
|
+
debug_log("Task cancellation detected during OpenAI chunk processing")
|
124
|
+
if hasattr(client, 'cancel_stream'):
|
125
|
+
await client.cancel_stream()
|
126
|
+
raise asyncio.CancelledError()
|
127
|
+
|
128
|
+
# Process chunk content
|
129
|
+
if chunk:
|
130
|
+
if not isinstance(chunk, str):
|
131
|
+
try:
|
132
|
+
chunk = str(chunk)
|
133
|
+
except Exception:
|
134
|
+
continue
|
135
|
+
|
136
|
+
buffer.append(chunk)
|
137
|
+
current_time = time.time()
|
138
|
+
|
139
|
+
# Update UI with new content
|
140
|
+
if (current_time - last_update >= update_interval or
|
141
|
+
len(''.join(buffer)) > 5 or
|
142
|
+
len(full_response) < 50):
|
143
|
+
|
144
|
+
new_content = ''.join(buffer)
|
145
|
+
full_response += new_content
|
146
|
+
|
147
|
+
try:
|
148
|
+
async with update_lock:
|
149
|
+
await callback(full_response)
|
150
|
+
if hasattr(app, 'refresh'):
|
151
|
+
app.refresh(layout=True)
|
152
|
+
except Exception as callback_err:
|
153
|
+
logger.error(f"Error in OpenAI UI callback: {str(callback_err)}")
|
154
|
+
|
155
|
+
buffer = []
|
156
|
+
last_update = current_time
|
157
|
+
await asyncio.sleep(0.02)
|
158
|
+
|
159
|
+
# Process any remaining buffer content
|
160
|
+
if buffer:
|
161
|
+
new_content = ''.join(buffer)
|
162
|
+
full_response += new_content
|
163
|
+
|
164
|
+
try:
|
165
|
+
async with update_lock:
|
166
|
+
await callback(full_response)
|
167
|
+
if hasattr(app, 'refresh'):
|
168
|
+
app.refresh(layout=True)
|
169
|
+
await asyncio.sleep(0.02)
|
170
|
+
try:
|
171
|
+
messages_container = app.query_one("#messages-container")
|
172
|
+
if messages_container:
|
173
|
+
messages_container.scroll_end(animate=False)
|
174
|
+
except Exception:
|
175
|
+
pass
|
176
|
+
except Exception as callback_err:
|
177
|
+
logger.error(f"Error in final OpenAI UI callback: {str(callback_err)}")
|
178
|
+
|
179
|
+
# Final refresh to ensure everything is displayed correctly
|
180
|
+
try:
|
181
|
+
await asyncio.sleep(0.05)
|
182
|
+
async with update_lock:
|
183
|
+
await callback(full_response)
|
184
|
+
if hasattr(app, 'refresh'):
|
185
|
+
app.refresh(layout=True)
|
186
|
+
except Exception:
|
187
|
+
pass
|
188
|
+
|
189
|
+
return full_response
|
190
|
+
|
191
|
+
except asyncio.CancelledError:
|
192
|
+
logger.info(f"OpenAI streaming cancelled. Partial response length: {len(full_response)}")
|
193
|
+
if hasattr(client, 'cancel_stream'):
|
194
|
+
await client.cancel_stream()
|
195
|
+
return full_response
|
196
|
+
|
197
|
+
except Exception as e:
|
198
|
+
logger.error(f"Error during OpenAI streaming: {str(e)}")
|
199
|
+
if hasattr(client, 'cancel_stream'):
|
200
|
+
await client.cancel_stream()
|
201
|
+
raise
|
125
202
|
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
203
|
+
# Helper function for Anthropic streaming
|
204
|
+
async def _generate_anthropic_stream(
|
205
|
+
app: 'SimpleChatApp',
|
206
|
+
messages: List[Dict],
|
207
|
+
model: str,
|
208
|
+
style: str,
|
209
|
+
client: Any,
|
210
|
+
callback: Callable[[str], Awaitable[None]],
|
211
|
+
update_lock: asyncio.Lock
|
212
|
+
) -> Optional[str]:
|
213
|
+
"""Generate streaming response using Anthropic provider."""
|
214
|
+
try:
|
215
|
+
from app.main import debug_log
|
216
|
+
except ImportError:
|
217
|
+
debug_log = lambda msg: None
|
137
218
|
|
138
|
-
|
139
|
-
last_error = None
|
219
|
+
debug_log(f"Using Anthropic-specific streaming for model: {model}")
|
140
220
|
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
221
|
+
# Initialize variables for response tracking
|
222
|
+
full_response = ""
|
223
|
+
buffer = []
|
224
|
+
last_update = time.time()
|
225
|
+
update_interval = 0.03 # Responsive updates for Anthropic
|
226
|
+
|
227
|
+
try:
|
228
|
+
# Initialize stream generator
|
229
|
+
debug_log("Initializing Anthropic stream generator")
|
230
|
+
stream_generator = client.generate_stream(messages, model, style)
|
231
|
+
|
232
|
+
# Process stream chunks
|
233
|
+
debug_log("Beginning to process Anthropic stream chunks")
|
234
|
+
async for chunk in stream_generator:
|
235
|
+
# Check for task cancellation
|
236
|
+
if asyncio.current_task().cancelled():
|
237
|
+
debug_log("Task cancellation detected during Anthropic chunk processing")
|
238
|
+
if hasattr(client, 'cancel_stream'):
|
239
|
+
await client.cancel_stream()
|
240
|
+
raise asyncio.CancelledError()
|
241
|
+
|
242
|
+
# Process chunk content
|
243
|
+
if chunk:
|
244
|
+
if not isinstance(chunk, str):
|
245
|
+
try:
|
246
|
+
chunk = str(chunk)
|
247
|
+
except Exception:
|
248
|
+
continue
|
249
|
+
|
250
|
+
buffer.append(chunk)
|
251
|
+
current_time = time.time()
|
252
|
+
|
253
|
+
# Update UI with new content
|
254
|
+
if (current_time - last_update >= update_interval or
|
255
|
+
len(''.join(buffer)) > 5 or
|
256
|
+
len(full_response) < 50):
|
167
257
|
|
168
|
-
|
169
|
-
|
258
|
+
new_content = ''.join(buffer)
|
259
|
+
full_response += new_content
|
170
260
|
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
261
|
+
try:
|
262
|
+
async with update_lock:
|
263
|
+
await callback(full_response)
|
264
|
+
if hasattr(app, 'refresh'):
|
265
|
+
app.refresh(layout=True)
|
266
|
+
except Exception as callback_err:
|
267
|
+
logger.error(f"Error in Anthropic UI callback: {str(callback_err)}")
|
268
|
+
|
269
|
+
buffer = []
|
270
|
+
last_update = current_time
|
271
|
+
await asyncio.sleep(0.02)
|
272
|
+
|
273
|
+
# Process any remaining buffer content
|
274
|
+
if buffer:
|
275
|
+
new_content = ''.join(buffer)
|
276
|
+
full_response += new_content
|
277
|
+
|
278
|
+
try:
|
279
|
+
async with update_lock:
|
280
|
+
await callback(full_response)
|
281
|
+
if hasattr(app, 'refresh'):
|
282
|
+
app.refresh(layout=True)
|
283
|
+
await asyncio.sleep(0.02)
|
284
|
+
try:
|
285
|
+
messages_container = app.query_one("#messages-container")
|
286
|
+
if messages_container:
|
287
|
+
messages_container.scroll_end(animate=False)
|
288
|
+
except Exception:
|
289
|
+
pass
|
290
|
+
except Exception as callback_err:
|
291
|
+
logger.error(f"Error in final Anthropic UI callback: {str(callback_err)}")
|
186
292
|
|
187
|
-
|
188
|
-
|
189
|
-
|
293
|
+
# Final refresh to ensure everything is displayed correctly
|
294
|
+
try:
|
295
|
+
await asyncio.sleep(0.05)
|
296
|
+
async with update_lock:
|
297
|
+
await callback(full_response)
|
298
|
+
if hasattr(app, 'refresh'):
|
299
|
+
app.refresh(layout=True)
|
300
|
+
except Exception:
|
301
|
+
pass
|
190
302
|
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
303
|
+
return full_response
|
304
|
+
|
305
|
+
except asyncio.CancelledError:
|
306
|
+
logger.info(f"Anthropic streaming cancelled. Partial response length: {len(full_response)}")
|
307
|
+
if hasattr(client, 'cancel_stream'):
|
308
|
+
await client.cancel_stream()
|
309
|
+
return full_response
|
310
|
+
|
311
|
+
except Exception as e:
|
312
|
+
logger.error(f"Error during Anthropic streaming: {str(e)}")
|
313
|
+
if hasattr(client, 'cancel_stream'):
|
314
|
+
await client.cancel_stream()
|
315
|
+
raise
|
203
316
|
|
204
|
-
#
|
205
|
-
async def
|
317
|
+
# Helper function for Ollama streaming
|
318
|
+
async def _generate_ollama_stream(
|
206
319
|
app: 'SimpleChatApp',
|
207
320
|
messages: List[Dict],
|
208
321
|
model: str,
|
209
322
|
style: str,
|
210
323
|
client: Any,
|
211
|
-
callback: Callable[[str], Awaitable[None]]
|
324
|
+
callback: Callable[[str], Awaitable[None]],
|
325
|
+
update_lock: asyncio.Lock
|
212
326
|
) -> Optional[str]:
|
213
|
-
"""
|
214
|
-
Generate a streaming response from the model (as a Textual worker).
|
215
|
-
Refactored to be a coroutine, not an async generator.
|
216
|
-
"""
|
327
|
+
"""Generate streaming response using Ollama provider."""
|
217
328
|
try:
|
218
329
|
from app.main import debug_log
|
219
330
|
except ImportError:
|
220
331
|
debug_log = lambda msg: None
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
# Validate messages
|
226
|
-
if not messages:
|
227
|
-
debug_log("Error: messages list is empty")
|
228
|
-
raise ValueError("Messages list cannot be empty")
|
229
|
-
|
230
|
-
# Ensure all messages have required fields
|
231
|
-
for i, msg in enumerate(messages):
|
232
|
-
try:
|
233
|
-
debug_log(f"Message {i}: role={msg.get('role', 'missing')}, content_len={len(msg.get('content', ''))}")
|
234
|
-
if 'role' not in msg:
|
235
|
-
debug_log(f"Adding missing 'role' to message {i}")
|
236
|
-
msg['role'] = 'user'
|
237
|
-
if 'content' not in msg:
|
238
|
-
debug_log(f"Adding missing 'content' to message {i}")
|
239
|
-
msg['content'] = ''
|
240
|
-
except Exception as e:
|
241
|
-
debug_log(f"Error checking message {i}: {str(e)}")
|
242
|
-
messages[i] = {
|
243
|
-
'role': 'user',
|
244
|
-
'content': str(msg) if msg else ''
|
245
|
-
}
|
246
|
-
debug_log(f"Repaired message {i}")
|
247
|
-
|
332
|
+
|
333
|
+
debug_log(f"Using Ollama-specific streaming for model: {model}")
|
334
|
+
|
248
335
|
# Initialize variables for response tracking
|
249
336
|
full_response = ""
|
250
337
|
buffer = []
|
251
338
|
last_update = time.time()
|
252
|
-
update_interval = 0.
|
253
|
-
|
339
|
+
update_interval = 0.03 # Responsive updates for Ollama
|
340
|
+
|
254
341
|
try:
|
255
|
-
#
|
256
|
-
if
|
257
|
-
debug_log("Error: client is None, cannot proceed with streaming")
|
258
|
-
raise ValueError("Model client is None, cannot proceed with streaming")
|
259
|
-
|
260
|
-
if not hasattr(client, 'generate_stream'):
|
261
|
-
debug_log(f"Error: client {type(client).__name__} does not have generate_stream method")
|
262
|
-
raise ValueError(f"Client {type(client).__name__} does not support streaming")
|
263
|
-
|
264
|
-
# Determine client type
|
265
|
-
is_ollama = 'ollama' in str(type(client)).lower()
|
266
|
-
is_openai = 'openai' in str(type(client)).lower()
|
267
|
-
is_anthropic = 'anthropic' in str(type(client)).lower()
|
268
|
-
|
269
|
-
debug_log(f"Client types - Ollama: {is_ollama}, OpenAI: {is_openai}, Anthropic: {is_anthropic}")
|
270
|
-
|
271
|
-
# Only show loading indicator for Ollama (which may need to load models)
|
272
|
-
# This prevents Ollama-specific UI elements from showing when using other providers
|
273
|
-
if is_ollama and hasattr(app, 'query_one'):
|
342
|
+
# Show loading indicator for Ollama (which may need to load models)
|
343
|
+
if hasattr(app, 'query_one'):
|
274
344
|
try:
|
275
345
|
debug_log("Showing initial model loading indicator for Ollama")
|
276
|
-
logger.info("Showing initial model loading indicator for Ollama")
|
277
346
|
loading = app.query_one("#loading-indicator")
|
278
347
|
loading.add_class("model-loading")
|
279
348
|
loading.update("⚙️ Loading Ollama model...")
|
280
349
|
except Exception as e:
|
281
350
|
debug_log(f"Error setting initial Ollama loading state: {str(e)}")
|
282
|
-
|
283
|
-
|
284
|
-
debug_log(f"Starting stream generation with messages length: {len(messages)}")
|
285
|
-
logger.info(f"Starting stream generation for model: {model}")
|
286
|
-
|
351
|
+
|
287
352
|
# Initialize stream generator
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
debug_log(f"Error initializing stream generator: {str(stream_init_error)}")
|
294
|
-
logger.error(f"Error initializing stream generator: {str(stream_init_error)}")
|
295
|
-
raise
|
296
|
-
|
297
|
-
# Update UI if model is ready (Ollama specific)
|
298
|
-
# Only check is_loading_model for Ollama clients to prevent errors with other providers
|
299
|
-
if is_ollama and hasattr(client, 'is_loading_model') and not client.is_loading_model() and hasattr(app, 'query_one'):
|
353
|
+
debug_log("Initializing Ollama stream generator")
|
354
|
+
stream_generator = client.generate_stream(messages, model, style)
|
355
|
+
|
356
|
+
# Update UI if model is ready
|
357
|
+
if hasattr(client, 'is_loading_model') and not client.is_loading_model() and hasattr(app, 'query_one'):
|
300
358
|
try:
|
301
359
|
debug_log("Ollama model is ready for generation, updating UI")
|
302
|
-
logger.info("Ollama model is ready for generation, updating UI")
|
303
360
|
loading = app.query_one("#loading-indicator")
|
304
361
|
loading.remove_class("model-loading")
|
305
362
|
loading.update("▪▪▪ Generating response...")
|
306
363
|
except Exception as e:
|
307
|
-
debug_log(f"Error updating UI after stream init: {str(e)}")
|
308
|
-
|
309
|
-
|
364
|
+
debug_log(f"Error updating UI after Ollama stream init: {str(e)}")
|
365
|
+
|
310
366
|
# Process stream chunks
|
311
|
-
debug_log("Beginning to process stream chunks")
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
try:
|
326
|
-
model_loading = client.is_loading_model()
|
327
|
-
debug_log(f"Ollama model loading state: {model_loading}")
|
328
|
-
if hasattr(app, 'query_one'):
|
329
|
-
try:
|
330
|
-
loading = app.query_one("#loading-indicator")
|
331
|
-
if model_loading and hasattr(loading, 'has_class') and not loading.has_class("model-loading"):
|
332
|
-
debug_log("Ollama model loading started during streaming")
|
333
|
-
logger.info("Ollama model loading started during streaming")
|
334
|
-
loading.add_class("model-loading")
|
335
|
-
loading.update("⚙️ Loading Ollama model...")
|
336
|
-
elif not model_loading and hasattr(loading, 'has_class') and loading.has_class("model-loading"):
|
337
|
-
debug_log("Ollama model loading finished during streaming")
|
338
|
-
logger.info("Ollama model loading finished during streaming")
|
339
|
-
loading.remove_class("model-loading")
|
340
|
-
loading.update("▪▪▪ Generating response...")
|
341
|
-
except Exception as ui_e:
|
342
|
-
debug_log(f"Error updating UI elements: {str(ui_e)}")
|
343
|
-
logger.error(f"Error updating UI elements: {str(ui_e)}")
|
344
|
-
except Exception as e:
|
345
|
-
debug_log(f"Error checking Ollama model loading state: {str(e)}")
|
346
|
-
logger.error(f"Error checking Ollama model loading state: {str(e)}")
|
347
|
-
|
348
|
-
# Process chunk content
|
349
|
-
if chunk:
|
350
|
-
if not isinstance(chunk, str):
|
351
|
-
debug_log(f"WARNING: Received non-string chunk of type: {type(chunk).__name__}")
|
367
|
+
debug_log("Beginning to process Ollama stream chunks")
|
368
|
+
async for chunk in stream_generator:
|
369
|
+
# Check for task cancellation
|
370
|
+
if asyncio.current_task().cancelled():
|
371
|
+
debug_log("Task cancellation detected during Ollama chunk processing")
|
372
|
+
if hasattr(client, 'cancel_stream'):
|
373
|
+
await client.cancel_stream()
|
374
|
+
raise asyncio.CancelledError()
|
375
|
+
|
376
|
+
# Handle Ollama model loading state changes
|
377
|
+
if hasattr(client, 'is_loading_model'):
|
378
|
+
try:
|
379
|
+
model_loading = client.is_loading_model()
|
380
|
+
if hasattr(app, 'query_one'):
|
352
381
|
try:
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
print(f"STREAM DEBUG: +{len(new_content)} chars, total: {len(full_response)}")
|
375
|
-
# Print first few characters of content for debugging
|
376
|
-
if len(full_response) < 100:
|
377
|
-
print(f"STREAM CONTENT: '{full_response}'")
|
382
|
+
loading = app.query_one("#loading-indicator")
|
383
|
+
if model_loading and hasattr(loading, 'has_class') and not loading.has_class("model-loading"):
|
384
|
+
debug_log("Ollama model loading started during streaming")
|
385
|
+
loading.add_class("model-loading")
|
386
|
+
loading.update("⚙️ Loading Ollama model...")
|
387
|
+
elif not model_loading and hasattr(loading, 'has_class') and loading.has_class("model-loading"):
|
388
|
+
debug_log("Ollama model loading finished during streaming")
|
389
|
+
loading.remove_class("model-loading")
|
390
|
+
loading.update("▪▪▪ Generating response...")
|
391
|
+
except Exception:
|
392
|
+
pass
|
393
|
+
except Exception:
|
394
|
+
pass
|
395
|
+
|
396
|
+
# Process chunk content
|
397
|
+
if chunk:
|
398
|
+
if not isinstance(chunk, str):
|
399
|
+
try:
|
400
|
+
chunk = str(chunk)
|
401
|
+
except Exception:
|
402
|
+
continue
|
378
403
|
|
379
|
-
|
380
|
-
|
381
|
-
|
404
|
+
buffer.append(chunk)
|
405
|
+
current_time = time.time()
|
406
|
+
|
407
|
+
# Update UI with new content
|
408
|
+
if (current_time - last_update >= update_interval or
|
409
|
+
len(''.join(buffer)) > 5 or
|
410
|
+
len(full_response) < 50):
|
411
|
+
|
412
|
+
new_content = ''.join(buffer)
|
413
|
+
full_response += new_content
|
414
|
+
|
415
|
+
try:
|
416
|
+
async with update_lock:
|
382
417
|
await callback(full_response)
|
383
|
-
debug_log("UI callback completed successfully")
|
384
|
-
|
385
|
-
# Force app refresh after each update
|
386
418
|
if hasattr(app, 'refresh'):
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
debug_log(f"Error in UI callback: {str(callback_err)}")
|
391
|
-
logger.error(f"Error in UI callback: {str(callback_err)}")
|
392
|
-
print(f"STREAM ERROR: Error updating UI: {str(callback_err)}")
|
393
|
-
|
394
|
-
buffer = []
|
395
|
-
last_update = current_time
|
419
|
+
app.refresh(layout=True)
|
420
|
+
except Exception as callback_err:
|
421
|
+
logger.error(f"Error in Ollama UI callback: {str(callback_err)}")
|
396
422
|
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
except Exception as chunk_error:
|
403
|
-
debug_log(f"Error processing stream chunks: {str(chunk_error)}")
|
404
|
-
logger.error(f"Error processing stream chunks: {str(chunk_error)}")
|
405
|
-
raise
|
406
|
-
|
423
|
+
buffer = []
|
424
|
+
last_update = current_time
|
425
|
+
await asyncio.sleep(0.02)
|
426
|
+
|
427
|
+
# Process any remaining buffer content
|
407
428
|
if buffer:
|
408
429
|
new_content = ''.join(buffer)
|
409
430
|
full_response += new_content
|
410
|
-
|
431
|
+
|
411
432
|
try:
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
debug_log("Forcing final UI refresh sequence for all models")
|
416
|
-
try:
|
433
|
+
async with update_lock:
|
434
|
+
await callback(full_response)
|
417
435
|
if hasattr(app, 'refresh'):
|
418
|
-
app.refresh(layout=
|
436
|
+
app.refresh(layout=True)
|
419
437
|
await asyncio.sleep(0.02)
|
420
438
|
try:
|
421
439
|
messages_container = app.query_one("#messages-container")
|
422
|
-
if messages_container
|
440
|
+
if messages_container:
|
423
441
|
messages_container.scroll_end(animate=False)
|
424
442
|
except Exception:
|
425
443
|
pass
|
444
|
+
except Exception as callback_err:
|
445
|
+
logger.error(f"Error in final Ollama UI callback: {str(callback_err)}")
|
446
|
+
|
447
|
+
# Final refresh to ensure everything is displayed correctly
|
448
|
+
try:
|
449
|
+
await asyncio.sleep(0.05)
|
450
|
+
async with update_lock:
|
451
|
+
await callback(full_response)
|
452
|
+
if hasattr(app, 'refresh'):
|
453
|
+
app.refresh(layout=True)
|
454
|
+
except Exception:
|
455
|
+
pass
|
456
|
+
|
457
|
+
return full_response
|
458
|
+
|
459
|
+
except asyncio.CancelledError:
|
460
|
+
logger.info(f"Ollama streaming cancelled. Partial response length: {len(full_response)}")
|
461
|
+
if hasattr(client, 'cancel_stream'):
|
462
|
+
await client.cancel_stream()
|
463
|
+
return full_response
|
464
|
+
|
465
|
+
except Exception as e:
|
466
|
+
logger.error(f"Error during Ollama streaming: {str(e)}")
|
467
|
+
if hasattr(client, 'cancel_stream'):
|
468
|
+
await client.cancel_stream()
|
469
|
+
raise
|
470
|
+
|
471
|
+
# Generic fallback streaming implementation
|
472
|
+
async def _generate_generic_stream(
|
473
|
+
app: 'SimpleChatApp',
|
474
|
+
messages: List[Dict],
|
475
|
+
model: str,
|
476
|
+
style: str,
|
477
|
+
client: Any,
|
478
|
+
callback: Callable[[str], Awaitable[None]],
|
479
|
+
update_lock: asyncio.Lock
|
480
|
+
) -> Optional[str]:
|
481
|
+
"""Generic fallback implementation for streaming responses."""
|
482
|
+
try:
|
483
|
+
from app.main import debug_log
|
484
|
+
except ImportError:
|
485
|
+
debug_log = lambda msg: None
|
486
|
+
|
487
|
+
debug_log(f"Using generic streaming for model: {model}, client type: {type(client).__name__}")
|
488
|
+
|
489
|
+
# Initialize variables for response tracking
|
490
|
+
full_response = ""
|
491
|
+
buffer = []
|
492
|
+
last_update = time.time()
|
493
|
+
update_interval = 0.03 # Responsive updates
|
494
|
+
|
495
|
+
try:
|
496
|
+
# Initialize stream generator
|
497
|
+
debug_log("Initializing generic stream generator")
|
498
|
+
stream_generator = client.generate_stream(messages, model, style)
|
499
|
+
|
500
|
+
# Process stream chunks
|
501
|
+
debug_log("Beginning to process generic stream chunks")
|
502
|
+
async for chunk in stream_generator:
|
503
|
+
# Check for task cancellation
|
504
|
+
if asyncio.current_task().cancelled():
|
505
|
+
debug_log("Task cancellation detected during generic chunk processing")
|
506
|
+
if hasattr(client, 'cancel_stream'):
|
507
|
+
await client.cancel_stream()
|
508
|
+
raise asyncio.CancelledError()
|
509
|
+
|
510
|
+
# Process chunk content
|
511
|
+
if chunk:
|
512
|
+
if not isinstance(chunk, str):
|
513
|
+
try:
|
514
|
+
chunk = str(chunk)
|
515
|
+
except Exception:
|
516
|
+
continue
|
517
|
+
|
518
|
+
buffer.append(chunk)
|
519
|
+
current_time = time.time()
|
520
|
+
|
521
|
+
# Update UI with new content
|
522
|
+
if (current_time - last_update >= update_interval or
|
523
|
+
len(''.join(buffer)) > 5 or
|
524
|
+
len(full_response) < 50):
|
525
|
+
|
526
|
+
new_content = ''.join(buffer)
|
527
|
+
full_response += new_content
|
528
|
+
|
529
|
+
try:
|
530
|
+
async with update_lock:
|
531
|
+
await callback(full_response)
|
532
|
+
if hasattr(app, 'refresh'):
|
533
|
+
app.refresh(layout=True)
|
534
|
+
except Exception as callback_err:
|
535
|
+
logger.error(f"Error in generic UI callback: {str(callback_err)}")
|
536
|
+
|
537
|
+
buffer = []
|
538
|
+
last_update = current_time
|
539
|
+
await asyncio.sleep(0.02)
|
540
|
+
|
541
|
+
# Process any remaining buffer content
|
542
|
+
if buffer:
|
543
|
+
new_content = ''.join(buffer)
|
544
|
+
full_response += new_content
|
545
|
+
|
546
|
+
try:
|
547
|
+
async with update_lock:
|
548
|
+
await callback(full_response)
|
549
|
+
if hasattr(app, 'refresh'):
|
426
550
|
app.refresh(layout=True)
|
427
551
|
await asyncio.sleep(0.02)
|
428
552
|
try:
|
429
553
|
messages_container = app.query_one("#messages-container")
|
430
|
-
if messages_container
|
554
|
+
if messages_container:
|
431
555
|
messages_container.scroll_end(animate=False)
|
432
556
|
except Exception:
|
433
557
|
pass
|
434
|
-
except Exception as refresh_err:
|
435
|
-
debug_log(f"Error forcing final UI refresh: {str(refresh_err)}")
|
436
558
|
except Exception as callback_err:
|
437
|
-
|
438
|
-
|
439
|
-
|
559
|
+
logger.error(f"Error in final generic UI callback: {str(callback_err)}")
|
560
|
+
|
561
|
+
# Final refresh to ensure everything is displayed correctly
|
440
562
|
try:
|
441
563
|
await asyncio.sleep(0.05)
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
except Exception
|
447
|
-
|
448
|
-
|
449
|
-
debug_log(f"Streaming response completed successfully. Response length: {len(full_response)}")
|
450
|
-
logger.info(f"Streaming response completed successfully. Response length: {len(full_response)}")
|
564
|
+
async with update_lock:
|
565
|
+
await callback(full_response)
|
566
|
+
if hasattr(app, 'refresh'):
|
567
|
+
app.refresh(layout=True)
|
568
|
+
except Exception:
|
569
|
+
pass
|
570
|
+
|
451
571
|
return full_response
|
452
|
-
|
572
|
+
|
453
573
|
except asyncio.CancelledError:
|
454
|
-
|
455
|
-
logger.info(f"Streaming response task cancelled. Partial response length: {len(full_response)}")
|
574
|
+
logger.info(f"Generic streaming cancelled. Partial response length: {len(full_response)}")
|
456
575
|
if hasattr(client, 'cancel_stream'):
|
457
|
-
|
458
|
-
try:
|
459
|
-
await client.cancel_stream()
|
460
|
-
debug_log("Successfully cancelled client stream")
|
461
|
-
except Exception as cancel_err:
|
462
|
-
debug_log(f"Error cancelling client stream: {str(cancel_err)}")
|
576
|
+
await client.cancel_stream()
|
463
577
|
return full_response
|
464
|
-
|
578
|
+
|
465
579
|
except Exception as e:
|
466
|
-
|
467
|
-
logger.error(f"Error during streaming response: {str(e)}")
|
580
|
+
logger.error(f"Error during generic streaming: {str(e)}")
|
468
581
|
if hasattr(client, 'cancel_stream'):
|
469
|
-
|
470
|
-
try:
|
471
|
-
await client.cancel_stream()
|
472
|
-
debug_log("Successfully cancelled client stream after error")
|
473
|
-
except Exception as cancel_err:
|
474
|
-
debug_log(f"Error cancelling client stream after error: {str(cancel_err)}")
|
582
|
+
await client.cancel_stream()
|
475
583
|
raise
|
476
584
|
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
585
|
+
# Worker function for streaming response generation
|
586
|
+
async def generate_streaming_response(
|
587
|
+
app: 'SimpleChatApp',
|
588
|
+
messages: List[Dict],
|
589
|
+
model: str,
|
590
|
+
style: str,
|
591
|
+
client: Any,
|
592
|
+
callback: Callable[[str], Awaitable[None]]
|
593
|
+
) -> Optional[str]:
|
594
|
+
"""
|
595
|
+
Generate a streaming response from the model (as a Textual worker).
|
596
|
+
Refactored to be a coroutine, not an async generator.
|
597
|
+
"""
|
598
|
+
try:
|
599
|
+
from app.main import debug_log
|
600
|
+
except ImportError:
|
601
|
+
debug_log = lambda msg: None
|
602
|
+
|
603
|
+
logger.info(f"Starting streaming response with model: {model}")
|
604
|
+
debug_log(f"Starting streaming response with model: '{model}', client type: {type(client).__name__}")
|
605
|
+
|
606
|
+
# Validate messages
|
607
|
+
if not messages:
|
608
|
+
debug_log("Error: messages list is empty")
|
609
|
+
raise ValueError("Messages list cannot be empty")
|
610
|
+
|
611
|
+
# Ensure all messages have required fields
|
612
|
+
for i, msg in enumerate(messages):
|
613
|
+
try:
|
614
|
+
debug_log(f"Message {i}: role={msg.get('role', 'missing')}, content_len={len(msg.get('content', ''))}")
|
615
|
+
if 'role' not in msg:
|
616
|
+
debug_log(f"Adding missing 'role' to message {i}")
|
617
|
+
msg['role'] = 'user'
|
618
|
+
if 'content' not in msg:
|
619
|
+
debug_log(f"Adding missing 'content' to message {i}")
|
620
|
+
msg['content'] = ''
|
621
|
+
except Exception as e:
|
622
|
+
debug_log(f"Error checking message {i}: {str(e)}")
|
623
|
+
messages[i] = {
|
624
|
+
'role': 'user',
|
625
|
+
'content': str(msg) if msg else ''
|
626
|
+
}
|
627
|
+
debug_log(f"Repaired message {i}")
|
628
|
+
|
629
|
+
# Create a lock for synchronizing UI updates
|
630
|
+
update_lock = asyncio.Lock()
|
631
|
+
|
632
|
+
# Validate client
|
633
|
+
if client is None:
|
634
|
+
debug_log("Error: client is None, cannot proceed with streaming")
|
635
|
+
raise ValueError("Model client is None, cannot proceed with streaming")
|
636
|
+
|
637
|
+
if not hasattr(client, 'generate_stream'):
|
638
|
+
debug_log(f"Error: client {type(client).__name__} does not have generate_stream method")
|
639
|
+
raise ValueError(f"Client {type(client).__name__} does not support streaming")
|
640
|
+
|
641
|
+
# Explicitly check provider type first
|
642
|
+
is_ollama = 'ollama' in str(type(client)).lower()
|
643
|
+
is_openai = 'openai' in str(type(client)).lower()
|
644
|
+
is_anthropic = 'anthropic' in str(type(client)).lower()
|
645
|
+
|
646
|
+
debug_log(f"Client types - Ollama: {is_ollama}, OpenAI: {is_openai}, Anthropic: {is_anthropic}")
|
647
|
+
|
648
|
+
# Use separate implementations for each provider
|
649
|
+
try:
|
650
|
+
if is_openai:
|
651
|
+
debug_log("Using OpenAI-specific streaming implementation")
|
652
|
+
return await _generate_openai_stream(app, messages, model, style, client, callback, update_lock)
|
653
|
+
elif is_anthropic:
|
654
|
+
debug_log("Using Anthropic-specific streaming implementation")
|
655
|
+
return await _generate_anthropic_stream(app, messages, model, style, client, callback, update_lock)
|
656
|
+
elif is_ollama:
|
657
|
+
debug_log("Using Ollama-specific streaming implementation")
|
658
|
+
return await _generate_ollama_stream(app, messages, model, style, client, callback, update_lock)
|
659
|
+
else:
|
660
|
+
# Generic fallback
|
661
|
+
debug_log("Using generic streaming implementation")
|
662
|
+
return await _generate_generic_stream(app, messages, model, style, client, callback, update_lock)
|
663
|
+
except asyncio.CancelledError:
|
664
|
+
debug_log("Task cancellation detected in main streaming function")
|
665
|
+
if hasattr(client, 'cancel_stream'):
|
666
|
+
await client.cancel_stream()
|
667
|
+
raise
|
668
|
+
except Exception as e:
|
669
|
+
debug_log(f"Error in streaming implementation: {str(e)}")
|
670
|
+
logger.error(f"Error in streaming implementation: {str(e)}")
|
671
|
+
raise
|
482
672
|
|
483
673
|
async def ensure_ollama_running() -> bool:
|
484
674
|
"""
|
@@ -555,6 +745,22 @@ def resolve_model_id(model_id_or_name: str) -> str:
|
|
555
745
|
input_lower = model_id_or_name.lower().strip()
|
556
746
|
logger.info(f"Attempting to resolve model identifier: '{input_lower}'")
|
557
747
|
|
748
|
+
# Add special case handling for common OpenAI models
|
749
|
+
openai_model_aliases = {
|
750
|
+
"04-mini": "gpt-4-mini", # Fix "04-mini" typo to "gpt-4-mini"
|
751
|
+
"04": "gpt-4",
|
752
|
+
"04-vision": "gpt-4-vision",
|
753
|
+
"04-turbo": "gpt-4-turbo",
|
754
|
+
"035": "gpt-3.5-turbo",
|
755
|
+
"35-turbo": "gpt-3.5-turbo",
|
756
|
+
"35": "gpt-3.5-turbo"
|
757
|
+
}
|
758
|
+
|
759
|
+
if input_lower in openai_model_aliases:
|
760
|
+
resolved = openai_model_aliases[input_lower]
|
761
|
+
logger.info(f"Resolved '{input_lower}' to '{resolved}' via OpenAI model alias")
|
762
|
+
return resolved
|
763
|
+
|
558
764
|
# Special case handling for common typos and model name variations
|
559
765
|
typo_corrections = {
|
560
766
|
"o4-mini": "04-mini",
|