chat-console 0.3.9__py3-none-any.whl → 0.3.94__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
app/utils.py CHANGED
@@ -27,458 +27,648 @@ async def generate_conversation_title(message: str, model: str, client: Any) ->
27
27
 
28
28
  debug_log(f"Starting title generation with model: {model}, client type: {type(client).__name__}")
29
29
 
30
- # --- Choose a specific, reliable model for title generation ---
31
- # First, determine if we have a valid client
32
- if client is None:
33
- debug_log("Client is None, will use default title")
34
- return f"Conversation ({datetime.now().strftime('%Y-%m-%d %H:%M')})"
35
-
36
- # Determine the best model to use for title generation
37
- title_model_id = None
30
+ # For safety, always use a default title first
31
+ default_title = f"Conversation ({datetime.now().strftime('%Y-%m-%d %H:%M')})"
38
32
 
39
- # Check if client is Anthropic
40
- is_anthropic = 'anthropic' in str(type(client)).lower()
41
- if is_anthropic:
42
- debug_log("Using Anthropic client for title generation")
43
- # Try to get available models safely
44
- try:
45
- available_anthropic_models = client.get_available_models()
46
- debug_log(f"Found {len(available_anthropic_models)} Anthropic models")
33
+ # Try-except the entire function to ensure we always return a title
34
+ try:
35
+ # Pick a reliable title generation model - prefer OpenAI if available
36
+ from ..config import OPENAI_API_KEY, ANTHROPIC_API_KEY
37
+
38
+ if OPENAI_API_KEY:
39
+ from ..api.openai import OpenAIClient
40
+ title_client = await OpenAIClient.create()
41
+ title_model = "gpt-3.5-turbo"
42
+ debug_log("Using OpenAI for title generation")
43
+ elif ANTHROPIC_API_KEY:
44
+ from ..api.anthropic import AnthropicClient
45
+ title_client = await AnthropicClient.create()
46
+ title_model = "claude-3-haiku-20240307"
47
+ debug_log("Using Anthropic for title generation")
48
+ else:
49
+ # Use the passed client if no API keys available
50
+ title_client = client
51
+ title_model = model
52
+ debug_log(f"Using provided {type(client).__name__} for title generation")
53
+
54
+ # Create a special prompt for title generation
55
+ title_prompt = [
56
+ {
57
+ "role": "system",
58
+ "content": "Generate a brief, descriptive title (maximum 40 characters) for a conversation that starts with the following message. Return only the title text with no additional explanation or formatting."
59
+ },
60
+ {
61
+ "role": "user",
62
+ "content": message
63
+ }
64
+ ]
65
+
66
+ # Generate title
67
+ debug_log(f"Sending title generation request to {title_model}")
68
+ title = await title_client.generate_completion(
69
+ messages=title_prompt,
70
+ model=title_model,
71
+ temperature=0.7,
72
+ max_tokens=60
73
+ )
74
+
75
+ # Sanitize the title
76
+ title = title.strip().strip('"\'').strip()
77
+ if len(title) > 40:
78
+ title = title[:37] + "..."
47
79
 
48
- # Try Claude 3 Haiku first (fastest)
49
- haiku_id = "claude-3-haiku-20240307"
50
- if any(m.get("id") == haiku_id for m in available_anthropic_models):
51
- title_model_id = haiku_id
52
- debug_log(f"Using Anthropic Haiku for title generation: {title_model_id}")
53
- else:
54
- # If Haiku not found, try Sonnet
55
- sonnet_id = "claude-3-sonnet-20240229"
56
- if any(m.get("id") == sonnet_id for m in available_anthropic_models):
57
- title_model_id = sonnet_id
58
- debug_log(f"Using Anthropic Sonnet for title generation: {title_model_id}")
59
- else:
60
- debug_log("Neither Haiku nor Sonnet found in Anthropic models list")
61
- except Exception as e:
62
- debug_log(f"Error getting Anthropic models: {str(e)}")
80
+ debug_log(f"Generated title: {title}")
81
+ return title
82
+
83
+ except Exception as e:
84
+ # Log the error and return default title
85
+ debug_log(f"Title generation failed: {str(e)}")
86
+ logger.error(f"Title generation failed: {str(e)}")
87
+ return default_title
63
88
 
64
- # Check if client is OpenAI
65
- is_openai = 'openai' in str(type(client)).lower()
66
- if is_openai:
67
- debug_log("Using OpenAI client for title generation")
68
- # Use GPT-3.5 for title generation (fast and cost-effective)
69
- title_model_id = "gpt-3.5-turbo"
70
- debug_log(f"Using OpenAI model for title generation: {title_model_id}")
71
- # For OpenAI, we'll always use their model, not fall back to the passed model
72
- # This prevents trying to use Ollama models with OpenAI client
89
+ # Helper function for OpenAI streaming
90
+ async def _generate_openai_stream(
91
+ app: 'SimpleChatApp',
92
+ messages: List[Dict],
93
+ model: str,
94
+ style: str,
95
+ client: Any,
96
+ callback: Callable[[str], Awaitable[None]],
97
+ update_lock: asyncio.Lock
98
+ ) -> Optional[str]:
99
+ """Generate streaming response using OpenAI provider."""
100
+ try:
101
+ from app.main import debug_log
102
+ except ImportError:
103
+ debug_log = lambda msg: None
73
104
 
74
- # Check if client is Ollama
75
- is_ollama = 'ollama' in str(type(client)).lower()
76
- if is_ollama and not title_model_id:
77
- debug_log("Using Ollama client for title generation")
78
- # For Ollama, check if the model exists before using it
79
- try:
80
- # Try a quick test request to check if model exists
81
- debug_log(f"Testing if Ollama model exists: {model}")
82
- import aiohttp
83
- async with aiohttp.ClientSession() as session:
84
- try:
85
- base_url = "http://localhost:11434"
86
- async with session.post(
87
- f"{base_url}/api/generate",
88
- json={"model": model, "prompt": "test", "stream": False},
89
- timeout=2
90
- ) as response:
91
- if response.status == 200:
92
- # Model exists, use it
93
- title_model_id = model
94
- debug_log(f"Ollama model {model} exists, using it for title generation")
95
- else:
96
- debug_log(f"Ollama model {model} returned status {response.status}, falling back to default")
97
- # Fall back to a common model
98
- title_model_id = "llama3"
99
- except Exception as e:
100
- debug_log(f"Error testing Ollama model: {str(e)}, falling back to default")
101
- # Fall back to a common model
102
- title_model_id = "llama3"
103
- except Exception as e:
104
- debug_log(f"Error checking Ollama model: {str(e)}")
105
- # Fall back to a common model
106
- title_model_id = "llama3"
105
+ debug_log(f"Using OpenAI-specific streaming for model: {model}")
107
106
 
108
- # Fallback logic if no specific model was found
109
- if not title_model_id:
110
- # Use a safe default based on client type
111
- if is_openai:
112
- title_model_id = "gpt-3.5-turbo"
113
- elif is_anthropic:
114
- title_model_id = "claude-3-haiku-20240307"
115
- elif is_ollama:
116
- title_model_id = "llama3" # Common default
117
- else:
118
- # Last resort - use the originally passed model
119
- title_model_id = model
120
-
121
- debug_log(f"No specific model found, using fallback model for title generation: {title_model_id}")
107
+ # Initialize variables for response tracking
108
+ full_response = ""
109
+ buffer = []
110
+ last_update = time.time()
111
+ update_interval = 0.03 # Responsive updates for OpenAI
122
112
 
123
- logger.info(f"Generating title for conversation using model: {title_model_id}")
124
- debug_log(f"Final model selected for title generation: {title_model_id}")
113
+ try:
114
+ # Initialize stream generator
115
+ debug_log("Initializing OpenAI stream generator")
116
+ stream_generator = client.generate_stream(messages, model, style)
117
+
118
+ # Process stream chunks
119
+ debug_log("Beginning to process OpenAI stream chunks")
120
+ async for chunk in stream_generator:
121
+ # Check for task cancellation
122
+ if asyncio.current_task().cancelled():
123
+ debug_log("Task cancellation detected during OpenAI chunk processing")
124
+ if hasattr(client, 'cancel_stream'):
125
+ await client.cancel_stream()
126
+ raise asyncio.CancelledError()
127
+
128
+ # Process chunk content
129
+ if chunk:
130
+ if not isinstance(chunk, str):
131
+ try:
132
+ chunk = str(chunk)
133
+ except Exception:
134
+ continue
135
+
136
+ buffer.append(chunk)
137
+ current_time = time.time()
138
+
139
+ # Update UI with new content
140
+ if (current_time - last_update >= update_interval or
141
+ len(''.join(buffer)) > 5 or
142
+ len(full_response) < 50):
143
+
144
+ new_content = ''.join(buffer)
145
+ full_response += new_content
146
+
147
+ try:
148
+ async with update_lock:
149
+ await callback(full_response)
150
+ if hasattr(app, 'refresh'):
151
+ app.refresh(layout=True)
152
+ except Exception as callback_err:
153
+ logger.error(f"Error in OpenAI UI callback: {str(callback_err)}")
154
+
155
+ buffer = []
156
+ last_update = current_time
157
+ await asyncio.sleep(0.02)
158
+
159
+ # Process any remaining buffer content
160
+ if buffer:
161
+ new_content = ''.join(buffer)
162
+ full_response += new_content
163
+
164
+ try:
165
+ async with update_lock:
166
+ await callback(full_response)
167
+ if hasattr(app, 'refresh'):
168
+ app.refresh(layout=True)
169
+ await asyncio.sleep(0.02)
170
+ try:
171
+ messages_container = app.query_one("#messages-container")
172
+ if messages_container:
173
+ messages_container.scroll_end(animate=False)
174
+ except Exception:
175
+ pass
176
+ except Exception as callback_err:
177
+ logger.error(f"Error in final OpenAI UI callback: {str(callback_err)}")
178
+
179
+ # Final refresh to ensure everything is displayed correctly
180
+ try:
181
+ await asyncio.sleep(0.05)
182
+ async with update_lock:
183
+ await callback(full_response)
184
+ if hasattr(app, 'refresh'):
185
+ app.refresh(layout=True)
186
+ except Exception:
187
+ pass
188
+
189
+ return full_response
190
+
191
+ except asyncio.CancelledError:
192
+ logger.info(f"OpenAI streaming cancelled. Partial response length: {len(full_response)}")
193
+ if hasattr(client, 'cancel_stream'):
194
+ await client.cancel_stream()
195
+ return full_response
196
+
197
+ except Exception as e:
198
+ logger.error(f"Error during OpenAI streaming: {str(e)}")
199
+ if hasattr(client, 'cancel_stream'):
200
+ await client.cancel_stream()
201
+ raise
125
202
 
126
- # Create a special prompt for title generation
127
- title_prompt = [
128
- {
129
- "role": "system",
130
- "content": "Generate a brief, descriptive title (maximum 40 characters) for a conversation that starts with the following message. The title should be concise and reflect the main topic or query. Return only the title text with no additional explanation or formatting."
131
- },
132
- {
133
- "role": "user",
134
- "content": message
135
- }
136
- ]
203
+ # Helper function for Anthropic streaming
204
+ async def _generate_anthropic_stream(
205
+ app: 'SimpleChatApp',
206
+ messages: List[Dict],
207
+ model: str,
208
+ style: str,
209
+ client: Any,
210
+ callback: Callable[[str], Awaitable[None]],
211
+ update_lock: asyncio.Lock
212
+ ) -> Optional[str]:
213
+ """Generate streaming response using Anthropic provider."""
214
+ try:
215
+ from app.main import debug_log
216
+ except ImportError:
217
+ debug_log = lambda msg: None
137
218
 
138
- tries = 2 # Number of retries
139
- last_error = None
219
+ debug_log(f"Using Anthropic-specific streaming for model: {model}")
140
220
 
141
- while tries > 0:
142
- try:
143
- debug_log(f"Attempt {3-tries} to generate title")
144
- # First try generate_completion if available
145
- if hasattr(client, 'generate_completion'):
146
- debug_log("Using generate_completion method")
147
- try:
148
- title = await client.generate_completion(
149
- messages=title_prompt,
150
- model=title_model_id,
151
- temperature=0.7,
152
- max_tokens=60 # Titles should be short
153
- )
154
- debug_log(f"Title generated successfully: {title}")
155
- except Exception as completion_error:
156
- debug_log(f"Error in generate_completion: {str(completion_error)}")
157
- raise # Re-raise to be caught by outer try/except
158
- # Fall back to generate_stream if completion not available
159
- elif hasattr(client, 'generate_stream'):
160
- debug_log("Using generate_stream method")
161
- title_chunks = []
162
- try:
163
- async for chunk in client.generate_stream(title_prompt, title_model_id, style=""):
164
- if chunk is not None:
165
- title_chunks.append(chunk)
166
- debug_log(f"Received chunk of length: {len(chunk)}")
221
+ # Initialize variables for response tracking
222
+ full_response = ""
223
+ buffer = []
224
+ last_update = time.time()
225
+ update_interval = 0.03 # Responsive updates for Anthropic
226
+
227
+ try:
228
+ # Initialize stream generator
229
+ debug_log("Initializing Anthropic stream generator")
230
+ stream_generator = client.generate_stream(messages, model, style)
231
+
232
+ # Process stream chunks
233
+ debug_log("Beginning to process Anthropic stream chunks")
234
+ async for chunk in stream_generator:
235
+ # Check for task cancellation
236
+ if asyncio.current_task().cancelled():
237
+ debug_log("Task cancellation detected during Anthropic chunk processing")
238
+ if hasattr(client, 'cancel_stream'):
239
+ await client.cancel_stream()
240
+ raise asyncio.CancelledError()
241
+
242
+ # Process chunk content
243
+ if chunk:
244
+ if not isinstance(chunk, str):
245
+ try:
246
+ chunk = str(chunk)
247
+ except Exception:
248
+ continue
249
+
250
+ buffer.append(chunk)
251
+ current_time = time.time()
252
+
253
+ # Update UI with new content
254
+ if (current_time - last_update >= update_interval or
255
+ len(''.join(buffer)) > 5 or
256
+ len(full_response) < 50):
167
257
 
168
- title = "".join(title_chunks)
169
- debug_log(f"Combined title from chunks: {title}")
258
+ new_content = ''.join(buffer)
259
+ full_response += new_content
170
260
 
171
- # If we didn't get any content, use a default
172
- if not title.strip():
173
- debug_log("Empty title received, using default")
174
- title = f"Conversation ({datetime.now().strftime('%Y-%m-%d %H:%M')})"
175
- except Exception as stream_error:
176
- debug_log(f"Error during title stream processing: {str(stream_error)}")
177
- raise # Re-raise to be caught by outer try/except
178
- else:
179
- debug_log("Client does not support any title generation method")
180
- raise NotImplementedError("Client does not support a suitable method for title generation.")
181
-
182
- # Sanitize and limit the title
183
- title = title.strip().strip('"\'').strip()
184
- if len(title) > 40: # Set a maximum title length
185
- title = title[:37] + "..."
261
+ try:
262
+ async with update_lock:
263
+ await callback(full_response)
264
+ if hasattr(app, 'refresh'):
265
+ app.refresh(layout=True)
266
+ except Exception as callback_err:
267
+ logger.error(f"Error in Anthropic UI callback: {str(callback_err)}")
268
+
269
+ buffer = []
270
+ last_update = current_time
271
+ await asyncio.sleep(0.02)
272
+
273
+ # Process any remaining buffer content
274
+ if buffer:
275
+ new_content = ''.join(buffer)
276
+ full_response += new_content
277
+
278
+ try:
279
+ async with update_lock:
280
+ await callback(full_response)
281
+ if hasattr(app, 'refresh'):
282
+ app.refresh(layout=True)
283
+ await asyncio.sleep(0.02)
284
+ try:
285
+ messages_container = app.query_one("#messages-container")
286
+ if messages_container:
287
+ messages_container.scroll_end(animate=False)
288
+ except Exception:
289
+ pass
290
+ except Exception as callback_err:
291
+ logger.error(f"Error in final Anthropic UI callback: {str(callback_err)}")
186
292
 
187
- logger.info(f"Generated title: {title}")
188
- debug_log(f"Final sanitized title: {title}")
189
- return title # Return successful title
293
+ # Final refresh to ensure everything is displayed correctly
294
+ try:
295
+ await asyncio.sleep(0.05)
296
+ async with update_lock:
297
+ await callback(full_response)
298
+ if hasattr(app, 'refresh'):
299
+ app.refresh(layout=True)
300
+ except Exception:
301
+ pass
190
302
 
191
- except Exception as e:
192
- last_error = str(e)
193
- debug_log(f"Error generating title (tries left: {tries-1}): {last_error}")
194
- logger.error(f"Error generating title (tries left: {tries-1}): {last_error}")
195
- tries -= 1
196
- if tries > 0: # Only sleep if there are more retries
197
- await asyncio.sleep(1) # Small delay before retry
198
-
199
- # If all retries fail, log the error and return a default title
200
- debug_log(f"Failed to generate title after multiple retries. Using default title.")
201
- logger.error(f"Failed to generate title after multiple retries. Last error: {last_error}")
202
- return f"Conversation ({datetime.now().strftime('%Y-%m-%d %H:%M')})"
303
+ return full_response
304
+
305
+ except asyncio.CancelledError:
306
+ logger.info(f"Anthropic streaming cancelled. Partial response length: {len(full_response)}")
307
+ if hasattr(client, 'cancel_stream'):
308
+ await client.cancel_stream()
309
+ return full_response
310
+
311
+ except Exception as e:
312
+ logger.error(f"Error during Anthropic streaming: {str(e)}")
313
+ if hasattr(client, 'cancel_stream'):
314
+ await client.cancel_stream()
315
+ raise
203
316
 
204
- # Worker function for streaming response generation
205
- async def generate_streaming_response(
317
+ # Helper function for Ollama streaming
318
+ async def _generate_ollama_stream(
206
319
  app: 'SimpleChatApp',
207
320
  messages: List[Dict],
208
321
  model: str,
209
322
  style: str,
210
323
  client: Any,
211
- callback: Callable[[str], Awaitable[None]]
324
+ callback: Callable[[str], Awaitable[None]],
325
+ update_lock: asyncio.Lock
212
326
  ) -> Optional[str]:
213
- """
214
- Generate a streaming response from the model (as a Textual worker).
215
- Refactored to be a coroutine, not an async generator.
216
- """
327
+ """Generate streaming response using Ollama provider."""
217
328
  try:
218
329
  from app.main import debug_log
219
330
  except ImportError:
220
331
  debug_log = lambda msg: None
221
-
222
- logger.info(f"Starting streaming response with model: {model}")
223
- debug_log(f"Starting streaming response with model: '{model}', client type: {type(client).__name__}")
224
-
225
- # Validate messages
226
- if not messages:
227
- debug_log("Error: messages list is empty")
228
- raise ValueError("Messages list cannot be empty")
229
-
230
- # Ensure all messages have required fields
231
- for i, msg in enumerate(messages):
232
- try:
233
- debug_log(f"Message {i}: role={msg.get('role', 'missing')}, content_len={len(msg.get('content', ''))}")
234
- if 'role' not in msg:
235
- debug_log(f"Adding missing 'role' to message {i}")
236
- msg['role'] = 'user'
237
- if 'content' not in msg:
238
- debug_log(f"Adding missing 'content' to message {i}")
239
- msg['content'] = ''
240
- except Exception as e:
241
- debug_log(f"Error checking message {i}: {str(e)}")
242
- messages[i] = {
243
- 'role': 'user',
244
- 'content': str(msg) if msg else ''
245
- }
246
- debug_log(f"Repaired message {i}")
247
-
332
+
333
+ debug_log(f"Using Ollama-specific streaming for model: {model}")
334
+
248
335
  # Initialize variables for response tracking
249
336
  full_response = ""
250
337
  buffer = []
251
338
  last_update = time.time()
252
- update_interval = 0.05 # Reduced interval for more frequent updates
253
-
339
+ update_interval = 0.03 # Responsive updates for Ollama
340
+
254
341
  try:
255
- # Validate client
256
- if client is None:
257
- debug_log("Error: client is None, cannot proceed with streaming")
258
- raise ValueError("Model client is None, cannot proceed with streaming")
259
-
260
- if not hasattr(client, 'generate_stream'):
261
- debug_log(f"Error: client {type(client).__name__} does not have generate_stream method")
262
- raise ValueError(f"Client {type(client).__name__} does not support streaming")
263
-
264
- # Determine client type
265
- is_ollama = 'ollama' in str(type(client)).lower()
266
- is_openai = 'openai' in str(type(client)).lower()
267
- is_anthropic = 'anthropic' in str(type(client)).lower()
268
-
269
- debug_log(f"Client types - Ollama: {is_ollama}, OpenAI: {is_openai}, Anthropic: {is_anthropic}")
270
-
271
- # Only show loading indicator for Ollama (which may need to load models)
272
- # This prevents Ollama-specific UI elements from showing when using other providers
273
- if is_ollama and hasattr(app, 'query_one'):
342
+ # Show loading indicator for Ollama (which may need to load models)
343
+ if hasattr(app, 'query_one'):
274
344
  try:
275
345
  debug_log("Showing initial model loading indicator for Ollama")
276
- logger.info("Showing initial model loading indicator for Ollama")
277
346
  loading = app.query_one("#loading-indicator")
278
347
  loading.add_class("model-loading")
279
348
  loading.update("⚙️ Loading Ollama model...")
280
349
  except Exception as e:
281
350
  debug_log(f"Error setting initial Ollama loading state: {str(e)}")
282
- logger.error(f"Error setting initial Ollama loading state: {str(e)}")
283
-
284
- debug_log(f"Starting stream generation with messages length: {len(messages)}")
285
- logger.info(f"Starting stream generation for model: {model}")
286
-
351
+
287
352
  # Initialize stream generator
288
- try:
289
- debug_log("Calling client.generate_stream()")
290
- stream_generator = client.generate_stream(messages, model, style)
291
- debug_log("Successfully obtained stream generator")
292
- except Exception as stream_init_error:
293
- debug_log(f"Error initializing stream generator: {str(stream_init_error)}")
294
- logger.error(f"Error initializing stream generator: {str(stream_init_error)}")
295
- raise
296
-
297
- # Update UI if model is ready (Ollama specific)
298
- # Only check is_loading_model for Ollama clients to prevent errors with other providers
299
- if is_ollama and hasattr(client, 'is_loading_model') and not client.is_loading_model() and hasattr(app, 'query_one'):
353
+ debug_log("Initializing Ollama stream generator")
354
+ stream_generator = client.generate_stream(messages, model, style)
355
+
356
+ # Update UI if model is ready
357
+ if hasattr(client, 'is_loading_model') and not client.is_loading_model() and hasattr(app, 'query_one'):
300
358
  try:
301
359
  debug_log("Ollama model is ready for generation, updating UI")
302
- logger.info("Ollama model is ready for generation, updating UI")
303
360
  loading = app.query_one("#loading-indicator")
304
361
  loading.remove_class("model-loading")
305
362
  loading.update("▪▪▪ Generating response...")
306
363
  except Exception as e:
307
- debug_log(f"Error updating UI after stream init: {str(e)}")
308
- logger.error(f"Error updating UI after stream init: {str(e)}")
309
-
364
+ debug_log(f"Error updating UI after Ollama stream init: {str(e)}")
365
+
310
366
  # Process stream chunks
311
- debug_log("Beginning to process stream chunks")
312
- try:
313
- async for chunk in stream_generator:
314
- # Check for task cancellation
315
- if asyncio.current_task().cancelled():
316
- debug_log("Task cancellation detected during chunk processing")
317
- logger.info("Task cancellation detected during chunk processing")
318
- if hasattr(client, 'cancel_stream'):
319
- debug_log("Calling client.cancel_stream() due to task cancellation")
320
- await client.cancel_stream()
321
- raise asyncio.CancelledError()
322
-
323
- # Handle Ollama model loading state changes - only for Ollama clients
324
- if is_ollama and hasattr(client, 'is_loading_model'):
325
- try:
326
- model_loading = client.is_loading_model()
327
- debug_log(f"Ollama model loading state: {model_loading}")
328
- if hasattr(app, 'query_one'):
329
- try:
330
- loading = app.query_one("#loading-indicator")
331
- if model_loading and hasattr(loading, 'has_class') and not loading.has_class("model-loading"):
332
- debug_log("Ollama model loading started during streaming")
333
- logger.info("Ollama model loading started during streaming")
334
- loading.add_class("model-loading")
335
- loading.update("⚙️ Loading Ollama model...")
336
- elif not model_loading and hasattr(loading, 'has_class') and loading.has_class("model-loading"):
337
- debug_log("Ollama model loading finished during streaming")
338
- logger.info("Ollama model loading finished during streaming")
339
- loading.remove_class("model-loading")
340
- loading.update("▪▪▪ Generating response...")
341
- except Exception as ui_e:
342
- debug_log(f"Error updating UI elements: {str(ui_e)}")
343
- logger.error(f"Error updating UI elements: {str(ui_e)}")
344
- except Exception as e:
345
- debug_log(f"Error checking Ollama model loading state: {str(e)}")
346
- logger.error(f"Error checking Ollama model loading state: {str(e)}")
347
-
348
- # Process chunk content
349
- if chunk:
350
- if not isinstance(chunk, str):
351
- debug_log(f"WARNING: Received non-string chunk of type: {type(chunk).__name__}")
367
+ debug_log("Beginning to process Ollama stream chunks")
368
+ async for chunk in stream_generator:
369
+ # Check for task cancellation
370
+ if asyncio.current_task().cancelled():
371
+ debug_log("Task cancellation detected during Ollama chunk processing")
372
+ if hasattr(client, 'cancel_stream'):
373
+ await client.cancel_stream()
374
+ raise asyncio.CancelledError()
375
+
376
+ # Handle Ollama model loading state changes
377
+ if hasattr(client, 'is_loading_model'):
378
+ try:
379
+ model_loading = client.is_loading_model()
380
+ if hasattr(app, 'query_one'):
352
381
  try:
353
- chunk = str(chunk)
354
- debug_log(f"Successfully converted chunk to string, length: {len(chunk)}")
355
- except Exception as e:
356
- debug_log(f"Error converting chunk to string: {str(e)}")
357
- continue
358
-
359
- debug_log(f"Received chunk of length: {len(chunk)}")
360
- buffer.append(chunk)
361
- current_time = time.time()
362
-
363
- # Update UI with new content
364
- # Always update immediately for the first few chunks for better responsiveness
365
- if (current_time - last_update >= update_interval or
366
- len(''.join(buffer)) > 5 or # Reduced buffer size threshold
367
- len(full_response) < 50): # More aggressive updates for early content
368
-
369
- new_content = ''.join(buffer)
370
- full_response += new_content
371
- debug_log(f"Updating UI with content length: {len(full_response)}")
372
-
373
- # Enhanced debug logging
374
- print(f"STREAM DEBUG: +{len(new_content)} chars, total: {len(full_response)}")
375
- # Print first few characters of content for debugging
376
- if len(full_response) < 100:
377
- print(f"STREAM CONTENT: '{full_response}'")
382
+ loading = app.query_one("#loading-indicator")
383
+ if model_loading and hasattr(loading, 'has_class') and not loading.has_class("model-loading"):
384
+ debug_log("Ollama model loading started during streaming")
385
+ loading.add_class("model-loading")
386
+ loading.update("⚙️ Loading Ollama model...")
387
+ elif not model_loading and hasattr(loading, 'has_class') and loading.has_class("model-loading"):
388
+ debug_log("Ollama model loading finished during streaming")
389
+ loading.remove_class("model-loading")
390
+ loading.update("▪▪▪ Generating response...")
391
+ except Exception:
392
+ pass
393
+ except Exception:
394
+ pass
395
+
396
+ # Process chunk content
397
+ if chunk:
398
+ if not isinstance(chunk, str):
399
+ try:
400
+ chunk = str(chunk)
401
+ except Exception:
402
+ continue
378
403
 
379
- try:
380
- # Call the UI callback with the full response so far
381
- debug_log("Calling UI callback with content")
404
+ buffer.append(chunk)
405
+ current_time = time.time()
406
+
407
+ # Update UI with new content
408
+ if (current_time - last_update >= update_interval or
409
+ len(''.join(buffer)) > 5 or
410
+ len(full_response) < 50):
411
+
412
+ new_content = ''.join(buffer)
413
+ full_response += new_content
414
+
415
+ try:
416
+ async with update_lock:
382
417
  await callback(full_response)
383
- debug_log("UI callback completed successfully")
384
-
385
- # Force app refresh after each update
386
418
  if hasattr(app, 'refresh'):
387
- debug_log("Forcing app refresh")
388
- app.refresh(layout=True) # Force layout refresh
389
- except Exception as callback_err:
390
- debug_log(f"Error in UI callback: {str(callback_err)}")
391
- logger.error(f"Error in UI callback: {str(callback_err)}")
392
- print(f"STREAM ERROR: Error updating UI: {str(callback_err)}")
393
-
394
- buffer = []
395
- last_update = current_time
419
+ app.refresh(layout=True)
420
+ except Exception as callback_err:
421
+ logger.error(f"Error in Ollama UI callback: {str(callback_err)}")
396
422
 
397
- # Shorter sleep between updates for more responsive streaming
398
- await asyncio.sleep(0.02)
399
- except asyncio.CancelledError:
400
- debug_log("CancelledError in stream processing")
401
- raise
402
- except Exception as chunk_error:
403
- debug_log(f"Error processing stream chunks: {str(chunk_error)}")
404
- logger.error(f"Error processing stream chunks: {str(chunk_error)}")
405
- raise
406
-
423
+ buffer = []
424
+ last_update = current_time
425
+ await asyncio.sleep(0.02)
426
+
427
+ # Process any remaining buffer content
407
428
  if buffer:
408
429
  new_content = ''.join(buffer)
409
430
  full_response += new_content
410
- debug_log(f"Sending final content, total length: {len(full_response)}")
431
+
411
432
  try:
412
- await callback(full_response)
413
- debug_log("Final UI callback completed successfully")
414
-
415
- debug_log("Forcing final UI refresh sequence for all models")
416
- try:
433
+ async with update_lock:
434
+ await callback(full_response)
417
435
  if hasattr(app, 'refresh'):
418
- app.refresh(layout=False)
436
+ app.refresh(layout=True)
419
437
  await asyncio.sleep(0.02)
420
438
  try:
421
439
  messages_container = app.query_one("#messages-container")
422
- if messages_container and hasattr(messages_container, 'scroll_end'):
440
+ if messages_container:
423
441
  messages_container.scroll_end(animate=False)
424
442
  except Exception:
425
443
  pass
444
+ except Exception as callback_err:
445
+ logger.error(f"Error in final Ollama UI callback: {str(callback_err)}")
446
+
447
+ # Final refresh to ensure everything is displayed correctly
448
+ try:
449
+ await asyncio.sleep(0.05)
450
+ async with update_lock:
451
+ await callback(full_response)
452
+ if hasattr(app, 'refresh'):
453
+ app.refresh(layout=True)
454
+ except Exception:
455
+ pass
456
+
457
+ return full_response
458
+
459
+ except asyncio.CancelledError:
460
+ logger.info(f"Ollama streaming cancelled. Partial response length: {len(full_response)}")
461
+ if hasattr(client, 'cancel_stream'):
462
+ await client.cancel_stream()
463
+ return full_response
464
+
465
+ except Exception as e:
466
+ logger.error(f"Error during Ollama streaming: {str(e)}")
467
+ if hasattr(client, 'cancel_stream'):
468
+ await client.cancel_stream()
469
+ raise
470
+
471
+ # Generic fallback streaming implementation
472
+ async def _generate_generic_stream(
473
+ app: 'SimpleChatApp',
474
+ messages: List[Dict],
475
+ model: str,
476
+ style: str,
477
+ client: Any,
478
+ callback: Callable[[str], Awaitable[None]],
479
+ update_lock: asyncio.Lock
480
+ ) -> Optional[str]:
481
+ """Generic fallback implementation for streaming responses."""
482
+ try:
483
+ from app.main import debug_log
484
+ except ImportError:
485
+ debug_log = lambda msg: None
486
+
487
+ debug_log(f"Using generic streaming for model: {model}, client type: {type(client).__name__}")
488
+
489
+ # Initialize variables for response tracking
490
+ full_response = ""
491
+ buffer = []
492
+ last_update = time.time()
493
+ update_interval = 0.03 # Responsive updates
494
+
495
+ try:
496
+ # Initialize stream generator
497
+ debug_log("Initializing generic stream generator")
498
+ stream_generator = client.generate_stream(messages, model, style)
499
+
500
+ # Process stream chunks
501
+ debug_log("Beginning to process generic stream chunks")
502
+ async for chunk in stream_generator:
503
+ # Check for task cancellation
504
+ if asyncio.current_task().cancelled():
505
+ debug_log("Task cancellation detected during generic chunk processing")
506
+ if hasattr(client, 'cancel_stream'):
507
+ await client.cancel_stream()
508
+ raise asyncio.CancelledError()
509
+
510
+ # Process chunk content
511
+ if chunk:
512
+ if not isinstance(chunk, str):
513
+ try:
514
+ chunk = str(chunk)
515
+ except Exception:
516
+ continue
517
+
518
+ buffer.append(chunk)
519
+ current_time = time.time()
520
+
521
+ # Update UI with new content
522
+ if (current_time - last_update >= update_interval or
523
+ len(''.join(buffer)) > 5 or
524
+ len(full_response) < 50):
525
+
526
+ new_content = ''.join(buffer)
527
+ full_response += new_content
528
+
529
+ try:
530
+ async with update_lock:
531
+ await callback(full_response)
532
+ if hasattr(app, 'refresh'):
533
+ app.refresh(layout=True)
534
+ except Exception as callback_err:
535
+ logger.error(f"Error in generic UI callback: {str(callback_err)}")
536
+
537
+ buffer = []
538
+ last_update = current_time
539
+ await asyncio.sleep(0.02)
540
+
541
+ # Process any remaining buffer content
542
+ if buffer:
543
+ new_content = ''.join(buffer)
544
+ full_response += new_content
545
+
546
+ try:
547
+ async with update_lock:
548
+ await callback(full_response)
549
+ if hasattr(app, 'refresh'):
426
550
  app.refresh(layout=True)
427
551
  await asyncio.sleep(0.02)
428
552
  try:
429
553
  messages_container = app.query_one("#messages-container")
430
- if messages_container and hasattr(messages_container, 'scroll_end'):
554
+ if messages_container:
431
555
  messages_container.scroll_end(animate=False)
432
556
  except Exception:
433
557
  pass
434
- except Exception as refresh_err:
435
- debug_log(f"Error forcing final UI refresh: {str(refresh_err)}")
436
558
  except Exception as callback_err:
437
- debug_log(f"Error in final UI callback: {str(callback_err)}")
438
- logger.error(f"Error in final UI callback: {str(callback_err)}")
439
-
559
+ logger.error(f"Error in final generic UI callback: {str(callback_err)}")
560
+
561
+ # Final refresh to ensure everything is displayed correctly
440
562
  try:
441
563
  await asyncio.sleep(0.05)
442
- debug_log("Sending one final callback to ensure UI refresh")
443
- await callback(full_response)
444
- if hasattr(app, 'refresh'):
445
- app.refresh(layout=True)
446
- except Exception as final_err:
447
- debug_log(f"Error in final extra callback: {str(final_err)}")
448
-
449
- debug_log(f"Streaming response completed successfully. Response length: {len(full_response)}")
450
- logger.info(f"Streaming response completed successfully. Response length: {len(full_response)}")
564
+ async with update_lock:
565
+ await callback(full_response)
566
+ if hasattr(app, 'refresh'):
567
+ app.refresh(layout=True)
568
+ except Exception:
569
+ pass
570
+
451
571
  return full_response
452
-
572
+
453
573
  except asyncio.CancelledError:
454
- debug_log(f"Streaming response task cancelled. Partial response length: {len(full_response)}")
455
- logger.info(f"Streaming response task cancelled. Partial response length: {len(full_response)}")
574
+ logger.info(f"Generic streaming cancelled. Partial response length: {len(full_response)}")
456
575
  if hasattr(client, 'cancel_stream'):
457
- debug_log("Calling client.cancel_stream() after cancellation")
458
- try:
459
- await client.cancel_stream()
460
- debug_log("Successfully cancelled client stream")
461
- except Exception as cancel_err:
462
- debug_log(f"Error cancelling client stream: {str(cancel_err)}")
576
+ await client.cancel_stream()
463
577
  return full_response
464
-
578
+
465
579
  except Exception as e:
466
- debug_log(f"Error during streaming response: {str(e)}")
467
- logger.error(f"Error during streaming response: {str(e)}")
580
+ logger.error(f"Error during generic streaming: {str(e)}")
468
581
  if hasattr(client, 'cancel_stream'):
469
- debug_log("Attempting to cancel client stream after error")
470
- try:
471
- await client.cancel_stream()
472
- debug_log("Successfully cancelled client stream after error")
473
- except Exception as cancel_err:
474
- debug_log(f"Error cancelling client stream after error: {str(cancel_err)}")
582
+ await client.cancel_stream()
475
583
  raise
476
584
 
477
- finally:
478
- debug_log("generate_streaming_response worker finished or errored.")
479
- if 'full_response' in locals():
480
- return full_response
481
- return None
585
+ # Worker function for streaming response generation
586
+ async def generate_streaming_response(
587
+ app: 'SimpleChatApp',
588
+ messages: List[Dict],
589
+ model: str,
590
+ style: str,
591
+ client: Any,
592
+ callback: Callable[[str], Awaitable[None]]
593
+ ) -> Optional[str]:
594
+ """
595
+ Generate a streaming response from the model (as a Textual worker).
596
+ Refactored to be a coroutine, not an async generator.
597
+ """
598
+ try:
599
+ from app.main import debug_log
600
+ except ImportError:
601
+ debug_log = lambda msg: None
602
+
603
+ logger.info(f"Starting streaming response with model: {model}")
604
+ debug_log(f"Starting streaming response with model: '{model}', client type: {type(client).__name__}")
605
+
606
+ # Validate messages
607
+ if not messages:
608
+ debug_log("Error: messages list is empty")
609
+ raise ValueError("Messages list cannot be empty")
610
+
611
+ # Ensure all messages have required fields
612
+ for i, msg in enumerate(messages):
613
+ try:
614
+ debug_log(f"Message {i}: role={msg.get('role', 'missing')}, content_len={len(msg.get('content', ''))}")
615
+ if 'role' not in msg:
616
+ debug_log(f"Adding missing 'role' to message {i}")
617
+ msg['role'] = 'user'
618
+ if 'content' not in msg:
619
+ debug_log(f"Adding missing 'content' to message {i}")
620
+ msg['content'] = ''
621
+ except Exception as e:
622
+ debug_log(f"Error checking message {i}: {str(e)}")
623
+ messages[i] = {
624
+ 'role': 'user',
625
+ 'content': str(msg) if msg else ''
626
+ }
627
+ debug_log(f"Repaired message {i}")
628
+
629
+ # Create a lock for synchronizing UI updates
630
+ update_lock = asyncio.Lock()
631
+
632
+ # Validate client
633
+ if client is None:
634
+ debug_log("Error: client is None, cannot proceed with streaming")
635
+ raise ValueError("Model client is None, cannot proceed with streaming")
636
+
637
+ if not hasattr(client, 'generate_stream'):
638
+ debug_log(f"Error: client {type(client).__name__} does not have generate_stream method")
639
+ raise ValueError(f"Client {type(client).__name__} does not support streaming")
640
+
641
+ # Explicitly check provider type first
642
+ is_ollama = 'ollama' in str(type(client)).lower()
643
+ is_openai = 'openai' in str(type(client)).lower()
644
+ is_anthropic = 'anthropic' in str(type(client)).lower()
645
+
646
+ debug_log(f"Client types - Ollama: {is_ollama}, OpenAI: {is_openai}, Anthropic: {is_anthropic}")
647
+
648
+ # Use separate implementations for each provider
649
+ try:
650
+ if is_openai:
651
+ debug_log("Using OpenAI-specific streaming implementation")
652
+ return await _generate_openai_stream(app, messages, model, style, client, callback, update_lock)
653
+ elif is_anthropic:
654
+ debug_log("Using Anthropic-specific streaming implementation")
655
+ return await _generate_anthropic_stream(app, messages, model, style, client, callback, update_lock)
656
+ elif is_ollama:
657
+ debug_log("Using Ollama-specific streaming implementation")
658
+ return await _generate_ollama_stream(app, messages, model, style, client, callback, update_lock)
659
+ else:
660
+ # Generic fallback
661
+ debug_log("Using generic streaming implementation")
662
+ return await _generate_generic_stream(app, messages, model, style, client, callback, update_lock)
663
+ except asyncio.CancelledError:
664
+ debug_log("Task cancellation detected in main streaming function")
665
+ if hasattr(client, 'cancel_stream'):
666
+ await client.cancel_stream()
667
+ raise
668
+ except Exception as e:
669
+ debug_log(f"Error in streaming implementation: {str(e)}")
670
+ logger.error(f"Error in streaming implementation: {str(e)}")
671
+ raise
482
672
 
483
673
  async def ensure_ollama_running() -> bool:
484
674
  """
@@ -555,6 +745,22 @@ def resolve_model_id(model_id_or_name: str) -> str:
555
745
  input_lower = model_id_or_name.lower().strip()
556
746
  logger.info(f"Attempting to resolve model identifier: '{input_lower}'")
557
747
 
748
+ # Add special case handling for common OpenAI models
749
+ openai_model_aliases = {
750
+ "04-mini": "gpt-4-mini", # Fix "04-mini" typo to "gpt-4-mini"
751
+ "04": "gpt-4",
752
+ "04-vision": "gpt-4-vision",
753
+ "04-turbo": "gpt-4-turbo",
754
+ "035": "gpt-3.5-turbo",
755
+ "35-turbo": "gpt-3.5-turbo",
756
+ "35": "gpt-3.5-turbo"
757
+ }
758
+
759
+ if input_lower in openai_model_aliases:
760
+ resolved = openai_model_aliases[input_lower]
761
+ logger.info(f"Resolved '{input_lower}' to '{resolved}' via OpenAI model alias")
762
+ return resolved
763
+
558
764
  # Special case handling for common typos and model name variations
559
765
  typo_corrections = {
560
766
  "o4-mini": "04-mini",