chat-console 0.2.8__py3-none-any.whl → 0.2.98__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
app/__init__.py CHANGED
@@ -3,4 +3,4 @@ Chat CLI
3
3
  A command-line interface for chatting with various LLM providers like ChatGPT and Claude.
4
4
  """
5
5
 
6
- __version__ = "0.2.8"
6
+ __version__ = "0.2.98"
app/api/anthropic.py CHANGED
@@ -1,7 +1,9 @@
1
1
  import anthropic
2
+ import asyncio # Add missing import
2
3
  from typing import List, Dict, Any, Optional, Generator, AsyncGenerator
3
4
  from .base import BaseModelClient
4
5
  from ..config import ANTHROPIC_API_KEY
6
+ from ..utils import resolve_model_id # Import the resolve_model_id function
5
7
 
6
8
  class AnthropicClient(BaseModelClient):
7
9
  def __init__(self):
@@ -47,16 +49,26 @@ class AnthropicClient(BaseModelClient):
47
49
 
48
50
  return styles.get(style, "")
49
51
 
50
- async def generate_completion(self, messages: List[Dict[str, str]],
51
- model: str,
52
- style: Optional[str] = None,
53
- temperature: float = 0.7,
52
+ async def generate_completion(self, messages: List[Dict[str, str]],
53
+ model: str,
54
+ style: Optional[str] = None,
55
+ temperature: float = 0.7,
54
56
  max_tokens: Optional[int] = None) -> str:
55
57
  """Generate a text completion using Claude"""
58
+ try:
59
+ from app.main import debug_log
60
+ except ImportError:
61
+ debug_log = lambda msg: None
62
+
63
+ # Resolve the model ID right before making the API call
64
+ original_model = model
65
+ resolved_model = resolve_model_id(model)
66
+ debug_log(f"Anthropic: Original model ID '{original_model}' resolved to '{resolved_model}' in generate_completion")
67
+
56
68
  processed_messages = self._prepare_messages(messages, style)
57
69
 
58
70
  response = await self.client.messages.create(
59
- model=model,
71
+ model=resolved_model, # Use the resolved model ID
60
72
  messages=processed_messages,
61
73
  temperature=temperature,
62
74
  max_tokens=max_tokens or 1024,
@@ -64,29 +76,147 @@ class AnthropicClient(BaseModelClient):
64
76
 
65
77
  return response.content[0].text
66
78
 
67
- async def generate_stream(self, messages: List[Dict[str, str]],
68
- model: str,
79
+ async def generate_stream(self, messages: List[Dict[str, str]],
80
+ model: str,
69
81
  style: Optional[str] = None,
70
- temperature: float = 0.7,
82
+ temperature: float = 0.7,
71
83
  max_tokens: Optional[int] = None) -> AsyncGenerator[str, None]:
72
84
  """Generate a streaming text completion using Claude"""
85
+ try:
86
+ from app.main import debug_log # Import debug logging if available
87
+ except ImportError:
88
+ # If debug_log not available, create a no-op function
89
+ debug_log = lambda msg: None
90
+
91
+ # Resolve the model ID right before making the API call
92
+ original_model = model
93
+ resolved_model = resolve_model_id(model)
94
+ debug_log(f"Anthropic: Original model ID '{original_model}' resolved to '{resolved_model}'")
95
+ debug_log(f"Anthropic: starting streaming generation with model: {resolved_model}")
96
+
73
97
  processed_messages = self._prepare_messages(messages, style)
74
98
 
75
- stream = await self.client.messages.stream(
76
- model=model,
77
- messages=processed_messages,
78
- temperature=temperature,
79
- max_tokens=max_tokens or 1024,
80
- )
81
- async for chunk in stream:
82
- if chunk.type == "content_block":
83
- yield chunk.text
84
-
99
+ try:
100
+ debug_log(f"Anthropic: requesting stream with {len(processed_messages)} messages")
101
+ # Remove await from this line - it returns the context manager, not an awaitable
102
+ stream = self.client.messages.stream(
103
+ model=resolved_model, # Use the resolved model ID
104
+ messages=processed_messages,
105
+ temperature=temperature,
106
+ max_tokens=max_tokens or 1024,
107
+ )
108
+
109
+ debug_log("Anthropic: stream created successfully, processing chunks using async with")
110
+ async with stream as stream_context: # Use async with
111
+ async for chunk in stream_context: # Iterate over the context
112
+ try:
113
+ if chunk.type == "content_block_delta": # Check for delta type
114
+ # Ensure we always return a string
115
+ if chunk.delta.text is None:
116
+ debug_log("Anthropic: skipping empty text delta chunk")
117
+ continue
118
+
119
+ text = str(chunk.delta.text) # Get text from delta
120
+ debug_log(f"Anthropic: yielding chunk of length: {len(text)}")
121
+ yield text
122
+ else:
123
+ debug_log(f"Anthropic: skipping non-content_delta chunk of type: {chunk.type}")
124
+ except Exception as chunk_error: # Restore the except block for chunk processing
125
+ debug_log(f"Anthropic: error processing chunk: {str(chunk_error)}")
126
+ # Skip problematic chunks but continue processing
127
+ continue # This continue is now correctly inside the loop and except block
128
+
129
+ except Exception as e:
130
+ debug_log(f"Anthropic: error in generate_stream: {str(e)}")
131
+ raise Exception(f"Anthropic streaming error: {str(e)}")
132
+
133
+ async def _fetch_models_from_api(self) -> List[Dict[str, Any]]:
134
+ """Fetch available models directly from the Anthropic API."""
135
+ try:
136
+ from app.main import debug_log
137
+ except ImportError:
138
+ debug_log = lambda msg: None
139
+
140
+ try:
141
+ debug_log("Anthropic: Fetching models from API...")
142
+ # The Anthropic Python SDK might not have a direct high-level method for listing models yet.
143
+ # We might need to use the underlying HTTP client or make a direct request.
144
+ # Let's assume for now the SDK client *does* have a way, like self.client.models.list()
145
+ # If this fails, we'd need to implement a direct HTTP GET request.
146
+ # response = await self.client.models.list() # Hypothetical SDK method
147
+
148
+ # --- Alternative: Direct HTTP Request using httpx (if client exposes it) ---
149
+ # Check if the client has an internal http_client we can use
150
+ if hasattr(self.client, '_client') and hasattr(self.client._client, 'get'):
151
+ response = await self.client._client.get(
152
+ "/v1/models",
153
+ headers={"anthropic-version": "2023-06-01"} # Add required version header
154
+ )
155
+ response.raise_for_status() # Raise HTTP errors
156
+ models_data = response.json()
157
+ debug_log(f"Anthropic: API response received: {models_data}")
158
+ if 'data' in models_data and isinstance(models_data['data'], list):
159
+ # Format the response as expected: list of {"id": ..., "name": ...}
160
+ formatted_models = [
161
+ {"id": model.get("id"), "name": model.get("display_name", model.get("id"))}
162
+ for model in models_data['data']
163
+ if model.get("id") # Ensure model has an ID
164
+ ]
165
+ # Log each model ID clearly for debugging
166
+ debug_log(f"Anthropic: Available models from API:")
167
+ for model in formatted_models:
168
+ debug_log(f" - ID: {model.get('id')}, Name: {model.get('name')}")
169
+ return formatted_models
170
+ else:
171
+ debug_log("Anthropic: Unexpected API response format for models.")
172
+ return []
173
+ else:
174
+ debug_log("Anthropic: Client does not expose HTTP client for model listing. Returning empty list.")
175
+ return [] # Cannot fetch dynamically
176
+
177
+ except Exception as e:
178
+ debug_log(f"Anthropic: Failed to fetch models from API: {str(e)}")
179
+ # Fallback to a minimal hardcoded list in case of API error
180
+ # Include Claude 3.7 Sonnet with the correct full ID
181
+ fallback_models = [
182
+ {"id": "claude-3-opus-20240229", "name": "Claude 3 Opus"},
183
+ {"id": "claude-3-sonnet-20240229", "name": "Claude 3 Sonnet"},
184
+ {"id": "claude-3-haiku-20240307", "name": "Claude 3 Haiku"},
185
+ {"id": "claude-3-5-sonnet-20240620", "name": "Claude 3.5 Sonnet"},
186
+ {"id": "claude-3-7-sonnet-20250219", "name": "Claude 3.7 Sonnet"}, # Add Claude 3.7 Sonnet
187
+ ]
188
+ debug_log("Anthropic: Using fallback model list:")
189
+ for model in fallback_models:
190
+ debug_log(f" - ID: {model['id']}, Name: {model['name']}")
191
+ return fallback_models
192
+
193
+ # Keep this synchronous for now, but make it call the async fetcher
194
+ # Note: This is slightly awkward. Ideally, config loading would be async.
195
+ # For now, we'll run the async fetcher within the sync method using asyncio.run()
196
+ # This is NOT ideal for performance but avoids larger refactoring of config loading.
85
197
  def get_available_models(self) -> List[Dict[str, Any]]:
86
- """Get list of available Claude models"""
87
- return [
88
- {"id": "claude-3-opus", "name": "Claude 3 Opus"},
89
- {"id": "claude-3-sonnet", "name": "Claude 3 Sonnet"},
90
- {"id": "claude-3-haiku", "name": "Claude 3 Haiku"},
91
- {"id": "claude-3.7-sonnet", "name": "Claude 3.7 Sonnet"},
92
- ]
198
+ """Get list of available Claude models by fetching from API."""
199
+ try:
200
+ # Run the async fetcher method synchronously
201
+ models = asyncio.run(self._fetch_models_from_api())
202
+ return models
203
+ except RuntimeError as e:
204
+ # Handle cases where asyncio.run can't be called (e.g., already in an event loop)
205
+ # This might happen during app runtime if called again. Fallback needed.
206
+ try:
207
+ from app.main import debug_log
208
+ except ImportError:
209
+ debug_log = lambda msg: None
210
+ debug_log(f"Anthropic: Cannot run async model fetch synchronously ({e}). Falling back to hardcoded list.")
211
+ # Use the same fallback list as in _fetch_models_from_api
212
+ fallback_models = [
213
+ {"id": "claude-3-opus-20240229", "name": "Claude 3 Opus"},
214
+ {"id": "claude-3-sonnet-20240229", "name": "Claude 3 Sonnet"},
215
+ {"id": "claude-3-haiku-20240307", "name": "Claude 3 Haiku"},
216
+ {"id": "claude-3-5-sonnet-20240620", "name": "Claude 3.5 Sonnet"},
217
+ {"id": "claude-3-7-sonnet-20250219", "name": "Claude 3.7 Sonnet"}, # Add Claude 3.7 Sonnet
218
+ ]
219
+ debug_log("Anthropic: Using fallback model list in get_available_models:")
220
+ for model in fallback_models:
221
+ debug_log(f" - ID: {model['id']}, Name: {model['name']}")
222
+ return fallback_models
app/api/base.py CHANGED
@@ -27,6 +27,49 @@ class BaseModelClient(ABC):
27
27
  """Get list of available models from this provider"""
28
28
  pass
29
29
 
30
+ @staticmethod
31
+ def get_client_type_for_model(model_name: str) -> type:
32
+ """Get the client class for a model without instantiating it"""
33
+ from ..config import CONFIG, AVAILABLE_PROVIDERS
34
+ from .anthropic import AnthropicClient
35
+ from .openai import OpenAIClient
36
+ from .ollama import OllamaClient
37
+ import logging
38
+
39
+ logger = logging.getLogger(__name__)
40
+
41
+ # Get model info and provider
42
+ model_info = CONFIG["available_models"].get(model_name)
43
+ model_name_lower = model_name.lower()
44
+
45
+ # If model is in config, use its provider
46
+ if model_info:
47
+ provider = model_info["provider"]
48
+ # For custom models, try to infer provider
49
+ else:
50
+ # First try Ollama for known model names or if selected from Ollama UI
51
+ if (any(name in model_name_lower for name in ["llama", "mistral", "codellama", "gemma"]) or
52
+ model_name in [m["id"] for m in CONFIG.get("ollama_models", [])]):
53
+ provider = "ollama"
54
+ # Then try other providers
55
+ elif any(name in model_name_lower for name in ["gpt", "text-", "davinci"]):
56
+ provider = "openai"
57
+ elif any(name in model_name_lower for name in ["claude", "anthropic"]):
58
+ provider = "anthropic"
59
+ else:
60
+ # Default to Ollama for unknown models
61
+ provider = "ollama"
62
+
63
+ # Return appropriate client class
64
+ if provider == "ollama":
65
+ return OllamaClient
66
+ elif provider == "openai":
67
+ return OpenAIClient
68
+ elif provider == "anthropic":
69
+ return AnthropicClient
70
+ else:
71
+ return None
72
+
30
73
  @staticmethod
31
74
  def get_client_for_model(model_name: str) -> 'BaseModelClient':
32
75
  """Factory method to get appropriate client for model"""
app/api/ollama.py CHANGED
@@ -22,6 +22,9 @@ class OllamaClient(BaseModelClient):
22
22
  # Track active stream session
23
23
  self._active_stream_session = None
24
24
 
25
+ # Track model loading state
26
+ self._model_loading = False
27
+
25
28
  # Path to the cached models file
26
29
  self.models_cache_path = Path(__file__).parent.parent / "data" / "ollama-models.json"
27
30
 
@@ -31,17 +34,62 @@ class OllamaClient(BaseModelClient):
31
34
 
32
35
  def _prepare_messages(self, messages: List[Dict[str, str]], style: Optional[str] = None) -> str:
33
36
  """Convert chat messages to Ollama format"""
37
+ try:
38
+ from app.main import debug_log # Import debug logging
39
+ debug_log(f"_prepare_messages called with {len(messages)} messages and style: {style}")
40
+ except ImportError:
41
+ # If debug_log not available, create a no-op function
42
+ debug_log = lambda msg: None
43
+
34
44
  # Start with any style instructions
35
45
  formatted_messages = []
36
46
  if style and style != "default":
37
- formatted_messages.append(self._get_style_instructions(style))
38
-
47
+ style_instructions = self._get_style_instructions(style)
48
+ debug_log(f"Adding style instructions: {style_instructions[:50]}...")
49
+ formatted_messages.append(style_instructions)
50
+
39
51
  # Add message content, preserving conversation flow
40
- for msg in messages:
41
- formatted_messages.append(msg["content"])
42
-
52
+ for i, msg in enumerate(messages):
53
+ try:
54
+ debug_log(f"Processing message {i}: role={msg.get('role', 'unknown')}, content length={len(msg.get('content', ''))}")
55
+
56
+ # Safely extract content with fallback
57
+ if "content" in msg and msg["content"] is not None:
58
+ content = msg["content"]
59
+ formatted_messages.append(content)
60
+ else:
61
+ debug_log(f"Message {i} has no valid content key, using fallback")
62
+ # Try to get content from alternative sources
63
+ if isinstance(msg, dict):
64
+ # Try to convert the whole message to string as last resort
65
+ content = str(msg)
66
+ debug_log(f"Using fallback content: {content[:50]}...")
67
+ formatted_messages.append(content)
68
+ else:
69
+ debug_log(f"Message {i} is not a dict, skipping")
70
+
71
+ except KeyError as e:
72
+ debug_log(f"KeyError processing message {i}: {e}, message: {msg}")
73
+ # Handle missing key more gracefully
74
+ content = msg.get('content', '')
75
+ if content:
76
+ formatted_messages.append(content)
77
+ else:
78
+ debug_log(f"Warning: Message {i} has no content, skipping")
79
+ except Exception as e:
80
+ debug_log(f"Error processing message {i}: {e}")
81
+ # Continue processing other messages
82
+ continue
83
+
84
+ # Defensive check to ensure we have something to return
85
+ if not formatted_messages:
86
+ debug_log("Warning: No formatted messages were created, using fallback")
87
+ formatted_messages = ["Please provide some input for the model to respond to."]
88
+
43
89
  # Join with double newlines for better readability
44
- return "\n\n".join(formatted_messages)
90
+ result = "\n\n".join(formatted_messages)
91
+ debug_log(f"Final formatted prompt length: {len(result)}")
92
+ return result
45
93
 
46
94
  def _get_style_instructions(self, style: str) -> str:
47
95
  """Get formatting instructions for different styles"""
@@ -165,7 +213,49 @@ class OllamaClient(BaseModelClient):
165
213
  max_tokens: Optional[int] = None) -> AsyncGenerator[str, None]:
166
214
  """Generate a streaming text completion using Ollama"""
167
215
  logger.info(f"Starting streaming generation with model: {model}")
168
- prompt = self._prepare_messages(messages, style)
216
+ try:
217
+ from app.main import debug_log # Import debug logging if available
218
+ debug_log(f"Starting streaming generation with model: {model}")
219
+ except ImportError:
220
+ # If debug_log not available, create a no-op function
221
+ debug_log = lambda msg: None
222
+
223
+ debug_log(f"generate_stream called with model: {model}, {len(messages)} messages")
224
+
225
+ # At the beginning of the method, check messages format
226
+ if not messages:
227
+ debug_log("Error: messages is empty")
228
+ raise ValueError("Messages list is empty")
229
+
230
+ for i, msg in enumerate(messages):
231
+ try:
232
+ if not isinstance(msg, dict):
233
+ debug_log(f"Error: message {i} is not a dict: {type(msg)}")
234
+ raise ValueError(f"Message {i} is not a dictionary")
235
+ if 'role' not in msg:
236
+ debug_log(f"Error: message {i} missing 'role' key, using default")
237
+ msg['role'] = 'user'
238
+ if 'content' not in msg:
239
+ debug_log(f"Error: message {i} missing 'content' key, using default")
240
+ msg['content'] = ''
241
+ except Exception as e:
242
+ debug_log(f"Error validating message {i}: {str(e)}")
243
+
244
+ # Now prepare the messages with our robust _prepare_messages method
245
+ try:
246
+ debug_log("Calling _prepare_messages to format prompt")
247
+ prompt = self._prepare_messages(messages, style)
248
+ debug_log(f"Prompt prepared, length: {len(prompt)}")
249
+ except Exception as prep_error:
250
+ debug_log(f"Error preparing messages: {str(prep_error)}")
251
+ # Create a simple fallback prompt
252
+ if len(messages) > 0 and isinstance(messages[-1], dict) and 'content' in messages[-1]:
253
+ prompt = messages[-1]['content']
254
+ debug_log(f"Using last message content as fallback prompt: {prompt[:100]}...")
255
+ else:
256
+ prompt = "Please respond to the user's query."
257
+ debug_log("Using generic fallback prompt")
258
+
169
259
  retries = 2
170
260
  last_error = None
171
261
  self._active_stream_session = None # Track the active session
@@ -176,31 +266,64 @@ class OllamaClient(BaseModelClient):
176
266
  async with aiohttp.ClientSession() as session:
177
267
  try:
178
268
  logger.info("Testing model availability...")
179
- async with session.post(
180
- f"{self.base_url}/api/generate",
181
- json={
182
- "model": model,
269
+ debug_log("Testing model availability...")
270
+ # Build test payload with careful error handling
271
+ try:
272
+ test_payload = {
273
+ "model": str(model) if model is not None else "gemma:2b",
274
+ "prompt": "test",
275
+ "temperature": float(temperature) if temperature is not None else 0.7,
276
+ "stream": False
277
+ }
278
+ debug_log(f"Prepared test payload: {test_payload}")
279
+ except Exception as payload_error:
280
+ debug_log(f"Error preparing test payload: {str(payload_error)}, using defaults")
281
+ test_payload = {
282
+ "model": "gemma:2b", # Safe default
183
283
  "prompt": "test",
184
- "temperature": temperature,
284
+ "temperature": 0.7,
185
285
  "stream": False
186
- },
286
+ }
287
+
288
+ async with session.post(
289
+ f"{self.base_url}/api/generate",
290
+ json=test_payload,
187
291
  timeout=2
188
292
  ) as response:
189
293
  if response.status != 200:
190
294
  logger.warning(f"Model test request failed with status {response.status}")
295
+ debug_log(f"Model test request failed with status {response.status}")
191
296
  raise aiohttp.ClientError("Model not ready")
192
297
  except (aiohttp.ClientError, asyncio.TimeoutError) as e:
193
298
  logger.info(f"Model cold start detected: {str(e)}")
299
+ debug_log(f"Model cold start detected: {str(e)}")
300
+ # Set model loading flag
301
+ self._model_loading = True
302
+ logger.info("Setting model_loading state to True")
303
+ debug_log("Setting model_loading state to True")
304
+
194
305
  # Model might need loading, try pulling it
306
+ # Prepare pull payload safely
307
+ try:
308
+ pull_payload = {"name": str(model) if model is not None else "gemma:2b"}
309
+ debug_log(f"Prepared pull payload: {pull_payload}")
310
+ except Exception as pull_err:
311
+ debug_log(f"Error preparing pull payload: {str(pull_err)}, using default")
312
+ pull_payload = {"name": "gemma:2b"} # Safe default
313
+
195
314
  async with session.post(
196
315
  f"{self.base_url}/api/pull",
197
- json={"name": model},
316
+ json=pull_payload,
198
317
  timeout=60
199
318
  ) as pull_response:
200
319
  if pull_response.status != 200:
201
320
  logger.error("Failed to pull model")
321
+ debug_log("Failed to pull model")
322
+ self._model_loading = False # Reset flag on failure
202
323
  raise Exception("Failed to pull model")
203
324
  logger.info("Model pulled successfully")
325
+ debug_log("Model pulled successfully")
326
+ self._model_loading = False # Reset flag after successful pull
204
327
 
205
328
  # Now proceed with actual generation
206
329
  session = aiohttp.ClientSession()
@@ -208,50 +331,106 @@ class OllamaClient(BaseModelClient):
208
331
 
209
332
  try:
210
333
  logger.debug(f"Sending streaming request to {self.base_url}/api/generate")
211
- async with session.post(
212
- f"{self.base_url}/api/generate",
213
- json={
214
- "model": model,
215
- "prompt": prompt,
216
- "temperature": temperature,
334
+ debug_log(f"Sending streaming request to {self.base_url}/api/generate with model: {model}")
335
+ debug_log(f"Request payload: model={model}, prompt_length={len(prompt) if prompt else 0}, temperature={temperature}")
336
+
337
+ # Build request payload with careful error handling
338
+ try:
339
+ request_payload = {
340
+ "model": str(model) if model is not None else "gemma:2b", # Default if model is None
341
+ "prompt": str(prompt) if prompt is not None else "Please respond to the user's query.",
342
+ "temperature": float(temperature) if temperature is not None else 0.7,
217
343
  "stream": True
218
- },
344
+ }
345
+ debug_log(f"Prepared request payload successfully")
346
+ except Exception as payload_error:
347
+ debug_log(f"Error preparing payload: {str(payload_error)}, using defaults")
348
+ request_payload = {
349
+ "model": "gemma:2b", # Safe default
350
+ "prompt": "Please respond to the user's query.",
351
+ "temperature": 0.7,
352
+ "stream": True
353
+ }
354
+
355
+ debug_log(f"Sending request to Ollama API")
356
+ response = await session.post(
357
+ f"{self.base_url}/api/generate",
358
+ json=request_payload,
219
359
  timeout=60 # Longer timeout for actual generation
220
- ) as response:
221
- response.raise_for_status()
222
- async for line in response.content:
360
+ )
361
+ response.raise_for_status()
362
+ debug_log(f"Response status: {response.status}")
363
+
364
+ # Use a simpler async iteration pattern that's less error-prone
365
+ debug_log("Starting to process response stream")
366
+ async for line in response.content:
367
+ # Check cancellation periodically
368
+ if self._active_stream_session is None:
369
+ debug_log("Stream session closed, stopping stream processing")
370
+ break
371
+
372
+ try:
373
+ # Process the chunk
223
374
  if line:
224
375
  chunk = line.decode().strip()
225
- try:
226
- data = json.loads(chunk)
227
- if "response" in data:
228
- yield data["response"]
229
- except json.JSONDecodeError:
230
- continue
231
- logger.info("Streaming completed successfully")
232
- return
376
+ chunk_str = line.decode().strip()
377
+ # Check if it looks like JSON before trying to parse
378
+ if chunk_str.startswith('{') and chunk_str.endswith('}'):
379
+ try:
380
+ data = json.loads(chunk_str)
381
+ if isinstance(data, dict) and "response" in data:
382
+ chunk_length = len(data["response"]) if data["response"] else 0
383
+ debug_log(f"Yielding chunk of length: {chunk_length}")
384
+ yield data["response"]
385
+ else:
386
+ debug_log(f"JSON chunk missing 'response' key: {chunk_str}")
387
+ except json.JSONDecodeError:
388
+ debug_log(f"JSON decode error for chunk: {chunk_str}")
389
+ else:
390
+ # Log unexpected non-JSON lines but don't process them
391
+ if chunk_str: # Avoid logging empty lines
392
+ debug_log(f"Received unexpected non-JSON line: {chunk_str}")
393
+ # Continue processing next line regardless of parsing success/failure of current line
394
+ continue
395
+ except Exception as chunk_err:
396
+ debug_log(f"Error processing chunk: {str(chunk_err)}")
397
+ # Continue instead of breaking to try processing more chunks
398
+ continue
399
+
400
+ logger.info("Streaming completed successfully")
401
+ debug_log("Streaming completed successfully")
402
+ return
233
403
  finally:
234
404
  self._active_stream_session = None # Clear reference when done
235
405
  await session.close() # Ensure session is closed
406
+ debug_log("Stream session closed")
236
407
 
237
408
  except aiohttp.ClientConnectorError:
238
409
  last_error = "Could not connect to Ollama server. Make sure Ollama is running and accessible at " + self.base_url
410
+ debug_log(f"ClientConnectorError: {last_error}")
239
411
  except aiohttp.ClientResponseError as e:
240
412
  last_error = f"Ollama API error: {e.status} - {e.message}"
413
+ debug_log(f"ClientResponseError: {last_error}")
241
414
  except aiohttp.ClientTimeout:
242
415
  last_error = "Request to Ollama server timed out"
416
+ debug_log(f"ClientTimeout: {last_error}")
243
417
  except asyncio.CancelledError:
244
418
  logger.info("Streaming cancelled by client")
419
+ debug_log("CancelledError: Streaming cancelled by client")
245
420
  raise # Propagate cancellation
246
421
  except Exception as e:
247
422
  last_error = f"Error streaming completion: {str(e)}"
423
+ debug_log(f"General exception: {last_error}")
248
424
 
249
425
  logger.error(f"Streaming attempt failed: {last_error}")
426
+ debug_log(f"Streaming attempt failed: {last_error}")
250
427
  retries -= 1
251
428
  if retries >= 0:
252
429
  logger.info(f"Retrying stream... {retries} attempts remaining")
430
+ debug_log(f"Retrying stream... {retries} attempts remaining")
253
431
  await asyncio.sleep(1)
254
432
 
433
+ debug_log(f"All retries failed. Last error: {last_error}")
255
434
  raise Exception(last_error)
256
435
 
257
436
  async def cancel_stream(self) -> None:
@@ -260,6 +439,12 @@ class OllamaClient(BaseModelClient):
260
439
  logger.info("Cancelling active stream session")
261
440
  await self._active_stream_session.close()
262
441
  self._active_stream_session = None
442
+ self._model_loading = False
443
+ logger.info("Stream session closed successfully")
444
+
445
+ def is_loading_model(self) -> bool:
446
+ """Check if Ollama is currently loading a model"""
447
+ return self._model_loading
263
448
 
264
449
  async def get_model_details(self, model_id: str) -> Dict[str, Any]:
265
450
  """Get detailed information about a specific Ollama model"""