chat-console 0.2.9__tar.gz → 0.2.99__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {chat_console-0.2.9 → chat_console-0.2.99}/PKG-INFO +1 -1
  2. {chat_console-0.2.9 → chat_console-0.2.99}/app/__init__.py +1 -1
  3. chat_console-0.2.99/app/api/anthropic.py +229 -0
  4. {chat_console-0.2.9 → chat_console-0.2.99}/app/api/base.py +45 -2
  5. {chat_console-0.2.9 → chat_console-0.2.99}/app/api/ollama.py +202 -43
  6. {chat_console-0.2.9 → chat_console-0.2.99}/app/api/openai.py +53 -4
  7. {chat_console-0.2.9 → chat_console-0.2.99}/app/config.py +53 -7
  8. {chat_console-0.2.9 → chat_console-0.2.99}/app/main.py +512 -103
  9. {chat_console-0.2.9 → chat_console-0.2.99}/app/ui/chat_interface.py +40 -20
  10. {chat_console-0.2.9 → chat_console-0.2.99}/app/ui/model_browser.py +405 -45
  11. {chat_console-0.2.9 → chat_console-0.2.99}/app/ui/model_selector.py +77 -19
  12. chat_console-0.2.99/app/utils.py +530 -0
  13. {chat_console-0.2.9 → chat_console-0.2.99}/chat_console.egg-info/PKG-INFO +1 -1
  14. chat_console-0.2.9/app/api/anthropic.py +0 -92
  15. chat_console-0.2.9/app/utils.py +0 -256
  16. {chat_console-0.2.9 → chat_console-0.2.99}/LICENSE +0 -0
  17. {chat_console-0.2.9 → chat_console-0.2.99}/README.md +0 -0
  18. {chat_console-0.2.9 → chat_console-0.2.99}/app/api/__init__.py +0 -0
  19. {chat_console-0.2.9 → chat_console-0.2.99}/app/database.py +0 -0
  20. {chat_console-0.2.9 → chat_console-0.2.99}/app/models.py +0 -0
  21. {chat_console-0.2.9 → chat_console-0.2.99}/app/ui/__init__.py +0 -0
  22. {chat_console-0.2.9 → chat_console-0.2.99}/app/ui/chat_list.py +0 -0
  23. {chat_console-0.2.9 → chat_console-0.2.99}/app/ui/search.py +0 -0
  24. {chat_console-0.2.9 → chat_console-0.2.99}/app/ui/styles.py +0 -0
  25. {chat_console-0.2.9 → chat_console-0.2.99}/chat_console.egg-info/SOURCES.txt +0 -0
  26. {chat_console-0.2.9 → chat_console-0.2.99}/chat_console.egg-info/dependency_links.txt +0 -0
  27. {chat_console-0.2.9 → chat_console-0.2.99}/chat_console.egg-info/entry_points.txt +0 -0
  28. {chat_console-0.2.9 → chat_console-0.2.99}/chat_console.egg-info/requires.txt +0 -0
  29. {chat_console-0.2.9 → chat_console-0.2.99}/chat_console.egg-info/top_level.txt +0 -0
  30. {chat_console-0.2.9 → chat_console-0.2.99}/setup.cfg +0 -0
  31. {chat_console-0.2.9 → chat_console-0.2.99}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: chat-console
3
- Version: 0.2.9
3
+ Version: 0.2.99
4
4
  Summary: A command-line interface for chatting with LLMs, storing chats and (future) rag interactions
5
5
  Home-page: https://github.com/wazacraftrfid/chat-console
6
6
  Author: Johnathan Greenaway
@@ -3,4 +3,4 @@ Chat CLI
3
3
  A command-line interface for chatting with various LLM providers like ChatGPT and Claude.
4
4
  """
5
5
 
6
- __version__ = "0.2.9"
6
+ __version__ = "0.2.99"
@@ -0,0 +1,229 @@
1
+ import anthropic
2
+ import asyncio # Add missing import
3
+ from typing import List, Dict, Any, Optional, Generator, AsyncGenerator
4
+ from .base import BaseModelClient
5
+ from ..config import ANTHROPIC_API_KEY
6
+ from ..utils import resolve_model_id # Import the resolve_model_id function
7
+
8
+ class AnthropicClient(BaseModelClient):
9
+ def __init__(self):
10
+ self.client = None # Initialize in create()
11
+
12
+ @classmethod
13
+ async def create(cls) -> 'AnthropicClient':
14
+ """Create a new instance with async initialization."""
15
+ instance = cls()
16
+ instance.client = anthropic.AsyncAnthropic(api_key=ANTHROPIC_API_KEY)
17
+ return instance
18
+
19
+ def _prepare_messages(self, messages: List[Dict[str, str]], style: Optional[str] = None) -> List[Dict[str, str]]:
20
+ """Prepare messages for Claude API"""
21
+ # Anthropic expects role to be 'user' or 'assistant'
22
+ processed_messages = []
23
+
24
+ for msg in messages:
25
+ role = msg["role"]
26
+ if role == "system":
27
+ # For Claude, we'll convert system messages to user messages with a special prefix
28
+ processed_messages.append({
29
+ "role": "user",
30
+ "content": f"<system>\n{msg['content']}\n</system>"
31
+ })
32
+ else:
33
+ processed_messages.append(msg)
34
+
35
+ # Add style instructions if provided
36
+ if style and style != "default":
37
+ # Find first non-system message to attach style to
38
+ for i, msg in enumerate(processed_messages):
39
+ if msg["role"] == "user":
40
+ content = msg["content"]
41
+ if "<userStyle>" not in content:
42
+ style_instructions = self._get_style_instructions(style)
43
+ msg["content"] = f"<userStyle>{style_instructions}</userStyle>\n\n{content}"
44
+ break
45
+
46
+ return processed_messages
47
+
48
+ def _get_style_instructions(self, style: str) -> str:
49
+ """Get formatting instructions for different styles"""
50
+ styles = {
51
+ "concise": "Be extremely concise and to the point. Use short sentences and paragraphs. Avoid unnecessary details.",
52
+ "detailed": "Be comprehensive and thorough in your responses. Provide detailed explanations, examples, and cover all relevant aspects of the topic.",
53
+ "technical": "Use precise technical language and terminology. Be formal and focus on accuracy and technical details.",
54
+ "friendly": "Be warm, approachable and conversational. Use casual language, personal examples, and a friendly tone.",
55
+ }
56
+
57
+ return styles.get(style, "")
58
+
59
+ async def generate_completion(self, messages: List[Dict[str, str]],
60
+ model: str,
61
+ style: Optional[str] = None,
62
+ temperature: float = 0.7,
63
+ max_tokens: Optional[int] = None) -> str:
64
+ """Generate a text completion using Claude"""
65
+ try:
66
+ from app.main import debug_log
67
+ except ImportError:
68
+ debug_log = lambda msg: None
69
+
70
+ # Resolve the model ID right before making the API call
71
+ original_model = model
72
+ resolved_model = resolve_model_id(model)
73
+ debug_log(f"Anthropic: Original model ID '{original_model}' resolved to '{resolved_model}' in generate_completion")
74
+
75
+ processed_messages = self._prepare_messages(messages, style)
76
+
77
+ response = await self.client.messages.create(
78
+ model=resolved_model, # Use the resolved model ID
79
+ messages=processed_messages,
80
+ temperature=temperature,
81
+ max_tokens=max_tokens or 1024,
82
+ )
83
+
84
+ return response.content[0].text
85
+
86
+ async def generate_stream(self, messages: List[Dict[str, str]],
87
+ model: str,
88
+ style: Optional[str] = None,
89
+ temperature: float = 0.7,
90
+ max_tokens: Optional[int] = None) -> AsyncGenerator[str, None]:
91
+ """Generate a streaming text completion using Claude"""
92
+ try:
93
+ from app.main import debug_log # Import debug logging if available
94
+ except ImportError:
95
+ # If debug_log not available, create a no-op function
96
+ debug_log = lambda msg: None
97
+
98
+ # Resolve the model ID right before making the API call
99
+ original_model = model
100
+ resolved_model = resolve_model_id(model)
101
+ debug_log(f"Anthropic: Original model ID '{original_model}' resolved to '{resolved_model}'")
102
+ debug_log(f"Anthropic: starting streaming generation with model: {resolved_model}")
103
+
104
+ processed_messages = self._prepare_messages(messages, style)
105
+
106
+ try:
107
+ debug_log(f"Anthropic: requesting stream with {len(processed_messages)} messages")
108
+ # Remove await from this line - it returns the context manager, not an awaitable
109
+ stream = self.client.messages.stream(
110
+ model=resolved_model, # Use the resolved model ID
111
+ messages=processed_messages,
112
+ temperature=temperature,
113
+ max_tokens=max_tokens or 1024,
114
+ )
115
+
116
+ debug_log("Anthropic: stream created successfully, processing chunks using async with")
117
+ async with stream as stream_context: # Use async with
118
+ async for chunk in stream_context: # Iterate over the context
119
+ try:
120
+ if chunk.type == "content_block_delta": # Check for delta type
121
+ # Ensure we always return a string
122
+ if chunk.delta.text is None:
123
+ debug_log("Anthropic: skipping empty text delta chunk")
124
+ continue
125
+
126
+ text = str(chunk.delta.text) # Get text from delta
127
+ debug_log(f"Anthropic: yielding chunk of length: {len(text)}")
128
+ yield text
129
+ else:
130
+ debug_log(f"Anthropic: skipping non-content_delta chunk of type: {chunk.type}")
131
+ except Exception as chunk_error: # Restore the except block for chunk processing
132
+ debug_log(f"Anthropic: error processing chunk: {str(chunk_error)}")
133
+ # Skip problematic chunks but continue processing
134
+ continue # This continue is now correctly inside the loop and except block
135
+
136
+ except Exception as e:
137
+ debug_log(f"Anthropic: error in generate_stream: {str(e)}")
138
+ raise Exception(f"Anthropic streaming error: {str(e)}")
139
+
140
+ async def _fetch_models_from_api(self) -> List[Dict[str, Any]]:
141
+ """Fetch available models directly from the Anthropic API."""
142
+ try:
143
+ from app.main import debug_log
144
+ except ImportError:
145
+ debug_log = lambda msg: None
146
+
147
+ try:
148
+ debug_log("Anthropic: Fetching models from API...")
149
+ # The Anthropic Python SDK might not have a direct high-level method for listing models yet.
150
+ # We might need to use the underlying HTTP client or make a direct request.
151
+ # Let's assume for now the SDK client *does* have a way, like self.client.models.list()
152
+ # If this fails, we'd need to implement a direct HTTP GET request.
153
+ # response = await self.client.models.list() # Hypothetical SDK method
154
+
155
+ # --- Alternative: Direct HTTP Request using httpx (if client exposes it) ---
156
+ # Check if the client has an internal http_client we can use
157
+ if hasattr(self.client, '_client') and hasattr(self.client._client, 'get'):
158
+ response = await self.client._client.get(
159
+ "/v1/models",
160
+ headers={"anthropic-version": "2023-06-01"} # Add required version header
161
+ )
162
+ response.raise_for_status() # Raise HTTP errors
163
+ models_data = response.json()
164
+ debug_log(f"Anthropic: API response received: {models_data}")
165
+ if 'data' in models_data and isinstance(models_data['data'], list):
166
+ # Format the response as expected: list of {"id": ..., "name": ...}
167
+ formatted_models = [
168
+ {"id": model.get("id"), "name": model.get("display_name", model.get("id"))}
169
+ for model in models_data['data']
170
+ if model.get("id") # Ensure model has an ID
171
+ ]
172
+ # Log each model ID clearly for debugging
173
+ debug_log(f"Anthropic: Available models from API:")
174
+ for model in formatted_models:
175
+ debug_log(f" - ID: {model.get('id')}, Name: {model.get('name')}")
176
+ return formatted_models
177
+ else:
178
+ debug_log("Anthropic: Unexpected API response format for models.")
179
+ return []
180
+ else:
181
+ debug_log("Anthropic: Client does not expose HTTP client for model listing. Returning empty list.")
182
+ return [] # Cannot fetch dynamically
183
+
184
+ except Exception as e:
185
+ debug_log(f"Anthropic: Failed to fetch models from API: {str(e)}")
186
+ # Fallback to a minimal hardcoded list in case of API error
187
+ # Include Claude 3.7 Sonnet with the correct full ID
188
+ fallback_models = [
189
+ {"id": "claude-3-opus-20240229", "name": "Claude 3 Opus"},
190
+ {"id": "claude-3-sonnet-20240229", "name": "Claude 3 Sonnet"},
191
+ {"id": "claude-3-haiku-20240307", "name": "Claude 3 Haiku"},
192
+ {"id": "claude-3-5-sonnet-20240620", "name": "Claude 3.5 Sonnet"},
193
+ {"id": "claude-3-7-sonnet-20250219", "name": "Claude 3.7 Sonnet"}, # Add Claude 3.7 Sonnet
194
+ ]
195
+ debug_log("Anthropic: Using fallback model list:")
196
+ for model in fallback_models:
197
+ debug_log(f" - ID: {model['id']}, Name: {model['name']}")
198
+ return fallback_models
199
+
200
+ # Keep this synchronous for now, but make it call the async fetcher
201
+ # Note: This is slightly awkward. Ideally, config loading would be async.
202
+ # For now, we'll run the async fetcher within the sync method using asyncio.run()
203
+ # This is NOT ideal for performance but avoids larger refactoring of config loading.
204
+ def get_available_models(self) -> List[Dict[str, Any]]:
205
+ """Get list of available Claude models by fetching from API."""
206
+ try:
207
+ # Run the async fetcher method synchronously
208
+ models = asyncio.run(self._fetch_models_from_api())
209
+ return models
210
+ except RuntimeError as e:
211
+ # Handle cases where asyncio.run can't be called (e.g., already in an event loop)
212
+ # This might happen during app runtime if called again. Fallback needed.
213
+ try:
214
+ from app.main import debug_log
215
+ except ImportError:
216
+ debug_log = lambda msg: None
217
+ debug_log(f"Anthropic: Cannot run async model fetch synchronously ({e}). Falling back to hardcoded list.")
218
+ # Use the same fallback list as in _fetch_models_from_api
219
+ fallback_models = [
220
+ {"id": "claude-3-opus-20240229", "name": "Claude 3 Opus"},
221
+ {"id": "claude-3-sonnet-20240229", "name": "Claude 3 Sonnet"},
222
+ {"id": "claude-3-haiku-20240307", "name": "Claude 3 Haiku"},
223
+ {"id": "claude-3-5-sonnet-20240620", "name": "Claude 3.5 Sonnet"},
224
+ {"id": "claude-3-7-sonnet-20250219", "name": "Claude 3.7 Sonnet"}, # Add Claude 3.7 Sonnet
225
+ ]
226
+ debug_log("Anthropic: Using fallback model list in get_available_models:")
227
+ for model in fallback_models:
228
+ debug_log(f" - ID: {model['id']}, Name: {model['name']}")
229
+ return fallback_models
@@ -28,7 +28,50 @@ class BaseModelClient(ABC):
28
28
  pass
29
29
 
30
30
  @staticmethod
31
- def get_client_for_model(model_name: str) -> 'BaseModelClient':
31
+ def get_client_type_for_model(model_name: str) -> type:
32
+ """Get the client class for a model without instantiating it"""
33
+ from ..config import CONFIG, AVAILABLE_PROVIDERS
34
+ from .anthropic import AnthropicClient
35
+ from .openai import OpenAIClient
36
+ from .ollama import OllamaClient
37
+ import logging
38
+
39
+ logger = logging.getLogger(__name__)
40
+
41
+ # Get model info and provider
42
+ model_info = CONFIG["available_models"].get(model_name)
43
+ model_name_lower = model_name.lower()
44
+
45
+ # If model is in config, use its provider
46
+ if model_info:
47
+ provider = model_info["provider"]
48
+ # For custom models, try to infer provider
49
+ else:
50
+ # First try Ollama for known model names or if selected from Ollama UI
51
+ if (any(name in model_name_lower for name in ["llama", "mistral", "codellama", "gemma"]) or
52
+ model_name in [m["id"] for m in CONFIG.get("ollama_models", [])]):
53
+ provider = "ollama"
54
+ # Then try other providers
55
+ elif any(name in model_name_lower for name in ["gpt", "text-", "davinci"]):
56
+ provider = "openai"
57
+ elif any(name in model_name_lower for name in ["claude", "anthropic"]):
58
+ provider = "anthropic"
59
+ else:
60
+ # Default to Ollama for unknown models
61
+ provider = "ollama"
62
+
63
+ # Return appropriate client class
64
+ if provider == "ollama":
65
+ return OllamaClient
66
+ elif provider == "openai":
67
+ return OpenAIClient
68
+ elif provider == "anthropic":
69
+ return AnthropicClient
70
+ else:
71
+ return None
72
+
73
+ @staticmethod
74
+ async def get_client_for_model(model_name: str) -> 'BaseModelClient':
32
75
  """Factory method to get appropriate client for model"""
33
76
  from ..config import CONFIG, AVAILABLE_PROVIDERS
34
77
  from .anthropic import AnthropicClient
@@ -75,7 +118,7 @@ class BaseModelClient(ABC):
75
118
 
76
119
  # Return appropriate client
77
120
  if provider == "ollama":
78
- return OllamaClient()
121
+ return await OllamaClient.create()
79
122
  elif provider == "openai":
80
123
  return OpenAIClient()
81
124
  elif provider == "anthropic":
@@ -3,7 +3,6 @@ import asyncio
3
3
  import json
4
4
  import logging
5
5
  import os
6
- import time
7
6
  from datetime import datetime, timedelta
8
7
  from pathlib import Path
9
8
  from typing import List, Dict, Any, Optional, Generator, AsyncGenerator
@@ -15,7 +14,6 @@ logger = logging.getLogger(__name__)
15
14
  class OllamaClient(BaseModelClient):
16
15
  def __init__(self):
17
16
  from ..config import OLLAMA_BASE_URL
18
- from ..utils import ensure_ollama_running
19
17
  self.base_url = OLLAMA_BASE_URL.rstrip('/')
20
18
  logger.info(f"Initializing Ollama client with base URL: {self.base_url}")
21
19
 
@@ -27,24 +25,77 @@ class OllamaClient(BaseModelClient):
27
25
 
28
26
  # Path to the cached models file
29
27
  self.models_cache_path = Path(__file__).parent.parent / "data" / "ollama-models.json"
28
+
29
+ @classmethod
30
+ async def create(cls) -> 'OllamaClient':
31
+ """Factory method to create and initialize an OllamaClient instance"""
32
+ from ..utils import ensure_ollama_running
33
+ client = cls()
30
34
 
31
35
  # Try to start Ollama if not running
32
- if not ensure_ollama_running():
36
+ if not await ensure_ollama_running():
33
37
  raise Exception(f"Failed to start Ollama server. Please ensure Ollama is installed and try again.")
38
+
39
+ return client
34
40
 
35
41
  def _prepare_messages(self, messages: List[Dict[str, str]], style: Optional[str] = None) -> str:
36
42
  """Convert chat messages to Ollama format"""
43
+ try:
44
+ from app.main import debug_log # Import debug logging
45
+ debug_log(f"_prepare_messages called with {len(messages)} messages and style: {style}")
46
+ except ImportError:
47
+ # If debug_log not available, create a no-op function
48
+ debug_log = lambda msg: None
49
+
37
50
  # Start with any style instructions
38
51
  formatted_messages = []
39
52
  if style and style != "default":
40
- formatted_messages.append(self._get_style_instructions(style))
41
-
53
+ style_instructions = self._get_style_instructions(style)
54
+ debug_log(f"Adding style instructions: {style_instructions[:50]}...")
55
+ formatted_messages.append(style_instructions)
56
+
42
57
  # Add message content, preserving conversation flow
43
- for msg in messages:
44
- formatted_messages.append(msg["content"])
45
-
58
+ for i, msg in enumerate(messages):
59
+ try:
60
+ debug_log(f"Processing message {i}: role={msg.get('role', 'unknown')}, content length={len(msg.get('content', ''))}")
61
+
62
+ # Safely extract content with fallback
63
+ if "content" in msg and msg["content"] is not None:
64
+ content = msg["content"]
65
+ formatted_messages.append(content)
66
+ else:
67
+ debug_log(f"Message {i} has no valid content key, using fallback")
68
+ # Try to get content from alternative sources
69
+ if isinstance(msg, dict):
70
+ # Try to convert the whole message to string as last resort
71
+ content = str(msg)
72
+ debug_log(f"Using fallback content: {content[:50]}...")
73
+ formatted_messages.append(content)
74
+ else:
75
+ debug_log(f"Message {i} is not a dict, skipping")
76
+
77
+ except KeyError as e:
78
+ debug_log(f"KeyError processing message {i}: {e}, message: {msg}")
79
+ # Handle missing key more gracefully
80
+ content = msg.get('content', '')
81
+ if content:
82
+ formatted_messages.append(content)
83
+ else:
84
+ debug_log(f"Warning: Message {i} has no content, skipping")
85
+ except Exception as e:
86
+ debug_log(f"Error processing message {i}: {e}")
87
+ # Continue processing other messages
88
+ continue
89
+
90
+ # Defensive check to ensure we have something to return
91
+ if not formatted_messages:
92
+ debug_log("Warning: No formatted messages were created, using fallback")
93
+ formatted_messages = ["Please provide some input for the model to respond to."]
94
+
46
95
  # Join with double newlines for better readability
47
- return "\n\n".join(formatted_messages)
96
+ result = "\n\n".join(formatted_messages)
97
+ debug_log(f"Final formatted prompt length: {len(result)}")
98
+ return result
48
99
 
49
100
  def _get_style_instructions(self, style: str) -> str:
50
101
  """Get formatting instructions for different styles"""
@@ -168,7 +219,49 @@ class OllamaClient(BaseModelClient):
168
219
  max_tokens: Optional[int] = None) -> AsyncGenerator[str, None]:
169
220
  """Generate a streaming text completion using Ollama"""
170
221
  logger.info(f"Starting streaming generation with model: {model}")
171
- prompt = self._prepare_messages(messages, style)
222
+ try:
223
+ from app.main import debug_log # Import debug logging if available
224
+ debug_log(f"Starting streaming generation with model: {model}")
225
+ except ImportError:
226
+ # If debug_log not available, create a no-op function
227
+ debug_log = lambda msg: None
228
+
229
+ debug_log(f"generate_stream called with model: {model}, {len(messages)} messages")
230
+
231
+ # At the beginning of the method, check messages format
232
+ if not messages:
233
+ debug_log("Error: messages is empty")
234
+ raise ValueError("Messages list is empty")
235
+
236
+ for i, msg in enumerate(messages):
237
+ try:
238
+ if not isinstance(msg, dict):
239
+ debug_log(f"Error: message {i} is not a dict: {type(msg)}")
240
+ raise ValueError(f"Message {i} is not a dictionary")
241
+ if 'role' not in msg:
242
+ debug_log(f"Error: message {i} missing 'role' key, using default")
243
+ msg['role'] = 'user'
244
+ if 'content' not in msg:
245
+ debug_log(f"Error: message {i} missing 'content' key, using default")
246
+ msg['content'] = ''
247
+ except Exception as e:
248
+ debug_log(f"Error validating message {i}: {str(e)}")
249
+
250
+ # Now prepare the messages with our robust _prepare_messages method
251
+ try:
252
+ debug_log("Calling _prepare_messages to format prompt")
253
+ prompt = self._prepare_messages(messages, style)
254
+ debug_log(f"Prompt prepared, length: {len(prompt)}")
255
+ except Exception as prep_error:
256
+ debug_log(f"Error preparing messages: {str(prep_error)}")
257
+ # Create a simple fallback prompt
258
+ if len(messages) > 0 and isinstance(messages[-1], dict) and 'content' in messages[-1]:
259
+ prompt = messages[-1]['content']
260
+ debug_log(f"Using last message content as fallback prompt: {prompt[:100]}...")
261
+ else:
262
+ prompt = "Please respond to the user's query."
263
+ debug_log("Using generic fallback prompt")
264
+
172
265
  retries = 2
173
266
  last_error = None
174
267
  self._active_stream_session = None # Track the active session
@@ -179,36 +272,63 @@ class OllamaClient(BaseModelClient):
179
272
  async with aiohttp.ClientSession() as session:
180
273
  try:
181
274
  logger.info("Testing model availability...")
182
- async with session.post(
183
- f"{self.base_url}/api/generate",
184
- json={
185
- "model": model,
275
+ debug_log("Testing model availability...")
276
+ # Build test payload with careful error handling
277
+ try:
278
+ test_payload = {
279
+ "model": str(model) if model is not None else "gemma:2b",
280
+ "prompt": "test",
281
+ "temperature": float(temperature) if temperature is not None else 0.7,
282
+ "stream": False
283
+ }
284
+ debug_log(f"Prepared test payload: {test_payload}")
285
+ except Exception as payload_error:
286
+ debug_log(f"Error preparing test payload: {str(payload_error)}, using defaults")
287
+ test_payload = {
288
+ "model": "gemma:2b", # Safe default
186
289
  "prompt": "test",
187
- "temperature": temperature,
290
+ "temperature": 0.7,
188
291
  "stream": False
189
- },
292
+ }
293
+
294
+ async with session.post(
295
+ f"{self.base_url}/api/generate",
296
+ json=test_payload,
190
297
  timeout=2
191
298
  ) as response:
192
299
  if response.status != 200:
193
300
  logger.warning(f"Model test request failed with status {response.status}")
301
+ debug_log(f"Model test request failed with status {response.status}")
194
302
  raise aiohttp.ClientError("Model not ready")
195
303
  except (aiohttp.ClientError, asyncio.TimeoutError) as e:
196
304
  logger.info(f"Model cold start detected: {str(e)}")
305
+ debug_log(f"Model cold start detected: {str(e)}")
197
306
  # Set model loading flag
198
307
  self._model_loading = True
199
308
  logger.info("Setting model_loading state to True")
309
+ debug_log("Setting model_loading state to True")
200
310
 
201
311
  # Model might need loading, try pulling it
312
+ # Prepare pull payload safely
313
+ try:
314
+ pull_payload = {"name": str(model) if model is not None else "gemma:2b"}
315
+ debug_log(f"Prepared pull payload: {pull_payload}")
316
+ except Exception as pull_err:
317
+ debug_log(f"Error preparing pull payload: {str(pull_err)}, using default")
318
+ pull_payload = {"name": "gemma:2b"} # Safe default
319
+
202
320
  async with session.post(
203
321
  f"{self.base_url}/api/pull",
204
- json={"name": model},
322
+ json=pull_payload,
205
323
  timeout=60
206
324
  ) as pull_response:
207
325
  if pull_response.status != 200:
208
326
  logger.error("Failed to pull model")
327
+ debug_log("Failed to pull model")
209
328
  self._model_loading = False # Reset flag on failure
210
329
  raise Exception("Failed to pull model")
211
330
  logger.info("Model pulled successfully")
331
+ debug_log("Model pulled successfully")
212
332
  self._model_loading = False # Reset flag after successful pull
213
333
 
214
334
  # Now proceed with actual generation
@@ -217,67 +337,106 @@ class OllamaClient(BaseModelClient):
217
337
 
218
338
  try:
219
339
  logger.debug(f"Sending streaming request to {self.base_url}/api/generate")
340
+ debug_log(f"Sending streaming request to {self.base_url}/api/generate with model: {model}")
341
+ debug_log(f"Request payload: model={model}, prompt_length={len(prompt) if prompt else 0}, temperature={temperature}")
342
+
343
+ # Build request payload with careful error handling
344
+ try:
345
+ request_payload = {
346
+ "model": str(model) if model is not None else "gemma:2b", # Default if model is None
347
+ "prompt": str(prompt) if prompt is not None else "Please respond to the user's query.",
348
+ "temperature": float(temperature) if temperature is not None else 0.7,
349
+ "stream": True
350
+ }
351
+ debug_log(f"Prepared request payload successfully")
352
+ except Exception as payload_error:
353
+ debug_log(f"Error preparing payload: {str(payload_error)}, using defaults")
354
+ request_payload = {
355
+ "model": "gemma:2b", # Safe default
356
+ "prompt": "Please respond to the user's query.",
357
+ "temperature": 0.7,
358
+ "stream": True
359
+ }
360
+
361
+ debug_log(f"Sending request to Ollama API")
220
362
  response = await session.post(
221
363
  f"{self.base_url}/api/generate",
222
- json={
223
- "model": model,
224
- "prompt": prompt,
225
- "temperature": temperature,
226
- "stream": True
227
- },
364
+ json=request_payload,
228
365
  timeout=60 # Longer timeout for actual generation
229
366
  )
230
367
  response.raise_for_status()
368
+ debug_log(f"Response status: {response.status}")
231
369
 
232
- # Process the response stream
233
- while True:
234
- if not self._active_stream_session:
235
- logger.info("Stream session was closed externally")
370
+ # Use a simpler async iteration pattern that's less error-prone
371
+ debug_log("Starting to process response stream")
372
+ async for line in response.content:
373
+ # Check cancellation periodically
374
+ if self._active_stream_session is None:
375
+ debug_log("Stream session closed, stopping stream processing")
236
376
  break
237
377
 
238
378
  try:
239
- line = await asyncio.wait_for(response.content.readline(), timeout=0.5)
240
- if not line: # End of stream
241
- break
242
-
243
- chunk = line.decode().strip()
244
- try:
245
- data = json.loads(chunk)
246
- if "response" in data:
247
- yield data["response"]
248
- except json.JSONDecodeError:
379
+ # Process the chunk
380
+ if line:
381
+ chunk = line.decode().strip()
382
+ chunk_str = line.decode().strip()
383
+ # Check if it looks like JSON before trying to parse
384
+ if chunk_str.startswith('{') and chunk_str.endswith('}'):
385
+ try:
386
+ data = json.loads(chunk_str)
387
+ if isinstance(data, dict) and "response" in data:
388
+ chunk_length = len(data["response"]) if data["response"] else 0
389
+ debug_log(f"Yielding chunk of length: {chunk_length}")
390
+ yield data["response"]
391
+ else:
392
+ debug_log(f"JSON chunk missing 'response' key: {chunk_str}")
393
+ except json.JSONDecodeError:
394
+ debug_log(f"JSON decode error for chunk: {chunk_str}")
395
+ else:
396
+ # Log unexpected non-JSON lines but don't process them
397
+ if chunk_str: # Avoid logging empty lines
398
+ debug_log(f"Received unexpected non-JSON line: {chunk_str}")
399
+ # Continue processing next line regardless of parsing success/failure of current line
249
400
  continue
250
- except asyncio.TimeoutError:
251
- # This allows checking for cancellation regularly
401
+ except Exception as chunk_err:
402
+ debug_log(f"Error processing chunk: {str(chunk_err)}")
403
+ # Continue instead of breaking to try processing more chunks
252
404
  continue
253
- except asyncio.CancelledError:
254
- logger.info("Stream processing was cancelled")
255
- raise
256
-
405
+
257
406
  logger.info("Streaming completed successfully")
407
+ debug_log("Streaming completed successfully")
258
408
  return
259
409
  finally:
260
410
  self._active_stream_session = None # Clear reference when done
261
411
  await session.close() # Ensure session is closed
412
+ debug_log("Stream session closed")
262
413
 
263
414
  except aiohttp.ClientConnectorError:
264
415
  last_error = "Could not connect to Ollama server. Make sure Ollama is running and accessible at " + self.base_url
416
+ debug_log(f"ClientConnectorError: {last_error}")
265
417
  except aiohttp.ClientResponseError as e:
266
418
  last_error = f"Ollama API error: {e.status} - {e.message}"
419
+ debug_log(f"ClientResponseError: {last_error}")
267
420
  except aiohttp.ClientTimeout:
268
421
  last_error = "Request to Ollama server timed out"
422
+ debug_log(f"ClientTimeout: {last_error}")
269
423
  except asyncio.CancelledError:
270
424
  logger.info("Streaming cancelled by client")
425
+ debug_log("CancelledError: Streaming cancelled by client")
271
426
  raise # Propagate cancellation
272
427
  except Exception as e:
273
428
  last_error = f"Error streaming completion: {str(e)}"
429
+ debug_log(f"General exception: {last_error}")
274
430
 
275
431
  logger.error(f"Streaming attempt failed: {last_error}")
432
+ debug_log(f"Streaming attempt failed: {last_error}")
276
433
  retries -= 1
277
434
  if retries >= 0:
278
435
  logger.info(f"Retrying stream... {retries} attempts remaining")
436
+ debug_log(f"Retrying stream... {retries} attempts remaining")
279
437
  await asyncio.sleep(1)
280
438
 
439
+ debug_log(f"All retries failed. Last error: {last_error}")
281
440
  raise Exception(last_error)
282
441
 
283
442
  async def cancel_stream(self) -> None: