chat-console 0.2.8__tar.gz → 0.2.98__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {chat_console-0.2.8 → chat_console-0.2.98}/PKG-INFO +1 -1
- {chat_console-0.2.8 → chat_console-0.2.98}/app/__init__.py +1 -1
- chat_console-0.2.98/app/api/anthropic.py +222 -0
- {chat_console-0.2.8 → chat_console-0.2.98}/app/api/base.py +43 -0
- {chat_console-0.2.8 → chat_console-0.2.98}/app/api/ollama.py +217 -32
- {chat_console-0.2.8 → chat_console-0.2.98}/app/api/openai.py +45 -3
- {chat_console-0.2.8 → chat_console-0.2.98}/app/config.py +53 -7
- {chat_console-0.2.8 → chat_console-0.2.98}/app/main.py +507 -108
- {chat_console-0.2.8 → chat_console-0.2.98}/app/ui/chat_interface.py +56 -21
- {chat_console-0.2.8 → chat_console-0.2.98}/app/ui/model_browser.py +405 -45
- {chat_console-0.2.8 → chat_console-0.2.98}/app/ui/model_selector.py +77 -19
- chat_console-0.2.98/app/utils.py +528 -0
- {chat_console-0.2.8 → chat_console-0.2.98}/chat_console.egg-info/PKG-INFO +1 -1
- chat_console-0.2.8/app/api/anthropic.py +0 -92
- chat_console-0.2.8/app/utils.py +0 -195
- {chat_console-0.2.8 → chat_console-0.2.98}/LICENSE +0 -0
- {chat_console-0.2.8 → chat_console-0.2.98}/README.md +0 -0
- {chat_console-0.2.8 → chat_console-0.2.98}/app/api/__init__.py +0 -0
- {chat_console-0.2.8 → chat_console-0.2.98}/app/database.py +0 -0
- {chat_console-0.2.8 → chat_console-0.2.98}/app/models.py +0 -0
- {chat_console-0.2.8 → chat_console-0.2.98}/app/ui/__init__.py +0 -0
- {chat_console-0.2.8 → chat_console-0.2.98}/app/ui/chat_list.py +0 -0
- {chat_console-0.2.8 → chat_console-0.2.98}/app/ui/search.py +0 -0
- {chat_console-0.2.8 → chat_console-0.2.98}/app/ui/styles.py +0 -0
- {chat_console-0.2.8 → chat_console-0.2.98}/chat_console.egg-info/SOURCES.txt +0 -0
- {chat_console-0.2.8 → chat_console-0.2.98}/chat_console.egg-info/dependency_links.txt +0 -0
- {chat_console-0.2.8 → chat_console-0.2.98}/chat_console.egg-info/entry_points.txt +0 -0
- {chat_console-0.2.8 → chat_console-0.2.98}/chat_console.egg-info/requires.txt +0 -0
- {chat_console-0.2.8 → chat_console-0.2.98}/chat_console.egg-info/top_level.txt +0 -0
- {chat_console-0.2.8 → chat_console-0.2.98}/setup.cfg +0 -0
- {chat_console-0.2.8 → chat_console-0.2.98}/setup.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: chat-console
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.98
|
4
4
|
Summary: A command-line interface for chatting with LLMs, storing chats and (future) rag interactions
|
5
5
|
Home-page: https://github.com/wazacraftrfid/chat-console
|
6
6
|
Author: Johnathan Greenaway
|
@@ -0,0 +1,222 @@
|
|
1
|
+
import anthropic
|
2
|
+
import asyncio # Add missing import
|
3
|
+
from typing import List, Dict, Any, Optional, Generator, AsyncGenerator
|
4
|
+
from .base import BaseModelClient
|
5
|
+
from ..config import ANTHROPIC_API_KEY
|
6
|
+
from ..utils import resolve_model_id # Import the resolve_model_id function
|
7
|
+
|
8
|
+
class AnthropicClient(BaseModelClient):
|
9
|
+
def __init__(self):
|
10
|
+
self.client = anthropic.AsyncAnthropic(api_key=ANTHROPIC_API_KEY)
|
11
|
+
|
12
|
+
def _prepare_messages(self, messages: List[Dict[str, str]], style: Optional[str] = None) -> List[Dict[str, str]]:
|
13
|
+
"""Prepare messages for Claude API"""
|
14
|
+
# Anthropic expects role to be 'user' or 'assistant'
|
15
|
+
processed_messages = []
|
16
|
+
|
17
|
+
for msg in messages:
|
18
|
+
role = msg["role"]
|
19
|
+
if role == "system":
|
20
|
+
# For Claude, we'll convert system messages to user messages with a special prefix
|
21
|
+
processed_messages.append({
|
22
|
+
"role": "user",
|
23
|
+
"content": f"<system>\n{msg['content']}\n</system>"
|
24
|
+
})
|
25
|
+
else:
|
26
|
+
processed_messages.append(msg)
|
27
|
+
|
28
|
+
# Add style instructions if provided
|
29
|
+
if style and style != "default":
|
30
|
+
# Find first non-system message to attach style to
|
31
|
+
for i, msg in enumerate(processed_messages):
|
32
|
+
if msg["role"] == "user":
|
33
|
+
content = msg["content"]
|
34
|
+
if "<userStyle>" not in content:
|
35
|
+
style_instructions = self._get_style_instructions(style)
|
36
|
+
msg["content"] = f"<userStyle>{style_instructions}</userStyle>\n\n{content}"
|
37
|
+
break
|
38
|
+
|
39
|
+
return processed_messages
|
40
|
+
|
41
|
+
def _get_style_instructions(self, style: str) -> str:
|
42
|
+
"""Get formatting instructions for different styles"""
|
43
|
+
styles = {
|
44
|
+
"concise": "Be extremely concise and to the point. Use short sentences and paragraphs. Avoid unnecessary details.",
|
45
|
+
"detailed": "Be comprehensive and thorough in your responses. Provide detailed explanations, examples, and cover all relevant aspects of the topic.",
|
46
|
+
"technical": "Use precise technical language and terminology. Be formal and focus on accuracy and technical details.",
|
47
|
+
"friendly": "Be warm, approachable and conversational. Use casual language, personal examples, and a friendly tone.",
|
48
|
+
}
|
49
|
+
|
50
|
+
return styles.get(style, "")
|
51
|
+
|
52
|
+
async def generate_completion(self, messages: List[Dict[str, str]],
|
53
|
+
model: str,
|
54
|
+
style: Optional[str] = None,
|
55
|
+
temperature: float = 0.7,
|
56
|
+
max_tokens: Optional[int] = None) -> str:
|
57
|
+
"""Generate a text completion using Claude"""
|
58
|
+
try:
|
59
|
+
from app.main import debug_log
|
60
|
+
except ImportError:
|
61
|
+
debug_log = lambda msg: None
|
62
|
+
|
63
|
+
# Resolve the model ID right before making the API call
|
64
|
+
original_model = model
|
65
|
+
resolved_model = resolve_model_id(model)
|
66
|
+
debug_log(f"Anthropic: Original model ID '{original_model}' resolved to '{resolved_model}' in generate_completion")
|
67
|
+
|
68
|
+
processed_messages = self._prepare_messages(messages, style)
|
69
|
+
|
70
|
+
response = await self.client.messages.create(
|
71
|
+
model=resolved_model, # Use the resolved model ID
|
72
|
+
messages=processed_messages,
|
73
|
+
temperature=temperature,
|
74
|
+
max_tokens=max_tokens or 1024,
|
75
|
+
)
|
76
|
+
|
77
|
+
return response.content[0].text
|
78
|
+
|
79
|
+
async def generate_stream(self, messages: List[Dict[str, str]],
|
80
|
+
model: str,
|
81
|
+
style: Optional[str] = None,
|
82
|
+
temperature: float = 0.7,
|
83
|
+
max_tokens: Optional[int] = None) -> AsyncGenerator[str, None]:
|
84
|
+
"""Generate a streaming text completion using Claude"""
|
85
|
+
try:
|
86
|
+
from app.main import debug_log # Import debug logging if available
|
87
|
+
except ImportError:
|
88
|
+
# If debug_log not available, create a no-op function
|
89
|
+
debug_log = lambda msg: None
|
90
|
+
|
91
|
+
# Resolve the model ID right before making the API call
|
92
|
+
original_model = model
|
93
|
+
resolved_model = resolve_model_id(model)
|
94
|
+
debug_log(f"Anthropic: Original model ID '{original_model}' resolved to '{resolved_model}'")
|
95
|
+
debug_log(f"Anthropic: starting streaming generation with model: {resolved_model}")
|
96
|
+
|
97
|
+
processed_messages = self._prepare_messages(messages, style)
|
98
|
+
|
99
|
+
try:
|
100
|
+
debug_log(f"Anthropic: requesting stream with {len(processed_messages)} messages")
|
101
|
+
# Remove await from this line - it returns the context manager, not an awaitable
|
102
|
+
stream = self.client.messages.stream(
|
103
|
+
model=resolved_model, # Use the resolved model ID
|
104
|
+
messages=processed_messages,
|
105
|
+
temperature=temperature,
|
106
|
+
max_tokens=max_tokens or 1024,
|
107
|
+
)
|
108
|
+
|
109
|
+
debug_log("Anthropic: stream created successfully, processing chunks using async with")
|
110
|
+
async with stream as stream_context: # Use async with
|
111
|
+
async for chunk in stream_context: # Iterate over the context
|
112
|
+
try:
|
113
|
+
if chunk.type == "content_block_delta": # Check for delta type
|
114
|
+
# Ensure we always return a string
|
115
|
+
if chunk.delta.text is None:
|
116
|
+
debug_log("Anthropic: skipping empty text delta chunk")
|
117
|
+
continue
|
118
|
+
|
119
|
+
text = str(chunk.delta.text) # Get text from delta
|
120
|
+
debug_log(f"Anthropic: yielding chunk of length: {len(text)}")
|
121
|
+
yield text
|
122
|
+
else:
|
123
|
+
debug_log(f"Anthropic: skipping non-content_delta chunk of type: {chunk.type}")
|
124
|
+
except Exception as chunk_error: # Restore the except block for chunk processing
|
125
|
+
debug_log(f"Anthropic: error processing chunk: {str(chunk_error)}")
|
126
|
+
# Skip problematic chunks but continue processing
|
127
|
+
continue # This continue is now correctly inside the loop and except block
|
128
|
+
|
129
|
+
except Exception as e:
|
130
|
+
debug_log(f"Anthropic: error in generate_stream: {str(e)}")
|
131
|
+
raise Exception(f"Anthropic streaming error: {str(e)}")
|
132
|
+
|
133
|
+
async def _fetch_models_from_api(self) -> List[Dict[str, Any]]:
|
134
|
+
"""Fetch available models directly from the Anthropic API."""
|
135
|
+
try:
|
136
|
+
from app.main import debug_log
|
137
|
+
except ImportError:
|
138
|
+
debug_log = lambda msg: None
|
139
|
+
|
140
|
+
try:
|
141
|
+
debug_log("Anthropic: Fetching models from API...")
|
142
|
+
# The Anthropic Python SDK might not have a direct high-level method for listing models yet.
|
143
|
+
# We might need to use the underlying HTTP client or make a direct request.
|
144
|
+
# Let's assume for now the SDK client *does* have a way, like self.client.models.list()
|
145
|
+
# If this fails, we'd need to implement a direct HTTP GET request.
|
146
|
+
# response = await self.client.models.list() # Hypothetical SDK method
|
147
|
+
|
148
|
+
# --- Alternative: Direct HTTP Request using httpx (if client exposes it) ---
|
149
|
+
# Check if the client has an internal http_client we can use
|
150
|
+
if hasattr(self.client, '_client') and hasattr(self.client._client, 'get'):
|
151
|
+
response = await self.client._client.get(
|
152
|
+
"/v1/models",
|
153
|
+
headers={"anthropic-version": "2023-06-01"} # Add required version header
|
154
|
+
)
|
155
|
+
response.raise_for_status() # Raise HTTP errors
|
156
|
+
models_data = response.json()
|
157
|
+
debug_log(f"Anthropic: API response received: {models_data}")
|
158
|
+
if 'data' in models_data and isinstance(models_data['data'], list):
|
159
|
+
# Format the response as expected: list of {"id": ..., "name": ...}
|
160
|
+
formatted_models = [
|
161
|
+
{"id": model.get("id"), "name": model.get("display_name", model.get("id"))}
|
162
|
+
for model in models_data['data']
|
163
|
+
if model.get("id") # Ensure model has an ID
|
164
|
+
]
|
165
|
+
# Log each model ID clearly for debugging
|
166
|
+
debug_log(f"Anthropic: Available models from API:")
|
167
|
+
for model in formatted_models:
|
168
|
+
debug_log(f" - ID: {model.get('id')}, Name: {model.get('name')}")
|
169
|
+
return formatted_models
|
170
|
+
else:
|
171
|
+
debug_log("Anthropic: Unexpected API response format for models.")
|
172
|
+
return []
|
173
|
+
else:
|
174
|
+
debug_log("Anthropic: Client does not expose HTTP client for model listing. Returning empty list.")
|
175
|
+
return [] # Cannot fetch dynamically
|
176
|
+
|
177
|
+
except Exception as e:
|
178
|
+
debug_log(f"Anthropic: Failed to fetch models from API: {str(e)}")
|
179
|
+
# Fallback to a minimal hardcoded list in case of API error
|
180
|
+
# Include Claude 3.7 Sonnet with the correct full ID
|
181
|
+
fallback_models = [
|
182
|
+
{"id": "claude-3-opus-20240229", "name": "Claude 3 Opus"},
|
183
|
+
{"id": "claude-3-sonnet-20240229", "name": "Claude 3 Sonnet"},
|
184
|
+
{"id": "claude-3-haiku-20240307", "name": "Claude 3 Haiku"},
|
185
|
+
{"id": "claude-3-5-sonnet-20240620", "name": "Claude 3.5 Sonnet"},
|
186
|
+
{"id": "claude-3-7-sonnet-20250219", "name": "Claude 3.7 Sonnet"}, # Add Claude 3.7 Sonnet
|
187
|
+
]
|
188
|
+
debug_log("Anthropic: Using fallback model list:")
|
189
|
+
for model in fallback_models:
|
190
|
+
debug_log(f" - ID: {model['id']}, Name: {model['name']}")
|
191
|
+
return fallback_models
|
192
|
+
|
193
|
+
# Keep this synchronous for now, but make it call the async fetcher
|
194
|
+
# Note: This is slightly awkward. Ideally, config loading would be async.
|
195
|
+
# For now, we'll run the async fetcher within the sync method using asyncio.run()
|
196
|
+
# This is NOT ideal for performance but avoids larger refactoring of config loading.
|
197
|
+
def get_available_models(self) -> List[Dict[str, Any]]:
|
198
|
+
"""Get list of available Claude models by fetching from API."""
|
199
|
+
try:
|
200
|
+
# Run the async fetcher method synchronously
|
201
|
+
models = asyncio.run(self._fetch_models_from_api())
|
202
|
+
return models
|
203
|
+
except RuntimeError as e:
|
204
|
+
# Handle cases where asyncio.run can't be called (e.g., already in an event loop)
|
205
|
+
# This might happen during app runtime if called again. Fallback needed.
|
206
|
+
try:
|
207
|
+
from app.main import debug_log
|
208
|
+
except ImportError:
|
209
|
+
debug_log = lambda msg: None
|
210
|
+
debug_log(f"Anthropic: Cannot run async model fetch synchronously ({e}). Falling back to hardcoded list.")
|
211
|
+
# Use the same fallback list as in _fetch_models_from_api
|
212
|
+
fallback_models = [
|
213
|
+
{"id": "claude-3-opus-20240229", "name": "Claude 3 Opus"},
|
214
|
+
{"id": "claude-3-sonnet-20240229", "name": "Claude 3 Sonnet"},
|
215
|
+
{"id": "claude-3-haiku-20240307", "name": "Claude 3 Haiku"},
|
216
|
+
{"id": "claude-3-5-sonnet-20240620", "name": "Claude 3.5 Sonnet"},
|
217
|
+
{"id": "claude-3-7-sonnet-20250219", "name": "Claude 3.7 Sonnet"}, # Add Claude 3.7 Sonnet
|
218
|
+
]
|
219
|
+
debug_log("Anthropic: Using fallback model list in get_available_models:")
|
220
|
+
for model in fallback_models:
|
221
|
+
debug_log(f" - ID: {model['id']}, Name: {model['name']}")
|
222
|
+
return fallback_models
|
@@ -27,6 +27,49 @@ class BaseModelClient(ABC):
|
|
27
27
|
"""Get list of available models from this provider"""
|
28
28
|
pass
|
29
29
|
|
30
|
+
@staticmethod
|
31
|
+
def get_client_type_for_model(model_name: str) -> type:
|
32
|
+
"""Get the client class for a model without instantiating it"""
|
33
|
+
from ..config import CONFIG, AVAILABLE_PROVIDERS
|
34
|
+
from .anthropic import AnthropicClient
|
35
|
+
from .openai import OpenAIClient
|
36
|
+
from .ollama import OllamaClient
|
37
|
+
import logging
|
38
|
+
|
39
|
+
logger = logging.getLogger(__name__)
|
40
|
+
|
41
|
+
# Get model info and provider
|
42
|
+
model_info = CONFIG["available_models"].get(model_name)
|
43
|
+
model_name_lower = model_name.lower()
|
44
|
+
|
45
|
+
# If model is in config, use its provider
|
46
|
+
if model_info:
|
47
|
+
provider = model_info["provider"]
|
48
|
+
# For custom models, try to infer provider
|
49
|
+
else:
|
50
|
+
# First try Ollama for known model names or if selected from Ollama UI
|
51
|
+
if (any(name in model_name_lower for name in ["llama", "mistral", "codellama", "gemma"]) or
|
52
|
+
model_name in [m["id"] for m in CONFIG.get("ollama_models", [])]):
|
53
|
+
provider = "ollama"
|
54
|
+
# Then try other providers
|
55
|
+
elif any(name in model_name_lower for name in ["gpt", "text-", "davinci"]):
|
56
|
+
provider = "openai"
|
57
|
+
elif any(name in model_name_lower for name in ["claude", "anthropic"]):
|
58
|
+
provider = "anthropic"
|
59
|
+
else:
|
60
|
+
# Default to Ollama for unknown models
|
61
|
+
provider = "ollama"
|
62
|
+
|
63
|
+
# Return appropriate client class
|
64
|
+
if provider == "ollama":
|
65
|
+
return OllamaClient
|
66
|
+
elif provider == "openai":
|
67
|
+
return OpenAIClient
|
68
|
+
elif provider == "anthropic":
|
69
|
+
return AnthropicClient
|
70
|
+
else:
|
71
|
+
return None
|
72
|
+
|
30
73
|
@staticmethod
|
31
74
|
def get_client_for_model(model_name: str) -> 'BaseModelClient':
|
32
75
|
"""Factory method to get appropriate client for model"""
|
@@ -22,6 +22,9 @@ class OllamaClient(BaseModelClient):
|
|
22
22
|
# Track active stream session
|
23
23
|
self._active_stream_session = None
|
24
24
|
|
25
|
+
# Track model loading state
|
26
|
+
self._model_loading = False
|
27
|
+
|
25
28
|
# Path to the cached models file
|
26
29
|
self.models_cache_path = Path(__file__).parent.parent / "data" / "ollama-models.json"
|
27
30
|
|
@@ -31,17 +34,62 @@ class OllamaClient(BaseModelClient):
|
|
31
34
|
|
32
35
|
def _prepare_messages(self, messages: List[Dict[str, str]], style: Optional[str] = None) -> str:
|
33
36
|
"""Convert chat messages to Ollama format"""
|
37
|
+
try:
|
38
|
+
from app.main import debug_log # Import debug logging
|
39
|
+
debug_log(f"_prepare_messages called with {len(messages)} messages and style: {style}")
|
40
|
+
except ImportError:
|
41
|
+
# If debug_log not available, create a no-op function
|
42
|
+
debug_log = lambda msg: None
|
43
|
+
|
34
44
|
# Start with any style instructions
|
35
45
|
formatted_messages = []
|
36
46
|
if style and style != "default":
|
37
|
-
|
38
|
-
|
47
|
+
style_instructions = self._get_style_instructions(style)
|
48
|
+
debug_log(f"Adding style instructions: {style_instructions[:50]}...")
|
49
|
+
formatted_messages.append(style_instructions)
|
50
|
+
|
39
51
|
# Add message content, preserving conversation flow
|
40
|
-
for msg in messages:
|
41
|
-
|
42
|
-
|
52
|
+
for i, msg in enumerate(messages):
|
53
|
+
try:
|
54
|
+
debug_log(f"Processing message {i}: role={msg.get('role', 'unknown')}, content length={len(msg.get('content', ''))}")
|
55
|
+
|
56
|
+
# Safely extract content with fallback
|
57
|
+
if "content" in msg and msg["content"] is not None:
|
58
|
+
content = msg["content"]
|
59
|
+
formatted_messages.append(content)
|
60
|
+
else:
|
61
|
+
debug_log(f"Message {i} has no valid content key, using fallback")
|
62
|
+
# Try to get content from alternative sources
|
63
|
+
if isinstance(msg, dict):
|
64
|
+
# Try to convert the whole message to string as last resort
|
65
|
+
content = str(msg)
|
66
|
+
debug_log(f"Using fallback content: {content[:50]}...")
|
67
|
+
formatted_messages.append(content)
|
68
|
+
else:
|
69
|
+
debug_log(f"Message {i} is not a dict, skipping")
|
70
|
+
|
71
|
+
except KeyError as e:
|
72
|
+
debug_log(f"KeyError processing message {i}: {e}, message: {msg}")
|
73
|
+
# Handle missing key more gracefully
|
74
|
+
content = msg.get('content', '')
|
75
|
+
if content:
|
76
|
+
formatted_messages.append(content)
|
77
|
+
else:
|
78
|
+
debug_log(f"Warning: Message {i} has no content, skipping")
|
79
|
+
except Exception as e:
|
80
|
+
debug_log(f"Error processing message {i}: {e}")
|
81
|
+
# Continue processing other messages
|
82
|
+
continue
|
83
|
+
|
84
|
+
# Defensive check to ensure we have something to return
|
85
|
+
if not formatted_messages:
|
86
|
+
debug_log("Warning: No formatted messages were created, using fallback")
|
87
|
+
formatted_messages = ["Please provide some input for the model to respond to."]
|
88
|
+
|
43
89
|
# Join with double newlines for better readability
|
44
|
-
|
90
|
+
result = "\n\n".join(formatted_messages)
|
91
|
+
debug_log(f"Final formatted prompt length: {len(result)}")
|
92
|
+
return result
|
45
93
|
|
46
94
|
def _get_style_instructions(self, style: str) -> str:
|
47
95
|
"""Get formatting instructions for different styles"""
|
@@ -165,7 +213,49 @@ class OllamaClient(BaseModelClient):
|
|
165
213
|
max_tokens: Optional[int] = None) -> AsyncGenerator[str, None]:
|
166
214
|
"""Generate a streaming text completion using Ollama"""
|
167
215
|
logger.info(f"Starting streaming generation with model: {model}")
|
168
|
-
|
216
|
+
try:
|
217
|
+
from app.main import debug_log # Import debug logging if available
|
218
|
+
debug_log(f"Starting streaming generation with model: {model}")
|
219
|
+
except ImportError:
|
220
|
+
# If debug_log not available, create a no-op function
|
221
|
+
debug_log = lambda msg: None
|
222
|
+
|
223
|
+
debug_log(f"generate_stream called with model: {model}, {len(messages)} messages")
|
224
|
+
|
225
|
+
# At the beginning of the method, check messages format
|
226
|
+
if not messages:
|
227
|
+
debug_log("Error: messages is empty")
|
228
|
+
raise ValueError("Messages list is empty")
|
229
|
+
|
230
|
+
for i, msg in enumerate(messages):
|
231
|
+
try:
|
232
|
+
if not isinstance(msg, dict):
|
233
|
+
debug_log(f"Error: message {i} is not a dict: {type(msg)}")
|
234
|
+
raise ValueError(f"Message {i} is not a dictionary")
|
235
|
+
if 'role' not in msg:
|
236
|
+
debug_log(f"Error: message {i} missing 'role' key, using default")
|
237
|
+
msg['role'] = 'user'
|
238
|
+
if 'content' not in msg:
|
239
|
+
debug_log(f"Error: message {i} missing 'content' key, using default")
|
240
|
+
msg['content'] = ''
|
241
|
+
except Exception as e:
|
242
|
+
debug_log(f"Error validating message {i}: {str(e)}")
|
243
|
+
|
244
|
+
# Now prepare the messages with our robust _prepare_messages method
|
245
|
+
try:
|
246
|
+
debug_log("Calling _prepare_messages to format prompt")
|
247
|
+
prompt = self._prepare_messages(messages, style)
|
248
|
+
debug_log(f"Prompt prepared, length: {len(prompt)}")
|
249
|
+
except Exception as prep_error:
|
250
|
+
debug_log(f"Error preparing messages: {str(prep_error)}")
|
251
|
+
# Create a simple fallback prompt
|
252
|
+
if len(messages) > 0 and isinstance(messages[-1], dict) and 'content' in messages[-1]:
|
253
|
+
prompt = messages[-1]['content']
|
254
|
+
debug_log(f"Using last message content as fallback prompt: {prompt[:100]}...")
|
255
|
+
else:
|
256
|
+
prompt = "Please respond to the user's query."
|
257
|
+
debug_log("Using generic fallback prompt")
|
258
|
+
|
169
259
|
retries = 2
|
170
260
|
last_error = None
|
171
261
|
self._active_stream_session = None # Track the active session
|
@@ -176,31 +266,64 @@ class OllamaClient(BaseModelClient):
|
|
176
266
|
async with aiohttp.ClientSession() as session:
|
177
267
|
try:
|
178
268
|
logger.info("Testing model availability...")
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
269
|
+
debug_log("Testing model availability...")
|
270
|
+
# Build test payload with careful error handling
|
271
|
+
try:
|
272
|
+
test_payload = {
|
273
|
+
"model": str(model) if model is not None else "gemma:2b",
|
274
|
+
"prompt": "test",
|
275
|
+
"temperature": float(temperature) if temperature is not None else 0.7,
|
276
|
+
"stream": False
|
277
|
+
}
|
278
|
+
debug_log(f"Prepared test payload: {test_payload}")
|
279
|
+
except Exception as payload_error:
|
280
|
+
debug_log(f"Error preparing test payload: {str(payload_error)}, using defaults")
|
281
|
+
test_payload = {
|
282
|
+
"model": "gemma:2b", # Safe default
|
183
283
|
"prompt": "test",
|
184
|
-
"temperature":
|
284
|
+
"temperature": 0.7,
|
185
285
|
"stream": False
|
186
|
-
}
|
286
|
+
}
|
287
|
+
|
288
|
+
async with session.post(
|
289
|
+
f"{self.base_url}/api/generate",
|
290
|
+
json=test_payload,
|
187
291
|
timeout=2
|
188
292
|
) as response:
|
189
293
|
if response.status != 200:
|
190
294
|
logger.warning(f"Model test request failed with status {response.status}")
|
295
|
+
debug_log(f"Model test request failed with status {response.status}")
|
191
296
|
raise aiohttp.ClientError("Model not ready")
|
192
297
|
except (aiohttp.ClientError, asyncio.TimeoutError) as e:
|
193
298
|
logger.info(f"Model cold start detected: {str(e)}")
|
299
|
+
debug_log(f"Model cold start detected: {str(e)}")
|
300
|
+
# Set model loading flag
|
301
|
+
self._model_loading = True
|
302
|
+
logger.info("Setting model_loading state to True")
|
303
|
+
debug_log("Setting model_loading state to True")
|
304
|
+
|
194
305
|
# Model might need loading, try pulling it
|
306
|
+
# Prepare pull payload safely
|
307
|
+
try:
|
308
|
+
pull_payload = {"name": str(model) if model is not None else "gemma:2b"}
|
309
|
+
debug_log(f"Prepared pull payload: {pull_payload}")
|
310
|
+
except Exception as pull_err:
|
311
|
+
debug_log(f"Error preparing pull payload: {str(pull_err)}, using default")
|
312
|
+
pull_payload = {"name": "gemma:2b"} # Safe default
|
313
|
+
|
195
314
|
async with session.post(
|
196
315
|
f"{self.base_url}/api/pull",
|
197
|
-
json=
|
316
|
+
json=pull_payload,
|
198
317
|
timeout=60
|
199
318
|
) as pull_response:
|
200
319
|
if pull_response.status != 200:
|
201
320
|
logger.error("Failed to pull model")
|
321
|
+
debug_log("Failed to pull model")
|
322
|
+
self._model_loading = False # Reset flag on failure
|
202
323
|
raise Exception("Failed to pull model")
|
203
324
|
logger.info("Model pulled successfully")
|
325
|
+
debug_log("Model pulled successfully")
|
326
|
+
self._model_loading = False # Reset flag after successful pull
|
204
327
|
|
205
328
|
# Now proceed with actual generation
|
206
329
|
session = aiohttp.ClientSession()
|
@@ -208,50 +331,106 @@ class OllamaClient(BaseModelClient):
|
|
208
331
|
|
209
332
|
try:
|
210
333
|
logger.debug(f"Sending streaming request to {self.base_url}/api/generate")
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
334
|
+
debug_log(f"Sending streaming request to {self.base_url}/api/generate with model: {model}")
|
335
|
+
debug_log(f"Request payload: model={model}, prompt_length={len(prompt) if prompt else 0}, temperature={temperature}")
|
336
|
+
|
337
|
+
# Build request payload with careful error handling
|
338
|
+
try:
|
339
|
+
request_payload = {
|
340
|
+
"model": str(model) if model is not None else "gemma:2b", # Default if model is None
|
341
|
+
"prompt": str(prompt) if prompt is not None else "Please respond to the user's query.",
|
342
|
+
"temperature": float(temperature) if temperature is not None else 0.7,
|
217
343
|
"stream": True
|
218
|
-
}
|
344
|
+
}
|
345
|
+
debug_log(f"Prepared request payload successfully")
|
346
|
+
except Exception as payload_error:
|
347
|
+
debug_log(f"Error preparing payload: {str(payload_error)}, using defaults")
|
348
|
+
request_payload = {
|
349
|
+
"model": "gemma:2b", # Safe default
|
350
|
+
"prompt": "Please respond to the user's query.",
|
351
|
+
"temperature": 0.7,
|
352
|
+
"stream": True
|
353
|
+
}
|
354
|
+
|
355
|
+
debug_log(f"Sending request to Ollama API")
|
356
|
+
response = await session.post(
|
357
|
+
f"{self.base_url}/api/generate",
|
358
|
+
json=request_payload,
|
219
359
|
timeout=60 # Longer timeout for actual generation
|
220
|
-
)
|
221
|
-
|
222
|
-
|
360
|
+
)
|
361
|
+
response.raise_for_status()
|
362
|
+
debug_log(f"Response status: {response.status}")
|
363
|
+
|
364
|
+
# Use a simpler async iteration pattern that's less error-prone
|
365
|
+
debug_log("Starting to process response stream")
|
366
|
+
async for line in response.content:
|
367
|
+
# Check cancellation periodically
|
368
|
+
if self._active_stream_session is None:
|
369
|
+
debug_log("Stream session closed, stopping stream processing")
|
370
|
+
break
|
371
|
+
|
372
|
+
try:
|
373
|
+
# Process the chunk
|
223
374
|
if line:
|
224
375
|
chunk = line.decode().strip()
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
376
|
+
chunk_str = line.decode().strip()
|
377
|
+
# Check if it looks like JSON before trying to parse
|
378
|
+
if chunk_str.startswith('{') and chunk_str.endswith('}'):
|
379
|
+
try:
|
380
|
+
data = json.loads(chunk_str)
|
381
|
+
if isinstance(data, dict) and "response" in data:
|
382
|
+
chunk_length = len(data["response"]) if data["response"] else 0
|
383
|
+
debug_log(f"Yielding chunk of length: {chunk_length}")
|
384
|
+
yield data["response"]
|
385
|
+
else:
|
386
|
+
debug_log(f"JSON chunk missing 'response' key: {chunk_str}")
|
387
|
+
except json.JSONDecodeError:
|
388
|
+
debug_log(f"JSON decode error for chunk: {chunk_str}")
|
389
|
+
else:
|
390
|
+
# Log unexpected non-JSON lines but don't process them
|
391
|
+
if chunk_str: # Avoid logging empty lines
|
392
|
+
debug_log(f"Received unexpected non-JSON line: {chunk_str}")
|
393
|
+
# Continue processing next line regardless of parsing success/failure of current line
|
394
|
+
continue
|
395
|
+
except Exception as chunk_err:
|
396
|
+
debug_log(f"Error processing chunk: {str(chunk_err)}")
|
397
|
+
# Continue instead of breaking to try processing more chunks
|
398
|
+
continue
|
399
|
+
|
400
|
+
logger.info("Streaming completed successfully")
|
401
|
+
debug_log("Streaming completed successfully")
|
402
|
+
return
|
233
403
|
finally:
|
234
404
|
self._active_stream_session = None # Clear reference when done
|
235
405
|
await session.close() # Ensure session is closed
|
406
|
+
debug_log("Stream session closed")
|
236
407
|
|
237
408
|
except aiohttp.ClientConnectorError:
|
238
409
|
last_error = "Could not connect to Ollama server. Make sure Ollama is running and accessible at " + self.base_url
|
410
|
+
debug_log(f"ClientConnectorError: {last_error}")
|
239
411
|
except aiohttp.ClientResponseError as e:
|
240
412
|
last_error = f"Ollama API error: {e.status} - {e.message}"
|
413
|
+
debug_log(f"ClientResponseError: {last_error}")
|
241
414
|
except aiohttp.ClientTimeout:
|
242
415
|
last_error = "Request to Ollama server timed out"
|
416
|
+
debug_log(f"ClientTimeout: {last_error}")
|
243
417
|
except asyncio.CancelledError:
|
244
418
|
logger.info("Streaming cancelled by client")
|
419
|
+
debug_log("CancelledError: Streaming cancelled by client")
|
245
420
|
raise # Propagate cancellation
|
246
421
|
except Exception as e:
|
247
422
|
last_error = f"Error streaming completion: {str(e)}"
|
423
|
+
debug_log(f"General exception: {last_error}")
|
248
424
|
|
249
425
|
logger.error(f"Streaming attempt failed: {last_error}")
|
426
|
+
debug_log(f"Streaming attempt failed: {last_error}")
|
250
427
|
retries -= 1
|
251
428
|
if retries >= 0:
|
252
429
|
logger.info(f"Retrying stream... {retries} attempts remaining")
|
430
|
+
debug_log(f"Retrying stream... {retries} attempts remaining")
|
253
431
|
await asyncio.sleep(1)
|
254
432
|
|
433
|
+
debug_log(f"All retries failed. Last error: {last_error}")
|
255
434
|
raise Exception(last_error)
|
256
435
|
|
257
436
|
async def cancel_stream(self) -> None:
|
@@ -260,6 +439,12 @@ class OllamaClient(BaseModelClient):
|
|
260
439
|
logger.info("Cancelling active stream session")
|
261
440
|
await self._active_stream_session.close()
|
262
441
|
self._active_stream_session = None
|
442
|
+
self._model_loading = False
|
443
|
+
logger.info("Stream session closed successfully")
|
444
|
+
|
445
|
+
def is_loading_model(self) -> bool:
|
446
|
+
"""Check if Ollama is currently loading a model"""
|
447
|
+
return self._model_loading
|
263
448
|
|
264
449
|
async def get_model_details(self, model_id: str) -> Dict[str, Any]:
|
265
450
|
"""Get detailed information about a specific Ollama model"""
|