mem-llm 1.1.0__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mem-llm might be problematic. Click here for more details.
- mem_llm/__init__.py +26 -3
- mem_llm/base_llm_client.py +175 -0
- mem_llm/clients/__init__.py +25 -0
- mem_llm/clients/gemini_client.py +381 -0
- mem_llm/clients/lmstudio_client.py +280 -0
- mem_llm/clients/ollama_client.py +268 -0
- mem_llm/config_manager.py +1 -1
- mem_llm/conversation_summarizer.py +372 -0
- mem_llm/data_export_import.py +640 -0
- mem_llm/llm_client_factory.py +277 -0
- mem_llm/mem_agent.py +154 -43
- mem_llm/memory_db.py +7 -1
- mem_llm/thread_safe_db.py +7 -1
- {mem_llm-1.1.0.dist-info → mem_llm-1.3.0.dist-info}/METADATA +84 -110
- mem_llm-1.3.0.dist-info/RECORD +29 -0
- mem_llm-1.1.0.dist-info/RECORD +0 -21
- {mem_llm-1.1.0.dist-info → mem_llm-1.3.0.dist-info}/WHEEL +0 -0
- {mem_llm-1.1.0.dist-info → mem_llm-1.3.0.dist-info}/entry_points.txt +0 -0
- {mem_llm-1.1.0.dist-info → mem_llm-1.3.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LM Studio LLM Client
|
|
3
|
+
====================
|
|
4
|
+
|
|
5
|
+
Client for LM Studio local inference server.
|
|
6
|
+
LM Studio uses OpenAI-compatible API format.
|
|
7
|
+
|
|
8
|
+
Features:
|
|
9
|
+
- OpenAI-compatible API
|
|
10
|
+
- Fast local inference
|
|
11
|
+
- Easy model switching
|
|
12
|
+
- GPU acceleration support
|
|
13
|
+
|
|
14
|
+
Installation:
|
|
15
|
+
1. Download LM Studio from https://lmstudio.ai
|
|
16
|
+
2. Load a model
|
|
17
|
+
3. Start local server (default: http://localhost:1234)
|
|
18
|
+
|
|
19
|
+
Author: C. Emre Karataş
|
|
20
|
+
Version: 1.3.0
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
import requests
|
|
24
|
+
import time
|
|
25
|
+
from typing import List, Dict, Optional
|
|
26
|
+
import sys
|
|
27
|
+
import os
|
|
28
|
+
|
|
29
|
+
# Add parent directory to path for imports
|
|
30
|
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
31
|
+
|
|
32
|
+
from base_llm_client import BaseLLMClient
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class LMStudioClient(BaseLLMClient):
|
|
36
|
+
"""
|
|
37
|
+
LM Studio client implementation
|
|
38
|
+
|
|
39
|
+
LM Studio provides an OpenAI-compatible API for local models.
|
|
40
|
+
This client works with any model loaded in LM Studio.
|
|
41
|
+
|
|
42
|
+
Usage:
|
|
43
|
+
client = LMStudioClient(
|
|
44
|
+
model="local-model", # or specific model name
|
|
45
|
+
base_url="http://localhost:1234"
|
|
46
|
+
)
|
|
47
|
+
response = client.chat([{"role": "user", "content": "Hello!"}])
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
def __init__(self,
|
|
51
|
+
model: str = "local-model",
|
|
52
|
+
base_url: str = "http://localhost:1234",
|
|
53
|
+
**kwargs):
|
|
54
|
+
"""
|
|
55
|
+
Initialize LM Studio client
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
model: Model identifier (use "local-model" for default loaded model)
|
|
59
|
+
base_url: LM Studio server URL (default: http://localhost:1234)
|
|
60
|
+
**kwargs: Additional configuration
|
|
61
|
+
"""
|
|
62
|
+
super().__init__(model=model, **kwargs)
|
|
63
|
+
self.base_url = base_url.rstrip('/')
|
|
64
|
+
self.chat_url = f"{self.base_url}/v1/chat/completions"
|
|
65
|
+
self.models_url = f"{self.base_url}/v1/models"
|
|
66
|
+
|
|
67
|
+
self.logger.debug(f"Initialized LM Studio client: {base_url}, model: {model}")
|
|
68
|
+
|
|
69
|
+
def check_connection(self) -> bool:
|
|
70
|
+
"""
|
|
71
|
+
Check if LM Studio server is running
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
True if server is available
|
|
75
|
+
"""
|
|
76
|
+
try:
|
|
77
|
+
response = requests.get(self.models_url, timeout=5)
|
|
78
|
+
return response.status_code == 200
|
|
79
|
+
except Exception as e:
|
|
80
|
+
self.logger.debug(f"LM Studio connection check failed: {e}")
|
|
81
|
+
return False
|
|
82
|
+
|
|
83
|
+
def list_models(self) -> List[str]:
|
|
84
|
+
"""
|
|
85
|
+
List available models in LM Studio
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
List of model identifiers
|
|
89
|
+
"""
|
|
90
|
+
try:
|
|
91
|
+
response = requests.get(self.models_url, timeout=5)
|
|
92
|
+
if response.status_code == 200:
|
|
93
|
+
data = response.json()
|
|
94
|
+
models = data.get('data', [])
|
|
95
|
+
return [model.get('id', '') for model in models if model.get('id')]
|
|
96
|
+
return []
|
|
97
|
+
except Exception as e:
|
|
98
|
+
self.logger.error(f"Failed to list models: {e}")
|
|
99
|
+
return []
|
|
100
|
+
|
|
101
|
+
def chat(self,
|
|
102
|
+
messages: List[Dict[str, str]],
|
|
103
|
+
temperature: float = 0.7,
|
|
104
|
+
max_tokens: int = 2000,
|
|
105
|
+
**kwargs) -> str:
|
|
106
|
+
"""
|
|
107
|
+
Send chat request to LM Studio
|
|
108
|
+
|
|
109
|
+
Uses OpenAI-compatible chat completions endpoint.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
messages: Message history in OpenAI format
|
|
113
|
+
temperature: Sampling temperature (0.0-2.0)
|
|
114
|
+
max_tokens: Maximum tokens in response
|
|
115
|
+
**kwargs: Additional OpenAI-compatible parameters
|
|
116
|
+
- top_p: Nucleus sampling (0.0-1.0)
|
|
117
|
+
- frequency_penalty: (-2.0 to 2.0)
|
|
118
|
+
- presence_penalty: (-2.0 to 2.0)
|
|
119
|
+
- stream: Enable streaming (bool)
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
Model response text
|
|
123
|
+
|
|
124
|
+
Raises:
|
|
125
|
+
ConnectionError: If cannot connect to LM Studio
|
|
126
|
+
ValueError: If invalid parameters
|
|
127
|
+
"""
|
|
128
|
+
# Validate messages
|
|
129
|
+
self._validate_messages(messages)
|
|
130
|
+
|
|
131
|
+
# Build OpenAI-compatible payload
|
|
132
|
+
payload = {
|
|
133
|
+
"model": self.model,
|
|
134
|
+
"messages": messages,
|
|
135
|
+
"temperature": temperature,
|
|
136
|
+
"max_tokens": max_tokens,
|
|
137
|
+
"stream": kwargs.get("stream", False)
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
# Add optional parameters
|
|
141
|
+
if "top_p" in kwargs:
|
|
142
|
+
payload["top_p"] = kwargs["top_p"]
|
|
143
|
+
if "frequency_penalty" in kwargs:
|
|
144
|
+
payload["frequency_penalty"] = kwargs["frequency_penalty"]
|
|
145
|
+
if "presence_penalty" in kwargs:
|
|
146
|
+
payload["presence_penalty"] = kwargs["presence_penalty"]
|
|
147
|
+
if "stop" in kwargs:
|
|
148
|
+
payload["stop"] = kwargs["stop"]
|
|
149
|
+
|
|
150
|
+
# Send request with retry logic
|
|
151
|
+
max_retries = kwargs.get("max_retries", 3)
|
|
152
|
+
for attempt in range(max_retries):
|
|
153
|
+
try:
|
|
154
|
+
response = requests.post(
|
|
155
|
+
self.chat_url,
|
|
156
|
+
json=payload,
|
|
157
|
+
timeout=kwargs.get("timeout", 120)
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
if response.status_code == 200:
|
|
161
|
+
response_data = response.json()
|
|
162
|
+
|
|
163
|
+
# Extract content from OpenAI format
|
|
164
|
+
choices = response_data.get('choices', [])
|
|
165
|
+
if not choices:
|
|
166
|
+
self.logger.warning("No choices in LM Studio response")
|
|
167
|
+
if attempt < max_retries - 1:
|
|
168
|
+
time.sleep(1.0 * (2 ** attempt))
|
|
169
|
+
continue
|
|
170
|
+
return ""
|
|
171
|
+
|
|
172
|
+
# Get the message content
|
|
173
|
+
message = choices[0].get('message', {})
|
|
174
|
+
content = message.get('content', '').strip()
|
|
175
|
+
|
|
176
|
+
if not content:
|
|
177
|
+
self.logger.warning("Empty content in LM Studio response")
|
|
178
|
+
if attempt < max_retries - 1:
|
|
179
|
+
time.sleep(1.0 * (2 ** attempt))
|
|
180
|
+
continue
|
|
181
|
+
|
|
182
|
+
# Log usage statistics if available
|
|
183
|
+
usage = response_data.get('usage', {})
|
|
184
|
+
if usage:
|
|
185
|
+
self.logger.debug(
|
|
186
|
+
f"LM Studio usage - "
|
|
187
|
+
f"prompt: {usage.get('prompt_tokens', 0)} tokens, "
|
|
188
|
+
f"completion: {usage.get('completion_tokens', 0)} tokens, "
|
|
189
|
+
f"total: {usage.get('total_tokens', 0)} tokens"
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
return content
|
|
193
|
+
|
|
194
|
+
else:
|
|
195
|
+
error_msg = f"LM Studio API error: {response.status_code}"
|
|
196
|
+
try:
|
|
197
|
+
error_data = response.json()
|
|
198
|
+
error_detail = error_data.get('error', {})
|
|
199
|
+
if isinstance(error_detail, dict):
|
|
200
|
+
error_msg += f" - {error_detail.get('message', response.text)}"
|
|
201
|
+
else:
|
|
202
|
+
error_msg += f" - {error_detail}"
|
|
203
|
+
except:
|
|
204
|
+
error_msg += f" - {response.text[:200]}"
|
|
205
|
+
|
|
206
|
+
self.logger.error(error_msg)
|
|
207
|
+
|
|
208
|
+
if attempt < max_retries - 1:
|
|
209
|
+
time.sleep(1.0 * (2 ** attempt))
|
|
210
|
+
continue
|
|
211
|
+
raise ConnectionError(error_msg)
|
|
212
|
+
|
|
213
|
+
except requests.exceptions.Timeout:
|
|
214
|
+
self.logger.warning(f"LM Studio request timeout (attempt {attempt + 1}/{max_retries})")
|
|
215
|
+
if attempt < max_retries - 1:
|
|
216
|
+
time.sleep(2.0 * (2 ** attempt))
|
|
217
|
+
continue
|
|
218
|
+
raise ConnectionError("LM Studio request timeout. Check if server is running.")
|
|
219
|
+
|
|
220
|
+
except requests.exceptions.ConnectionError as e:
|
|
221
|
+
self.logger.warning(f"Cannot connect to LM Studio (attempt {attempt + 1}/{max_retries})")
|
|
222
|
+
if attempt < max_retries - 1:
|
|
223
|
+
time.sleep(1.0 * (2 ** attempt))
|
|
224
|
+
continue
|
|
225
|
+
raise ConnectionError(
|
|
226
|
+
f"Cannot connect to LM Studio at {self.base_url}. "
|
|
227
|
+
"Make sure LM Studio is running and server is started."
|
|
228
|
+
) from e
|
|
229
|
+
|
|
230
|
+
except Exception as e:
|
|
231
|
+
self.logger.error(f"Unexpected error: {e}")
|
|
232
|
+
if attempt < max_retries - 1:
|
|
233
|
+
time.sleep(1.0 * (2 ** attempt))
|
|
234
|
+
continue
|
|
235
|
+
raise
|
|
236
|
+
|
|
237
|
+
raise ConnectionError("Failed to get response after maximum retries")
|
|
238
|
+
|
|
239
|
+
def get_model_info(self) -> Dict:
|
|
240
|
+
"""
|
|
241
|
+
Get information about currently loaded model
|
|
242
|
+
|
|
243
|
+
Returns:
|
|
244
|
+
Dictionary with model information
|
|
245
|
+
"""
|
|
246
|
+
try:
|
|
247
|
+
response = requests.get(self.models_url, timeout=5)
|
|
248
|
+
if response.status_code == 200:
|
|
249
|
+
data = response.json()
|
|
250
|
+
models = data.get('data', [])
|
|
251
|
+
|
|
252
|
+
# Find our model or return first one
|
|
253
|
+
for model in models:
|
|
254
|
+
if model.get('id') == self.model:
|
|
255
|
+
return model
|
|
256
|
+
|
|
257
|
+
# Return first model if ours not found
|
|
258
|
+
return models[0] if models else {}
|
|
259
|
+
return {}
|
|
260
|
+
except Exception as e:
|
|
261
|
+
self.logger.error(f"Failed to get model info: {e}")
|
|
262
|
+
return {}
|
|
263
|
+
|
|
264
|
+
def get_info(self) -> Dict:
|
|
265
|
+
"""
|
|
266
|
+
Get comprehensive client information
|
|
267
|
+
|
|
268
|
+
Returns:
|
|
269
|
+
Dictionary with client and model metadata
|
|
270
|
+
"""
|
|
271
|
+
base_info = super().get_info()
|
|
272
|
+
|
|
273
|
+
# Add LM Studio specific info
|
|
274
|
+
if self.check_connection():
|
|
275
|
+
model_info = self.get_model_info()
|
|
276
|
+
base_info['model_details'] = model_info
|
|
277
|
+
base_info['available_models'] = self.list_models()
|
|
278
|
+
|
|
279
|
+
return base_info
|
|
280
|
+
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Ollama LLM Client
|
|
3
|
+
=================
|
|
4
|
+
|
|
5
|
+
Client for local Ollama service.
|
|
6
|
+
Supports all Ollama models (Llama3, Granite, Qwen3, DeepSeek, etc.)
|
|
7
|
+
|
|
8
|
+
Author: C. Emre Karataş
|
|
9
|
+
Version: 1.3.0
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import requests
|
|
13
|
+
import time
|
|
14
|
+
from typing import List, Dict, Optional
|
|
15
|
+
import sys
|
|
16
|
+
import os
|
|
17
|
+
|
|
18
|
+
# Add parent directory to path for imports
|
|
19
|
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
20
|
+
|
|
21
|
+
from base_llm_client import BaseLLMClient
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class OllamaClient(BaseLLMClient):
|
|
25
|
+
"""
|
|
26
|
+
Ollama LLM client implementation
|
|
27
|
+
|
|
28
|
+
Supports:
|
|
29
|
+
- All Ollama models
|
|
30
|
+
- Chat and generate modes
|
|
31
|
+
- Thinking mode detection (Qwen3, DeepSeek)
|
|
32
|
+
- Automatic retry with exponential backoff
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
def __init__(self,
|
|
36
|
+
model: str = "granite4:tiny-h",
|
|
37
|
+
base_url: str = "http://localhost:11434",
|
|
38
|
+
**kwargs):
|
|
39
|
+
"""
|
|
40
|
+
Initialize Ollama client
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
model: Model name (e.g., "llama3", "granite4:tiny-h")
|
|
44
|
+
base_url: Ollama API URL
|
|
45
|
+
**kwargs: Additional configuration
|
|
46
|
+
"""
|
|
47
|
+
super().__init__(model=model, **kwargs)
|
|
48
|
+
self.base_url = base_url
|
|
49
|
+
self.api_url = f"{base_url}/api/generate"
|
|
50
|
+
self.chat_url = f"{base_url}/api/chat"
|
|
51
|
+
self.tags_url = f"{base_url}/api/tags"
|
|
52
|
+
|
|
53
|
+
self.logger.debug(f"Initialized Ollama client: {base_url}, model: {model}")
|
|
54
|
+
|
|
55
|
+
def check_connection(self) -> bool:
|
|
56
|
+
"""
|
|
57
|
+
Check if Ollama service is running
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
True if service is available
|
|
61
|
+
"""
|
|
62
|
+
try:
|
|
63
|
+
response = requests.get(self.tags_url, timeout=5)
|
|
64
|
+
return response.status_code == 200
|
|
65
|
+
except Exception as e:
|
|
66
|
+
self.logger.debug(f"Ollama connection check failed: {e}")
|
|
67
|
+
return False
|
|
68
|
+
|
|
69
|
+
def list_models(self) -> List[str]:
|
|
70
|
+
"""
|
|
71
|
+
List available Ollama models
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
List of model names
|
|
75
|
+
"""
|
|
76
|
+
try:
|
|
77
|
+
response = requests.get(self.tags_url, timeout=5)
|
|
78
|
+
if response.status_code == 200:
|
|
79
|
+
data = response.json()
|
|
80
|
+
return [model['name'] for model in data.get('models', [])]
|
|
81
|
+
return []
|
|
82
|
+
except Exception as e:
|
|
83
|
+
self.logger.error(f"Failed to list models: {e}")
|
|
84
|
+
return []
|
|
85
|
+
|
|
86
|
+
def chat(self,
|
|
87
|
+
messages: List[Dict[str, str]],
|
|
88
|
+
temperature: float = 0.7,
|
|
89
|
+
max_tokens: int = 2000,
|
|
90
|
+
**kwargs) -> str:
|
|
91
|
+
"""
|
|
92
|
+
Send chat request to Ollama
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
messages: Message history
|
|
96
|
+
temperature: Sampling temperature (0.0-1.0)
|
|
97
|
+
max_tokens: Maximum tokens in response
|
|
98
|
+
**kwargs: Additional Ollama-specific options
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
Model response text
|
|
102
|
+
|
|
103
|
+
Raises:
|
|
104
|
+
ConnectionError: If cannot connect to Ollama
|
|
105
|
+
ValueError: If invalid parameters
|
|
106
|
+
"""
|
|
107
|
+
# Validate messages
|
|
108
|
+
self._validate_messages(messages)
|
|
109
|
+
|
|
110
|
+
# Build payload
|
|
111
|
+
payload = {
|
|
112
|
+
"model": self.model,
|
|
113
|
+
"messages": messages,
|
|
114
|
+
"stream": False,
|
|
115
|
+
"options": {
|
|
116
|
+
"temperature": temperature,
|
|
117
|
+
"num_predict": max_tokens,
|
|
118
|
+
"num_ctx": kwargs.get("num_ctx", 4096),
|
|
119
|
+
"top_k": kwargs.get("top_k", 40),
|
|
120
|
+
"top_p": kwargs.get("top_p", 0.9),
|
|
121
|
+
"num_thread": kwargs.get("num_thread", 8)
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
# Disable thinking mode for thinking-enabled models
|
|
126
|
+
# (Qwen3, DeepSeek) to get direct answers
|
|
127
|
+
if any(name in self.model.lower() for name in ['qwen', 'deepseek', 'qwq']):
|
|
128
|
+
payload["options"]["enable_thinking"] = False
|
|
129
|
+
|
|
130
|
+
# Send request with retry logic
|
|
131
|
+
max_retries = kwargs.get("max_retries", 3)
|
|
132
|
+
for attempt in range(max_retries):
|
|
133
|
+
try:
|
|
134
|
+
response = requests.post(
|
|
135
|
+
self.chat_url,
|
|
136
|
+
json=payload,
|
|
137
|
+
timeout=kwargs.get("timeout", 120)
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
if response.status_code == 200:
|
|
141
|
+
response_data = response.json()
|
|
142
|
+
message = response_data.get('message', {})
|
|
143
|
+
|
|
144
|
+
# Get content - primary response field
|
|
145
|
+
result = message.get('content', '').strip()
|
|
146
|
+
|
|
147
|
+
# Fallback: Extract from thinking if content is empty
|
|
148
|
+
if not result and message.get('thinking'):
|
|
149
|
+
result = self._extract_from_thinking(message.get('thinking', ''))
|
|
150
|
+
|
|
151
|
+
if not result:
|
|
152
|
+
self.logger.warning("Empty response from Ollama")
|
|
153
|
+
if attempt < max_retries - 1:
|
|
154
|
+
time.sleep(1.0 * (2 ** attempt))
|
|
155
|
+
continue
|
|
156
|
+
|
|
157
|
+
return result
|
|
158
|
+
else:
|
|
159
|
+
error_msg = f"Ollama API error: {response.status_code} - {response.text}"
|
|
160
|
+
self.logger.error(error_msg)
|
|
161
|
+
if attempt < max_retries - 1:
|
|
162
|
+
time.sleep(1.0 * (2 ** attempt))
|
|
163
|
+
continue
|
|
164
|
+
raise ConnectionError(error_msg)
|
|
165
|
+
|
|
166
|
+
except requests.exceptions.Timeout:
|
|
167
|
+
self.logger.warning(f"Ollama request timeout (attempt {attempt + 1}/{max_retries})")
|
|
168
|
+
if attempt < max_retries - 1:
|
|
169
|
+
time.sleep(2.0 * (2 ** attempt))
|
|
170
|
+
continue
|
|
171
|
+
raise ConnectionError("Ollama request timeout. Check if service is running.")
|
|
172
|
+
|
|
173
|
+
except requests.exceptions.ConnectionError as e:
|
|
174
|
+
self.logger.warning(f"Cannot connect to Ollama (attempt {attempt + 1}/{max_retries})")
|
|
175
|
+
if attempt < max_retries - 1:
|
|
176
|
+
time.sleep(1.0 * (2 ** attempt))
|
|
177
|
+
continue
|
|
178
|
+
raise ConnectionError(f"Cannot connect to Ollama at {self.base_url}. Make sure service is running.") from e
|
|
179
|
+
|
|
180
|
+
except Exception as e:
|
|
181
|
+
self.logger.error(f"Unexpected error: {e}")
|
|
182
|
+
if attempt < max_retries - 1:
|
|
183
|
+
time.sleep(1.0 * (2 ** attempt))
|
|
184
|
+
continue
|
|
185
|
+
raise
|
|
186
|
+
|
|
187
|
+
raise ConnectionError("Failed to get response after maximum retries")
|
|
188
|
+
|
|
189
|
+
def _extract_from_thinking(self, thinking: str) -> str:
|
|
190
|
+
"""
|
|
191
|
+
Extract actual answer from thinking process
|
|
192
|
+
|
|
193
|
+
Some models output reasoning process instead of direct answer.
|
|
194
|
+
This extracts the final answer from that process.
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
thinking: Thinking process text
|
|
198
|
+
|
|
199
|
+
Returns:
|
|
200
|
+
Extracted answer
|
|
201
|
+
"""
|
|
202
|
+
if not thinking:
|
|
203
|
+
return ""
|
|
204
|
+
|
|
205
|
+
# Try to find answer after common separators
|
|
206
|
+
for separator in ['\n\nAnswer:', '\n\nFinal answer:',
|
|
207
|
+
'\n\nResponse:', '\n\nSo the answer is:',
|
|
208
|
+
'\n\n---\n', '\n\nOkay,', '\n\nTherefore,']:
|
|
209
|
+
if separator in thinking:
|
|
210
|
+
parts = thinking.split(separator)
|
|
211
|
+
if len(parts) > 1:
|
|
212
|
+
return parts[-1].strip()
|
|
213
|
+
|
|
214
|
+
# Fallback: Get last meaningful paragraph
|
|
215
|
+
paragraphs = [p.strip() for p in thinking.split('\n\n') if p.strip()]
|
|
216
|
+
if paragraphs:
|
|
217
|
+
last_para = paragraphs[-1]
|
|
218
|
+
# Avoid meta-commentary
|
|
219
|
+
if not any(word in last_para.lower()
|
|
220
|
+
for word in ['wait', 'hmm', 'let me', 'thinking', 'okay']):
|
|
221
|
+
return last_para
|
|
222
|
+
|
|
223
|
+
# If nothing else works, return the whole thinking
|
|
224
|
+
return thinking
|
|
225
|
+
|
|
226
|
+
def generate_with_memory_context(self,
|
|
227
|
+
user_message: str,
|
|
228
|
+
memory_summary: str,
|
|
229
|
+
recent_conversations: List[Dict]) -> str:
|
|
230
|
+
"""
|
|
231
|
+
Generate response with memory context
|
|
232
|
+
|
|
233
|
+
This is a specialized method for MemAgent integration.
|
|
234
|
+
|
|
235
|
+
Args:
|
|
236
|
+
user_message: User's message
|
|
237
|
+
memory_summary: Summary of past interactions
|
|
238
|
+
recent_conversations: Recent conversation history
|
|
239
|
+
|
|
240
|
+
Returns:
|
|
241
|
+
Context-aware response
|
|
242
|
+
"""
|
|
243
|
+
# Build system prompt
|
|
244
|
+
system_prompt = """You are a helpful customer service assistant.
|
|
245
|
+
You can remember past conversations with users.
|
|
246
|
+
Give short, clear and professional answers.
|
|
247
|
+
Use past interactions intelligently."""
|
|
248
|
+
|
|
249
|
+
# Build message history
|
|
250
|
+
messages = [{"role": "system", "content": system_prompt}]
|
|
251
|
+
|
|
252
|
+
# Add memory summary
|
|
253
|
+
if memory_summary and memory_summary != "No interactions with this user yet.":
|
|
254
|
+
messages.append({
|
|
255
|
+
"role": "system",
|
|
256
|
+
"content": f"User history:\n{memory_summary}"
|
|
257
|
+
})
|
|
258
|
+
|
|
259
|
+
# Add recent conversations (last 3)
|
|
260
|
+
for conv in recent_conversations[-3:]:
|
|
261
|
+
messages.append({"role": "user", "content": conv.get('user_message', '')})
|
|
262
|
+
messages.append({"role": "assistant", "content": conv.get('bot_response', '')})
|
|
263
|
+
|
|
264
|
+
# Add current message
|
|
265
|
+
messages.append({"role": "user", "content": user_message})
|
|
266
|
+
|
|
267
|
+
return self.chat(messages, temperature=0.7)
|
|
268
|
+
|
mem_llm/config_manager.py
CHANGED