mem-llm 1.2.0__py3-none-any.whl → 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mem-llm might be problematic. Click here for more details.

@@ -0,0 +1,280 @@
1
+ """
2
+ LM Studio LLM Client
3
+ ====================
4
+
5
+ Client for LM Studio local inference server.
6
+ LM Studio uses OpenAI-compatible API format.
7
+
8
+ Features:
9
+ - OpenAI-compatible API
10
+ - Fast local inference
11
+ - Easy model switching
12
+ - GPU acceleration support
13
+
14
+ Installation:
15
+ 1. Download LM Studio from https://lmstudio.ai
16
+ 2. Load a model
17
+ 3. Start local server (default: http://localhost:1234)
18
+
19
+ Author: C. Emre Karataş
20
+ Version: 1.3.0
21
+ """
22
+
23
+ import requests
24
+ import time
25
+ from typing import List, Dict, Optional
26
+ import sys
27
+ import os
28
+
29
+ # Add parent directory to path for imports
30
+ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
31
+
32
+ from base_llm_client import BaseLLMClient
33
+
34
+
35
+ class LMStudioClient(BaseLLMClient):
36
+ """
37
+ LM Studio client implementation
38
+
39
+ LM Studio provides an OpenAI-compatible API for local models.
40
+ This client works with any model loaded in LM Studio.
41
+
42
+ Usage:
43
+ client = LMStudioClient(
44
+ model="local-model", # or specific model name
45
+ base_url="http://localhost:1234"
46
+ )
47
+ response = client.chat([{"role": "user", "content": "Hello!"}])
48
+ """
49
+
50
+ def __init__(self,
51
+ model: str = "local-model",
52
+ base_url: str = "http://localhost:1234",
53
+ **kwargs):
54
+ """
55
+ Initialize LM Studio client
56
+
57
+ Args:
58
+ model: Model identifier (use "local-model" for default loaded model)
59
+ base_url: LM Studio server URL (default: http://localhost:1234)
60
+ **kwargs: Additional configuration
61
+ """
62
+ super().__init__(model=model, **kwargs)
63
+ self.base_url = base_url.rstrip('/')
64
+ self.chat_url = f"{self.base_url}/v1/chat/completions"
65
+ self.models_url = f"{self.base_url}/v1/models"
66
+
67
+ self.logger.debug(f"Initialized LM Studio client: {base_url}, model: {model}")
68
+
69
+ def check_connection(self) -> bool:
70
+ """
71
+ Check if LM Studio server is running
72
+
73
+ Returns:
74
+ True if server is available
75
+ """
76
+ try:
77
+ response = requests.get(self.models_url, timeout=5)
78
+ return response.status_code == 200
79
+ except Exception as e:
80
+ self.logger.debug(f"LM Studio connection check failed: {e}")
81
+ return False
82
+
83
+ def list_models(self) -> List[str]:
84
+ """
85
+ List available models in LM Studio
86
+
87
+ Returns:
88
+ List of model identifiers
89
+ """
90
+ try:
91
+ response = requests.get(self.models_url, timeout=5)
92
+ if response.status_code == 200:
93
+ data = response.json()
94
+ models = data.get('data', [])
95
+ return [model.get('id', '') for model in models if model.get('id')]
96
+ return []
97
+ except Exception as e:
98
+ self.logger.error(f"Failed to list models: {e}")
99
+ return []
100
+
101
+ def chat(self,
102
+ messages: List[Dict[str, str]],
103
+ temperature: float = 0.7,
104
+ max_tokens: int = 2000,
105
+ **kwargs) -> str:
106
+ """
107
+ Send chat request to LM Studio
108
+
109
+ Uses OpenAI-compatible chat completions endpoint.
110
+
111
+ Args:
112
+ messages: Message history in OpenAI format
113
+ temperature: Sampling temperature (0.0-2.0)
114
+ max_tokens: Maximum tokens in response
115
+ **kwargs: Additional OpenAI-compatible parameters
116
+ - top_p: Nucleus sampling (0.0-1.0)
117
+ - frequency_penalty: (-2.0 to 2.0)
118
+ - presence_penalty: (-2.0 to 2.0)
119
+ - stream: Enable streaming (bool)
120
+
121
+ Returns:
122
+ Model response text
123
+
124
+ Raises:
125
+ ConnectionError: If cannot connect to LM Studio
126
+ ValueError: If invalid parameters
127
+ """
128
+ # Validate messages
129
+ self._validate_messages(messages)
130
+
131
+ # Build OpenAI-compatible payload
132
+ payload = {
133
+ "model": self.model,
134
+ "messages": messages,
135
+ "temperature": temperature,
136
+ "max_tokens": max_tokens,
137
+ "stream": kwargs.get("stream", False)
138
+ }
139
+
140
+ # Add optional parameters
141
+ if "top_p" in kwargs:
142
+ payload["top_p"] = kwargs["top_p"]
143
+ if "frequency_penalty" in kwargs:
144
+ payload["frequency_penalty"] = kwargs["frequency_penalty"]
145
+ if "presence_penalty" in kwargs:
146
+ payload["presence_penalty"] = kwargs["presence_penalty"]
147
+ if "stop" in kwargs:
148
+ payload["stop"] = kwargs["stop"]
149
+
150
+ # Send request with retry logic
151
+ max_retries = kwargs.get("max_retries", 3)
152
+ for attempt in range(max_retries):
153
+ try:
154
+ response = requests.post(
155
+ self.chat_url,
156
+ json=payload,
157
+ timeout=kwargs.get("timeout", 120)
158
+ )
159
+
160
+ if response.status_code == 200:
161
+ response_data = response.json()
162
+
163
+ # Extract content from OpenAI format
164
+ choices = response_data.get('choices', [])
165
+ if not choices:
166
+ self.logger.warning("No choices in LM Studio response")
167
+ if attempt < max_retries - 1:
168
+ time.sleep(1.0 * (2 ** attempt))
169
+ continue
170
+ return ""
171
+
172
+ # Get the message content
173
+ message = choices[0].get('message', {})
174
+ content = message.get('content', '').strip()
175
+
176
+ if not content:
177
+ self.logger.warning("Empty content in LM Studio response")
178
+ if attempt < max_retries - 1:
179
+ time.sleep(1.0 * (2 ** attempt))
180
+ continue
181
+
182
+ # Log usage statistics if available
183
+ usage = response_data.get('usage', {})
184
+ if usage:
185
+ self.logger.debug(
186
+ f"LM Studio usage - "
187
+ f"prompt: {usage.get('prompt_tokens', 0)} tokens, "
188
+ f"completion: {usage.get('completion_tokens', 0)} tokens, "
189
+ f"total: {usage.get('total_tokens', 0)} tokens"
190
+ )
191
+
192
+ return content
193
+
194
+ else:
195
+ error_msg = f"LM Studio API error: {response.status_code}"
196
+ try:
197
+ error_data = response.json()
198
+ error_detail = error_data.get('error', {})
199
+ if isinstance(error_detail, dict):
200
+ error_msg += f" - {error_detail.get('message', response.text)}"
201
+ else:
202
+ error_msg += f" - {error_detail}"
203
+ except:
204
+ error_msg += f" - {response.text[:200]}"
205
+
206
+ self.logger.error(error_msg)
207
+
208
+ if attempt < max_retries - 1:
209
+ time.sleep(1.0 * (2 ** attempt))
210
+ continue
211
+ raise ConnectionError(error_msg)
212
+
213
+ except requests.exceptions.Timeout:
214
+ self.logger.warning(f"LM Studio request timeout (attempt {attempt + 1}/{max_retries})")
215
+ if attempt < max_retries - 1:
216
+ time.sleep(2.0 * (2 ** attempt))
217
+ continue
218
+ raise ConnectionError("LM Studio request timeout. Check if server is running.")
219
+
220
+ except requests.exceptions.ConnectionError as e:
221
+ self.logger.warning(f"Cannot connect to LM Studio (attempt {attempt + 1}/{max_retries})")
222
+ if attempt < max_retries - 1:
223
+ time.sleep(1.0 * (2 ** attempt))
224
+ continue
225
+ raise ConnectionError(
226
+ f"Cannot connect to LM Studio at {self.base_url}. "
227
+ "Make sure LM Studio is running and server is started."
228
+ ) from e
229
+
230
+ except Exception as e:
231
+ self.logger.error(f"Unexpected error: {e}")
232
+ if attempt < max_retries - 1:
233
+ time.sleep(1.0 * (2 ** attempt))
234
+ continue
235
+ raise
236
+
237
+ raise ConnectionError("Failed to get response after maximum retries")
238
+
239
+ def get_model_info(self) -> Dict:
240
+ """
241
+ Get information about currently loaded model
242
+
243
+ Returns:
244
+ Dictionary with model information
245
+ """
246
+ try:
247
+ response = requests.get(self.models_url, timeout=5)
248
+ if response.status_code == 200:
249
+ data = response.json()
250
+ models = data.get('data', [])
251
+
252
+ # Find our model or return first one
253
+ for model in models:
254
+ if model.get('id') == self.model:
255
+ return model
256
+
257
+ # Return first model if ours not found
258
+ return models[0] if models else {}
259
+ return {}
260
+ except Exception as e:
261
+ self.logger.error(f"Failed to get model info: {e}")
262
+ return {}
263
+
264
+ def get_info(self) -> Dict:
265
+ """
266
+ Get comprehensive client information
267
+
268
+ Returns:
269
+ Dictionary with client and model metadata
270
+ """
271
+ base_info = super().get_info()
272
+
273
+ # Add LM Studio specific info
274
+ if self.check_connection():
275
+ model_info = self.get_model_info()
276
+ base_info['model_details'] = model_info
277
+ base_info['available_models'] = self.list_models()
278
+
279
+ return base_info
280
+
@@ -0,0 +1,268 @@
1
+ """
2
+ Ollama LLM Client
3
+ =================
4
+
5
+ Client for local Ollama service.
6
+ Supports all Ollama models (Llama3, Granite, Qwen3, DeepSeek, etc.)
7
+
8
+ Author: C. Emre Karataş
9
+ Version: 1.3.0
10
+ """
11
+
12
+ import requests
13
+ import time
14
+ from typing import List, Dict, Optional
15
+ import sys
16
+ import os
17
+
18
+ # Add parent directory to path for imports
19
+ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
20
+
21
+ from base_llm_client import BaseLLMClient
22
+
23
+
24
+ class OllamaClient(BaseLLMClient):
25
+ """
26
+ Ollama LLM client implementation
27
+
28
+ Supports:
29
+ - All Ollama models
30
+ - Chat and generate modes
31
+ - Thinking mode detection (Qwen3, DeepSeek)
32
+ - Automatic retry with exponential backoff
33
+ """
34
+
35
+ def __init__(self,
36
+ model: str = "granite4:tiny-h",
37
+ base_url: str = "http://localhost:11434",
38
+ **kwargs):
39
+ """
40
+ Initialize Ollama client
41
+
42
+ Args:
43
+ model: Model name (e.g., "llama3", "granite4:tiny-h")
44
+ base_url: Ollama API URL
45
+ **kwargs: Additional configuration
46
+ """
47
+ super().__init__(model=model, **kwargs)
48
+ self.base_url = base_url
49
+ self.api_url = f"{base_url}/api/generate"
50
+ self.chat_url = f"{base_url}/api/chat"
51
+ self.tags_url = f"{base_url}/api/tags"
52
+
53
+ self.logger.debug(f"Initialized Ollama client: {base_url}, model: {model}")
54
+
55
+ def check_connection(self) -> bool:
56
+ """
57
+ Check if Ollama service is running
58
+
59
+ Returns:
60
+ True if service is available
61
+ """
62
+ try:
63
+ response = requests.get(self.tags_url, timeout=5)
64
+ return response.status_code == 200
65
+ except Exception as e:
66
+ self.logger.debug(f"Ollama connection check failed: {e}")
67
+ return False
68
+
69
+ def list_models(self) -> List[str]:
70
+ """
71
+ List available Ollama models
72
+
73
+ Returns:
74
+ List of model names
75
+ """
76
+ try:
77
+ response = requests.get(self.tags_url, timeout=5)
78
+ if response.status_code == 200:
79
+ data = response.json()
80
+ return [model['name'] for model in data.get('models', [])]
81
+ return []
82
+ except Exception as e:
83
+ self.logger.error(f"Failed to list models: {e}")
84
+ return []
85
+
86
+ def chat(self,
87
+ messages: List[Dict[str, str]],
88
+ temperature: float = 0.7,
89
+ max_tokens: int = 2000,
90
+ **kwargs) -> str:
91
+ """
92
+ Send chat request to Ollama
93
+
94
+ Args:
95
+ messages: Message history
96
+ temperature: Sampling temperature (0.0-1.0)
97
+ max_tokens: Maximum tokens in response
98
+ **kwargs: Additional Ollama-specific options
99
+
100
+ Returns:
101
+ Model response text
102
+
103
+ Raises:
104
+ ConnectionError: If cannot connect to Ollama
105
+ ValueError: If invalid parameters
106
+ """
107
+ # Validate messages
108
+ self._validate_messages(messages)
109
+
110
+ # Build payload
111
+ payload = {
112
+ "model": self.model,
113
+ "messages": messages,
114
+ "stream": False,
115
+ "options": {
116
+ "temperature": temperature,
117
+ "num_predict": max_tokens,
118
+ "num_ctx": kwargs.get("num_ctx", 4096),
119
+ "top_k": kwargs.get("top_k", 40),
120
+ "top_p": kwargs.get("top_p", 0.9),
121
+ "num_thread": kwargs.get("num_thread", 8)
122
+ }
123
+ }
124
+
125
+ # Disable thinking mode for thinking-enabled models
126
+ # (Qwen3, DeepSeek) to get direct answers
127
+ if any(name in self.model.lower() for name in ['qwen', 'deepseek', 'qwq']):
128
+ payload["options"]["enable_thinking"] = False
129
+
130
+ # Send request with retry logic
131
+ max_retries = kwargs.get("max_retries", 3)
132
+ for attempt in range(max_retries):
133
+ try:
134
+ response = requests.post(
135
+ self.chat_url,
136
+ json=payload,
137
+ timeout=kwargs.get("timeout", 120)
138
+ )
139
+
140
+ if response.status_code == 200:
141
+ response_data = response.json()
142
+ message = response_data.get('message', {})
143
+
144
+ # Get content - primary response field
145
+ result = message.get('content', '').strip()
146
+
147
+ # Fallback: Extract from thinking if content is empty
148
+ if not result and message.get('thinking'):
149
+ result = self._extract_from_thinking(message.get('thinking', ''))
150
+
151
+ if not result:
152
+ self.logger.warning("Empty response from Ollama")
153
+ if attempt < max_retries - 1:
154
+ time.sleep(1.0 * (2 ** attempt))
155
+ continue
156
+
157
+ return result
158
+ else:
159
+ error_msg = f"Ollama API error: {response.status_code} - {response.text}"
160
+ self.logger.error(error_msg)
161
+ if attempt < max_retries - 1:
162
+ time.sleep(1.0 * (2 ** attempt))
163
+ continue
164
+ raise ConnectionError(error_msg)
165
+
166
+ except requests.exceptions.Timeout:
167
+ self.logger.warning(f"Ollama request timeout (attempt {attempt + 1}/{max_retries})")
168
+ if attempt < max_retries - 1:
169
+ time.sleep(2.0 * (2 ** attempt))
170
+ continue
171
+ raise ConnectionError("Ollama request timeout. Check if service is running.")
172
+
173
+ except requests.exceptions.ConnectionError as e:
174
+ self.logger.warning(f"Cannot connect to Ollama (attempt {attempt + 1}/{max_retries})")
175
+ if attempt < max_retries - 1:
176
+ time.sleep(1.0 * (2 ** attempt))
177
+ continue
178
+ raise ConnectionError(f"Cannot connect to Ollama at {self.base_url}. Make sure service is running.") from e
179
+
180
+ except Exception as e:
181
+ self.logger.error(f"Unexpected error: {e}")
182
+ if attempt < max_retries - 1:
183
+ time.sleep(1.0 * (2 ** attempt))
184
+ continue
185
+ raise
186
+
187
+ raise ConnectionError("Failed to get response after maximum retries")
188
+
189
+ def _extract_from_thinking(self, thinking: str) -> str:
190
+ """
191
+ Extract actual answer from thinking process
192
+
193
+ Some models output reasoning process instead of direct answer.
194
+ This extracts the final answer from that process.
195
+
196
+ Args:
197
+ thinking: Thinking process text
198
+
199
+ Returns:
200
+ Extracted answer
201
+ """
202
+ if not thinking:
203
+ return ""
204
+
205
+ # Try to find answer after common separators
206
+ for separator in ['\n\nAnswer:', '\n\nFinal answer:',
207
+ '\n\nResponse:', '\n\nSo the answer is:',
208
+ '\n\n---\n', '\n\nOkay,', '\n\nTherefore,']:
209
+ if separator in thinking:
210
+ parts = thinking.split(separator)
211
+ if len(parts) > 1:
212
+ return parts[-1].strip()
213
+
214
+ # Fallback: Get last meaningful paragraph
215
+ paragraphs = [p.strip() for p in thinking.split('\n\n') if p.strip()]
216
+ if paragraphs:
217
+ last_para = paragraphs[-1]
218
+ # Avoid meta-commentary
219
+ if not any(word in last_para.lower()
220
+ for word in ['wait', 'hmm', 'let me', 'thinking', 'okay']):
221
+ return last_para
222
+
223
+ # If nothing else works, return the whole thinking
224
+ return thinking
225
+
226
+ def generate_with_memory_context(self,
227
+ user_message: str,
228
+ memory_summary: str,
229
+ recent_conversations: List[Dict]) -> str:
230
+ """
231
+ Generate response with memory context
232
+
233
+ This is a specialized method for MemAgent integration.
234
+
235
+ Args:
236
+ user_message: User's message
237
+ memory_summary: Summary of past interactions
238
+ recent_conversations: Recent conversation history
239
+
240
+ Returns:
241
+ Context-aware response
242
+ """
243
+ # Build system prompt
244
+ system_prompt = """You are a helpful customer service assistant.
245
+ You can remember past conversations with users.
246
+ Give short, clear and professional answers.
247
+ Use past interactions intelligently."""
248
+
249
+ # Build message history
250
+ messages = [{"role": "system", "content": system_prompt}]
251
+
252
+ # Add memory summary
253
+ if memory_summary and memory_summary != "No interactions with this user yet.":
254
+ messages.append({
255
+ "role": "system",
256
+ "content": f"User history:\n{memory_summary}"
257
+ })
258
+
259
+ # Add recent conversations (last 3)
260
+ for conv in recent_conversations[-3:]:
261
+ messages.append({"role": "user", "content": conv.get('user_message', '')})
262
+ messages.append({"role": "assistant", "content": conv.get('bot_response', '')})
263
+
264
+ # Add current message
265
+ messages.append({"role": "user", "content": user_message})
266
+
267
+ return self.chat(messages, temperature=0.7)
268
+