mem-llm 1.0.2__py3-none-any.whl → 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mem-llm might be problematic. Click here for more details.

Files changed (41) hide show
  1. mem_llm/__init__.py +71 -8
  2. mem_llm/api_server.py +595 -0
  3. mem_llm/base_llm_client.py +201 -0
  4. mem_llm/builtin_tools.py +311 -0
  5. mem_llm/builtin_tools_async.py +170 -0
  6. mem_llm/cli.py +254 -0
  7. mem_llm/clients/__init__.py +22 -0
  8. mem_llm/clients/lmstudio_client.py +393 -0
  9. mem_llm/clients/ollama_client.py +354 -0
  10. mem_llm/config.yaml.example +1 -1
  11. mem_llm/config_from_docs.py +1 -1
  12. mem_llm/config_manager.py +5 -3
  13. mem_llm/conversation_summarizer.py +372 -0
  14. mem_llm/data_export_import.py +640 -0
  15. mem_llm/dynamic_prompt.py +298 -0
  16. mem_llm/llm_client.py +77 -14
  17. mem_llm/llm_client_factory.py +260 -0
  18. mem_llm/logger.py +129 -0
  19. mem_llm/mem_agent.py +1178 -87
  20. mem_llm/memory_db.py +290 -59
  21. mem_llm/memory_manager.py +60 -1
  22. mem_llm/prompt_security.py +304 -0
  23. mem_llm/response_metrics.py +221 -0
  24. mem_llm/retry_handler.py +193 -0
  25. mem_llm/thread_safe_db.py +301 -0
  26. mem_llm/tool_system.py +537 -0
  27. mem_llm/vector_store.py +278 -0
  28. mem_llm/web_launcher.py +129 -0
  29. mem_llm/web_ui/README.md +44 -0
  30. mem_llm/web_ui/__init__.py +7 -0
  31. mem_llm/web_ui/index.html +641 -0
  32. mem_llm/web_ui/memory.html +569 -0
  33. mem_llm/web_ui/metrics.html +75 -0
  34. mem_llm-2.1.0.dist-info/METADATA +753 -0
  35. mem_llm-2.1.0.dist-info/RECORD +40 -0
  36. {mem_llm-1.0.2.dist-info → mem_llm-2.1.0.dist-info}/WHEEL +1 -1
  37. mem_llm-2.1.0.dist-info/entry_points.txt +3 -0
  38. mem_llm/prompt_templates.py +0 -244
  39. mem_llm-1.0.2.dist-info/METADATA +0 -382
  40. mem_llm-1.0.2.dist-info/RECORD +0 -15
  41. {mem_llm-1.0.2.dist-info → mem_llm-2.1.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,354 @@
1
+ """
2
+ Ollama LLM Client
3
+ =================
4
+
5
+ Client for local Ollama service.
6
+ Supports all Ollama models (Llama3, Granite, Qwen3, DeepSeek, etc.)
7
+
8
+ Author: C. Emre Karataş
9
+ Version: 1.3.0
10
+ """
11
+
12
+ import requests
13
+ import time
14
+ import json
15
+ from typing import List, Dict, Optional, Iterator
16
+ import sys
17
+ import os
18
+
19
+ # Add parent directory to path for imports
20
+ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
21
+
22
+ from base_llm_client import BaseLLMClient
23
+
24
+
25
+ class OllamaClient(BaseLLMClient):
26
+ """
27
+ Ollama LLM client implementation
28
+
29
+ Supports:
30
+ - All Ollama models
31
+ - Chat and generate modes
32
+ - Thinking mode detection (Qwen3, DeepSeek)
33
+ - Automatic retry with exponential backoff
34
+ """
35
+
36
+ def __init__(self,
37
+ model: str = "granite4:3b",
38
+ base_url: str = "http://localhost:11434",
39
+ **kwargs):
40
+ """
41
+ Initialize Ollama client
42
+
43
+ Args:
44
+ model: Model name (e.g., "llama3", "granite4:3b")
45
+ base_url: Ollama API URL
46
+ **kwargs: Additional configuration
47
+ """
48
+ super().__init__(model=model, **kwargs)
49
+ self.base_url = base_url
50
+ self.api_url = f"{base_url}/api/generate"
51
+ self.chat_url = f"{base_url}/api/chat"
52
+ self.tags_url = f"{base_url}/api/tags"
53
+
54
+ self.logger.debug(f"Initialized Ollama client: {base_url}, model: {model}")
55
+
56
+ def check_connection(self) -> bool:
57
+ """
58
+ Check if Ollama service is running
59
+
60
+ Returns:
61
+ True if service is available
62
+ """
63
+ try:
64
+ response = requests.get(self.tags_url, timeout=5)
65
+ return response.status_code == 200
66
+ except Exception as e:
67
+ self.logger.debug(f"Ollama connection check failed: {e}")
68
+ return False
69
+
70
+ def list_models(self) -> List[str]:
71
+ """
72
+ List available Ollama models
73
+
74
+ Returns:
75
+ List of model names
76
+ """
77
+ try:
78
+ response = requests.get(self.tags_url, timeout=5)
79
+ if response.status_code == 200:
80
+ data = response.json()
81
+ return [model['name'] for model in data.get('models', [])]
82
+ return []
83
+ except Exception as e:
84
+ self.logger.error(f"Failed to list models: {e}")
85
+ return []
86
+
87
+ def chat(self,
88
+ messages: List[Dict[str, str]],
89
+ temperature: float = 0.7,
90
+ max_tokens: int = 2000,
91
+ **kwargs) -> str:
92
+ """
93
+ Send chat request to Ollama
94
+
95
+ Args:
96
+ messages: Message history
97
+ temperature: Sampling temperature (0.0-1.0)
98
+ max_tokens: Maximum tokens in response
99
+ **kwargs: Additional Ollama-specific options
100
+
101
+ Returns:
102
+ Model response text
103
+
104
+ Raises:
105
+ ConnectionError: If cannot connect to Ollama
106
+ ValueError: If invalid parameters
107
+ """
108
+ # Validate messages
109
+ self._validate_messages(messages)
110
+
111
+ # Build payload
112
+ payload = {
113
+ "model": self.model,
114
+ "messages": messages,
115
+ "stream": False,
116
+ "options": {
117
+ "temperature": temperature,
118
+ "num_predict": max_tokens,
119
+ "num_ctx": kwargs.get("num_ctx", 4096),
120
+ "top_k": kwargs.get("top_k", 40),
121
+ "top_p": kwargs.get("top_p", 0.9),
122
+ "num_thread": kwargs.get("num_thread", 8)
123
+ }
124
+ }
125
+
126
+ # Disable thinking mode for thinking-enabled models
127
+ # (Qwen3, DeepSeek) to get direct answers
128
+ if any(name in self.model.lower() for name in ['qwen', 'deepseek', 'qwq']):
129
+ payload["options"]["enable_thinking"] = False
130
+
131
+ # Send request with retry logic
132
+ max_retries = kwargs.get("max_retries", 3)
133
+ for attempt in range(max_retries):
134
+ try:
135
+ response = requests.post(
136
+ self.chat_url,
137
+ json=payload,
138
+ timeout=kwargs.get("timeout", 120)
139
+ )
140
+
141
+ if response.status_code == 200:
142
+ response_data = response.json()
143
+ message = response_data.get('message', {})
144
+
145
+ # Get content - primary response field
146
+ result = message.get('content', '').strip()
147
+
148
+ # Fallback: Extract from thinking if content is empty
149
+ if not result and message.get('thinking'):
150
+ result = self._extract_from_thinking(message.get('thinking', ''))
151
+
152
+ if not result:
153
+ self.logger.warning("Empty response from Ollama")
154
+ if attempt < max_retries - 1:
155
+ time.sleep(1.0 * (2 ** attempt))
156
+ continue
157
+
158
+ return result
159
+ else:
160
+ error_msg = f"Ollama API error: {response.status_code} - {response.text}"
161
+ self.logger.error(error_msg)
162
+ if attempt < max_retries - 1:
163
+ time.sleep(1.0 * (2 ** attempt))
164
+ continue
165
+ raise ConnectionError(error_msg)
166
+
167
+ except requests.exceptions.Timeout:
168
+ self.logger.warning(f"Ollama request timeout (attempt {attempt + 1}/{max_retries})")
169
+ if attempt < max_retries - 1:
170
+ time.sleep(2.0 * (2 ** attempt))
171
+ continue
172
+ raise ConnectionError("Ollama request timeout. Check if service is running.")
173
+
174
+ except requests.exceptions.ConnectionError as e:
175
+ self.logger.warning(f"Cannot connect to Ollama (attempt {attempt + 1}/{max_retries})")
176
+ if attempt < max_retries - 1:
177
+ time.sleep(1.0 * (2 ** attempt))
178
+ continue
179
+ raise ConnectionError(f"Cannot connect to Ollama at {self.base_url}. Make sure service is running.") from e
180
+
181
+ except Exception as e:
182
+ self.logger.error(f"Unexpected error: {e}")
183
+ if attempt < max_retries - 1:
184
+ time.sleep(1.0 * (2 ** attempt))
185
+ continue
186
+ raise
187
+
188
+ raise ConnectionError("Failed to get response after maximum retries")
189
+
190
+ def _extract_from_thinking(self, thinking: str) -> str:
191
+ """
192
+ Extract actual answer from thinking process
193
+
194
+ Some models output reasoning process instead of direct answer.
195
+ This extracts the final answer from that process.
196
+
197
+ Args:
198
+ thinking: Thinking process text
199
+
200
+ Returns:
201
+ Extracted answer
202
+ """
203
+ if not thinking:
204
+ return ""
205
+
206
+ # Try to find answer after common separators
207
+ for separator in ['\n\nAnswer:', '\n\nFinal answer:',
208
+ '\n\nResponse:', '\n\nSo the answer is:',
209
+ '\n\n---\n', '\n\nOkay,', '\n\nTherefore,']:
210
+ if separator in thinking:
211
+ parts = thinking.split(separator)
212
+ if len(parts) > 1:
213
+ return parts[-1].strip()
214
+
215
+ # Fallback: Get last meaningful paragraph
216
+ paragraphs = [p.strip() for p in thinking.split('\n\n') if p.strip()]
217
+ if paragraphs:
218
+ last_para = paragraphs[-1]
219
+ # Avoid meta-commentary
220
+ if not any(word in last_para.lower()
221
+ for word in ['wait', 'hmm', 'let me', 'thinking', 'okay']):
222
+ return last_para
223
+
224
+ # If nothing else works, return the whole thinking
225
+ return thinking
226
+
227
+ def chat_stream(self,
228
+ messages: List[Dict[str, str]],
229
+ temperature: float = 0.7,
230
+ max_tokens: int = 2000,
231
+ **kwargs) -> Iterator[str]:
232
+ """
233
+ Send chat request to Ollama with streaming response
234
+
235
+ Args:
236
+ messages: Message history
237
+ temperature: Sampling temperature (0.0-1.0)
238
+ max_tokens: Maximum tokens in response
239
+ **kwargs: Additional Ollama-specific options
240
+
241
+ Yields:
242
+ Response text chunks as they arrive
243
+
244
+ Raises:
245
+ ConnectionError: If cannot connect to Ollama
246
+ ValueError: If invalid parameters
247
+ """
248
+ # Validate messages
249
+ self._validate_messages(messages)
250
+
251
+ # Build payload
252
+ payload = {
253
+ "model": self.model,
254
+ "messages": messages,
255
+ "stream": True, # Enable streaming
256
+ "options": {
257
+ "temperature": temperature,
258
+ "num_predict": max_tokens,
259
+ "num_ctx": kwargs.get("num_ctx", 4096),
260
+ "top_k": kwargs.get("top_k", 40),
261
+ "top_p": kwargs.get("top_p", 0.9),
262
+ "num_thread": kwargs.get("num_thread", 8)
263
+ }
264
+ }
265
+
266
+ # Disable thinking mode for thinking-enabled models
267
+ if any(name in self.model.lower() for name in ['qwen', 'deepseek', 'qwq']):
268
+ payload["options"]["enable_thinking"] = False
269
+
270
+ try:
271
+ response = requests.post(
272
+ self.chat_url,
273
+ json=payload,
274
+ stream=True, # Enable streaming
275
+ timeout=kwargs.get("timeout", 120)
276
+ )
277
+
278
+ if response.status_code == 200:
279
+ # Process streaming response
280
+ for line in response.iter_lines():
281
+ if line:
282
+ try:
283
+ chunk_data = json.loads(line.decode('utf-8'))
284
+
285
+ # Get message content
286
+ message = chunk_data.get('message', {})
287
+ content = message.get('content', '')
288
+
289
+ if content:
290
+ yield content
291
+
292
+ # Check if this is the final chunk
293
+ if chunk_data.get('done', False):
294
+ break
295
+
296
+ except json.JSONDecodeError as e:
297
+ self.logger.warning(f"Failed to parse streaming chunk: {e}")
298
+ continue
299
+ else:
300
+ error_msg = f"Ollama API error: {response.status_code} - {response.text}"
301
+ self.logger.error(error_msg)
302
+ raise ConnectionError(error_msg)
303
+
304
+ except requests.exceptions.Timeout:
305
+ raise ConnectionError("Ollama request timeout. Check if service is running.")
306
+ except requests.exceptions.ConnectionError as e:
307
+ raise ConnectionError(f"Cannot connect to Ollama at {self.base_url}. Make sure service is running.") from e
308
+ except Exception as e:
309
+ self.logger.error(f"Unexpected error in streaming: {e}")
310
+ raise
311
+
312
+ def generate_with_memory_context(self,
313
+ user_message: str,
314
+ memory_summary: str,
315
+ recent_conversations: List[Dict]) -> str:
316
+ """
317
+ Generate response with memory context
318
+
319
+ This is a specialized method for MemAgent integration.
320
+
321
+ Args:
322
+ user_message: User's message
323
+ memory_summary: Summary of past interactions
324
+ recent_conversations: Recent conversation history
325
+
326
+ Returns:
327
+ Context-aware response
328
+ """
329
+ # Build system prompt
330
+ system_prompt = """You are a helpful customer service assistant.
331
+ You can remember past conversations with users.
332
+ Give short, clear and professional answers.
333
+ Use past interactions intelligently."""
334
+
335
+ # Build message history
336
+ messages = [{"role": "system", "content": system_prompt}]
337
+
338
+ # Add memory summary
339
+ if memory_summary and memory_summary != "No interactions with this user yet.":
340
+ messages.append({
341
+ "role": "system",
342
+ "content": f"User history:\n{memory_summary}"
343
+ })
344
+
345
+ # Add recent conversations (last 3)
346
+ for conv in recent_conversations[-3:]:
347
+ messages.append({"role": "user", "content": conv.get('user_message', '')})
348
+ messages.append({"role": "assistant", "content": conv.get('bot_response', '')})
349
+
350
+ # Add current message
351
+ messages.append({"role": "user", "content": user_message})
352
+
353
+ return self.chat(messages, temperature=0.7)
354
+
@@ -6,7 +6,7 @@ usage_mode: "personal"
6
6
 
7
7
  # LLM Settings
8
8
  llm:
9
- model: "granite4:tiny-h"
9
+ model: "granite4:3b"
10
10
  base_url: "http://localhost:11434"
11
11
  temperature: 0.7
12
12
  max_tokens: 500
@@ -67,7 +67,7 @@ def generate_config_from_text(text: str, company_name: Optional[str] = None) ->
67
67
  "usage_mode": "business", # or "personal"
68
68
 
69
69
  "llm": {
70
- "model": "granite4:tiny-h",
70
+ "model": "granite4:3b",
71
71
  "temperature": 0.3,
72
72
  "max_tokens": 300,
73
73
  "ollama_url": "http://localhost:11434"
mem_llm/config_manager.py CHANGED
@@ -35,7 +35,7 @@ class ConfigManager:
35
35
  """Returns default configuration"""
36
36
  return {
37
37
  "llm": {
38
- "model": "granite4:tiny-h",
38
+ "model": "granite4:3b",
39
39
  "base_url": "http://localhost:11434",
40
40
  "temperature": 0.7,
41
41
  "max_tokens": 500
@@ -43,7 +43,7 @@ class ConfigManager:
43
43
  "memory": {
44
44
  "backend": "sql",
45
45
  "json_dir": "memories",
46
- "db_path": "memories.db",
46
+ "db_path": "memories/memories.db",
47
47
  "max_conversations_per_user": 1000,
48
48
  "auto_cleanup": True,
49
49
  "cleanup_after_days": 90
@@ -62,7 +62,9 @@ class ConfigManager:
62
62
  "default_kb": "ecommerce",
63
63
  "custom_kb_file": None,
64
64
  "search_limit": 5,
65
- "min_relevance_score": 0.3
65
+ "min_relevance_score": 0.3,
66
+ "enable_vector_search": False, # v1.3.2+ - Optional semantic search
67
+ "embedding_model": "all-MiniLM-L6-v2" # Sentence transformers model
66
68
  },
67
69
  "response": {
68
70
  "use_knowledge_base": True,