mem-llm 1.0.2__py3-none-any.whl → 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mem-llm might be problematic. Click here for more details.
- mem_llm/__init__.py +71 -8
- mem_llm/api_server.py +595 -0
- mem_llm/base_llm_client.py +201 -0
- mem_llm/builtin_tools.py +311 -0
- mem_llm/builtin_tools_async.py +170 -0
- mem_llm/cli.py +254 -0
- mem_llm/clients/__init__.py +22 -0
- mem_llm/clients/lmstudio_client.py +393 -0
- mem_llm/clients/ollama_client.py +354 -0
- mem_llm/config.yaml.example +1 -1
- mem_llm/config_from_docs.py +1 -1
- mem_llm/config_manager.py +5 -3
- mem_llm/conversation_summarizer.py +372 -0
- mem_llm/data_export_import.py +640 -0
- mem_llm/dynamic_prompt.py +298 -0
- mem_llm/llm_client.py +77 -14
- mem_llm/llm_client_factory.py +260 -0
- mem_llm/logger.py +129 -0
- mem_llm/mem_agent.py +1178 -87
- mem_llm/memory_db.py +290 -59
- mem_llm/memory_manager.py +60 -1
- mem_llm/prompt_security.py +304 -0
- mem_llm/response_metrics.py +221 -0
- mem_llm/retry_handler.py +193 -0
- mem_llm/thread_safe_db.py +301 -0
- mem_llm/tool_system.py +537 -0
- mem_llm/vector_store.py +278 -0
- mem_llm/web_launcher.py +129 -0
- mem_llm/web_ui/README.md +44 -0
- mem_llm/web_ui/__init__.py +7 -0
- mem_llm/web_ui/index.html +641 -0
- mem_llm/web_ui/memory.html +569 -0
- mem_llm/web_ui/metrics.html +75 -0
- mem_llm-2.1.0.dist-info/METADATA +753 -0
- mem_llm-2.1.0.dist-info/RECORD +40 -0
- {mem_llm-1.0.2.dist-info → mem_llm-2.1.0.dist-info}/WHEEL +1 -1
- mem_llm-2.1.0.dist-info/entry_points.txt +3 -0
- mem_llm/prompt_templates.py +0 -244
- mem_llm-1.0.2.dist-info/METADATA +0 -382
- mem_llm-1.0.2.dist-info/RECORD +0 -15
- {mem_llm-1.0.2.dist-info → mem_llm-2.1.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Dynamic System Prompt Builder
|
|
3
|
+
=============================
|
|
4
|
+
|
|
5
|
+
Builds optimized system prompts based on active features:
|
|
6
|
+
- Knowledge Base enabled/disabled
|
|
7
|
+
- Tools enabled/disabled
|
|
8
|
+
- Memory type (JSON/SQL)
|
|
9
|
+
- Usage mode (business/personal)
|
|
10
|
+
- Multi-user support
|
|
11
|
+
- Document processing
|
|
12
|
+
|
|
13
|
+
This prevents irrelevant/context-broken responses by adapting
|
|
14
|
+
the system prompt to actual capabilities.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from typing import Dict, List, Optional, Any
|
|
18
|
+
from datetime import datetime
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class DynamicPromptBuilder:
|
|
22
|
+
"""Builds context-aware system prompts based on active features"""
|
|
23
|
+
|
|
24
|
+
def __init__(self):
|
|
25
|
+
self.base_instructions = {
|
|
26
|
+
"core": """You are a helpful AI assistant that maintains conversation context and provides accurate, relevant responses.
|
|
27
|
+
|
|
28
|
+
⚠️ OUTPUT FORMAT:
|
|
29
|
+
- If you're a thinking-enabled model (Qwen, DeepSeek, etc.), DO NOT show your internal reasoning
|
|
30
|
+
- Respond DIRECTLY with the final answer only
|
|
31
|
+
- Suppress any chain-of-thought or thinking process
|
|
32
|
+
- Be concise and natural""",
|
|
33
|
+
|
|
34
|
+
"concise": """
|
|
35
|
+
RESPONSE GUIDELINES:
|
|
36
|
+
- Keep responses SHORT and FOCUSED (1-3 sentences for simple questions)
|
|
37
|
+
- Only elaborate when the user asks for details
|
|
38
|
+
- Acknowledge personal information briefly ("Got it!", "Noted!")
|
|
39
|
+
- Be conversational and natural""",
|
|
40
|
+
|
|
41
|
+
"memory": """
|
|
42
|
+
MEMORY AWARENESS:
|
|
43
|
+
- You have access to past conversations with this user
|
|
44
|
+
- Reference previous context when relevant
|
|
45
|
+
- Build upon earlier discussions naturally
|
|
46
|
+
- Remember user preferences and details shared""",
|
|
47
|
+
|
|
48
|
+
"knowledge_base": """
|
|
49
|
+
KNOWLEDGE BASE PRIORITY (⚠️ CRITICAL):
|
|
50
|
+
1. If KNOWLEDGE BASE information is provided below, USE IT FIRST - it's authoritative!
|
|
51
|
+
2. Knowledge base entries are marked with "📚 RELEVANT KNOWLEDGE"
|
|
52
|
+
3. Answer from knowledge base EXACTLY as provided
|
|
53
|
+
4. DO NOT make up information not in the knowledge base
|
|
54
|
+
5. If knowledge base has no info, then use conversation history or say "I don't have specific information about that"
|
|
55
|
+
|
|
56
|
+
RESPONSE PRIORITY:
|
|
57
|
+
1️⃣ Knowledge Base (if available) ← ALWAYS FIRST!
|
|
58
|
+
2️⃣ Conversation History
|
|
59
|
+
3️⃣ General knowledge (if appropriate)""",
|
|
60
|
+
|
|
61
|
+
"no_knowledge_base": """
|
|
62
|
+
INFORMATION SOURCES:
|
|
63
|
+
- Use conversation history to maintain context
|
|
64
|
+
- Provide helpful general information when appropriate
|
|
65
|
+
- Be honest when you don't have specific information""",
|
|
66
|
+
|
|
67
|
+
"tools": """
|
|
68
|
+
AVAILABLE TOOLS:
|
|
69
|
+
{tool_descriptions}
|
|
70
|
+
|
|
71
|
+
TOOL USAGE:
|
|
72
|
+
- Use tools when user requests actions (calculator, weather, search, etc.)
|
|
73
|
+
- Explain what you're doing when using a tool
|
|
74
|
+
- Present tool results clearly""",
|
|
75
|
+
|
|
76
|
+
"multi_user": """
|
|
77
|
+
USER CONTEXT:
|
|
78
|
+
- Each user has separate conversation history
|
|
79
|
+
- Maintain appropriate boundaries between user sessions
|
|
80
|
+
- Current user: {current_user}""",
|
|
81
|
+
|
|
82
|
+
"business": """
|
|
83
|
+
BUSINESS CONTEXT:
|
|
84
|
+
- Company: {company_name}
|
|
85
|
+
- Industry: {industry}
|
|
86
|
+
- Founded: {founded_year}
|
|
87
|
+
|
|
88
|
+
PROFESSIONAL STANDARDS:
|
|
89
|
+
- Maintain professional tone
|
|
90
|
+
- Prioritize customer satisfaction
|
|
91
|
+
- Provide clear, actionable solutions
|
|
92
|
+
- Escalate complex issues appropriately""",
|
|
93
|
+
|
|
94
|
+
"personal": """
|
|
95
|
+
PERSONAL ASSISTANT MODE:
|
|
96
|
+
- User: {user_name}
|
|
97
|
+
- Timezone: {timezone}
|
|
98
|
+
|
|
99
|
+
ASSISTANCE STYLE:
|
|
100
|
+
- Friendly and helpful
|
|
101
|
+
- Proactive suggestions when appropriate
|
|
102
|
+
- Respect user preferences and privacy""",
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
def build_prompt(self,
|
|
106
|
+
usage_mode: str = "personal",
|
|
107
|
+
has_knowledge_base: bool = False,
|
|
108
|
+
has_tools: bool = False,
|
|
109
|
+
tool_descriptions: Optional[str] = None,
|
|
110
|
+
is_multi_user: bool = False,
|
|
111
|
+
current_user: Optional[str] = None,
|
|
112
|
+
business_config: Optional[Dict] = None,
|
|
113
|
+
personal_config: Optional[Dict] = None,
|
|
114
|
+
memory_type: str = "sql",
|
|
115
|
+
custom_instructions: Optional[str] = None) -> str:
|
|
116
|
+
"""
|
|
117
|
+
Build dynamic system prompt based on active features
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
usage_mode: 'business' or 'personal'
|
|
121
|
+
has_knowledge_base: Whether knowledge base is active
|
|
122
|
+
has_tools: Whether tools are enabled
|
|
123
|
+
tool_descriptions: Description of available tools
|
|
124
|
+
is_multi_user: Whether multi-user mode is active
|
|
125
|
+
current_user: Current user ID
|
|
126
|
+
business_config: Business mode configuration
|
|
127
|
+
personal_config: Personal mode configuration
|
|
128
|
+
memory_type: 'json' or 'sql'
|
|
129
|
+
custom_instructions: Additional custom instructions
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
Complete system prompt
|
|
133
|
+
"""
|
|
134
|
+
|
|
135
|
+
sections = []
|
|
136
|
+
|
|
137
|
+
# 1. Core identity
|
|
138
|
+
sections.append(self.base_instructions["core"])
|
|
139
|
+
|
|
140
|
+
# 2. Mode-specific context
|
|
141
|
+
if usage_mode == "business":
|
|
142
|
+
business_info = business_config or {}
|
|
143
|
+
business_prompt = self.base_instructions["business"].format(
|
|
144
|
+
company_name=business_info.get("company_name", "Our Company"),
|
|
145
|
+
industry=business_info.get("industry", "Technology"),
|
|
146
|
+
founded_year=business_info.get("founded_year", "2020")
|
|
147
|
+
)
|
|
148
|
+
sections.append(business_prompt)
|
|
149
|
+
else: # personal
|
|
150
|
+
personal_info = personal_config or {}
|
|
151
|
+
personal_prompt = self.base_instructions["personal"].format(
|
|
152
|
+
user_name=personal_info.get("user_name", "User"),
|
|
153
|
+
timezone=personal_info.get("timezone", "UTC")
|
|
154
|
+
)
|
|
155
|
+
sections.append(personal_prompt)
|
|
156
|
+
|
|
157
|
+
# 3. Memory awareness
|
|
158
|
+
sections.append(self.base_instructions["memory"])
|
|
159
|
+
|
|
160
|
+
# 4. Knowledge base instructions (CRITICAL - only if enabled!)
|
|
161
|
+
if has_knowledge_base:
|
|
162
|
+
sections.append(self.base_instructions["knowledge_base"])
|
|
163
|
+
else:
|
|
164
|
+
sections.append(self.base_instructions["no_knowledge_base"])
|
|
165
|
+
|
|
166
|
+
# 5. Tools instructions (only if enabled)
|
|
167
|
+
if has_tools and tool_descriptions:
|
|
168
|
+
tools_prompt = self.base_instructions["tools"].format(
|
|
169
|
+
tool_descriptions=tool_descriptions
|
|
170
|
+
)
|
|
171
|
+
sections.append(tools_prompt)
|
|
172
|
+
|
|
173
|
+
# 6. Multi-user context (only if enabled)
|
|
174
|
+
if is_multi_user and current_user:
|
|
175
|
+
multi_user_prompt = self.base_instructions["multi_user"].format(
|
|
176
|
+
current_user=current_user
|
|
177
|
+
)
|
|
178
|
+
sections.append(multi_user_prompt)
|
|
179
|
+
|
|
180
|
+
# 7. Response guidelines
|
|
181
|
+
sections.append(self.base_instructions["concise"])
|
|
182
|
+
|
|
183
|
+
# 8. Custom instructions (if provided)
|
|
184
|
+
if custom_instructions:
|
|
185
|
+
sections.append(f"\nADDITIONAL INSTRUCTIONS:\n{custom_instructions}")
|
|
186
|
+
|
|
187
|
+
# 9. Current date
|
|
188
|
+
current_date = datetime.now().strftime("%Y-%m-%d %H:%M")
|
|
189
|
+
sections.append(f"\nCurrent Date/Time: {current_date}")
|
|
190
|
+
|
|
191
|
+
# Join all sections
|
|
192
|
+
full_prompt = "\n\n".join(sections)
|
|
193
|
+
|
|
194
|
+
return full_prompt
|
|
195
|
+
|
|
196
|
+
def get_feature_summary(self,
|
|
197
|
+
has_knowledge_base: bool,
|
|
198
|
+
has_tools: bool,
|
|
199
|
+
is_multi_user: bool,
|
|
200
|
+
memory_type: str) -> str:
|
|
201
|
+
"""
|
|
202
|
+
Get human-readable summary of active features
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
Feature summary string
|
|
206
|
+
"""
|
|
207
|
+
features = []
|
|
208
|
+
|
|
209
|
+
if has_knowledge_base:
|
|
210
|
+
features.append("✅ Knowledge Base")
|
|
211
|
+
else:
|
|
212
|
+
features.append("❌ Knowledge Base")
|
|
213
|
+
|
|
214
|
+
if has_tools:
|
|
215
|
+
features.append("✅ Tools")
|
|
216
|
+
else:
|
|
217
|
+
features.append("❌ Tools")
|
|
218
|
+
|
|
219
|
+
if is_multi_user:
|
|
220
|
+
features.append("✅ Multi-user")
|
|
221
|
+
else:
|
|
222
|
+
features.append("⚪ Single-user")
|
|
223
|
+
|
|
224
|
+
features.append(f"💾 Memory: {memory_type.upper()}")
|
|
225
|
+
|
|
226
|
+
return " | ".join(features)
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
# Global instance
|
|
230
|
+
dynamic_prompt_builder = DynamicPromptBuilder()
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
# Example usage
|
|
234
|
+
if __name__ == "__main__":
|
|
235
|
+
print("=" * 70)
|
|
236
|
+
print("DYNAMIC PROMPT BUILDER - EXAMPLES")
|
|
237
|
+
print("=" * 70)
|
|
238
|
+
|
|
239
|
+
# Example 1: Simple personal assistant (no KB, no tools)
|
|
240
|
+
print("\n📱 EXAMPLE 1: Simple Personal Assistant")
|
|
241
|
+
print("-" * 70)
|
|
242
|
+
prompt1 = dynamic_prompt_builder.build_prompt(
|
|
243
|
+
usage_mode="personal",
|
|
244
|
+
has_knowledge_base=False,
|
|
245
|
+
has_tools=False,
|
|
246
|
+
memory_type="json"
|
|
247
|
+
)
|
|
248
|
+
print(prompt1[:300] + "...")
|
|
249
|
+
|
|
250
|
+
# Example 2: Business with Knowledge Base
|
|
251
|
+
print("\n\n🏢 EXAMPLE 2: Business with Knowledge Base")
|
|
252
|
+
print("-" * 70)
|
|
253
|
+
prompt2 = dynamic_prompt_builder.build_prompt(
|
|
254
|
+
usage_mode="business",
|
|
255
|
+
has_knowledge_base=True,
|
|
256
|
+
has_tools=False,
|
|
257
|
+
business_config={
|
|
258
|
+
"company_name": "Acme Corp",
|
|
259
|
+
"industry": "E-commerce",
|
|
260
|
+
"founded_year": "2015"
|
|
261
|
+
},
|
|
262
|
+
memory_type="sql"
|
|
263
|
+
)
|
|
264
|
+
print(prompt2[:300] + "...")
|
|
265
|
+
|
|
266
|
+
# Example 3: Full-featured multi-user system
|
|
267
|
+
print("\n\n⚡ EXAMPLE 3: Full-Featured Multi-User System")
|
|
268
|
+
print("-" * 70)
|
|
269
|
+
prompt3 = dynamic_prompt_builder.build_prompt(
|
|
270
|
+
usage_mode="business",
|
|
271
|
+
has_knowledge_base=True,
|
|
272
|
+
has_tools=True,
|
|
273
|
+
tool_descriptions="- Calculator: Perform math calculations\n- Weather: Get current weather",
|
|
274
|
+
is_multi_user=True,
|
|
275
|
+
current_user="customer_12345",
|
|
276
|
+
business_config={
|
|
277
|
+
"company_name": "TechSupport Inc",
|
|
278
|
+
"industry": "Technology",
|
|
279
|
+
"founded_year": "2010"
|
|
280
|
+
},
|
|
281
|
+
memory_type="sql"
|
|
282
|
+
)
|
|
283
|
+
print(prompt3[:300] + "...")
|
|
284
|
+
|
|
285
|
+
# Feature summaries
|
|
286
|
+
print("\n\n📊 FEATURE SUMMARIES")
|
|
287
|
+
print("-" * 70)
|
|
288
|
+
|
|
289
|
+
configs = [
|
|
290
|
+
("Simple", False, False, False, "json"),
|
|
291
|
+
("Basic KB", True, False, False, "json"),
|
|
292
|
+
("With Tools", True, True, False, "sql"),
|
|
293
|
+
("Full System", True, True, True, "sql"),
|
|
294
|
+
]
|
|
295
|
+
|
|
296
|
+
for name, kb, tools, multi, mem in configs:
|
|
297
|
+
summary = dynamic_prompt_builder.get_feature_summary(kb, tools, multi, mem)
|
|
298
|
+
print(f"{name:15} : {summary}")
|
mem_llm/llm_client.py
CHANGED
|
@@ -5,13 +5,14 @@ Works with Granite4:tiny-h model
|
|
|
5
5
|
|
|
6
6
|
import requests
|
|
7
7
|
import json
|
|
8
|
+
import time
|
|
8
9
|
from typing import List, Dict, Optional
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
class OllamaClient:
|
|
12
13
|
"""Uses local LLM model with Ollama API"""
|
|
13
14
|
|
|
14
|
-
def __init__(self, model: str = "granite4:
|
|
15
|
+
def __init__(self, model: str = "granite4:3b",
|
|
15
16
|
base_url: str = "http://localhost:11434"):
|
|
16
17
|
"""
|
|
17
18
|
Args:
|
|
@@ -79,19 +80,37 @@ class OllamaClient:
|
|
|
79
80
|
if system_prompt:
|
|
80
81
|
payload["system"] = system_prompt
|
|
81
82
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
83
|
+
max_retries = 3
|
|
84
|
+
for attempt in range(max_retries):
|
|
85
|
+
try:
|
|
86
|
+
response = requests.post(self.api_url, json=payload, timeout=60)
|
|
87
|
+
if response.status_code == 200:
|
|
88
|
+
return response.json().get('response', '').strip()
|
|
89
|
+
else:
|
|
90
|
+
if attempt < max_retries - 1:
|
|
91
|
+
time.sleep(1.0 * (2 ** attempt)) # Exponential backoff
|
|
92
|
+
continue
|
|
93
|
+
return f"Error: {response.status_code} - {response.text}"
|
|
94
|
+
except requests.exceptions.Timeout:
|
|
95
|
+
if attempt < max_retries - 1:
|
|
96
|
+
time.sleep(2.0 * (2 ** attempt))
|
|
97
|
+
continue
|
|
98
|
+
return "Error: Request timeout. Please check if Ollama is running."
|
|
99
|
+
except requests.exceptions.ConnectionError:
|
|
100
|
+
if attempt < max_retries - 1:
|
|
101
|
+
time.sleep(1.0 * (2 ** attempt))
|
|
102
|
+
continue
|
|
103
|
+
return "Error: Cannot connect to Ollama. Make sure Ollama service is running."
|
|
104
|
+
except Exception as e:
|
|
105
|
+
if attempt < max_retries - 1:
|
|
106
|
+
time.sleep(1.0 * (2 ** attempt))
|
|
107
|
+
continue
|
|
108
|
+
return f"Connection error: {str(e)}"
|
|
90
109
|
|
|
91
110
|
def chat(self, messages: List[Dict[str, str]],
|
|
92
|
-
temperature: float = 0.7, max_tokens: int =
|
|
111
|
+
temperature: float = 0.7, max_tokens: int = 2000) -> str:
|
|
93
112
|
"""
|
|
94
|
-
Chat format interaction
|
|
113
|
+
Chat format interaction - Compatible with ALL Ollama models
|
|
95
114
|
|
|
96
115
|
Args:
|
|
97
116
|
messages: Message history [{"role": "user/assistant/system", "content": "..."}]
|
|
@@ -107,14 +126,58 @@ class OllamaClient:
|
|
|
107
126
|
"stream": False,
|
|
108
127
|
"options": {
|
|
109
128
|
"temperature": temperature,
|
|
110
|
-
"num_predict": max_tokens
|
|
129
|
+
"num_predict": max_tokens,
|
|
130
|
+
"num_ctx": 4096, # Context window
|
|
131
|
+
"top_k": 40, # Limit vocab
|
|
132
|
+
"top_p": 0.9, # Nucleus sampling
|
|
133
|
+
"num_thread": 8 # Parallel processing
|
|
111
134
|
}
|
|
112
135
|
}
|
|
113
136
|
|
|
137
|
+
# For thinking-enabled models (like qwen3), disable thinking mode
|
|
138
|
+
# to get direct answers instead of reasoning process
|
|
139
|
+
if 'qwen' in self.model.lower() or 'deepseek' in self.model.lower():
|
|
140
|
+
payload["options"]["enable_thinking"] = False
|
|
141
|
+
|
|
114
142
|
try:
|
|
115
|
-
response = requests.post(self.chat_url, json=payload, timeout=
|
|
143
|
+
response = requests.post(self.chat_url, json=payload, timeout=120)
|
|
116
144
|
if response.status_code == 200:
|
|
117
|
-
|
|
145
|
+
response_data = response.json()
|
|
146
|
+
message = response_data.get('message', {})
|
|
147
|
+
|
|
148
|
+
# Get content - primary response field
|
|
149
|
+
result = message.get('content', '').strip()
|
|
150
|
+
|
|
151
|
+
# Fallback: If content is empty but thinking exists
|
|
152
|
+
# This happens when thinking mode couldn't be disabled
|
|
153
|
+
if not result and message.get('thinking'):
|
|
154
|
+
thinking = message.get('thinking', '')
|
|
155
|
+
|
|
156
|
+
# Try to extract the actual answer from thinking process
|
|
157
|
+
# Usually the answer is at the end after reasoning
|
|
158
|
+
if thinking:
|
|
159
|
+
# Split by common patterns that indicate final answer
|
|
160
|
+
for separator in ['\n\nAnswer:', '\n\nFinal answer:',
|
|
161
|
+
'\n\nResponse:', '\n\nSo the answer is:',
|
|
162
|
+
'\n\n---\n', '\n\nOkay,']:
|
|
163
|
+
if separator in thinking:
|
|
164
|
+
parts = thinking.split(separator)
|
|
165
|
+
if len(parts) > 1:
|
|
166
|
+
result = parts[-1].strip()
|
|
167
|
+
break
|
|
168
|
+
|
|
169
|
+
# If no separator found, try to get last meaningful paragraph
|
|
170
|
+
if not result:
|
|
171
|
+
paragraphs = [p.strip() for p in thinking.split('\n\n') if p.strip()]
|
|
172
|
+
if paragraphs:
|
|
173
|
+
# Take the last paragraph as likely answer
|
|
174
|
+
last_para = paragraphs[-1]
|
|
175
|
+
# Avoid meta-commentary like "Wait, let me think..."
|
|
176
|
+
if not any(word in last_para.lower() for word in
|
|
177
|
+
['wait', 'hmm', 'let me', 'thinking', 'okay']):
|
|
178
|
+
result = last_para
|
|
179
|
+
|
|
180
|
+
return result
|
|
118
181
|
else:
|
|
119
182
|
return f"Error: {response.status_code} - {response.text}"
|
|
120
183
|
except Exception as e:
|
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LLM Client Factory
|
|
3
|
+
==================
|
|
4
|
+
|
|
5
|
+
Factory pattern for creating LLM clients.
|
|
6
|
+
Supports multiple backends with automatic detection.
|
|
7
|
+
|
|
8
|
+
Supported Backends:
|
|
9
|
+
- Ollama: Local Ollama service
|
|
10
|
+
- LM Studio: Local LM Studio server
|
|
11
|
+
|
|
12
|
+
Usage:
|
|
13
|
+
# Create specific backend
|
|
14
|
+
client = LLMClientFactory.create('ollama', model='llama3')
|
|
15
|
+
|
|
16
|
+
# Auto-detect available backend
|
|
17
|
+
client = LLMClientFactory.auto_detect()
|
|
18
|
+
|
|
19
|
+
# Get all available backends
|
|
20
|
+
backends = LLMClientFactory.get_available_backends()
|
|
21
|
+
|
|
22
|
+
Author: C. Emre Karataş
|
|
23
|
+
Version: 1.3.0
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from typing import Optional, Dict, List, Any
|
|
27
|
+
import logging
|
|
28
|
+
|
|
29
|
+
from .clients.ollama_client import OllamaClient
|
|
30
|
+
from .clients.lmstudio_client import LMStudioClient
|
|
31
|
+
from .base_llm_client import BaseLLMClient
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class LLMClientFactory:
|
|
35
|
+
"""
|
|
36
|
+
Factory for creating LLM clients
|
|
37
|
+
|
|
38
|
+
Provides unified interface for creating different LLM backends.
|
|
39
|
+
Supports auto-detection of available local services.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
# Registry of supported backends
|
|
43
|
+
BACKENDS = {
|
|
44
|
+
'ollama': {
|
|
45
|
+
'class': OllamaClient,
|
|
46
|
+
'description': 'Local Ollama service',
|
|
47
|
+
'type': 'local',
|
|
48
|
+
'default_url': 'http://localhost:11434',
|
|
49
|
+
'default_model': 'granite4:3b'
|
|
50
|
+
},
|
|
51
|
+
'lmstudio': {
|
|
52
|
+
'class': LMStudioClient,
|
|
53
|
+
'description': 'LM Studio local server (OpenAI-compatible)',
|
|
54
|
+
'type': 'local',
|
|
55
|
+
'default_url': 'http://localhost:1234',
|
|
56
|
+
'default_model': 'local-model'
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
@staticmethod
|
|
61
|
+
def create(backend: str, model: Optional[str] = None, **kwargs) -> BaseLLMClient:
|
|
62
|
+
"""
|
|
63
|
+
Create LLM client for specified backend
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
backend: Backend name ('ollama', 'lmstudio')
|
|
67
|
+
model: Model name (uses default if None)
|
|
68
|
+
**kwargs: Backend-specific configuration
|
|
69
|
+
- base_url: API endpoint (for local backends)
|
|
70
|
+
- temperature: Default temperature
|
|
71
|
+
- max_tokens: Default max tokens
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
Configured LLM client
|
|
75
|
+
|
|
76
|
+
Raises:
|
|
77
|
+
ValueError: If backend is not supported
|
|
78
|
+
|
|
79
|
+
Examples:
|
|
80
|
+
# Ollama
|
|
81
|
+
client = LLMClientFactory.create('ollama', model='llama3')
|
|
82
|
+
|
|
83
|
+
# LM Studio
|
|
84
|
+
client = LLMClientFactory.create(
|
|
85
|
+
'lmstudio',
|
|
86
|
+
model='llama-3-8b',
|
|
87
|
+
base_url='http://localhost:1234'
|
|
88
|
+
)
|
|
89
|
+
"""
|
|
90
|
+
backend = backend.lower()
|
|
91
|
+
|
|
92
|
+
if backend not in LLMClientFactory.BACKENDS:
|
|
93
|
+
available = ', '.join(LLMClientFactory.BACKENDS.keys())
|
|
94
|
+
raise ValueError(
|
|
95
|
+
f"Unsupported backend: '{backend}'. "
|
|
96
|
+
f"Available backends: {available}"
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
backend_info = LLMClientFactory.BACKENDS[backend]
|
|
100
|
+
client_class = backend_info['class']
|
|
101
|
+
|
|
102
|
+
# Use default model if not specified
|
|
103
|
+
if not model:
|
|
104
|
+
model = backend_info.get('default_model')
|
|
105
|
+
|
|
106
|
+
# Add default base_url for local backends if not provided
|
|
107
|
+
if backend_info['type'] == 'local' and 'base_url' not in kwargs:
|
|
108
|
+
kwargs['base_url'] = backend_info.get('default_url')
|
|
109
|
+
|
|
110
|
+
# Create and return client
|
|
111
|
+
try:
|
|
112
|
+
return client_class(model=model, **kwargs)
|
|
113
|
+
except Exception as e:
|
|
114
|
+
raise ValueError(f"Failed to create {backend} client: {str(e)}") from e
|
|
115
|
+
|
|
116
|
+
@staticmethod
|
|
117
|
+
def auto_detect(preferred_backends: Optional[List[str]] = None) -> Optional[BaseLLMClient]:
|
|
118
|
+
"""
|
|
119
|
+
Auto-detect available LLM service
|
|
120
|
+
|
|
121
|
+
Checks common local services and returns the first available one.
|
|
122
|
+
Useful for applications that should work with any available backend.
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
preferred_backends: List of backends to check in order
|
|
126
|
+
(if None, checks all in default order)
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
First available LLM client, or None if none available
|
|
130
|
+
|
|
131
|
+
Example:
|
|
132
|
+
# Try to find any available backend
|
|
133
|
+
client = LLMClientFactory.auto_detect()
|
|
134
|
+
if client:
|
|
135
|
+
print(f"Using {client.get_info()['backend']}")
|
|
136
|
+
else:
|
|
137
|
+
print("No LLM service found")
|
|
138
|
+
|
|
139
|
+
# Try specific backends in order
|
|
140
|
+
client = LLMClientFactory.auto_detect(['lmstudio', 'ollama'])
|
|
141
|
+
"""
|
|
142
|
+
logger = logging.getLogger('LLMClientFactory')
|
|
143
|
+
|
|
144
|
+
# Default check order: local services first
|
|
145
|
+
if preferred_backends is None:
|
|
146
|
+
preferred_backends = ['ollama', 'lmstudio']
|
|
147
|
+
|
|
148
|
+
for backend_name in preferred_backends:
|
|
149
|
+
if backend_name not in LLMClientFactory.BACKENDS:
|
|
150
|
+
logger.warning(f"Unknown backend in auto-detect: {backend_name}")
|
|
151
|
+
continue
|
|
152
|
+
|
|
153
|
+
backend_info = LLMClientFactory.BACKENDS[backend_name]
|
|
154
|
+
|
|
155
|
+
# Skip cloud services in auto-detect (they require API keys)
|
|
156
|
+
if backend_info['type'] == 'cloud':
|
|
157
|
+
logger.debug(f"Skipping cloud backend in auto-detect: {backend_name}")
|
|
158
|
+
continue
|
|
159
|
+
|
|
160
|
+
try:
|
|
161
|
+
# Try to create client with defaults
|
|
162
|
+
client = LLMClientFactory.create(backend_name)
|
|
163
|
+
|
|
164
|
+
# Check if service is actually running
|
|
165
|
+
if client.check_connection():
|
|
166
|
+
logger.info(f"✅ Detected {backend_name} at {backend_info.get('default_url')}")
|
|
167
|
+
return client
|
|
168
|
+
else:
|
|
169
|
+
logger.debug(f"Service not running: {backend_name}")
|
|
170
|
+
|
|
171
|
+
except Exception as e:
|
|
172
|
+
logger.debug(f"Failed to detect {backend_name}: {e}")
|
|
173
|
+
continue
|
|
174
|
+
|
|
175
|
+
logger.warning("⚠️ No local LLM service detected")
|
|
176
|
+
return None
|
|
177
|
+
|
|
178
|
+
@staticmethod
|
|
179
|
+
def get_available_backends() -> List[Dict[str, Any]]:
|
|
180
|
+
"""
|
|
181
|
+
Get list of all supported backends with their info
|
|
182
|
+
|
|
183
|
+
Returns:
|
|
184
|
+
List of backend information dictionaries
|
|
185
|
+
|
|
186
|
+
Example:
|
|
187
|
+
backends = LLMClientFactory.get_available_backends()
|
|
188
|
+
for backend in backends:
|
|
189
|
+
print(f"{backend['name']}: {backend['description']}")
|
|
190
|
+
"""
|
|
191
|
+
result = []
|
|
192
|
+
|
|
193
|
+
for name, info in LLMClientFactory.BACKENDS.items():
|
|
194
|
+
backend_dict = {
|
|
195
|
+
'name': name,
|
|
196
|
+
'description': info['description'],
|
|
197
|
+
'type': info['type'],
|
|
198
|
+
'default_model': info.get('default_model'),
|
|
199
|
+
'requires_api_key': info.get('requires_api_key', False)
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
if info['type'] == 'local':
|
|
203
|
+
backend_dict['default_url'] = info.get('default_url')
|
|
204
|
+
|
|
205
|
+
result.append(backend_dict)
|
|
206
|
+
|
|
207
|
+
return result
|
|
208
|
+
|
|
209
|
+
@staticmethod
|
|
210
|
+
def check_backend_availability(backend: str, **kwargs) -> bool:
|
|
211
|
+
"""
|
|
212
|
+
Check if a specific backend is available
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
backend: Backend name
|
|
216
|
+
**kwargs: Configuration for creating the client
|
|
217
|
+
|
|
218
|
+
Returns:
|
|
219
|
+
True if backend is available and responding
|
|
220
|
+
|
|
221
|
+
Example:
|
|
222
|
+
# Check if Ollama is running
|
|
223
|
+
if LLMClientFactory.check_backend_availability('ollama'):
|
|
224
|
+
print("Ollama is available")
|
|
225
|
+
|
|
226
|
+
# Check custom LM Studio URL
|
|
227
|
+
if LLMClientFactory.check_backend_availability(
|
|
228
|
+
'lmstudio',
|
|
229
|
+
base_url='http://localhost:5000'
|
|
230
|
+
):
|
|
231
|
+
print("LM Studio is available")
|
|
232
|
+
"""
|
|
233
|
+
try:
|
|
234
|
+
client = LLMClientFactory.create(backend, **kwargs)
|
|
235
|
+
return client.check_connection()
|
|
236
|
+
except Exception:
|
|
237
|
+
return False
|
|
238
|
+
|
|
239
|
+
@staticmethod
|
|
240
|
+
def get_backend_info(backend: str) -> Dict[str, Any]:
|
|
241
|
+
"""
|
|
242
|
+
Get information about a specific backend
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
backend: Backend name
|
|
246
|
+
|
|
247
|
+
Returns:
|
|
248
|
+
Backend information dictionary
|
|
249
|
+
|
|
250
|
+
Raises:
|
|
251
|
+
ValueError: If backend not found
|
|
252
|
+
"""
|
|
253
|
+
if backend not in LLMClientFactory.BACKENDS:
|
|
254
|
+
raise ValueError(f"Unknown backend: {backend}")
|
|
255
|
+
|
|
256
|
+
info = LLMClientFactory.BACKENDS[backend].copy()
|
|
257
|
+
# Remove class reference for JSON serialization
|
|
258
|
+
info.pop('class', None)
|
|
259
|
+
return info
|
|
260
|
+
|