mem-llm 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mem_llm/__init__.py +98 -0
- mem_llm/api_server.py +595 -0
- mem_llm/base_llm_client.py +201 -0
- mem_llm/builtin_tools.py +311 -0
- mem_llm/cli.py +254 -0
- mem_llm/clients/__init__.py +22 -0
- mem_llm/clients/lmstudio_client.py +393 -0
- mem_llm/clients/ollama_client.py +354 -0
- mem_llm/config.yaml.example +52 -0
- mem_llm/config_from_docs.py +180 -0
- mem_llm/config_manager.py +231 -0
- mem_llm/conversation_summarizer.py +372 -0
- mem_llm/data_export_import.py +640 -0
- mem_llm/dynamic_prompt.py +298 -0
- mem_llm/knowledge_loader.py +88 -0
- mem_llm/llm_client.py +225 -0
- mem_llm/llm_client_factory.py +260 -0
- mem_llm/logger.py +129 -0
- mem_llm/mem_agent.py +1611 -0
- mem_llm/memory_db.py +612 -0
- mem_llm/memory_manager.py +321 -0
- mem_llm/memory_tools.py +253 -0
- mem_llm/prompt_security.py +304 -0
- mem_llm/response_metrics.py +221 -0
- mem_llm/retry_handler.py +193 -0
- mem_llm/thread_safe_db.py +301 -0
- mem_llm/tool_system.py +429 -0
- mem_llm/vector_store.py +278 -0
- mem_llm/web_launcher.py +129 -0
- mem_llm/web_ui/README.md +44 -0
- mem_llm/web_ui/__init__.py +7 -0
- mem_llm/web_ui/index.html +641 -0
- mem_llm/web_ui/memory.html +569 -0
- mem_llm/web_ui/metrics.html +75 -0
- mem_llm-2.0.0.dist-info/METADATA +667 -0
- mem_llm-2.0.0.dist-info/RECORD +39 -0
- mem_llm-2.0.0.dist-info/WHEEL +5 -0
- mem_llm-2.0.0.dist-info/entry_points.txt +3 -0
- mem_llm-2.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Dynamic System Prompt Builder
|
|
3
|
+
=============================
|
|
4
|
+
|
|
5
|
+
Builds optimized system prompts based on active features:
|
|
6
|
+
- Knowledge Base enabled/disabled
|
|
7
|
+
- Tools enabled/disabled
|
|
8
|
+
- Memory type (JSON/SQL)
|
|
9
|
+
- Usage mode (business/personal)
|
|
10
|
+
- Multi-user support
|
|
11
|
+
- Document processing
|
|
12
|
+
|
|
13
|
+
This prevents irrelevant/context-broken responses by adapting
|
|
14
|
+
the system prompt to actual capabilities.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from typing import Dict, List, Optional, Any
|
|
18
|
+
from datetime import datetime
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class DynamicPromptBuilder:
|
|
22
|
+
"""Builds context-aware system prompts based on active features"""
|
|
23
|
+
|
|
24
|
+
def __init__(self):
|
|
25
|
+
self.base_instructions = {
|
|
26
|
+
"core": """You are a helpful AI assistant that maintains conversation context and provides accurate, relevant responses.
|
|
27
|
+
|
|
28
|
+
⚠️ OUTPUT FORMAT:
|
|
29
|
+
- If you're a thinking-enabled model (Qwen, DeepSeek, etc.), DO NOT show your internal reasoning
|
|
30
|
+
- Respond DIRECTLY with the final answer only
|
|
31
|
+
- Suppress any chain-of-thought or thinking process
|
|
32
|
+
- Be concise and natural""",
|
|
33
|
+
|
|
34
|
+
"concise": """
|
|
35
|
+
RESPONSE GUIDELINES:
|
|
36
|
+
- Keep responses SHORT and FOCUSED (1-3 sentences for simple questions)
|
|
37
|
+
- Only elaborate when the user asks for details
|
|
38
|
+
- Acknowledge personal information briefly ("Got it!", "Noted!")
|
|
39
|
+
- Be conversational and natural""",
|
|
40
|
+
|
|
41
|
+
"memory": """
|
|
42
|
+
MEMORY AWARENESS:
|
|
43
|
+
- You have access to past conversations with this user
|
|
44
|
+
- Reference previous context when relevant
|
|
45
|
+
- Build upon earlier discussions naturally
|
|
46
|
+
- Remember user preferences and details shared""",
|
|
47
|
+
|
|
48
|
+
"knowledge_base": """
|
|
49
|
+
KNOWLEDGE BASE PRIORITY (⚠️ CRITICAL):
|
|
50
|
+
1. If KNOWLEDGE BASE information is provided below, USE IT FIRST - it's authoritative!
|
|
51
|
+
2. Knowledge base entries are marked with "📚 RELEVANT KNOWLEDGE"
|
|
52
|
+
3. Answer from knowledge base EXACTLY as provided
|
|
53
|
+
4. DO NOT make up information not in the knowledge base
|
|
54
|
+
5. If knowledge base has no info, then use conversation history or say "I don't have specific information about that"
|
|
55
|
+
|
|
56
|
+
RESPONSE PRIORITY:
|
|
57
|
+
1️⃣ Knowledge Base (if available) ← ALWAYS FIRST!
|
|
58
|
+
2️⃣ Conversation History
|
|
59
|
+
3️⃣ General knowledge (if appropriate)""",
|
|
60
|
+
|
|
61
|
+
"no_knowledge_base": """
|
|
62
|
+
INFORMATION SOURCES:
|
|
63
|
+
- Use conversation history to maintain context
|
|
64
|
+
- Provide helpful general information when appropriate
|
|
65
|
+
- Be honest when you don't have specific information""",
|
|
66
|
+
|
|
67
|
+
"tools": """
|
|
68
|
+
AVAILABLE TOOLS:
|
|
69
|
+
{tool_descriptions}
|
|
70
|
+
|
|
71
|
+
TOOL USAGE:
|
|
72
|
+
- Use tools when user requests actions (calculator, weather, search, etc.)
|
|
73
|
+
- Explain what you're doing when using a tool
|
|
74
|
+
- Present tool results clearly""",
|
|
75
|
+
|
|
76
|
+
"multi_user": """
|
|
77
|
+
USER CONTEXT:
|
|
78
|
+
- Each user has separate conversation history
|
|
79
|
+
- Maintain appropriate boundaries between user sessions
|
|
80
|
+
- Current user: {current_user}""",
|
|
81
|
+
|
|
82
|
+
"business": """
|
|
83
|
+
BUSINESS CONTEXT:
|
|
84
|
+
- Company: {company_name}
|
|
85
|
+
- Industry: {industry}
|
|
86
|
+
- Founded: {founded_year}
|
|
87
|
+
|
|
88
|
+
PROFESSIONAL STANDARDS:
|
|
89
|
+
- Maintain professional tone
|
|
90
|
+
- Prioritize customer satisfaction
|
|
91
|
+
- Provide clear, actionable solutions
|
|
92
|
+
- Escalate complex issues appropriately""",
|
|
93
|
+
|
|
94
|
+
"personal": """
|
|
95
|
+
PERSONAL ASSISTANT MODE:
|
|
96
|
+
- User: {user_name}
|
|
97
|
+
- Timezone: {timezone}
|
|
98
|
+
|
|
99
|
+
ASSISTANCE STYLE:
|
|
100
|
+
- Friendly and helpful
|
|
101
|
+
- Proactive suggestions when appropriate
|
|
102
|
+
- Respect user preferences and privacy""",
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
def build_prompt(self,
|
|
106
|
+
usage_mode: str = "personal",
|
|
107
|
+
has_knowledge_base: bool = False,
|
|
108
|
+
has_tools: bool = False,
|
|
109
|
+
tool_descriptions: Optional[str] = None,
|
|
110
|
+
is_multi_user: bool = False,
|
|
111
|
+
current_user: Optional[str] = None,
|
|
112
|
+
business_config: Optional[Dict] = None,
|
|
113
|
+
personal_config: Optional[Dict] = None,
|
|
114
|
+
memory_type: str = "sql",
|
|
115
|
+
custom_instructions: Optional[str] = None) -> str:
|
|
116
|
+
"""
|
|
117
|
+
Build dynamic system prompt based on active features
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
usage_mode: 'business' or 'personal'
|
|
121
|
+
has_knowledge_base: Whether knowledge base is active
|
|
122
|
+
has_tools: Whether tools are enabled
|
|
123
|
+
tool_descriptions: Description of available tools
|
|
124
|
+
is_multi_user: Whether multi-user mode is active
|
|
125
|
+
current_user: Current user ID
|
|
126
|
+
business_config: Business mode configuration
|
|
127
|
+
personal_config: Personal mode configuration
|
|
128
|
+
memory_type: 'json' or 'sql'
|
|
129
|
+
custom_instructions: Additional custom instructions
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
Complete system prompt
|
|
133
|
+
"""
|
|
134
|
+
|
|
135
|
+
sections = []
|
|
136
|
+
|
|
137
|
+
# 1. Core identity
|
|
138
|
+
sections.append(self.base_instructions["core"])
|
|
139
|
+
|
|
140
|
+
# 2. Mode-specific context
|
|
141
|
+
if usage_mode == "business":
|
|
142
|
+
business_info = business_config or {}
|
|
143
|
+
business_prompt = self.base_instructions["business"].format(
|
|
144
|
+
company_name=business_info.get("company_name", "Our Company"),
|
|
145
|
+
industry=business_info.get("industry", "Technology"),
|
|
146
|
+
founded_year=business_info.get("founded_year", "2020")
|
|
147
|
+
)
|
|
148
|
+
sections.append(business_prompt)
|
|
149
|
+
else: # personal
|
|
150
|
+
personal_info = personal_config or {}
|
|
151
|
+
personal_prompt = self.base_instructions["personal"].format(
|
|
152
|
+
user_name=personal_info.get("user_name", "User"),
|
|
153
|
+
timezone=personal_info.get("timezone", "UTC")
|
|
154
|
+
)
|
|
155
|
+
sections.append(personal_prompt)
|
|
156
|
+
|
|
157
|
+
# 3. Memory awareness
|
|
158
|
+
sections.append(self.base_instructions["memory"])
|
|
159
|
+
|
|
160
|
+
# 4. Knowledge base instructions (CRITICAL - only if enabled!)
|
|
161
|
+
if has_knowledge_base:
|
|
162
|
+
sections.append(self.base_instructions["knowledge_base"])
|
|
163
|
+
else:
|
|
164
|
+
sections.append(self.base_instructions["no_knowledge_base"])
|
|
165
|
+
|
|
166
|
+
# 5. Tools instructions (only if enabled)
|
|
167
|
+
if has_tools and tool_descriptions:
|
|
168
|
+
tools_prompt = self.base_instructions["tools"].format(
|
|
169
|
+
tool_descriptions=tool_descriptions
|
|
170
|
+
)
|
|
171
|
+
sections.append(tools_prompt)
|
|
172
|
+
|
|
173
|
+
# 6. Multi-user context (only if enabled)
|
|
174
|
+
if is_multi_user and current_user:
|
|
175
|
+
multi_user_prompt = self.base_instructions["multi_user"].format(
|
|
176
|
+
current_user=current_user
|
|
177
|
+
)
|
|
178
|
+
sections.append(multi_user_prompt)
|
|
179
|
+
|
|
180
|
+
# 7. Response guidelines
|
|
181
|
+
sections.append(self.base_instructions["concise"])
|
|
182
|
+
|
|
183
|
+
# 8. Custom instructions (if provided)
|
|
184
|
+
if custom_instructions:
|
|
185
|
+
sections.append(f"\nADDITIONAL INSTRUCTIONS:\n{custom_instructions}")
|
|
186
|
+
|
|
187
|
+
# 9. Current date
|
|
188
|
+
current_date = datetime.now().strftime("%Y-%m-%d %H:%M")
|
|
189
|
+
sections.append(f"\nCurrent Date/Time: {current_date}")
|
|
190
|
+
|
|
191
|
+
# Join all sections
|
|
192
|
+
full_prompt = "\n\n".join(sections)
|
|
193
|
+
|
|
194
|
+
return full_prompt
|
|
195
|
+
|
|
196
|
+
def get_feature_summary(self,
|
|
197
|
+
has_knowledge_base: bool,
|
|
198
|
+
has_tools: bool,
|
|
199
|
+
is_multi_user: bool,
|
|
200
|
+
memory_type: str) -> str:
|
|
201
|
+
"""
|
|
202
|
+
Get human-readable summary of active features
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
Feature summary string
|
|
206
|
+
"""
|
|
207
|
+
features = []
|
|
208
|
+
|
|
209
|
+
if has_knowledge_base:
|
|
210
|
+
features.append("✅ Knowledge Base")
|
|
211
|
+
else:
|
|
212
|
+
features.append("❌ Knowledge Base")
|
|
213
|
+
|
|
214
|
+
if has_tools:
|
|
215
|
+
features.append("✅ Tools")
|
|
216
|
+
else:
|
|
217
|
+
features.append("❌ Tools")
|
|
218
|
+
|
|
219
|
+
if is_multi_user:
|
|
220
|
+
features.append("✅ Multi-user")
|
|
221
|
+
else:
|
|
222
|
+
features.append("⚪ Single-user")
|
|
223
|
+
|
|
224
|
+
features.append(f"💾 Memory: {memory_type.upper()}")
|
|
225
|
+
|
|
226
|
+
return " | ".join(features)
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
# Global instance
|
|
230
|
+
dynamic_prompt_builder = DynamicPromptBuilder()
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
# Example usage
|
|
234
|
+
if __name__ == "__main__":
|
|
235
|
+
print("=" * 70)
|
|
236
|
+
print("DYNAMIC PROMPT BUILDER - EXAMPLES")
|
|
237
|
+
print("=" * 70)
|
|
238
|
+
|
|
239
|
+
# Example 1: Simple personal assistant (no KB, no tools)
|
|
240
|
+
print("\n📱 EXAMPLE 1: Simple Personal Assistant")
|
|
241
|
+
print("-" * 70)
|
|
242
|
+
prompt1 = dynamic_prompt_builder.build_prompt(
|
|
243
|
+
usage_mode="personal",
|
|
244
|
+
has_knowledge_base=False,
|
|
245
|
+
has_tools=False,
|
|
246
|
+
memory_type="json"
|
|
247
|
+
)
|
|
248
|
+
print(prompt1[:300] + "...")
|
|
249
|
+
|
|
250
|
+
# Example 2: Business with Knowledge Base
|
|
251
|
+
print("\n\n🏢 EXAMPLE 2: Business with Knowledge Base")
|
|
252
|
+
print("-" * 70)
|
|
253
|
+
prompt2 = dynamic_prompt_builder.build_prompt(
|
|
254
|
+
usage_mode="business",
|
|
255
|
+
has_knowledge_base=True,
|
|
256
|
+
has_tools=False,
|
|
257
|
+
business_config={
|
|
258
|
+
"company_name": "Acme Corp",
|
|
259
|
+
"industry": "E-commerce",
|
|
260
|
+
"founded_year": "2015"
|
|
261
|
+
},
|
|
262
|
+
memory_type="sql"
|
|
263
|
+
)
|
|
264
|
+
print(prompt2[:300] + "...")
|
|
265
|
+
|
|
266
|
+
# Example 3: Full-featured multi-user system
|
|
267
|
+
print("\n\n⚡ EXAMPLE 3: Full-Featured Multi-User System")
|
|
268
|
+
print("-" * 70)
|
|
269
|
+
prompt3 = dynamic_prompt_builder.build_prompt(
|
|
270
|
+
usage_mode="business",
|
|
271
|
+
has_knowledge_base=True,
|
|
272
|
+
has_tools=True,
|
|
273
|
+
tool_descriptions="- Calculator: Perform math calculations\n- Weather: Get current weather",
|
|
274
|
+
is_multi_user=True,
|
|
275
|
+
current_user="customer_12345",
|
|
276
|
+
business_config={
|
|
277
|
+
"company_name": "TechSupport Inc",
|
|
278
|
+
"industry": "Technology",
|
|
279
|
+
"founded_year": "2010"
|
|
280
|
+
},
|
|
281
|
+
memory_type="sql"
|
|
282
|
+
)
|
|
283
|
+
print(prompt3[:300] + "...")
|
|
284
|
+
|
|
285
|
+
# Feature summaries
|
|
286
|
+
print("\n\n📊 FEATURE SUMMARIES")
|
|
287
|
+
print("-" * 70)
|
|
288
|
+
|
|
289
|
+
configs = [
|
|
290
|
+
("Simple", False, False, False, "json"),
|
|
291
|
+
("Basic KB", True, False, False, "json"),
|
|
292
|
+
("With Tools", True, True, False, "sql"),
|
|
293
|
+
("Full System", True, True, True, "sql"),
|
|
294
|
+
]
|
|
295
|
+
|
|
296
|
+
for name, kb, tools, multi, mem in configs:
|
|
297
|
+
summary = dynamic_prompt_builder.get_feature_summary(kb, tools, multi, mem)
|
|
298
|
+
print(f"{name:15} : {summary}")
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Knowledge Base Loader
|
|
3
|
+
Loads pre-prepared problem/solution database into the system
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import json
|
|
7
|
+
try:
|
|
8
|
+
import yaml
|
|
9
|
+
except ImportError:
|
|
10
|
+
import pyyaml as yaml
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import List, Dict, Optional
|
|
13
|
+
from .memory_db import SQLMemoryManager
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class KnowledgeLoader:
|
|
17
|
+
"""Knowledge base management and loading"""
|
|
18
|
+
|
|
19
|
+
def __init__(self, db_manager: SQLMemoryManager):
|
|
20
|
+
"""
|
|
21
|
+
Args:
|
|
22
|
+
db_manager: SQL memory manager
|
|
23
|
+
"""
|
|
24
|
+
self.db = db_manager
|
|
25
|
+
|
|
26
|
+
def load_from_json(self, file_path: str) -> int:
|
|
27
|
+
"""Load knowledge base from JSON file"""
|
|
28
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
29
|
+
data = json.load(f)
|
|
30
|
+
|
|
31
|
+
count = 0
|
|
32
|
+
for entry in data.get('knowledge_base', []):
|
|
33
|
+
self.db.add_knowledge(
|
|
34
|
+
category=entry['category'],
|
|
35
|
+
question=entry['question'],
|
|
36
|
+
answer=entry['answer'],
|
|
37
|
+
keywords=entry.get('keywords', []),
|
|
38
|
+
priority=entry.get('priority', 0)
|
|
39
|
+
)
|
|
40
|
+
count += 1
|
|
41
|
+
|
|
42
|
+
return count
|
|
43
|
+
|
|
44
|
+
def load_default_ecommerce_kb(self) -> int:
|
|
45
|
+
"""Load default e-commerce knowledge base"""
|
|
46
|
+
knowledge = [
|
|
47
|
+
{
|
|
48
|
+
"category": "shipping",
|
|
49
|
+
"question": "When will my order arrive?",
|
|
50
|
+
"answer": "Orders are shipped within 2-3 business days and delivered within 3-5 business days.",
|
|
51
|
+
"keywords": ["shipping", "delivery", "time"],
|
|
52
|
+
"priority": 10
|
|
53
|
+
},
|
|
54
|
+
{
|
|
55
|
+
"category": "return",
|
|
56
|
+
"question": "How do I return a product?",
|
|
57
|
+
"answer": "You can return products within 14 days. Create a return request from My Orders page.",
|
|
58
|
+
"keywords": ["return", "refund"],
|
|
59
|
+
"priority": 10
|
|
60
|
+
},
|
|
61
|
+
]
|
|
62
|
+
|
|
63
|
+
count = 0
|
|
64
|
+
for entry in knowledge:
|
|
65
|
+
self.db.add_knowledge(**entry)
|
|
66
|
+
count += 1
|
|
67
|
+
|
|
68
|
+
return count
|
|
69
|
+
|
|
70
|
+
def load_default_tech_support_kb(self) -> int:
|
|
71
|
+
"""Load default tech support knowledge base"""
|
|
72
|
+
knowledge = [
|
|
73
|
+
{
|
|
74
|
+
"category": "connection",
|
|
75
|
+
"question": "Cannot connect to internet",
|
|
76
|
+
"answer": "1) Restart your modem/router 2) Check Wi-Fi password 3) Try other devices",
|
|
77
|
+
"keywords": ["internet", "connection", "wifi"],
|
|
78
|
+
"priority": 10
|
|
79
|
+
},
|
|
80
|
+
]
|
|
81
|
+
|
|
82
|
+
count = 0
|
|
83
|
+
for entry in knowledge:
|
|
84
|
+
self.db.add_knowledge(**entry)
|
|
85
|
+
count += 1
|
|
86
|
+
|
|
87
|
+
return count
|
|
88
|
+
|
mem_llm/llm_client.py
ADDED
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LLM Client - Local model integration with Ollama
|
|
3
|
+
Works with Granite4:tiny-h model
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import requests
|
|
7
|
+
import json
|
|
8
|
+
import time
|
|
9
|
+
from typing import List, Dict, Optional
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class OllamaClient:
|
|
13
|
+
"""Uses local LLM model with Ollama API"""
|
|
14
|
+
|
|
15
|
+
def __init__(self, model: str = "granite4:3b",
|
|
16
|
+
base_url: str = "http://localhost:11434"):
|
|
17
|
+
"""
|
|
18
|
+
Args:
|
|
19
|
+
model: Model name to use
|
|
20
|
+
base_url: Ollama API URL
|
|
21
|
+
"""
|
|
22
|
+
self.model = model
|
|
23
|
+
self.base_url = base_url
|
|
24
|
+
self.api_url = f"{base_url}/api/generate"
|
|
25
|
+
self.chat_url = f"{base_url}/api/chat"
|
|
26
|
+
|
|
27
|
+
def check_connection(self) -> bool:
|
|
28
|
+
"""
|
|
29
|
+
Checks if Ollama service is running
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
Is service running?
|
|
33
|
+
"""
|
|
34
|
+
try:
|
|
35
|
+
response = requests.get(f"{self.base_url}/api/tags", timeout=5)
|
|
36
|
+
return response.status_code == 200
|
|
37
|
+
except:
|
|
38
|
+
return False
|
|
39
|
+
|
|
40
|
+
def list_models(self) -> List[str]:
|
|
41
|
+
"""
|
|
42
|
+
List available models
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
List of model names
|
|
46
|
+
"""
|
|
47
|
+
try:
|
|
48
|
+
response = requests.get(f"{self.base_url}/api/tags")
|
|
49
|
+
if response.status_code == 200:
|
|
50
|
+
data = response.json()
|
|
51
|
+
return [model['name'] for model in data.get('models', [])]
|
|
52
|
+
return []
|
|
53
|
+
except:
|
|
54
|
+
return []
|
|
55
|
+
|
|
56
|
+
def generate(self, prompt: str, system_prompt: Optional[str] = None,
|
|
57
|
+
temperature: float = 0.7, max_tokens: int = 500) -> str:
|
|
58
|
+
"""
|
|
59
|
+
Generate simple text
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
prompt: User prompt (not AI system prompt)
|
|
63
|
+
system_prompt: AI system prompt
|
|
64
|
+
temperature: Creativity level (0-1)
|
|
65
|
+
max_tokens: Maximum token count
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
Model output
|
|
69
|
+
"""
|
|
70
|
+
payload = {
|
|
71
|
+
"model": self.model,
|
|
72
|
+
"prompt": prompt,
|
|
73
|
+
"stream": False,
|
|
74
|
+
"options": {
|
|
75
|
+
"temperature": temperature,
|
|
76
|
+
"num_predict": max_tokens
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
if system_prompt:
|
|
81
|
+
payload["system"] = system_prompt
|
|
82
|
+
|
|
83
|
+
max_retries = 3
|
|
84
|
+
for attempt in range(max_retries):
|
|
85
|
+
try:
|
|
86
|
+
response = requests.post(self.api_url, json=payload, timeout=60)
|
|
87
|
+
if response.status_code == 200:
|
|
88
|
+
return response.json().get('response', '').strip()
|
|
89
|
+
else:
|
|
90
|
+
if attempt < max_retries - 1:
|
|
91
|
+
time.sleep(1.0 * (2 ** attempt)) # Exponential backoff
|
|
92
|
+
continue
|
|
93
|
+
return f"Error: {response.status_code} - {response.text}"
|
|
94
|
+
except requests.exceptions.Timeout:
|
|
95
|
+
if attempt < max_retries - 1:
|
|
96
|
+
time.sleep(2.0 * (2 ** attempt))
|
|
97
|
+
continue
|
|
98
|
+
return "Error: Request timeout. Please check if Ollama is running."
|
|
99
|
+
except requests.exceptions.ConnectionError:
|
|
100
|
+
if attempt < max_retries - 1:
|
|
101
|
+
time.sleep(1.0 * (2 ** attempt))
|
|
102
|
+
continue
|
|
103
|
+
return "Error: Cannot connect to Ollama. Make sure Ollama service is running."
|
|
104
|
+
except Exception as e:
|
|
105
|
+
if attempt < max_retries - 1:
|
|
106
|
+
time.sleep(1.0 * (2 ** attempt))
|
|
107
|
+
continue
|
|
108
|
+
return f"Connection error: {str(e)}"
|
|
109
|
+
|
|
110
|
+
def chat(self, messages: List[Dict[str, str]],
|
|
111
|
+
temperature: float = 0.7, max_tokens: int = 2000) -> str:
|
|
112
|
+
"""
|
|
113
|
+
Chat format interaction - Compatible with ALL Ollama models
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
messages: Message history [{"role": "user/assistant/system", "content": "..."}]
|
|
117
|
+
temperature: Creativity level
|
|
118
|
+
max_tokens: Maximum token count
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
Model response
|
|
122
|
+
"""
|
|
123
|
+
payload = {
|
|
124
|
+
"model": self.model,
|
|
125
|
+
"messages": messages,
|
|
126
|
+
"stream": False,
|
|
127
|
+
"options": {
|
|
128
|
+
"temperature": temperature,
|
|
129
|
+
"num_predict": max_tokens,
|
|
130
|
+
"num_ctx": 4096, # Context window
|
|
131
|
+
"top_k": 40, # Limit vocab
|
|
132
|
+
"top_p": 0.9, # Nucleus sampling
|
|
133
|
+
"num_thread": 8 # Parallel processing
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
# For thinking-enabled models (like qwen3), disable thinking mode
|
|
138
|
+
# to get direct answers instead of reasoning process
|
|
139
|
+
if 'qwen' in self.model.lower() or 'deepseek' in self.model.lower():
|
|
140
|
+
payload["options"]["enable_thinking"] = False
|
|
141
|
+
|
|
142
|
+
try:
|
|
143
|
+
response = requests.post(self.chat_url, json=payload, timeout=120)
|
|
144
|
+
if response.status_code == 200:
|
|
145
|
+
response_data = response.json()
|
|
146
|
+
message = response_data.get('message', {})
|
|
147
|
+
|
|
148
|
+
# Get content - primary response field
|
|
149
|
+
result = message.get('content', '').strip()
|
|
150
|
+
|
|
151
|
+
# Fallback: If content is empty but thinking exists
|
|
152
|
+
# This happens when thinking mode couldn't be disabled
|
|
153
|
+
if not result and message.get('thinking'):
|
|
154
|
+
thinking = message.get('thinking', '')
|
|
155
|
+
|
|
156
|
+
# Try to extract the actual answer from thinking process
|
|
157
|
+
# Usually the answer is at the end after reasoning
|
|
158
|
+
if thinking:
|
|
159
|
+
# Split by common patterns that indicate final answer
|
|
160
|
+
for separator in ['\n\nAnswer:', '\n\nFinal answer:',
|
|
161
|
+
'\n\nResponse:', '\n\nSo the answer is:',
|
|
162
|
+
'\n\n---\n', '\n\nOkay,']:
|
|
163
|
+
if separator in thinking:
|
|
164
|
+
parts = thinking.split(separator)
|
|
165
|
+
if len(parts) > 1:
|
|
166
|
+
result = parts[-1].strip()
|
|
167
|
+
break
|
|
168
|
+
|
|
169
|
+
# If no separator found, try to get last meaningful paragraph
|
|
170
|
+
if not result:
|
|
171
|
+
paragraphs = [p.strip() for p in thinking.split('\n\n') if p.strip()]
|
|
172
|
+
if paragraphs:
|
|
173
|
+
# Take the last paragraph as likely answer
|
|
174
|
+
last_para = paragraphs[-1]
|
|
175
|
+
# Avoid meta-commentary like "Wait, let me think..."
|
|
176
|
+
if not any(word in last_para.lower() for word in
|
|
177
|
+
['wait', 'hmm', 'let me', 'thinking', 'okay']):
|
|
178
|
+
result = last_para
|
|
179
|
+
|
|
180
|
+
return result
|
|
181
|
+
else:
|
|
182
|
+
return f"Error: {response.status_code} - {response.text}"
|
|
183
|
+
except Exception as e:
|
|
184
|
+
return f"Connection error: {str(e)}"
|
|
185
|
+
|
|
186
|
+
def generate_with_memory_context(self, user_message: str,
|
|
187
|
+
memory_summary: str,
|
|
188
|
+
recent_conversations: List[Dict]) -> str:
|
|
189
|
+
"""
|
|
190
|
+
Generate response with memory context
|
|
191
|
+
|
|
192
|
+
Args:
|
|
193
|
+
user_message: User's message
|
|
194
|
+
memory_summary: User memory summary
|
|
195
|
+
recent_conversations: Recent conversations
|
|
196
|
+
|
|
197
|
+
Returns:
|
|
198
|
+
Context-aware response
|
|
199
|
+
"""
|
|
200
|
+
# Create system prompt
|
|
201
|
+
system_prompt = """You are a helpful customer service assistant.
|
|
202
|
+
You can remember past conversations with users.
|
|
203
|
+
Give short, clear and professional answers.
|
|
204
|
+
Use past interactions intelligently."""
|
|
205
|
+
|
|
206
|
+
# Create message history
|
|
207
|
+
messages = [{"role": "system", "content": system_prompt}]
|
|
208
|
+
|
|
209
|
+
# Add memory summary
|
|
210
|
+
if memory_summary and memory_summary != "No interactions with this user yet.":
|
|
211
|
+
messages.append({
|
|
212
|
+
"role": "system",
|
|
213
|
+
"content": f"User history:\n{memory_summary}"
|
|
214
|
+
})
|
|
215
|
+
|
|
216
|
+
# Add recent conversations
|
|
217
|
+
for conv in recent_conversations[-3:]:
|
|
218
|
+
messages.append({"role": "user", "content": conv.get('user_message', '')})
|
|
219
|
+
messages.append({"role": "assistant", "content": conv.get('bot_response', '')})
|
|
220
|
+
|
|
221
|
+
# Add current message
|
|
222
|
+
messages.append({"role": "user", "content": user_message})
|
|
223
|
+
|
|
224
|
+
return self.chat(messages, temperature=0.7)
|
|
225
|
+
|