mem-llm 1.0.2__py3-none-any.whl → 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mem-llm might be problematic. Click here for more details.
- mem_llm/__init__.py +71 -8
- mem_llm/api_server.py +595 -0
- mem_llm/base_llm_client.py +201 -0
- mem_llm/builtin_tools.py +311 -0
- mem_llm/builtin_tools_async.py +170 -0
- mem_llm/cli.py +254 -0
- mem_llm/clients/__init__.py +22 -0
- mem_llm/clients/lmstudio_client.py +393 -0
- mem_llm/clients/ollama_client.py +354 -0
- mem_llm/config.yaml.example +1 -1
- mem_llm/config_from_docs.py +1 -1
- mem_llm/config_manager.py +5 -3
- mem_llm/conversation_summarizer.py +372 -0
- mem_llm/data_export_import.py +640 -0
- mem_llm/dynamic_prompt.py +298 -0
- mem_llm/llm_client.py +77 -14
- mem_llm/llm_client_factory.py +260 -0
- mem_llm/logger.py +129 -0
- mem_llm/mem_agent.py +1178 -87
- mem_llm/memory_db.py +290 -59
- mem_llm/memory_manager.py +60 -1
- mem_llm/prompt_security.py +304 -0
- mem_llm/response_metrics.py +221 -0
- mem_llm/retry_handler.py +193 -0
- mem_llm/thread_safe_db.py +301 -0
- mem_llm/tool_system.py +537 -0
- mem_llm/vector_store.py +278 -0
- mem_llm/web_launcher.py +129 -0
- mem_llm/web_ui/README.md +44 -0
- mem_llm/web_ui/__init__.py +7 -0
- mem_llm/web_ui/index.html +641 -0
- mem_llm/web_ui/memory.html +569 -0
- mem_llm/web_ui/metrics.html +75 -0
- mem_llm-2.1.0.dist-info/METADATA +753 -0
- mem_llm-2.1.0.dist-info/RECORD +40 -0
- {mem_llm-1.0.2.dist-info → mem_llm-2.1.0.dist-info}/WHEEL +1 -1
- mem_llm-2.1.0.dist-info/entry_points.txt +3 -0
- mem_llm/prompt_templates.py +0 -244
- mem_llm-1.0.2.dist-info/METADATA +0 -382
- mem_llm-1.0.2.dist-info/RECORD +0 -15
- {mem_llm-1.0.2.dist-info → mem_llm-2.1.0.dist-info}/top_level.txt +0 -0
mem_llm/cli.py
ADDED
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Command Line Interface for Mem-LLM
|
|
3
|
+
Interactive chat, statistics, and data management
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import click
|
|
7
|
+
import json
|
|
8
|
+
import sys
|
|
9
|
+
from typing import Optional
|
|
10
|
+
from .mem_agent import MemAgent
|
|
11
|
+
from . import __version__
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@click.group()
|
|
15
|
+
@click.version_option(version=__version__, prog_name="mem-llm")
|
|
16
|
+
def cli():
|
|
17
|
+
"""
|
|
18
|
+
Mem-LLM - Memory-enabled AI Assistant CLI
|
|
19
|
+
|
|
20
|
+
A powerful command-line interface for interacting with your AI assistant.
|
|
21
|
+
"""
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@cli.command()
|
|
26
|
+
@click.option('--user', '-u', default='default', help='User ID for the chat session')
|
|
27
|
+
@click.option('--model', '-m', default='granite4:3b', help='LLM model to use')
|
|
28
|
+
@click.option('--sql/--json', default=False, help='Use SQL (default: JSON)')
|
|
29
|
+
@click.option('--config', '-c', type=click.Path(exists=True), help='Config file path')
|
|
30
|
+
def chat(user: str, model: str, sql: bool, config: Optional[str]):
|
|
31
|
+
"""
|
|
32
|
+
Start an interactive chat session
|
|
33
|
+
|
|
34
|
+
Examples:
|
|
35
|
+
mem-llm chat --user john
|
|
36
|
+
mem-llm chat --user alice --sql
|
|
37
|
+
mem-llm chat --config config.yaml
|
|
38
|
+
"""
|
|
39
|
+
click.echo("🤖 Mem-LLM Interactive Chat")
|
|
40
|
+
click.echo("=" * 60)
|
|
41
|
+
|
|
42
|
+
# Initialize agent
|
|
43
|
+
try:
|
|
44
|
+
if config:
|
|
45
|
+
agent = MemAgent(config_file=config)
|
|
46
|
+
else:
|
|
47
|
+
agent = MemAgent(model=model, use_sql=sql)
|
|
48
|
+
|
|
49
|
+
# Check setup
|
|
50
|
+
status = agent.check_setup()
|
|
51
|
+
if status['status'] != 'ready':
|
|
52
|
+
click.echo("\n❌ Setup Error!", err=True)
|
|
53
|
+
if not status['ollama_running']:
|
|
54
|
+
click.echo(" → Ollama service is not running", err=True)
|
|
55
|
+
click.echo(" → Start it with: ollama serve", err=True)
|
|
56
|
+
elif not status['model_ready']:
|
|
57
|
+
click.echo(f" → Model '{model}' not found", err=True)
|
|
58
|
+
click.echo(f" → Download it with: ollama pull {model}", err=True)
|
|
59
|
+
sys.exit(1)
|
|
60
|
+
|
|
61
|
+
agent.set_user(user)
|
|
62
|
+
|
|
63
|
+
click.echo(f"\n✅ Chat session started")
|
|
64
|
+
click.echo(f" User: {user}")
|
|
65
|
+
click.echo(f" Model: {model}")
|
|
66
|
+
click.echo(f" Memory: {'SQL' if sql else 'JSON'}")
|
|
67
|
+
click.echo("\nType your message and press Enter. Commands:")
|
|
68
|
+
click.echo(" /profile - Show user profile")
|
|
69
|
+
click.echo(" /stats - Show statistics")
|
|
70
|
+
click.echo(" /history - Show recent conversations")
|
|
71
|
+
click.echo(" /exit - Exit chat\n")
|
|
72
|
+
|
|
73
|
+
# Chat loop
|
|
74
|
+
while True:
|
|
75
|
+
try:
|
|
76
|
+
message = input(f"\n{user}> ").strip()
|
|
77
|
+
|
|
78
|
+
if not message:
|
|
79
|
+
continue
|
|
80
|
+
|
|
81
|
+
# Handle commands
|
|
82
|
+
if message.lower() in ['/exit', '/quit', 'exit', 'quit']:
|
|
83
|
+
click.echo("\n👋 Goodbye!")
|
|
84
|
+
break
|
|
85
|
+
|
|
86
|
+
elif message.lower() == '/profile':
|
|
87
|
+
profile = agent.get_user_profile()
|
|
88
|
+
click.echo("\n👤 User Profile:")
|
|
89
|
+
click.echo(json.dumps(profile, indent=2, ensure_ascii=False))
|
|
90
|
+
continue
|
|
91
|
+
|
|
92
|
+
elif message.lower() == '/stats':
|
|
93
|
+
stats = agent.get_statistics()
|
|
94
|
+
click.echo("\n📊 Statistics:")
|
|
95
|
+
click.echo(json.dumps(stats, indent=2, ensure_ascii=False))
|
|
96
|
+
continue
|
|
97
|
+
|
|
98
|
+
elif message.lower() == '/history':
|
|
99
|
+
if hasattr(agent.memory, 'get_recent_conversations'):
|
|
100
|
+
convs = agent.memory.get_recent_conversations(user, 5)
|
|
101
|
+
click.echo("\n📜 Recent Conversations:")
|
|
102
|
+
for i, conv in enumerate(convs, 1):
|
|
103
|
+
click.echo(f"\n{i}. [{conv.get('timestamp', 'N/A')}]")
|
|
104
|
+
click.echo(f" You: {conv.get('user_message', '')[:100]}")
|
|
105
|
+
click.echo(f" Bot: {conv.get('bot_response', '')[:100]}")
|
|
106
|
+
continue
|
|
107
|
+
|
|
108
|
+
# Regular chat
|
|
109
|
+
response = agent.chat(message)
|
|
110
|
+
click.echo(f"\n🤖 Bot> {response}")
|
|
111
|
+
|
|
112
|
+
except KeyboardInterrupt:
|
|
113
|
+
click.echo("\n\n👋 Goodbye!")
|
|
114
|
+
break
|
|
115
|
+
except EOFError:
|
|
116
|
+
click.echo("\n\n👋 Goodbye!")
|
|
117
|
+
break
|
|
118
|
+
|
|
119
|
+
except Exception as e:
|
|
120
|
+
click.echo(f"\n❌ Error: {str(e)}", err=True)
|
|
121
|
+
sys.exit(1)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
@cli.command()
|
|
125
|
+
@click.option('--user', '-u', help='User ID (optional, shows all users if not specified)')
|
|
126
|
+
@click.option('--sql/--json', default=False, help='Use SQL (default: JSON)')
|
|
127
|
+
def stats(user: Optional[str], sql: bool):
|
|
128
|
+
"""
|
|
129
|
+
Show memory statistics
|
|
130
|
+
|
|
131
|
+
Examples:
|
|
132
|
+
mem-llm stats
|
|
133
|
+
mem-llm stats --user john
|
|
134
|
+
mem-llm stats --sql
|
|
135
|
+
"""
|
|
136
|
+
try:
|
|
137
|
+
agent = MemAgent(use_sql=sql)
|
|
138
|
+
|
|
139
|
+
if user:
|
|
140
|
+
agent.set_user(user)
|
|
141
|
+
profile = agent.get_user_profile()
|
|
142
|
+
click.echo(f"\n👤 User Profile: {user}")
|
|
143
|
+
click.echo("=" * 60)
|
|
144
|
+
click.echo(json.dumps(profile, indent=2, ensure_ascii=False))
|
|
145
|
+
else:
|
|
146
|
+
stats = agent.get_statistics()
|
|
147
|
+
click.echo("\n📊 Memory Statistics")
|
|
148
|
+
click.echo("=" * 60)
|
|
149
|
+
click.echo(json.dumps(stats, indent=2, ensure_ascii=False))
|
|
150
|
+
|
|
151
|
+
except Exception as e:
|
|
152
|
+
click.echo(f"\n❌ Error: {str(e)}", err=True)
|
|
153
|
+
sys.exit(1)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
@cli.command()
|
|
157
|
+
@click.argument('user')
|
|
158
|
+
@click.option('--format', '-f', type=click.Choice(['json', 'txt']), default='json', help='Export format')
|
|
159
|
+
@click.option('--output', '-o', type=click.Path(), help='Output file (default: stdout)')
|
|
160
|
+
@click.option('--sql/--json', default=False, help='Use SQL (default: JSON)')
|
|
161
|
+
def export(user: str, format: str, output: Optional[str], sql: bool):
|
|
162
|
+
"""
|
|
163
|
+
Export user conversation data
|
|
164
|
+
|
|
165
|
+
Examples:
|
|
166
|
+
mem-llm export john
|
|
167
|
+
mem-llm export john --format txt
|
|
168
|
+
mem-llm export john --output john_data.json
|
|
169
|
+
"""
|
|
170
|
+
try:
|
|
171
|
+
agent = MemAgent(use_sql=sql)
|
|
172
|
+
agent.set_user(user)
|
|
173
|
+
|
|
174
|
+
data = agent.export_memory(format=format)
|
|
175
|
+
|
|
176
|
+
if output:
|
|
177
|
+
with open(output, 'w', encoding='utf-8') as f:
|
|
178
|
+
f.write(data)
|
|
179
|
+
click.echo(f"✅ Exported to: {output}")
|
|
180
|
+
else:
|
|
181
|
+
click.echo(data)
|
|
182
|
+
|
|
183
|
+
except Exception as e:
|
|
184
|
+
click.echo(f"\n❌ Error: {str(e)}", err=True)
|
|
185
|
+
sys.exit(1)
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
@cli.command()
|
|
189
|
+
@click.option('--model', '-m', default='granite4:3b', help='Model to check')
|
|
190
|
+
def check(model: str):
|
|
191
|
+
"""
|
|
192
|
+
Check if Ollama and model are ready
|
|
193
|
+
|
|
194
|
+
Example:
|
|
195
|
+
mem-llm check
|
|
196
|
+
mem-llm check --model llama3.2:3b
|
|
197
|
+
"""
|
|
198
|
+
try:
|
|
199
|
+
agent = MemAgent(model=model)
|
|
200
|
+
status = agent.check_setup()
|
|
201
|
+
|
|
202
|
+
click.echo("\n🔍 System Check")
|
|
203
|
+
click.echo("=" * 60)
|
|
204
|
+
click.echo(f"Ollama Running: {'✅' if status['ollama_running'] else '❌'}")
|
|
205
|
+
click.echo(f"Target Model: {status['target_model']}")
|
|
206
|
+
click.echo(f"Model Ready: {'✅' if status['model_ready'] else '❌'}")
|
|
207
|
+
click.echo(f"Memory Backend: {status['memory_backend']}")
|
|
208
|
+
click.echo(f"Total Users: {status['total_users']}")
|
|
209
|
+
click.echo(f"Total Chats: {status['total_interactions']}")
|
|
210
|
+
click.echo(f"KB Entries: {status['kb_entries']}")
|
|
211
|
+
|
|
212
|
+
if status['available_models']:
|
|
213
|
+
click.echo(f"\nAvailable Models:")
|
|
214
|
+
for m in status['available_models']:
|
|
215
|
+
click.echo(f" • {m}")
|
|
216
|
+
|
|
217
|
+
click.echo(f"\nStatus: {'✅ READY' if status['status'] == 'ready' else '❌ NOT READY'}")
|
|
218
|
+
|
|
219
|
+
if status['status'] != 'ready':
|
|
220
|
+
sys.exit(1)
|
|
221
|
+
|
|
222
|
+
except Exception as e:
|
|
223
|
+
click.echo(f"\n❌ Error: {str(e)}", err=True)
|
|
224
|
+
sys.exit(1)
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
@cli.command()
|
|
228
|
+
@click.argument('user')
|
|
229
|
+
@click.confirmation_option(prompt='Are you sure you want to delete all data for this user?')
|
|
230
|
+
@click.option('--sql/--json', default=False, help='Use SQL (default: JSON)')
|
|
231
|
+
def clear(user: str, sql: bool):
|
|
232
|
+
"""
|
|
233
|
+
Clear all data for a user (requires confirmation)
|
|
234
|
+
|
|
235
|
+
Example:
|
|
236
|
+
mem-llm clear john
|
|
237
|
+
"""
|
|
238
|
+
try:
|
|
239
|
+
agent = MemAgent(use_sql=sql)
|
|
240
|
+
result = agent.clear_user_data(user, confirm=True)
|
|
241
|
+
click.echo(f"✅ {result}")
|
|
242
|
+
|
|
243
|
+
except Exception as e:
|
|
244
|
+
click.echo(f"\n❌ Error: {str(e)}", err=True)
|
|
245
|
+
sys.exit(1)
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def main():
|
|
249
|
+
"""Entry point for the CLI"""
|
|
250
|
+
cli()
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
if __name__ == '__main__':
|
|
254
|
+
main()
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LLM Clients Package
|
|
3
|
+
===================
|
|
4
|
+
|
|
5
|
+
Multiple LLM backend support for Mem-LLM.
|
|
6
|
+
|
|
7
|
+
Available Backends:
|
|
8
|
+
- OllamaClient: Local Ollama service
|
|
9
|
+
- LMStudioClient: LM Studio (OpenAI-compatible)
|
|
10
|
+
|
|
11
|
+
Author: C. Emre Karataş
|
|
12
|
+
Version: 1.3.6
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from .ollama_client import OllamaClient
|
|
16
|
+
from .lmstudio_client import LMStudioClient
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
'OllamaClient',
|
|
20
|
+
'LMStudioClient',
|
|
21
|
+
]
|
|
22
|
+
|
|
@@ -0,0 +1,393 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LM Studio LLM Client
|
|
3
|
+
====================
|
|
4
|
+
|
|
5
|
+
Client for LM Studio local inference server.
|
|
6
|
+
LM Studio uses OpenAI-compatible API format.
|
|
7
|
+
|
|
8
|
+
Features:
|
|
9
|
+
- OpenAI-compatible API
|
|
10
|
+
- Fast local inference
|
|
11
|
+
- Easy model switching
|
|
12
|
+
- GPU acceleration support
|
|
13
|
+
|
|
14
|
+
Installation:
|
|
15
|
+
1. Download LM Studio from https://lmstudio.ai
|
|
16
|
+
2. Load a model
|
|
17
|
+
3. Start local server (default: http://localhost:1234)
|
|
18
|
+
|
|
19
|
+
Author: C. Emre Karataş
|
|
20
|
+
Version: 1.3.0
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
import requests
|
|
24
|
+
import time
|
|
25
|
+
import json
|
|
26
|
+
from typing import List, Dict, Optional, Iterator
|
|
27
|
+
import sys
|
|
28
|
+
import os
|
|
29
|
+
|
|
30
|
+
# Add parent directory to path for imports
|
|
31
|
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
32
|
+
|
|
33
|
+
from base_llm_client import BaseLLMClient
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class LMStudioClient(BaseLLMClient):
|
|
37
|
+
"""
|
|
38
|
+
LM Studio client implementation
|
|
39
|
+
|
|
40
|
+
LM Studio provides an OpenAI-compatible API for local models.
|
|
41
|
+
This client works with any model loaded in LM Studio.
|
|
42
|
+
|
|
43
|
+
Usage:
|
|
44
|
+
client = LMStudioClient(
|
|
45
|
+
model="local-model", # or specific model name
|
|
46
|
+
base_url="http://localhost:1234"
|
|
47
|
+
)
|
|
48
|
+
response = client.chat([{"role": "user", "content": "Hello!"}])
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
def __init__(self,
|
|
52
|
+
model: str = "local-model",
|
|
53
|
+
base_url: str = "http://localhost:1234",
|
|
54
|
+
**kwargs):
|
|
55
|
+
"""
|
|
56
|
+
Initialize LM Studio client
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
model: Model identifier (use "local-model" for default loaded model)
|
|
60
|
+
base_url: LM Studio server URL (default: http://localhost:1234)
|
|
61
|
+
**kwargs: Additional configuration
|
|
62
|
+
"""
|
|
63
|
+
super().__init__(model=model, **kwargs)
|
|
64
|
+
self.base_url = base_url.rstrip('/')
|
|
65
|
+
self.chat_url = f"{self.base_url}/v1/chat/completions"
|
|
66
|
+
self.models_url = f"{self.base_url}/v1/models"
|
|
67
|
+
|
|
68
|
+
self.logger.debug(f"Initialized LM Studio client: {base_url}, model: {model}")
|
|
69
|
+
|
|
70
|
+
def check_connection(self) -> bool:
|
|
71
|
+
"""
|
|
72
|
+
Check if LM Studio server is running
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
True if server is available
|
|
76
|
+
"""
|
|
77
|
+
try:
|
|
78
|
+
response = requests.get(self.models_url, timeout=5)
|
|
79
|
+
return response.status_code == 200
|
|
80
|
+
except Exception as e:
|
|
81
|
+
self.logger.debug(f"LM Studio connection check failed: {e}")
|
|
82
|
+
return False
|
|
83
|
+
|
|
84
|
+
def list_models(self) -> List[str]:
|
|
85
|
+
"""
|
|
86
|
+
List available models in LM Studio
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
List of model identifiers
|
|
90
|
+
"""
|
|
91
|
+
try:
|
|
92
|
+
response = requests.get(self.models_url, timeout=5)
|
|
93
|
+
if response.status_code == 200:
|
|
94
|
+
data = response.json()
|
|
95
|
+
models = data.get('data', [])
|
|
96
|
+
return [model.get('id', '') for model in models if model.get('id')]
|
|
97
|
+
return []
|
|
98
|
+
except Exception as e:
|
|
99
|
+
self.logger.error(f"Failed to list models: {e}")
|
|
100
|
+
return []
|
|
101
|
+
|
|
102
|
+
def chat(self,
|
|
103
|
+
messages: List[Dict[str, str]],
|
|
104
|
+
temperature: float = 0.7,
|
|
105
|
+
max_tokens: int = 2000,
|
|
106
|
+
**kwargs) -> str:
|
|
107
|
+
"""
|
|
108
|
+
Send chat request to LM Studio
|
|
109
|
+
|
|
110
|
+
Uses OpenAI-compatible chat completions endpoint.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
messages: Message history in OpenAI format
|
|
114
|
+
temperature: Sampling temperature (0.0-2.0)
|
|
115
|
+
max_tokens: Maximum tokens in response
|
|
116
|
+
**kwargs: Additional OpenAI-compatible parameters
|
|
117
|
+
- top_p: Nucleus sampling (0.0-1.0)
|
|
118
|
+
- frequency_penalty: (-2.0 to 2.0)
|
|
119
|
+
- presence_penalty: (-2.0 to 2.0)
|
|
120
|
+
- stream: Enable streaming (bool)
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
Model response text
|
|
124
|
+
|
|
125
|
+
Raises:
|
|
126
|
+
ConnectionError: If cannot connect to LM Studio
|
|
127
|
+
ValueError: If invalid parameters
|
|
128
|
+
"""
|
|
129
|
+
# Validate messages
|
|
130
|
+
self._validate_messages(messages)
|
|
131
|
+
|
|
132
|
+
# Build OpenAI-compatible payload
|
|
133
|
+
payload = {
|
|
134
|
+
"model": self.model,
|
|
135
|
+
"messages": messages,
|
|
136
|
+
"temperature": temperature,
|
|
137
|
+
"max_tokens": max_tokens,
|
|
138
|
+
"stream": kwargs.get("stream", False)
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
# Add optional parameters
|
|
142
|
+
if "top_p" in kwargs:
|
|
143
|
+
payload["top_p"] = kwargs["top_p"]
|
|
144
|
+
if "frequency_penalty" in kwargs:
|
|
145
|
+
payload["frequency_penalty"] = kwargs["frequency_penalty"]
|
|
146
|
+
if "presence_penalty" in kwargs:
|
|
147
|
+
payload["presence_penalty"] = kwargs["presence_penalty"]
|
|
148
|
+
if "stop" in kwargs:
|
|
149
|
+
payload["stop"] = kwargs["stop"]
|
|
150
|
+
|
|
151
|
+
# Send request with retry logic
|
|
152
|
+
max_retries = kwargs.get("max_retries", 3)
|
|
153
|
+
for attempt in range(max_retries):
|
|
154
|
+
try:
|
|
155
|
+
response = requests.post(
|
|
156
|
+
self.chat_url,
|
|
157
|
+
json=payload,
|
|
158
|
+
timeout=kwargs.get("timeout", 120)
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
if response.status_code == 200:
|
|
162
|
+
response_data = response.json()
|
|
163
|
+
|
|
164
|
+
# Extract content from OpenAI format
|
|
165
|
+
choices = response_data.get('choices', [])
|
|
166
|
+
if not choices:
|
|
167
|
+
self.logger.warning("No choices in LM Studio response")
|
|
168
|
+
if attempt < max_retries - 1:
|
|
169
|
+
time.sleep(1.0 * (2 ** attempt))
|
|
170
|
+
continue
|
|
171
|
+
return ""
|
|
172
|
+
|
|
173
|
+
# Get the message content
|
|
174
|
+
message = choices[0].get('message', {})
|
|
175
|
+
content = message.get('content', '').strip()
|
|
176
|
+
|
|
177
|
+
if not content:
|
|
178
|
+
self.logger.warning("Empty content in LM Studio response")
|
|
179
|
+
if attempt < max_retries - 1:
|
|
180
|
+
time.sleep(1.0 * (2 ** attempt))
|
|
181
|
+
continue
|
|
182
|
+
|
|
183
|
+
# Log usage statistics if available
|
|
184
|
+
usage = response_data.get('usage', {})
|
|
185
|
+
if usage:
|
|
186
|
+
self.logger.debug(
|
|
187
|
+
f"LM Studio usage - "
|
|
188
|
+
f"prompt: {usage.get('prompt_tokens', 0)} tokens, "
|
|
189
|
+
f"completion: {usage.get('completion_tokens', 0)} tokens, "
|
|
190
|
+
f"total: {usage.get('total_tokens', 0)} tokens"
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
return content
|
|
194
|
+
|
|
195
|
+
else:
|
|
196
|
+
error_msg = f"LM Studio API error: {response.status_code}"
|
|
197
|
+
try:
|
|
198
|
+
error_data = response.json()
|
|
199
|
+
error_detail = error_data.get('error', {})
|
|
200
|
+
if isinstance(error_detail, dict):
|
|
201
|
+
error_msg += f" - {error_detail.get('message', response.text)}"
|
|
202
|
+
else:
|
|
203
|
+
error_msg += f" - {error_detail}"
|
|
204
|
+
except:
|
|
205
|
+
error_msg += f" - {response.text[:200]}"
|
|
206
|
+
|
|
207
|
+
self.logger.error(error_msg)
|
|
208
|
+
|
|
209
|
+
if attempt < max_retries - 1:
|
|
210
|
+
time.sleep(1.0 * (2 ** attempt))
|
|
211
|
+
continue
|
|
212
|
+
raise ConnectionError(error_msg)
|
|
213
|
+
|
|
214
|
+
except requests.exceptions.Timeout:
|
|
215
|
+
self.logger.warning(f"LM Studio request timeout (attempt {attempt + 1}/{max_retries})")
|
|
216
|
+
if attempt < max_retries - 1:
|
|
217
|
+
time.sleep(2.0 * (2 ** attempt))
|
|
218
|
+
continue
|
|
219
|
+
raise ConnectionError("LM Studio request timeout. Check if server is running.")
|
|
220
|
+
|
|
221
|
+
except requests.exceptions.ConnectionError as e:
|
|
222
|
+
self.logger.warning(f"Cannot connect to LM Studio (attempt {attempt + 1}/{max_retries})")
|
|
223
|
+
if attempt < max_retries - 1:
|
|
224
|
+
time.sleep(1.0 * (2 ** attempt))
|
|
225
|
+
continue
|
|
226
|
+
raise ConnectionError(
|
|
227
|
+
f"Cannot connect to LM Studio at {self.base_url}. "
|
|
228
|
+
"Make sure LM Studio is running and server is started."
|
|
229
|
+
) from e
|
|
230
|
+
|
|
231
|
+
except Exception as e:
|
|
232
|
+
self.logger.error(f"Unexpected error: {e}")
|
|
233
|
+
if attempt < max_retries - 1:
|
|
234
|
+
time.sleep(1.0 * (2 ** attempt))
|
|
235
|
+
continue
|
|
236
|
+
raise
|
|
237
|
+
|
|
238
|
+
raise ConnectionError("Failed to get response after maximum retries")
|
|
239
|
+
|
|
240
|
+
def chat_stream(self,
|
|
241
|
+
messages: List[Dict[str, str]],
|
|
242
|
+
temperature: float = 0.7,
|
|
243
|
+
max_tokens: int = 2000,
|
|
244
|
+
**kwargs) -> Iterator[str]:
|
|
245
|
+
"""
|
|
246
|
+
Send chat request to LM Studio with streaming response
|
|
247
|
+
|
|
248
|
+
Uses OpenAI-compatible streaming format.
|
|
249
|
+
|
|
250
|
+
Args:
|
|
251
|
+
messages: Message history in OpenAI format
|
|
252
|
+
temperature: Sampling temperature (0.0-2.0)
|
|
253
|
+
max_tokens: Maximum tokens in response
|
|
254
|
+
**kwargs: Additional OpenAI-compatible parameters
|
|
255
|
+
|
|
256
|
+
Yields:
|
|
257
|
+
Response text chunks as they arrive
|
|
258
|
+
|
|
259
|
+
Raises:
|
|
260
|
+
ConnectionError: If cannot connect to LM Studio
|
|
261
|
+
ValueError: If invalid parameters
|
|
262
|
+
"""
|
|
263
|
+
# Validate messages
|
|
264
|
+
self._validate_messages(messages)
|
|
265
|
+
|
|
266
|
+
# Build OpenAI-compatible payload
|
|
267
|
+
payload = {
|
|
268
|
+
"model": self.model,
|
|
269
|
+
"messages": messages,
|
|
270
|
+
"temperature": temperature,
|
|
271
|
+
"max_tokens": max_tokens,
|
|
272
|
+
"stream": True # Enable streaming
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
# Add optional parameters
|
|
276
|
+
if "top_p" in kwargs:
|
|
277
|
+
payload["top_p"] = kwargs["top_p"]
|
|
278
|
+
if "frequency_penalty" in kwargs:
|
|
279
|
+
payload["frequency_penalty"] = kwargs["frequency_penalty"]
|
|
280
|
+
if "presence_penalty" in kwargs:
|
|
281
|
+
payload["presence_penalty"] = kwargs["presence_penalty"]
|
|
282
|
+
if "stop" in kwargs:
|
|
283
|
+
payload["stop"] = kwargs["stop"]
|
|
284
|
+
|
|
285
|
+
try:
|
|
286
|
+
response = requests.post(
|
|
287
|
+
self.chat_url,
|
|
288
|
+
json=payload,
|
|
289
|
+
stream=True, # Enable streaming
|
|
290
|
+
timeout=kwargs.get("timeout", 120)
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
if response.status_code == 200:
|
|
294
|
+
# Process OpenAI-compatible streaming response
|
|
295
|
+
for line in response.iter_lines():
|
|
296
|
+
if line:
|
|
297
|
+
line_text = line.decode('utf-8')
|
|
298
|
+
|
|
299
|
+
# Skip empty lines
|
|
300
|
+
if not line_text.strip():
|
|
301
|
+
continue
|
|
302
|
+
|
|
303
|
+
# OpenAI format uses "data: " prefix
|
|
304
|
+
if line_text.startswith('data: '):
|
|
305
|
+
line_text = line_text[6:] # Remove "data: " prefix
|
|
306
|
+
|
|
307
|
+
# Check for stream end
|
|
308
|
+
if line_text.strip() == '[DONE]':
|
|
309
|
+
break
|
|
310
|
+
|
|
311
|
+
try:
|
|
312
|
+
chunk_data = json.loads(line_text)
|
|
313
|
+
|
|
314
|
+
# Extract content from OpenAI format
|
|
315
|
+
choices = chunk_data.get('choices', [])
|
|
316
|
+
if choices:
|
|
317
|
+
delta = choices[0].get('delta', {})
|
|
318
|
+
content = delta.get('content', '')
|
|
319
|
+
|
|
320
|
+
if content:
|
|
321
|
+
yield content
|
|
322
|
+
|
|
323
|
+
except json.JSONDecodeError as e:
|
|
324
|
+
self.logger.warning(f"Failed to parse streaming chunk: {e}")
|
|
325
|
+
continue
|
|
326
|
+
else:
|
|
327
|
+
error_msg = f"LM Studio API error: {response.status_code}"
|
|
328
|
+
try:
|
|
329
|
+
error_data = response.json()
|
|
330
|
+
error_detail = error_data.get('error', {})
|
|
331
|
+
if isinstance(error_detail, dict):
|
|
332
|
+
error_msg += f" - {error_detail.get('message', response.text)}"
|
|
333
|
+
else:
|
|
334
|
+
error_msg += f" - {error_detail}"
|
|
335
|
+
except:
|
|
336
|
+
error_msg += f" - {response.text[:200]}"
|
|
337
|
+
|
|
338
|
+
self.logger.error(error_msg)
|
|
339
|
+
raise ConnectionError(error_msg)
|
|
340
|
+
|
|
341
|
+
except requests.exceptions.Timeout:
|
|
342
|
+
raise ConnectionError("LM Studio request timeout. Check if server is running.")
|
|
343
|
+
except requests.exceptions.ConnectionError as e:
|
|
344
|
+
raise ConnectionError(
|
|
345
|
+
f"Cannot connect to LM Studio at {self.base_url}. "
|
|
346
|
+
"Make sure LM Studio is running and server is started."
|
|
347
|
+
) from e
|
|
348
|
+
except Exception as e:
|
|
349
|
+
self.logger.error(f"Unexpected error in streaming: {e}")
|
|
350
|
+
raise
|
|
351
|
+
|
|
352
|
+
def get_model_info(self) -> Dict:
|
|
353
|
+
"""
|
|
354
|
+
Get information about currently loaded model
|
|
355
|
+
|
|
356
|
+
Returns:
|
|
357
|
+
Dictionary with model information
|
|
358
|
+
"""
|
|
359
|
+
try:
|
|
360
|
+
response = requests.get(self.models_url, timeout=5)
|
|
361
|
+
if response.status_code == 200:
|
|
362
|
+
data = response.json()
|
|
363
|
+
models = data.get('data', [])
|
|
364
|
+
|
|
365
|
+
# Find our model or return first one
|
|
366
|
+
for model in models:
|
|
367
|
+
if model.get('id') == self.model:
|
|
368
|
+
return model
|
|
369
|
+
|
|
370
|
+
# Return first model if ours not found
|
|
371
|
+
return models[0] if models else {}
|
|
372
|
+
return {}
|
|
373
|
+
except Exception as e:
|
|
374
|
+
self.logger.error(f"Failed to get model info: {e}")
|
|
375
|
+
return {}
|
|
376
|
+
|
|
377
|
+
def get_info(self) -> Dict:
|
|
378
|
+
"""
|
|
379
|
+
Get comprehensive client information
|
|
380
|
+
|
|
381
|
+
Returns:
|
|
382
|
+
Dictionary with client and model metadata
|
|
383
|
+
"""
|
|
384
|
+
base_info = super().get_info()
|
|
385
|
+
|
|
386
|
+
# Add LM Studio specific info
|
|
387
|
+
if self.check_connection():
|
|
388
|
+
model_info = self.get_model_info()
|
|
389
|
+
base_info['model_details'] = model_info
|
|
390
|
+
base_info['available_models'] = self.list_models()
|
|
391
|
+
|
|
392
|
+
return base_info
|
|
393
|
+
|