mem-llm 1.1.0__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mem-llm might be problematic. Click here for more details.
- mem_llm/__init__.py +26 -3
- mem_llm/base_llm_client.py +175 -0
- mem_llm/clients/__init__.py +25 -0
- mem_llm/clients/gemini_client.py +381 -0
- mem_llm/clients/lmstudio_client.py +280 -0
- mem_llm/clients/ollama_client.py +268 -0
- mem_llm/config_manager.py +1 -1
- mem_llm/conversation_summarizer.py +372 -0
- mem_llm/data_export_import.py +640 -0
- mem_llm/llm_client_factory.py +277 -0
- mem_llm/mem_agent.py +154 -43
- mem_llm/memory_db.py +7 -1
- mem_llm/thread_safe_db.py +7 -1
- {mem_llm-1.1.0.dist-info → mem_llm-1.3.0.dist-info}/METADATA +84 -110
- mem_llm-1.3.0.dist-info/RECORD +29 -0
- mem_llm-1.1.0.dist-info/RECORD +0 -21
- {mem_llm-1.1.0.dist-info → mem_llm-1.3.0.dist-info}/WHEEL +0 -0
- {mem_llm-1.1.0.dist-info → mem_llm-1.3.0.dist-info}/entry_points.txt +0 -0
- {mem_llm-1.1.0.dist-info → mem_llm-1.3.0.dist-info}/top_level.txt +0 -0
mem_llm/__init__.py
CHANGED
|
@@ -5,7 +5,13 @@ AI library that remembers user interactions
|
|
|
5
5
|
|
|
6
6
|
from .mem_agent import MemAgent
|
|
7
7
|
from .memory_manager import MemoryManager
|
|
8
|
-
from .llm_client import OllamaClient
|
|
8
|
+
from .llm_client import OllamaClient # Backward compatibility
|
|
9
|
+
from .base_llm_client import BaseLLMClient
|
|
10
|
+
from .llm_client_factory import LLMClientFactory
|
|
11
|
+
|
|
12
|
+
# New multi-backend support (v1.3.0+)
|
|
13
|
+
from .clients import OllamaClient as OllamaClientNew
|
|
14
|
+
from .clients import LMStudioClient, GeminiClient
|
|
9
15
|
|
|
10
16
|
# Tools (optional)
|
|
11
17
|
try:
|
|
@@ -43,9 +49,26 @@ try:
|
|
|
43
49
|
except ImportError:
|
|
44
50
|
__all_enhanced__ = []
|
|
45
51
|
|
|
46
|
-
|
|
52
|
+
# Conversation Summarization (v1.2.0+)
|
|
53
|
+
try:
|
|
54
|
+
from .conversation_summarizer import ConversationSummarizer, AutoSummarizer
|
|
55
|
+
__all_summarizer__ = ["ConversationSummarizer", "AutoSummarizer"]
|
|
56
|
+
except ImportError:
|
|
57
|
+
__all_summarizer__ = []
|
|
58
|
+
|
|
59
|
+
# Data Export/Import (v1.2.0+)
|
|
60
|
+
try:
|
|
61
|
+
from .data_export_import import DataExporter, DataImporter
|
|
62
|
+
__all_export_import__ = ["DataExporter", "DataImporter"]
|
|
63
|
+
except ImportError:
|
|
64
|
+
__all_export_import__ = []
|
|
65
|
+
|
|
66
|
+
__version__ = "1.3.0"
|
|
47
67
|
__author__ = "C. Emre Karataş"
|
|
48
68
|
|
|
69
|
+
# Multi-backend LLM support (v1.3.0+)
|
|
70
|
+
__all_llm_backends__ = ["BaseLLMClient", "LLMClientFactory", "OllamaClientNew", "LMStudioClient", "GeminiClient"]
|
|
71
|
+
|
|
49
72
|
# CLI
|
|
50
73
|
try:
|
|
51
74
|
from .cli import cli
|
|
@@ -57,4 +80,4 @@ __all__ = [
|
|
|
57
80
|
"MemAgent",
|
|
58
81
|
"MemoryManager",
|
|
59
82
|
"OllamaClient",
|
|
60
|
-
] + __all_tools__ + __all_pro__ + __all_cli__ + __all_security__ + __all_enhanced__
|
|
83
|
+
] + __all_llm_backends__ + __all_tools__ + __all_pro__ + __all_cli__ + __all_security__ + __all_enhanced__ + __all_summarizer__ + __all_export_import__
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Base LLM Client Interface
|
|
3
|
+
==========================
|
|
4
|
+
|
|
5
|
+
Abstract base class for all LLM client implementations.
|
|
6
|
+
Ensures consistent interface across different backends (Ollama, LM Studio, Gemini, etc.)
|
|
7
|
+
|
|
8
|
+
Author: C. Emre Karataş
|
|
9
|
+
Version: 1.3.0
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from abc import ABC, abstractmethod
|
|
13
|
+
from typing import List, Dict, Optional, Any
|
|
14
|
+
import logging
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class BaseLLMClient(ABC):
|
|
18
|
+
"""
|
|
19
|
+
Abstract base class for LLM clients
|
|
20
|
+
|
|
21
|
+
All LLM backends must implement these methods to ensure
|
|
22
|
+
compatibility with MemAgent and other components.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(self, model: str = None, **kwargs):
|
|
26
|
+
"""
|
|
27
|
+
Initialize LLM client
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
model: Model name/identifier
|
|
31
|
+
**kwargs: Backend-specific configuration
|
|
32
|
+
"""
|
|
33
|
+
self.model = model
|
|
34
|
+
self.logger = logging.getLogger(self.__class__.__name__)
|
|
35
|
+
|
|
36
|
+
@abstractmethod
|
|
37
|
+
def chat(self, messages: List[Dict[str, str]],
|
|
38
|
+
temperature: float = 0.7,
|
|
39
|
+
max_tokens: int = 2000,
|
|
40
|
+
**kwargs) -> str:
|
|
41
|
+
"""
|
|
42
|
+
Send chat request and return response
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
messages: List of messages in format:
|
|
46
|
+
[{"role": "system/user/assistant", "content": "..."}]
|
|
47
|
+
temperature: Sampling temperature (0.0-1.0)
|
|
48
|
+
max_tokens: Maximum tokens in response
|
|
49
|
+
**kwargs: Additional backend-specific parameters
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
Model response text
|
|
53
|
+
|
|
54
|
+
Raises:
|
|
55
|
+
ConnectionError: If cannot connect to service
|
|
56
|
+
ValueError: If invalid parameters
|
|
57
|
+
"""
|
|
58
|
+
pass
|
|
59
|
+
|
|
60
|
+
@abstractmethod
|
|
61
|
+
def check_connection(self) -> bool:
|
|
62
|
+
"""
|
|
63
|
+
Check if LLM service is available and responding
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
True if service is available, False otherwise
|
|
67
|
+
"""
|
|
68
|
+
pass
|
|
69
|
+
|
|
70
|
+
def generate(self, prompt: str,
|
|
71
|
+
system_prompt: Optional[str] = None,
|
|
72
|
+
temperature: float = 0.7,
|
|
73
|
+
max_tokens: int = 500,
|
|
74
|
+
**kwargs) -> str:
|
|
75
|
+
"""
|
|
76
|
+
Generate text from a simple prompt (convenience method)
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
prompt: User prompt
|
|
80
|
+
system_prompt: Optional system prompt
|
|
81
|
+
temperature: Sampling temperature
|
|
82
|
+
max_tokens: Maximum tokens
|
|
83
|
+
**kwargs: Additional parameters
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
Generated text
|
|
87
|
+
"""
|
|
88
|
+
# Convert to chat format
|
|
89
|
+
messages = []
|
|
90
|
+
if system_prompt:
|
|
91
|
+
messages.append({"role": "system", "content": system_prompt})
|
|
92
|
+
messages.append({"role": "user", "content": prompt})
|
|
93
|
+
|
|
94
|
+
return self.chat(messages, temperature, max_tokens, **kwargs)
|
|
95
|
+
|
|
96
|
+
def list_models(self) -> List[str]:
|
|
97
|
+
"""
|
|
98
|
+
List available models (optional, not all backends support this)
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
List of model names
|
|
102
|
+
"""
|
|
103
|
+
return [self.model] if self.model else []
|
|
104
|
+
|
|
105
|
+
def _format_messages_to_text(self, messages: List[Dict]) -> str:
|
|
106
|
+
"""
|
|
107
|
+
Helper: Convert message list to text format
|
|
108
|
+
|
|
109
|
+
Useful for backends that don't support chat format natively.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
messages: Message list
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
Formatted text prompt
|
|
116
|
+
"""
|
|
117
|
+
result = []
|
|
118
|
+
for msg in messages:
|
|
119
|
+
role = msg.get('role', 'user').upper()
|
|
120
|
+
content = msg.get('content', '').strip()
|
|
121
|
+
if content:
|
|
122
|
+
result.append(f"{role}: {content}")
|
|
123
|
+
return "\n\n".join(result)
|
|
124
|
+
|
|
125
|
+
def _validate_messages(self, messages: List[Dict]) -> bool:
|
|
126
|
+
"""
|
|
127
|
+
Validate message format
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
messages: Messages to validate
|
|
131
|
+
|
|
132
|
+
Returns:
|
|
133
|
+
True if valid
|
|
134
|
+
|
|
135
|
+
Raises:
|
|
136
|
+
ValueError: If invalid format
|
|
137
|
+
"""
|
|
138
|
+
if not isinstance(messages, list):
|
|
139
|
+
raise ValueError("Messages must be a list")
|
|
140
|
+
|
|
141
|
+
if not messages:
|
|
142
|
+
raise ValueError("Messages list cannot be empty")
|
|
143
|
+
|
|
144
|
+
for i, msg in enumerate(messages):
|
|
145
|
+
if not isinstance(msg, dict):
|
|
146
|
+
raise ValueError(f"Message {i} must be a dictionary")
|
|
147
|
+
|
|
148
|
+
if 'role' not in msg:
|
|
149
|
+
raise ValueError(f"Message {i} missing 'role' field")
|
|
150
|
+
|
|
151
|
+
if 'content' not in msg:
|
|
152
|
+
raise ValueError(f"Message {i} missing 'content' field")
|
|
153
|
+
|
|
154
|
+
if msg['role'] not in ['system', 'user', 'assistant']:
|
|
155
|
+
raise ValueError(f"Message {i} has invalid role: {msg['role']}")
|
|
156
|
+
|
|
157
|
+
return True
|
|
158
|
+
|
|
159
|
+
def get_info(self) -> Dict[str, Any]:
|
|
160
|
+
"""
|
|
161
|
+
Get client information
|
|
162
|
+
|
|
163
|
+
Returns:
|
|
164
|
+
Dictionary with client metadata
|
|
165
|
+
"""
|
|
166
|
+
return {
|
|
167
|
+
'backend': self.__class__.__name__,
|
|
168
|
+
'model': self.model,
|
|
169
|
+
'available': self.check_connection()
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
def __repr__(self) -> str:
|
|
173
|
+
"""String representation"""
|
|
174
|
+
return f"{self.__class__.__name__}(model='{self.model}')"
|
|
175
|
+
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LLM Clients Package
|
|
3
|
+
===================
|
|
4
|
+
|
|
5
|
+
Multiple LLM backend support for Mem-LLM.
|
|
6
|
+
|
|
7
|
+
Available Backends:
|
|
8
|
+
- OllamaClient: Local Ollama service
|
|
9
|
+
- LMStudioClient: LM Studio (OpenAI-compatible)
|
|
10
|
+
- GeminiClient: Google Gemini API
|
|
11
|
+
|
|
12
|
+
Author: C. Emre Karataş
|
|
13
|
+
Version: 1.3.0
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from .ollama_client import OllamaClient
|
|
17
|
+
from .lmstudio_client import LMStudioClient
|
|
18
|
+
from .gemini_client import GeminiClient
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
'OllamaClient',
|
|
22
|
+
'LMStudioClient',
|
|
23
|
+
'GeminiClient',
|
|
24
|
+
]
|
|
25
|
+
|
|
@@ -0,0 +1,381 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Google Gemini LLM Client
|
|
3
|
+
=========================
|
|
4
|
+
|
|
5
|
+
Client for Google Gemini API (cloud service).
|
|
6
|
+
|
|
7
|
+
Features:
|
|
8
|
+
- Gemini 1.5 Pro, Flash, etc.
|
|
9
|
+
- Fast and powerful
|
|
10
|
+
- Large context window (up to 2M tokens)
|
|
11
|
+
- Multimodal support (text, images, video)
|
|
12
|
+
|
|
13
|
+
Setup:
|
|
14
|
+
1. Get API key from: https://makersuite.google.com/app/apikey
|
|
15
|
+
2. Set environment variable: export GEMINI_API_KEY="your-key"
|
|
16
|
+
Or pass api_key parameter to constructor
|
|
17
|
+
|
|
18
|
+
Models:
|
|
19
|
+
- gemini-1.5-pro: Most capable model
|
|
20
|
+
- gemini-1.5-flash: Fastest model
|
|
21
|
+
- gemini-pro: Standard model
|
|
22
|
+
|
|
23
|
+
Author: C. Emre Karataş
|
|
24
|
+
Version: 1.3.0
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
import requests
|
|
28
|
+
import time
|
|
29
|
+
import os
|
|
30
|
+
from typing import List, Dict, Optional
|
|
31
|
+
import sys
|
|
32
|
+
|
|
33
|
+
# Add parent directory to path for imports
|
|
34
|
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
35
|
+
|
|
36
|
+
from base_llm_client import BaseLLMClient
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class GeminiClient(BaseLLMClient):
|
|
40
|
+
"""
|
|
41
|
+
Google Gemini API client implementation
|
|
42
|
+
|
|
43
|
+
Supports Gemini models via Google AI Studio API.
|
|
44
|
+
Requires API key from Google AI Studio.
|
|
45
|
+
|
|
46
|
+
Usage:
|
|
47
|
+
# Option 1: Using environment variable
|
|
48
|
+
export GEMINI_API_KEY="your-api-key"
|
|
49
|
+
client = GeminiClient(model="gemini-2.5-flash")
|
|
50
|
+
|
|
51
|
+
# Option 2: Direct API key
|
|
52
|
+
client = GeminiClient(
|
|
53
|
+
model="gemini-2.5-flash",
|
|
54
|
+
api_key="your-api-key"
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
response = client.chat([{"role": "user", "content": "Hello!"}])
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
# Available Gemini models
|
|
61
|
+
MODELS = {
|
|
62
|
+
'gemini-2.5-flash': 'Latest Gemini 2.5 Flash model (recommended)'
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
def __init__(self,
|
|
66
|
+
model: str = "gemini-2.5-flash",
|
|
67
|
+
api_key: Optional[str] = None,
|
|
68
|
+
**kwargs):
|
|
69
|
+
"""
|
|
70
|
+
Initialize Gemini client
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
model: Gemini model name (default: gemini-2.5-flash)
|
|
74
|
+
api_key: Google AI API key (if None, reads from GEMINI_API_KEY env var)
|
|
75
|
+
**kwargs: Additional configuration
|
|
76
|
+
|
|
77
|
+
Raises:
|
|
78
|
+
ValueError: If API key is not provided
|
|
79
|
+
"""
|
|
80
|
+
super().__init__(model=model, **kwargs)
|
|
81
|
+
|
|
82
|
+
# Get API key from parameter or environment
|
|
83
|
+
self.api_key = api_key or os.getenv('GEMINI_API_KEY')
|
|
84
|
+
|
|
85
|
+
if not self.api_key:
|
|
86
|
+
raise ValueError(
|
|
87
|
+
"Gemini API key is required. "
|
|
88
|
+
"Set GEMINI_API_KEY environment variable or pass api_key parameter. "
|
|
89
|
+
"Get key from: https://makersuite.google.com/app/apikey"
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
# API endpoints
|
|
93
|
+
self.base_url = "https://generativelanguage.googleapis.com/v1beta"
|
|
94
|
+
self.chat_url = f"{self.base_url}/models/{self.model}:generateContent"
|
|
95
|
+
|
|
96
|
+
# Add API key to URL
|
|
97
|
+
self.chat_url += f"?key={self.api_key}"
|
|
98
|
+
|
|
99
|
+
self.logger.debug(f"Initialized Gemini client with model: {model}")
|
|
100
|
+
|
|
101
|
+
def check_connection(self) -> bool:
|
|
102
|
+
"""
|
|
103
|
+
Check if Gemini API is accessible
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
True if API is available
|
|
107
|
+
"""
|
|
108
|
+
try:
|
|
109
|
+
# Send a minimal test request
|
|
110
|
+
test_payload = {
|
|
111
|
+
"contents": [{
|
|
112
|
+
"parts": [{"text": "Hi"}]
|
|
113
|
+
}],
|
|
114
|
+
"generationConfig": {
|
|
115
|
+
"maxOutputTokens": 10
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
response = requests.post(
|
|
120
|
+
self.chat_url,
|
|
121
|
+
json=test_payload,
|
|
122
|
+
timeout=10
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
# 200 = success, 400 might be rate limit or quota (but API is working)
|
|
126
|
+
return response.status_code in [200, 400, 429]
|
|
127
|
+
|
|
128
|
+
except Exception as e:
|
|
129
|
+
self.logger.debug(f"Gemini connection check failed: {e}")
|
|
130
|
+
return False
|
|
131
|
+
|
|
132
|
+
def list_models(self) -> List[str]:
|
|
133
|
+
"""
|
|
134
|
+
List available Gemini models
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
List of model identifiers
|
|
138
|
+
"""
|
|
139
|
+
return list(self.MODELS.keys())
|
|
140
|
+
|
|
141
|
+
def chat(self,
|
|
142
|
+
messages: List[Dict[str, str]],
|
|
143
|
+
temperature: float = 0.7,
|
|
144
|
+
max_tokens: int = 2000,
|
|
145
|
+
**kwargs) -> str:
|
|
146
|
+
"""
|
|
147
|
+
Send chat request to Gemini API
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
messages: Message history in OpenAI format
|
|
151
|
+
temperature: Sampling temperature (0.0-2.0)
|
|
152
|
+
max_tokens: Maximum tokens in response
|
|
153
|
+
**kwargs: Additional Gemini-specific parameters
|
|
154
|
+
- top_p: Nucleus sampling (0.0-1.0)
|
|
155
|
+
- top_k: Top-K sampling
|
|
156
|
+
- safety_settings: Safety filter settings
|
|
157
|
+
|
|
158
|
+
Returns:
|
|
159
|
+
Model response text
|
|
160
|
+
|
|
161
|
+
Raises:
|
|
162
|
+
ConnectionError: If cannot connect to Gemini API
|
|
163
|
+
ValueError: If invalid parameters or API key
|
|
164
|
+
"""
|
|
165
|
+
# Validate messages
|
|
166
|
+
self._validate_messages(messages)
|
|
167
|
+
|
|
168
|
+
# Convert OpenAI format to Gemini format
|
|
169
|
+
gemini_messages = self._convert_to_gemini_format(messages)
|
|
170
|
+
|
|
171
|
+
# Build Gemini payload
|
|
172
|
+
payload = {
|
|
173
|
+
"contents": gemini_messages,
|
|
174
|
+
"generationConfig": {
|
|
175
|
+
"temperature": temperature,
|
|
176
|
+
"maxOutputTokens": max_tokens,
|
|
177
|
+
"topP": kwargs.get("top_p", 0.95),
|
|
178
|
+
"topK": kwargs.get("top_k", 40)
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
# Add safety settings if provided
|
|
183
|
+
if "safety_settings" in kwargs:
|
|
184
|
+
payload["safetySettings"] = kwargs["safety_settings"]
|
|
185
|
+
|
|
186
|
+
# Send request with retry logic
|
|
187
|
+
max_retries = kwargs.get("max_retries", 3)
|
|
188
|
+
for attempt in range(max_retries):
|
|
189
|
+
try:
|
|
190
|
+
response = requests.post(
|
|
191
|
+
self.chat_url,
|
|
192
|
+
json=payload,
|
|
193
|
+
timeout=kwargs.get("timeout", 60)
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
if response.status_code == 200:
|
|
197
|
+
response_data = response.json()
|
|
198
|
+
|
|
199
|
+
# Extract content from Gemini format
|
|
200
|
+
candidates = response_data.get('candidates', [])
|
|
201
|
+
if not candidates:
|
|
202
|
+
self.logger.warning("No candidates in Gemini response")
|
|
203
|
+
if attempt < max_retries - 1:
|
|
204
|
+
time.sleep(1.0 * (2 ** attempt))
|
|
205
|
+
continue
|
|
206
|
+
return ""
|
|
207
|
+
|
|
208
|
+
# Get the first candidate's content
|
|
209
|
+
content = candidates[0].get('content', {})
|
|
210
|
+
parts = content.get('parts', [])
|
|
211
|
+
|
|
212
|
+
if not parts:
|
|
213
|
+
self.logger.warning("No parts in Gemini response")
|
|
214
|
+
if attempt < max_retries - 1:
|
|
215
|
+
time.sleep(1.0 * (2 ** attempt))
|
|
216
|
+
continue
|
|
217
|
+
return ""
|
|
218
|
+
|
|
219
|
+
# Combine all text parts
|
|
220
|
+
text_parts = [part.get('text', '') for part in parts if 'text' in part]
|
|
221
|
+
result = ' '.join(text_parts).strip()
|
|
222
|
+
|
|
223
|
+
if not result:
|
|
224
|
+
self.logger.warning("Empty content in Gemini response")
|
|
225
|
+
if attempt < max_retries - 1:
|
|
226
|
+
time.sleep(1.0 * (2 ** attempt))
|
|
227
|
+
continue
|
|
228
|
+
|
|
229
|
+
# Log usage metadata if available
|
|
230
|
+
usage_metadata = response_data.get('usageMetadata', {})
|
|
231
|
+
if usage_metadata:
|
|
232
|
+
self.logger.debug(
|
|
233
|
+
f"Gemini usage - "
|
|
234
|
+
f"prompt: {usage_metadata.get('promptTokenCount', 0)} tokens, "
|
|
235
|
+
f"response: {usage_metadata.get('candidatesTokenCount', 0)} tokens, "
|
|
236
|
+
f"total: {usage_metadata.get('totalTokenCount', 0)} tokens"
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
# Check for safety ratings/blocks
|
|
240
|
+
finish_reason = candidates[0].get('finishReason', '')
|
|
241
|
+
if finish_reason == 'SAFETY':
|
|
242
|
+
self.logger.warning("Response blocked by Gemini safety filters")
|
|
243
|
+
safety_ratings = candidates[0].get('safetyRatings', [])
|
|
244
|
+
self.logger.debug(f"Safety ratings: {safety_ratings}")
|
|
245
|
+
|
|
246
|
+
return result
|
|
247
|
+
|
|
248
|
+
elif response.status_code == 400:
|
|
249
|
+
# Bad request - likely API key or parameter issue
|
|
250
|
+
error_data = response.json()
|
|
251
|
+
error_msg = error_data.get('error', {}).get('message', response.text)
|
|
252
|
+
self.logger.error(f"Gemini API error (400): {error_msg}")
|
|
253
|
+
|
|
254
|
+
if "API_KEY" in error_msg.upper():
|
|
255
|
+
raise ValueError(f"Invalid Gemini API key. Get key from: https://makersuite.google.com/app/apikey")
|
|
256
|
+
|
|
257
|
+
raise ValueError(f"Gemini API error: {error_msg}")
|
|
258
|
+
|
|
259
|
+
elif response.status_code == 429:
|
|
260
|
+
# Rate limit - retry with exponential backoff
|
|
261
|
+
self.logger.warning(f"Gemini rate limit hit (attempt {attempt + 1}/{max_retries})")
|
|
262
|
+
if attempt < max_retries - 1:
|
|
263
|
+
wait_time = min(30, 2.0 * (2 ** attempt))
|
|
264
|
+
time.sleep(wait_time)
|
|
265
|
+
continue
|
|
266
|
+
raise ConnectionError("Gemini API rate limit exceeded. Please try again later.")
|
|
267
|
+
|
|
268
|
+
elif response.status_code == 403:
|
|
269
|
+
# Permission denied - quota or billing issue
|
|
270
|
+
error_data = response.json()
|
|
271
|
+
error_msg = error_data.get('error', {}).get('message', response.text)
|
|
272
|
+
raise ValueError(f"Gemini API permission denied: {error_msg}")
|
|
273
|
+
|
|
274
|
+
else:
|
|
275
|
+
# Other errors
|
|
276
|
+
error_msg = f"Gemini API error: {response.status_code}"
|
|
277
|
+
try:
|
|
278
|
+
error_data = response.json()
|
|
279
|
+
error_detail = error_data.get('error', {}).get('message', response.text)
|
|
280
|
+
error_msg += f" - {error_detail}"
|
|
281
|
+
except:
|
|
282
|
+
error_msg += f" - {response.text[:200]}"
|
|
283
|
+
|
|
284
|
+
self.logger.error(error_msg)
|
|
285
|
+
|
|
286
|
+
if attempt < max_retries - 1:
|
|
287
|
+
time.sleep(1.0 * (2 ** attempt))
|
|
288
|
+
continue
|
|
289
|
+
raise ConnectionError(error_msg)
|
|
290
|
+
|
|
291
|
+
except requests.exceptions.Timeout:
|
|
292
|
+
self.logger.warning(f"Gemini request timeout (attempt {attempt + 1}/{max_retries})")
|
|
293
|
+
if attempt < max_retries - 1:
|
|
294
|
+
time.sleep(2.0 * (2 ** attempt))
|
|
295
|
+
continue
|
|
296
|
+
raise ConnectionError("Gemini API request timeout. Please try again.")
|
|
297
|
+
|
|
298
|
+
except requests.exceptions.ConnectionError as e:
|
|
299
|
+
self.logger.warning(f"Cannot connect to Gemini API (attempt {attempt + 1}/{max_retries})")
|
|
300
|
+
if attempt < max_retries - 1:
|
|
301
|
+
time.sleep(1.0 * (2 ** attempt))
|
|
302
|
+
continue
|
|
303
|
+
raise ConnectionError("Cannot connect to Gemini API. Check your internet connection.") from e
|
|
304
|
+
|
|
305
|
+
except (ValueError, ConnectionError):
|
|
306
|
+
# Re-raise our custom exceptions
|
|
307
|
+
raise
|
|
308
|
+
|
|
309
|
+
except Exception as e:
|
|
310
|
+
self.logger.error(f"Unexpected error: {e}")
|
|
311
|
+
if attempt < max_retries - 1:
|
|
312
|
+
time.sleep(1.0 * (2 ** attempt))
|
|
313
|
+
continue
|
|
314
|
+
raise
|
|
315
|
+
|
|
316
|
+
raise ConnectionError("Failed to get response after maximum retries")
|
|
317
|
+
|
|
318
|
+
def _convert_to_gemini_format(self, messages: List[Dict[str, str]]) -> List[Dict]:
|
|
319
|
+
"""
|
|
320
|
+
Convert OpenAI message format to Gemini format
|
|
321
|
+
|
|
322
|
+
OpenAI format: [{"role": "user/assistant/system", "content": "..."}]
|
|
323
|
+
Gemini format: [{"role": "user/model", "parts": [{"text": "..."}]}]
|
|
324
|
+
|
|
325
|
+
Args:
|
|
326
|
+
messages: Messages in OpenAI format
|
|
327
|
+
|
|
328
|
+
Returns:
|
|
329
|
+
Messages in Gemini format
|
|
330
|
+
"""
|
|
331
|
+
gemini_messages = []
|
|
332
|
+
system_prompt = None
|
|
333
|
+
|
|
334
|
+
for msg in messages:
|
|
335
|
+
role = msg.get('role', 'user')
|
|
336
|
+
content = msg.get('content', '').strip()
|
|
337
|
+
|
|
338
|
+
if not content:
|
|
339
|
+
continue
|
|
340
|
+
|
|
341
|
+
# Handle system messages (Gemini doesn't have system role)
|
|
342
|
+
if role == 'system':
|
|
343
|
+
# Prepend system prompt to first user message
|
|
344
|
+
system_prompt = content
|
|
345
|
+
continue
|
|
346
|
+
|
|
347
|
+
# Convert role: assistant -> model
|
|
348
|
+
gemini_role = 'model' if role == 'assistant' else 'user'
|
|
349
|
+
|
|
350
|
+
# Build Gemini message
|
|
351
|
+
gemini_msg = {
|
|
352
|
+
"role": gemini_role,
|
|
353
|
+
"parts": [{"text": content}]
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
# If this is the first user message and we have a system prompt,
|
|
357
|
+
# prepend system prompt to the content
|
|
358
|
+
if gemini_role == 'user' and system_prompt and not gemini_messages:
|
|
359
|
+
gemini_msg["parts"][0]["text"] = f"{system_prompt}\n\n{content}"
|
|
360
|
+
system_prompt = None # Only use once
|
|
361
|
+
|
|
362
|
+
gemini_messages.append(gemini_msg)
|
|
363
|
+
|
|
364
|
+
return gemini_messages
|
|
365
|
+
|
|
366
|
+
def get_info(self) -> Dict:
|
|
367
|
+
"""
|
|
368
|
+
Get comprehensive client information
|
|
369
|
+
|
|
370
|
+
Returns:
|
|
371
|
+
Dictionary with client metadata
|
|
372
|
+
"""
|
|
373
|
+
base_info = super().get_info()
|
|
374
|
+
|
|
375
|
+
# Add Gemini-specific info
|
|
376
|
+
base_info['api_status'] = 'configured' if self.api_key else 'missing_key'
|
|
377
|
+
base_info['model_description'] = self.MODELS.get(self.model, 'Unknown model')
|
|
378
|
+
base_info['available_models'] = self.list_models()
|
|
379
|
+
|
|
380
|
+
return base_info
|
|
381
|
+
|