lm-deluge 0.0.14__tar.gz → 0.0.15__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lm-deluge might be problematic. Click here for more details.
- {lm_deluge-0.0.14/src/lm_deluge.egg-info → lm_deluge-0.0.15}/PKG-INFO +1 -1
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/pyproject.toml +1 -1
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge/api_requests/common.py +2 -0
- lm_deluge-0.0.15/src/lm_deluge/api_requests/gemini.py +222 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge/file.py +7 -2
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge/models.py +57 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge/usage.py +10 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15/src/lm_deluge.egg-info}/PKG-INFO +1 -1
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge.egg-info/SOURCES.txt +3 -0
- lm_deluge-0.0.15/tests/test_gemini_integration.py +238 -0
- lm_deluge-0.0.15/tests/test_simple_gemini.py +32 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/LICENSE +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/README.md +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/setup.cfg +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge/__init__.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge/agent.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge/api_requests/__init__.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge/api_requests/anthropic.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge/api_requests/base.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge/api_requests/bedrock.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge/api_requests/deprecated/bedrock.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge/api_requests/deprecated/cohere.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge/api_requests/deprecated/deepseek.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge/api_requests/deprecated/mistral.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge/api_requests/deprecated/vertex.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge/api_requests/mistral.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge/api_requests/openai.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge/api_requests/response.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge/batches.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge/cache.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge/client.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge/computer_use/anthropic_tools.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge/config.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge/embed.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge/errors.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge/gemini_limits.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge/image.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge/llm_tools/__init__.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge/llm_tools/extract.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge/llm_tools/score.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge/llm_tools/translate.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge/prompt.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge/rerank.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge/tool.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge/tracker.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge/util/json.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge/util/logprobs.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge/util/validation.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge/util/xml.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge.egg-info/dependency_links.txt +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge.egg-info/requires.txt +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/src/lm_deluge.egg-info/top_level.txt +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/tests/test_all_models.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/tests/test_batch_real.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/tests/test_bedrock_computer_use.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/tests/test_bedrock_models.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/tests/test_cache.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/tests/test_client_tracker_integration.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/tests/test_computer_use.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/tests/test_computer_use_integration.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/tests/test_debug_format.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/tests/test_file_integration.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/tests/test_file_support.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/tests/test_image_models.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/tests/test_image_utils.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/tests/test_json_utils.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/tests/test_logprobs_refactor.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/tests/test_max_concurrent_requests.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/tests/test_mcp_tools.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/tests/test_openai_responses.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/tests/test_prompt_caching.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/tests/test_real_caching.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/tests/test_real_caching_bedrock.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/tests/test_retry_fix.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/tests/test_rich_display.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/tests/test_sampling_params.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/tests/test_tool_calls.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/tests/test_tool_from_function.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/tests/test_tool_validation.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/tests/test_tracker_refactor.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/tests/test_translate.py +0 -0
- {lm_deluge-0.0.14 → lm_deluge-0.0.15}/tests/test_xml_utils.py +0 -0
|
@@ -2,6 +2,7 @@ from .openai import OpenAIRequest, OpenAIResponsesRequest
|
|
|
2
2
|
from .anthropic import AnthropicRequest
|
|
3
3
|
from .mistral import MistralRequest
|
|
4
4
|
from .bedrock import BedrockRequest
|
|
5
|
+
from .gemini import GeminiRequest
|
|
5
6
|
|
|
6
7
|
CLASSES = {
|
|
7
8
|
"openai": OpenAIRequest,
|
|
@@ -9,4 +10,5 @@ CLASSES = {
|
|
|
9
10
|
"anthropic": AnthropicRequest,
|
|
10
11
|
"mistral": MistralRequest,
|
|
11
12
|
"bedrock": BedrockRequest,
|
|
13
|
+
"gemini": GeminiRequest,
|
|
12
14
|
}
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
import warnings
|
|
4
|
+
from typing import Callable
|
|
5
|
+
|
|
6
|
+
from aiohttp import ClientResponse
|
|
7
|
+
|
|
8
|
+
from lm_deluge.tool import Tool
|
|
9
|
+
|
|
10
|
+
from ..config import SamplingParams
|
|
11
|
+
from ..models import APIModel
|
|
12
|
+
from ..prompt import CachePattern, Conversation, Message, Text, Thinking, ToolCall
|
|
13
|
+
from ..tracker import StatusTracker
|
|
14
|
+
from ..usage import Usage
|
|
15
|
+
from .base import APIRequestBase, APIResponse
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _build_gemini_request(
|
|
19
|
+
model: APIModel,
|
|
20
|
+
prompt: Conversation,
|
|
21
|
+
tools: list[Tool] | None,
|
|
22
|
+
sampling_params: SamplingParams,
|
|
23
|
+
) -> dict:
|
|
24
|
+
system_message, messages = prompt.to_gemini()
|
|
25
|
+
|
|
26
|
+
request_json = {
|
|
27
|
+
"contents": messages,
|
|
28
|
+
"generationConfig": {
|
|
29
|
+
"temperature": sampling_params.temperature,
|
|
30
|
+
"topP": sampling_params.top_p,
|
|
31
|
+
"maxOutputTokens": sampling_params.max_new_tokens,
|
|
32
|
+
},
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
# Add system instruction if present
|
|
36
|
+
if system_message:
|
|
37
|
+
request_json["systemInstruction"] = {"parts": [{"text": system_message}]}
|
|
38
|
+
|
|
39
|
+
# Handle reasoning models (thinking)
|
|
40
|
+
if model.reasoning_model:
|
|
41
|
+
request_json["generationConfig"]["thinkingConfig"] = {"includeThoughts": True}
|
|
42
|
+
if sampling_params.reasoning_effort and "flash" in model.id:
|
|
43
|
+
budget = {"low": 1024, "medium": 4096, "high": 16384}.get(
|
|
44
|
+
sampling_params.reasoning_effort
|
|
45
|
+
)
|
|
46
|
+
request_json["generationConfig"]["thinkingConfig"]["thinkingBudget"] = (
|
|
47
|
+
budget
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
else:
|
|
51
|
+
if sampling_params.reasoning_effort:
|
|
52
|
+
warnings.warn(
|
|
53
|
+
f"Ignoring reasoning_effort param for non-reasoning model: {model.name}"
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
# Add tools if provided
|
|
57
|
+
if tools:
|
|
58
|
+
tool_declarations = [tool.dump_for("google") for tool in tools]
|
|
59
|
+
request_json["tools"] = [{"functionDeclarations": tool_declarations}]
|
|
60
|
+
|
|
61
|
+
# Handle JSON mode
|
|
62
|
+
if sampling_params.json_mode and model.supports_json:
|
|
63
|
+
request_json["generationConfig"]["responseMimeType"] = "application/json"
|
|
64
|
+
|
|
65
|
+
return request_json
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class GeminiRequest(APIRequestBase):
|
|
69
|
+
def __init__(
|
|
70
|
+
self,
|
|
71
|
+
task_id: int,
|
|
72
|
+
model_name: str, # must correspond to registry
|
|
73
|
+
prompt: Conversation,
|
|
74
|
+
attempts_left: int,
|
|
75
|
+
status_tracker: StatusTracker,
|
|
76
|
+
results_arr: list,
|
|
77
|
+
request_timeout: int = 30,
|
|
78
|
+
sampling_params: SamplingParams = SamplingParams(),
|
|
79
|
+
callback: Callable | None = None,
|
|
80
|
+
all_model_names: list[str] | None = None,
|
|
81
|
+
all_sampling_params: list[SamplingParams] | None = None,
|
|
82
|
+
tools: list | None = None,
|
|
83
|
+
cache: CachePattern | None = None,
|
|
84
|
+
):
|
|
85
|
+
super().__init__(
|
|
86
|
+
task_id=task_id,
|
|
87
|
+
model_name=model_name,
|
|
88
|
+
prompt=prompt,
|
|
89
|
+
attempts_left=attempts_left,
|
|
90
|
+
status_tracker=status_tracker,
|
|
91
|
+
results_arr=results_arr,
|
|
92
|
+
request_timeout=request_timeout,
|
|
93
|
+
sampling_params=sampling_params,
|
|
94
|
+
callback=callback,
|
|
95
|
+
all_model_names=all_model_names,
|
|
96
|
+
all_sampling_params=all_sampling_params,
|
|
97
|
+
tools=tools,
|
|
98
|
+
cache=cache,
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
# Warn if cache is specified for Gemini model
|
|
102
|
+
if cache is not None:
|
|
103
|
+
warnings.warn(
|
|
104
|
+
f"Cache parameter '{cache}' is not supported for Gemini models, ignoring for {model_name}"
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
self.model = APIModel.from_registry(model_name)
|
|
108
|
+
# Gemini API endpoint format: https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent
|
|
109
|
+
self.url = f"{self.model.api_base}/models/{self.model.name}:generateContent"
|
|
110
|
+
self.request_header = {
|
|
111
|
+
"Content-Type": "application/json",
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
# Add API key as query parameter for Gemini
|
|
115
|
+
api_key = os.getenv(self.model.api_key_env_var)
|
|
116
|
+
if not api_key:
|
|
117
|
+
raise ValueError(
|
|
118
|
+
f"API key environment variable {self.model.api_key_env_var} not set"
|
|
119
|
+
)
|
|
120
|
+
self.url += f"?key={api_key}"
|
|
121
|
+
|
|
122
|
+
self.request_json = _build_gemini_request(
|
|
123
|
+
self.model, prompt, tools, sampling_params
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
async def handle_response(self, http_response: ClientResponse) -> APIResponse:
|
|
127
|
+
is_error = False
|
|
128
|
+
error_message = None
|
|
129
|
+
thinking = None
|
|
130
|
+
content = None
|
|
131
|
+
usage = None
|
|
132
|
+
status_code = http_response.status
|
|
133
|
+
mimetype = http_response.headers.get("Content-Type", None)
|
|
134
|
+
data = None
|
|
135
|
+
|
|
136
|
+
if status_code >= 200 and status_code < 300:
|
|
137
|
+
try:
|
|
138
|
+
data = await http_response.json()
|
|
139
|
+
except Exception as e:
|
|
140
|
+
is_error = True
|
|
141
|
+
error_message = (
|
|
142
|
+
f"Error calling .json() on response w/ status {status_code}: {e}"
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
if not is_error:
|
|
146
|
+
assert data
|
|
147
|
+
try:
|
|
148
|
+
# Parse Gemini response format
|
|
149
|
+
parts = []
|
|
150
|
+
|
|
151
|
+
if "candidates" in data and data["candidates"]:
|
|
152
|
+
candidate = data["candidates"][0]
|
|
153
|
+
if "content" in candidate and "parts" in candidate["content"]:
|
|
154
|
+
for part in candidate["content"]["parts"]:
|
|
155
|
+
if "text" in part:
|
|
156
|
+
parts.append(Text(part["text"]))
|
|
157
|
+
elif "thought" in part:
|
|
158
|
+
parts.append(Thinking(part["thought"]))
|
|
159
|
+
elif "functionCall" in part:
|
|
160
|
+
func_call = part["functionCall"]
|
|
161
|
+
# Generate a unique ID since Gemini doesn't provide one
|
|
162
|
+
import uuid
|
|
163
|
+
|
|
164
|
+
tool_id = f"call_{uuid.uuid4().hex[:8]}"
|
|
165
|
+
parts.append(
|
|
166
|
+
ToolCall(
|
|
167
|
+
id=tool_id,
|
|
168
|
+
name=func_call["name"],
|
|
169
|
+
arguments=func_call.get("args", {}),
|
|
170
|
+
)
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
content = Message("assistant", parts)
|
|
174
|
+
|
|
175
|
+
# Extract usage information if present
|
|
176
|
+
if "usageMetadata" in data:
|
|
177
|
+
usage_data = data["usageMetadata"]
|
|
178
|
+
usage = Usage.from_gemini_usage(usage_data)
|
|
179
|
+
|
|
180
|
+
except Exception as e:
|
|
181
|
+
is_error = True
|
|
182
|
+
error_message = f"Error parsing Gemini response: {str(e)}"
|
|
183
|
+
|
|
184
|
+
elif mimetype and "json" in mimetype.lower():
|
|
185
|
+
is_error = True
|
|
186
|
+
try:
|
|
187
|
+
data = await http_response.json()
|
|
188
|
+
error_message = json.dumps(data)
|
|
189
|
+
except Exception:
|
|
190
|
+
error_message = (
|
|
191
|
+
f"HTTP {status_code} with JSON content type but failed to parse"
|
|
192
|
+
)
|
|
193
|
+
else:
|
|
194
|
+
is_error = True
|
|
195
|
+
text = await http_response.text()
|
|
196
|
+
error_message = text
|
|
197
|
+
|
|
198
|
+
# Handle special kinds of errors
|
|
199
|
+
if is_error and error_message is not None:
|
|
200
|
+
if "rate limit" in error_message.lower() or status_code == 429:
|
|
201
|
+
error_message += " (Rate limit error, triggering cooldown.)"
|
|
202
|
+
self.status_tracker.rate_limit_exceeded()
|
|
203
|
+
if (
|
|
204
|
+
"context length" in error_message.lower()
|
|
205
|
+
or "token limit" in error_message.lower()
|
|
206
|
+
):
|
|
207
|
+
error_message += " (Context length exceeded, set retries to 0.)"
|
|
208
|
+
self.attempts_left = 0
|
|
209
|
+
|
|
210
|
+
return APIResponse(
|
|
211
|
+
id=self.task_id,
|
|
212
|
+
status_code=status_code,
|
|
213
|
+
is_error=is_error,
|
|
214
|
+
error_message=error_message,
|
|
215
|
+
prompt=self.prompt,
|
|
216
|
+
content=content,
|
|
217
|
+
thinking=thinking,
|
|
218
|
+
model_internal=self.model_name,
|
|
219
|
+
sampling_params=self.sampling_params,
|
|
220
|
+
usage=usage,
|
|
221
|
+
raw_response=data,
|
|
222
|
+
)
|
|
@@ -141,8 +141,13 @@ class File:
|
|
|
141
141
|
return filename, content, media_type
|
|
142
142
|
|
|
143
143
|
def gemini(self) -> dict:
|
|
144
|
-
"""For Gemini API -
|
|
145
|
-
|
|
144
|
+
"""For Gemini API - files are provided as inline data."""
|
|
145
|
+
return {
|
|
146
|
+
"inlineData": {
|
|
147
|
+
"mimeType": self._mime(),
|
|
148
|
+
"data": self._base64(include_header=False),
|
|
149
|
+
}
|
|
150
|
+
}
|
|
146
151
|
|
|
147
152
|
def mistral(self) -> dict:
|
|
148
153
|
"""For Mistral API - not yet supported."""
|
|
@@ -167,6 +167,63 @@ registry = {
|
|
|
167
167
|
"tokens_per_minute": 100_000,
|
|
168
168
|
"reasoning_model": True,
|
|
169
169
|
},
|
|
170
|
+
# Native Gemini API versions with file support
|
|
171
|
+
"gemini-2.0-flash-gemini": {
|
|
172
|
+
"id": "gemini-2.0-flash-gemini",
|
|
173
|
+
"name": "gemini-2.0-flash",
|
|
174
|
+
"api_base": "https://generativelanguage.googleapis.com/v1beta",
|
|
175
|
+
"api_key_env_var": "GEMINI_API_KEY",
|
|
176
|
+
"supports_json": True,
|
|
177
|
+
"supports_logprobs": False,
|
|
178
|
+
"api_spec": "gemini",
|
|
179
|
+
"input_cost": 0.1,
|
|
180
|
+
"output_cost": 0.4,
|
|
181
|
+
"requests_per_minute": 20,
|
|
182
|
+
"tokens_per_minute": 100_000,
|
|
183
|
+
"reasoning_model": False,
|
|
184
|
+
},
|
|
185
|
+
"gemini-2.0-flash-lite-gemini": {
|
|
186
|
+
"id": "gemini-2.0-flash-lite-gemini",
|
|
187
|
+
"name": "gemini-2.0-flash-lite",
|
|
188
|
+
"api_base": "https://generativelanguage.googleapis.com/v1beta",
|
|
189
|
+
"api_key_env_var": "GEMINI_API_KEY",
|
|
190
|
+
"supports_json": True,
|
|
191
|
+
"supports_logprobs": False,
|
|
192
|
+
"api_spec": "gemini",
|
|
193
|
+
"input_cost": 0.1,
|
|
194
|
+
"output_cost": 0.4,
|
|
195
|
+
"requests_per_minute": 20,
|
|
196
|
+
"tokens_per_minute": 100_000,
|
|
197
|
+
"reasoning_model": False,
|
|
198
|
+
},
|
|
199
|
+
"gemini-2.5-pro-gemini": {
|
|
200
|
+
"id": "gemini-2.5-pro-gemini",
|
|
201
|
+
"name": "gemini-2.5-pro-preview-05-06",
|
|
202
|
+
"api_base": "https://generativelanguage.googleapis.com/v1beta",
|
|
203
|
+
"api_key_env_var": "GEMINI_API_KEY",
|
|
204
|
+
"supports_json": True,
|
|
205
|
+
"supports_logprobs": False,
|
|
206
|
+
"api_spec": "gemini",
|
|
207
|
+
"input_cost": 0.1,
|
|
208
|
+
"output_cost": 0.4,
|
|
209
|
+
"requests_per_minute": 20,
|
|
210
|
+
"tokens_per_minute": 100_000,
|
|
211
|
+
"reasoning_model": True,
|
|
212
|
+
},
|
|
213
|
+
"gemini-2.5-flash-gemini": {
|
|
214
|
+
"id": "gemini-2.5-flash-gemini",
|
|
215
|
+
"name": "gemini-2.5-flash-preview-05-20",
|
|
216
|
+
"api_base": "https://generativelanguage.googleapis.com/v1beta",
|
|
217
|
+
"api_key_env_var": "GEMINI_API_KEY",
|
|
218
|
+
"supports_json": True,
|
|
219
|
+
"supports_logprobs": False,
|
|
220
|
+
"api_spec": "gemini",
|
|
221
|
+
"input_cost": 0.1,
|
|
222
|
+
"output_cost": 0.4,
|
|
223
|
+
"requests_per_minute": 20,
|
|
224
|
+
"tokens_per_minute": 100_000,
|
|
225
|
+
"reasoning_model": True,
|
|
226
|
+
},
|
|
170
227
|
# ███████ █████████ █████
|
|
171
228
|
# ███░░░░░███ ███░░░░░███ ░░███
|
|
172
229
|
# ███ ░░███ ████████ ██████ ████████ ░███ ░███ ░███
|
|
@@ -71,6 +71,16 @@ class Usage:
|
|
|
71
71
|
cache_write_tokens=None,
|
|
72
72
|
)
|
|
73
73
|
|
|
74
|
+
@classmethod
|
|
75
|
+
def from_gemini_usage(cls, usage_data: dict) -> "Usage":
|
|
76
|
+
"""Create Usage from Gemini API response usage data."""
|
|
77
|
+
return cls(
|
|
78
|
+
input_tokens=usage_data.get("promptTokenCount", 0),
|
|
79
|
+
output_tokens=usage_data.get("candidatesTokenCount", 0),
|
|
80
|
+
cache_read_tokens=None, # Gemini doesn't support caching yet
|
|
81
|
+
cache_write_tokens=None,
|
|
82
|
+
)
|
|
83
|
+
|
|
74
84
|
def to_dict(self) -> dict:
|
|
75
85
|
"""Convert to dictionary for serialization."""
|
|
76
86
|
return {
|
|
@@ -28,6 +28,7 @@ src/lm_deluge/api_requests/anthropic.py
|
|
|
28
28
|
src/lm_deluge/api_requests/base.py
|
|
29
29
|
src/lm_deluge/api_requests/bedrock.py
|
|
30
30
|
src/lm_deluge/api_requests/common.py
|
|
31
|
+
src/lm_deluge/api_requests/gemini.py
|
|
31
32
|
src/lm_deluge/api_requests/mistral.py
|
|
32
33
|
src/lm_deluge/api_requests/openai.py
|
|
33
34
|
src/lm_deluge/api_requests/response.py
|
|
@@ -56,6 +57,7 @@ tests/test_computer_use_integration.py
|
|
|
56
57
|
tests/test_debug_format.py
|
|
57
58
|
tests/test_file_integration.py
|
|
58
59
|
tests/test_file_support.py
|
|
60
|
+
tests/test_gemini_integration.py
|
|
59
61
|
tests/test_image_models.py
|
|
60
62
|
tests/test_image_utils.py
|
|
61
63
|
tests/test_json_utils.py
|
|
@@ -69,6 +71,7 @@ tests/test_real_caching_bedrock.py
|
|
|
69
71
|
tests/test_retry_fix.py
|
|
70
72
|
tests/test_rich_display.py
|
|
71
73
|
tests/test_sampling_params.py
|
|
74
|
+
tests/test_simple_gemini.py
|
|
72
75
|
tests/test_tool_calls.py
|
|
73
76
|
tests/test_tool_from_function.py
|
|
74
77
|
tests/test_tool_validation.py
|
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import os
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from lm_deluge import Conversation, LLMClient, Message
|
|
6
|
+
from lm_deluge.tool import Tool
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def test_gemini_basic_text():
|
|
10
|
+
"""Test basic text generation with native Gemini API."""
|
|
11
|
+
|
|
12
|
+
# Skip if no API key
|
|
13
|
+
if not os.getenv("GEMINI_API_KEY"):
|
|
14
|
+
print("Skipping Gemini test - no API key")
|
|
15
|
+
return
|
|
16
|
+
|
|
17
|
+
client = LLMClient(
|
|
18
|
+
["gemini-2.0-flash-gemini"],
|
|
19
|
+
max_requests_per_minute=10,
|
|
20
|
+
max_tokens_per_minute=100_000,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
conversation = Conversation.user("What is 2+2? Answer briefly.")
|
|
24
|
+
|
|
25
|
+
responses = asyncio.run(client.process_prompts_async([conversation]))
|
|
26
|
+
|
|
27
|
+
assert len(responses) == 1
|
|
28
|
+
response = responses[0]
|
|
29
|
+
assert response
|
|
30
|
+
assert not response.is_error
|
|
31
|
+
assert response.content is not None
|
|
32
|
+
assert response.content.completion is not None
|
|
33
|
+
assert "4" in response.content.completion
|
|
34
|
+
print(f"✓ Basic text test passed: {response.content.completion}")
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def test_gemini_with_image():
|
|
38
|
+
"""Test Gemini API with image support."""
|
|
39
|
+
|
|
40
|
+
# Skip if no API key
|
|
41
|
+
if not os.getenv("GEMINI_API_KEY"):
|
|
42
|
+
print("Skipping Gemini image test - no API key")
|
|
43
|
+
return
|
|
44
|
+
|
|
45
|
+
# Check if test image exists
|
|
46
|
+
test_image_path = Path(__file__).parent / "image.jpg"
|
|
47
|
+
if not test_image_path.exists():
|
|
48
|
+
print("Skipping image test - test image not found")
|
|
49
|
+
return
|
|
50
|
+
|
|
51
|
+
client = LLMClient(
|
|
52
|
+
["gemini-2.0-flash-gemini"],
|
|
53
|
+
max_requests_per_minute=10,
|
|
54
|
+
max_tokens_per_minute=100_000,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
conversation = Conversation(
|
|
58
|
+
[Message.user("What do you see in this image?").add_image(test_image_path)]
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
responses = asyncio.run(client.process_prompts_async([conversation]))
|
|
62
|
+
|
|
63
|
+
assert len(responses) == 1
|
|
64
|
+
response = responses[0]
|
|
65
|
+
assert response
|
|
66
|
+
assert not response.is_error
|
|
67
|
+
assert response.content is not None
|
|
68
|
+
assert response.content.completion is not None
|
|
69
|
+
print(f"✓ Image test passed: {response.content.completion[:100]}...")
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def test_gemini_with_pdf():
|
|
73
|
+
"""Test Gemini API with PDF file support."""
|
|
74
|
+
|
|
75
|
+
# Skip if no API key
|
|
76
|
+
if not os.getenv("GEMINI_API_KEY"):
|
|
77
|
+
print("Skipping Gemini PDF test - no API key")
|
|
78
|
+
return
|
|
79
|
+
|
|
80
|
+
# Check if test PDF exists
|
|
81
|
+
test_pdf_path = Path(__file__).parent / "sample.pdf"
|
|
82
|
+
if not test_pdf_path.exists():
|
|
83
|
+
print("Skipping PDF test - test PDF not found")
|
|
84
|
+
return
|
|
85
|
+
|
|
86
|
+
client = LLMClient(
|
|
87
|
+
["gemini-2.0-flash-gemini"],
|
|
88
|
+
max_requests_per_minute=10,
|
|
89
|
+
max_tokens_per_minute=100_000,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
conversation = Conversation(
|
|
93
|
+
[Message.user("Summarize this PDF document briefly.").add_file(test_pdf_path)]
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
responses = asyncio.run(client.process_prompts_async([conversation]))
|
|
97
|
+
|
|
98
|
+
assert len(responses) == 1
|
|
99
|
+
response = responses[0]
|
|
100
|
+
assert response
|
|
101
|
+
assert not response.is_error
|
|
102
|
+
assert response.content is not None
|
|
103
|
+
assert response.content.completion is not None
|
|
104
|
+
print(f"✓ PDF test passed: {response.content.completion[:100]}...")
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def test_gemini_with_tools():
|
|
108
|
+
"""Test Gemini API with tool calls."""
|
|
109
|
+
|
|
110
|
+
# Skip if no API key
|
|
111
|
+
if not os.getenv("GEMINI_API_KEY"):
|
|
112
|
+
print("Skipping Gemini tools test - no API key")
|
|
113
|
+
return
|
|
114
|
+
|
|
115
|
+
# Define a simple tool
|
|
116
|
+
def get_weather(location: str) -> str:
|
|
117
|
+
"""Get the weather for a location"""
|
|
118
|
+
return f"The weather in {location} is sunny and 72°F"
|
|
119
|
+
|
|
120
|
+
weather_tool = Tool.from_function(get_weather)
|
|
121
|
+
|
|
122
|
+
client = LLMClient(
|
|
123
|
+
["gemini-2.0-flash-gemini"],
|
|
124
|
+
max_requests_per_minute=10,
|
|
125
|
+
max_tokens_per_minute=100_000,
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
conversation = Conversation.user("What's the weather like in San Francisco?")
|
|
129
|
+
|
|
130
|
+
responses = asyncio.run(
|
|
131
|
+
client.process_prompts_async([conversation], tools=[weather_tool])
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
assert len(responses) == 1
|
|
135
|
+
response = responses[0]
|
|
136
|
+
assert response
|
|
137
|
+
assert not response.is_error
|
|
138
|
+
assert response.content is not None
|
|
139
|
+
|
|
140
|
+
# Check if tool call was made
|
|
141
|
+
tool_calls = response.content.tool_calls
|
|
142
|
+
if len(tool_calls) > 0:
|
|
143
|
+
tool_call = tool_calls[0]
|
|
144
|
+
assert tool_call.name == "get_weather"
|
|
145
|
+
assert "location" in tool_call.arguments
|
|
146
|
+
print(
|
|
147
|
+
f"✓ Tool call test passed: {tool_call.name} with args {tool_call.arguments}"
|
|
148
|
+
)
|
|
149
|
+
else:
|
|
150
|
+
print("✓ Tool test passed (no tool call made, but response was valid)")
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def test_gemini_json_mode():
|
|
154
|
+
"""Test Gemini API with JSON mode."""
|
|
155
|
+
|
|
156
|
+
# Skip if no API key
|
|
157
|
+
if not os.getenv("GEMINI_API_KEY"):
|
|
158
|
+
print("Skipping Gemini JSON test - no API key")
|
|
159
|
+
return
|
|
160
|
+
|
|
161
|
+
from lm_deluge.config import SamplingParams
|
|
162
|
+
|
|
163
|
+
client = LLMClient(
|
|
164
|
+
["gemini-2.0-flash-gemini"],
|
|
165
|
+
sampling_params=[SamplingParams(json_mode=True)],
|
|
166
|
+
max_requests_per_minute=10,
|
|
167
|
+
max_tokens_per_minute=100_000,
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
conversation = Conversation.user(
|
|
171
|
+
'Return a JSON object with keys "name" and "age" for a fictional character.'
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
responses = asyncio.run(client.process_prompts_async([conversation]))
|
|
175
|
+
|
|
176
|
+
assert len(responses) == 1
|
|
177
|
+
response = responses[0]
|
|
178
|
+
assert response
|
|
179
|
+
assert not response.is_error
|
|
180
|
+
assert response.content is not None
|
|
181
|
+
assert response.content.completion is not None
|
|
182
|
+
|
|
183
|
+
# Try to parse as JSON
|
|
184
|
+
import json
|
|
185
|
+
|
|
186
|
+
try:
|
|
187
|
+
parsed = json.loads(response.content.completion)
|
|
188
|
+
assert "name" in parsed or "age" in parsed
|
|
189
|
+
print(f"✓ JSON mode test passed: {response.content.completion}")
|
|
190
|
+
except json.JSONDecodeError:
|
|
191
|
+
print(
|
|
192
|
+
f"✓ JSON mode test passed (response may not be pure JSON): {response.content.completion}"
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def test_gemini_reasoning_model():
|
|
197
|
+
"""Test Gemini reasoning model."""
|
|
198
|
+
|
|
199
|
+
# Skip if no API key
|
|
200
|
+
if not os.getenv("GEMINI_API_KEY"):
|
|
201
|
+
print("Skipping Gemini reasoning test - no API key")
|
|
202
|
+
return
|
|
203
|
+
|
|
204
|
+
from lm_deluge.config import SamplingParams
|
|
205
|
+
|
|
206
|
+
client = LLMClient(
|
|
207
|
+
["gemini-2.5-pro-gemini"], # reasoning model
|
|
208
|
+
sampling_params=[SamplingParams(reasoning_effort="medium")],
|
|
209
|
+
max_requests_per_minute=10,
|
|
210
|
+
max_tokens_per_minute=100_000,
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
conversation = Conversation.user(
|
|
214
|
+
"What is the 15th Fibonacci number? Show your reasoning."
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
responses = asyncio.run(client.process_prompts_async([conversation]))
|
|
218
|
+
|
|
219
|
+
assert len(responses) == 1
|
|
220
|
+
response = responses[0]
|
|
221
|
+
assert response
|
|
222
|
+
assert not response.is_error
|
|
223
|
+
assert response.content is not None
|
|
224
|
+
assert response.content.completion is not None
|
|
225
|
+
print(f"✓ Reasoning test passed: {response.content.completion[:100]}...")
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
if __name__ == "__main__":
|
|
229
|
+
print("Testing Gemini API integration...")
|
|
230
|
+
|
|
231
|
+
test_gemini_basic_text()
|
|
232
|
+
test_gemini_with_image()
|
|
233
|
+
test_gemini_with_pdf()
|
|
234
|
+
test_gemini_with_tools()
|
|
235
|
+
test_gemini_json_mode()
|
|
236
|
+
test_gemini_reasoning_model()
|
|
237
|
+
|
|
238
|
+
print("✓ All Gemini tests completed!")
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Simple Gemini API test."""
|
|
3
|
+
|
|
4
|
+
import asyncio
|
|
5
|
+
import os
|
|
6
|
+
from lm_deluge import LLMClient, Conversation
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
async def main():
|
|
10
|
+
if not os.getenv("GEMINI_API_KEY"):
|
|
11
|
+
print("Skipping test - no GEMINI_API_KEY set")
|
|
12
|
+
return
|
|
13
|
+
|
|
14
|
+
print("Testing native Gemini API support...")
|
|
15
|
+
|
|
16
|
+
# Test the new -gemini model
|
|
17
|
+
client = LLMClient.basic("gemini-2.0-flash-gemini")
|
|
18
|
+
client.max_attempts = 2
|
|
19
|
+
client.request_timeout = 30
|
|
20
|
+
|
|
21
|
+
try:
|
|
22
|
+
res = await client.process_prompts_async(
|
|
23
|
+
[Conversation.user("What is the capital of France? Answer briefly.")],
|
|
24
|
+
show_progress=False,
|
|
25
|
+
)
|
|
26
|
+
print(f"✓ Gemini native API test passed: {res[0].content.completion}")
|
|
27
|
+
except Exception as e:
|
|
28
|
+
print(f"✗ Exception: {e}")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
if __name__ == "__main__":
|
|
32
|
+
asyncio.run(main())
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|