lm-deluge 0.0.14__py3-none-any.whl → 0.0.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lm-deluge might be problematic. Click here for more details.

@@ -2,6 +2,7 @@ from .openai import OpenAIRequest, OpenAIResponsesRequest
2
2
  from .anthropic import AnthropicRequest
3
3
  from .mistral import MistralRequest
4
4
  from .bedrock import BedrockRequest
5
+ from .gemini import GeminiRequest
5
6
 
6
7
  CLASSES = {
7
8
  "openai": OpenAIRequest,
@@ -9,4 +10,5 @@ CLASSES = {
9
10
  "anthropic": AnthropicRequest,
10
11
  "mistral": MistralRequest,
11
12
  "bedrock": BedrockRequest,
13
+ "gemini": GeminiRequest,
12
14
  }
@@ -0,0 +1,222 @@
1
+ import json
2
+ import os
3
+ import warnings
4
+ from typing import Callable
5
+
6
+ from aiohttp import ClientResponse
7
+
8
+ from lm_deluge.tool import Tool
9
+
10
+ from ..config import SamplingParams
11
+ from ..models import APIModel
12
+ from ..prompt import CachePattern, Conversation, Message, Text, Thinking, ToolCall
13
+ from ..tracker import StatusTracker
14
+ from ..usage import Usage
15
+ from .base import APIRequestBase, APIResponse
16
+
17
+
18
+ def _build_gemini_request(
19
+ model: APIModel,
20
+ prompt: Conversation,
21
+ tools: list[Tool] | None,
22
+ sampling_params: SamplingParams,
23
+ ) -> dict:
24
+ system_message, messages = prompt.to_gemini()
25
+
26
+ request_json = {
27
+ "contents": messages,
28
+ "generationConfig": {
29
+ "temperature": sampling_params.temperature,
30
+ "topP": sampling_params.top_p,
31
+ "maxOutputTokens": sampling_params.max_new_tokens,
32
+ },
33
+ }
34
+
35
+ # Add system instruction if present
36
+ if system_message:
37
+ request_json["systemInstruction"] = {"parts": [{"text": system_message}]}
38
+
39
+ # Handle reasoning models (thinking)
40
+ if model.reasoning_model:
41
+ request_json["generationConfig"]["thinkingConfig"] = {"includeThoughts": True}
42
+ if sampling_params.reasoning_effort and "flash" in model.id:
43
+ budget = {"low": 1024, "medium": 4096, "high": 16384}.get(
44
+ sampling_params.reasoning_effort
45
+ )
46
+ request_json["generationConfig"]["thinkingConfig"]["thinkingBudget"] = (
47
+ budget
48
+ )
49
+
50
+ else:
51
+ if sampling_params.reasoning_effort:
52
+ warnings.warn(
53
+ f"Ignoring reasoning_effort param for non-reasoning model: {model.name}"
54
+ )
55
+
56
+ # Add tools if provided
57
+ if tools:
58
+ tool_declarations = [tool.dump_for("google") for tool in tools]
59
+ request_json["tools"] = [{"functionDeclarations": tool_declarations}]
60
+
61
+ # Handle JSON mode
62
+ if sampling_params.json_mode and model.supports_json:
63
+ request_json["generationConfig"]["responseMimeType"] = "application/json"
64
+
65
+ return request_json
66
+
67
+
68
+ class GeminiRequest(APIRequestBase):
69
+ def __init__(
70
+ self,
71
+ task_id: int,
72
+ model_name: str, # must correspond to registry
73
+ prompt: Conversation,
74
+ attempts_left: int,
75
+ status_tracker: StatusTracker,
76
+ results_arr: list,
77
+ request_timeout: int = 30,
78
+ sampling_params: SamplingParams = SamplingParams(),
79
+ callback: Callable | None = None,
80
+ all_model_names: list[str] | None = None,
81
+ all_sampling_params: list[SamplingParams] | None = None,
82
+ tools: list | None = None,
83
+ cache: CachePattern | None = None,
84
+ ):
85
+ super().__init__(
86
+ task_id=task_id,
87
+ model_name=model_name,
88
+ prompt=prompt,
89
+ attempts_left=attempts_left,
90
+ status_tracker=status_tracker,
91
+ results_arr=results_arr,
92
+ request_timeout=request_timeout,
93
+ sampling_params=sampling_params,
94
+ callback=callback,
95
+ all_model_names=all_model_names,
96
+ all_sampling_params=all_sampling_params,
97
+ tools=tools,
98
+ cache=cache,
99
+ )
100
+
101
+ # Warn if cache is specified for Gemini model
102
+ if cache is not None:
103
+ warnings.warn(
104
+ f"Cache parameter '{cache}' is not supported for Gemini models, ignoring for {model_name}"
105
+ )
106
+
107
+ self.model = APIModel.from_registry(model_name)
108
+ # Gemini API endpoint format: https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent
109
+ self.url = f"{self.model.api_base}/models/{self.model.name}:generateContent"
110
+ self.request_header = {
111
+ "Content-Type": "application/json",
112
+ }
113
+
114
+ # Add API key as query parameter for Gemini
115
+ api_key = os.getenv(self.model.api_key_env_var)
116
+ if not api_key:
117
+ raise ValueError(
118
+ f"API key environment variable {self.model.api_key_env_var} not set"
119
+ )
120
+ self.url += f"?key={api_key}"
121
+
122
+ self.request_json = _build_gemini_request(
123
+ self.model, prompt, tools, sampling_params
124
+ )
125
+
126
+ async def handle_response(self, http_response: ClientResponse) -> APIResponse:
127
+ is_error = False
128
+ error_message = None
129
+ thinking = None
130
+ content = None
131
+ usage = None
132
+ status_code = http_response.status
133
+ mimetype = http_response.headers.get("Content-Type", None)
134
+ data = None
135
+
136
+ if status_code >= 200 and status_code < 300:
137
+ try:
138
+ data = await http_response.json()
139
+ except Exception as e:
140
+ is_error = True
141
+ error_message = (
142
+ f"Error calling .json() on response w/ status {status_code}: {e}"
143
+ )
144
+
145
+ if not is_error:
146
+ assert data
147
+ try:
148
+ # Parse Gemini response format
149
+ parts = []
150
+
151
+ if "candidates" in data and data["candidates"]:
152
+ candidate = data["candidates"][0]
153
+ if "content" in candidate and "parts" in candidate["content"]:
154
+ for part in candidate["content"]["parts"]:
155
+ if "text" in part:
156
+ parts.append(Text(part["text"]))
157
+ elif "thought" in part:
158
+ parts.append(Thinking(part["thought"]))
159
+ elif "functionCall" in part:
160
+ func_call = part["functionCall"]
161
+ # Generate a unique ID since Gemini doesn't provide one
162
+ import uuid
163
+
164
+ tool_id = f"call_{uuid.uuid4().hex[:8]}"
165
+ parts.append(
166
+ ToolCall(
167
+ id=tool_id,
168
+ name=func_call["name"],
169
+ arguments=func_call.get("args", {}),
170
+ )
171
+ )
172
+
173
+ content = Message("assistant", parts)
174
+
175
+ # Extract usage information if present
176
+ if "usageMetadata" in data:
177
+ usage_data = data["usageMetadata"]
178
+ usage = Usage.from_gemini_usage(usage_data)
179
+
180
+ except Exception as e:
181
+ is_error = True
182
+ error_message = f"Error parsing Gemini response: {str(e)}"
183
+
184
+ elif mimetype and "json" in mimetype.lower():
185
+ is_error = True
186
+ try:
187
+ data = await http_response.json()
188
+ error_message = json.dumps(data)
189
+ except Exception:
190
+ error_message = (
191
+ f"HTTP {status_code} with JSON content type but failed to parse"
192
+ )
193
+ else:
194
+ is_error = True
195
+ text = await http_response.text()
196
+ error_message = text
197
+
198
+ # Handle special kinds of errors
199
+ if is_error and error_message is not None:
200
+ if "rate limit" in error_message.lower() or status_code == 429:
201
+ error_message += " (Rate limit error, triggering cooldown.)"
202
+ self.status_tracker.rate_limit_exceeded()
203
+ if (
204
+ "context length" in error_message.lower()
205
+ or "token limit" in error_message.lower()
206
+ ):
207
+ error_message += " (Context length exceeded, set retries to 0.)"
208
+ self.attempts_left = 0
209
+
210
+ return APIResponse(
211
+ id=self.task_id,
212
+ status_code=status_code,
213
+ is_error=is_error,
214
+ error_message=error_message,
215
+ prompt=self.prompt,
216
+ content=content,
217
+ thinking=thinking,
218
+ model_internal=self.model_name,
219
+ sampling_params=self.sampling_params,
220
+ usage=usage,
221
+ raw_response=data,
222
+ )
lm_deluge/file.py CHANGED
@@ -141,8 +141,13 @@ class File:
141
141
  return filename, content, media_type
142
142
 
143
143
  def gemini(self) -> dict:
144
- """For Gemini API - not yet supported."""
145
- raise NotImplementedError("File support for Gemini is not yet implemented")
144
+ """For Gemini API - files are provided as inline data."""
145
+ return {
146
+ "inlineData": {
147
+ "mimeType": self._mime(),
148
+ "data": self._base64(include_header=False),
149
+ }
150
+ }
146
151
 
147
152
  def mistral(self) -> dict:
148
153
  """For Mistral API - not yet supported."""
lm_deluge/models.py CHANGED
@@ -167,6 +167,63 @@ registry = {
167
167
  "tokens_per_minute": 100_000,
168
168
  "reasoning_model": True,
169
169
  },
170
+ # Native Gemini API versions with file support
171
+ "gemini-2.0-flash-gemini": {
172
+ "id": "gemini-2.0-flash-gemini",
173
+ "name": "gemini-2.0-flash",
174
+ "api_base": "https://generativelanguage.googleapis.com/v1beta",
175
+ "api_key_env_var": "GEMINI_API_KEY",
176
+ "supports_json": True,
177
+ "supports_logprobs": False,
178
+ "api_spec": "gemini",
179
+ "input_cost": 0.1,
180
+ "output_cost": 0.4,
181
+ "requests_per_minute": 20,
182
+ "tokens_per_minute": 100_000,
183
+ "reasoning_model": False,
184
+ },
185
+ "gemini-2.0-flash-lite-gemini": {
186
+ "id": "gemini-2.0-flash-lite-gemini",
187
+ "name": "gemini-2.0-flash-lite",
188
+ "api_base": "https://generativelanguage.googleapis.com/v1beta",
189
+ "api_key_env_var": "GEMINI_API_KEY",
190
+ "supports_json": True,
191
+ "supports_logprobs": False,
192
+ "api_spec": "gemini",
193
+ "input_cost": 0.1,
194
+ "output_cost": 0.4,
195
+ "requests_per_minute": 20,
196
+ "tokens_per_minute": 100_000,
197
+ "reasoning_model": False,
198
+ },
199
+ "gemini-2.5-pro-gemini": {
200
+ "id": "gemini-2.5-pro-gemini",
201
+ "name": "gemini-2.5-pro-preview-05-06",
202
+ "api_base": "https://generativelanguage.googleapis.com/v1beta",
203
+ "api_key_env_var": "GEMINI_API_KEY",
204
+ "supports_json": True,
205
+ "supports_logprobs": False,
206
+ "api_spec": "gemini",
207
+ "input_cost": 0.1,
208
+ "output_cost": 0.4,
209
+ "requests_per_minute": 20,
210
+ "tokens_per_minute": 100_000,
211
+ "reasoning_model": True,
212
+ },
213
+ "gemini-2.5-flash-gemini": {
214
+ "id": "gemini-2.5-flash-gemini",
215
+ "name": "gemini-2.5-flash-preview-05-20",
216
+ "api_base": "https://generativelanguage.googleapis.com/v1beta",
217
+ "api_key_env_var": "GEMINI_API_KEY",
218
+ "supports_json": True,
219
+ "supports_logprobs": False,
220
+ "api_spec": "gemini",
221
+ "input_cost": 0.1,
222
+ "output_cost": 0.4,
223
+ "requests_per_minute": 20,
224
+ "tokens_per_minute": 100_000,
225
+ "reasoning_model": True,
226
+ },
170
227
  # ███████ █████████ █████
171
228
  # ███░░░░░███ ███░░░░░███ ░░███
172
229
  # ███ ░░███ ████████ ██████ ████████ ░███ ░███ ░███
lm_deluge/usage.py CHANGED
@@ -71,6 +71,16 @@ class Usage:
71
71
  cache_write_tokens=None,
72
72
  )
73
73
 
74
+ @classmethod
75
+ def from_gemini_usage(cls, usage_data: dict) -> "Usage":
76
+ """Create Usage from Gemini API response usage data."""
77
+ return cls(
78
+ input_tokens=usage_data.get("promptTokenCount", 0),
79
+ output_tokens=usage_data.get("candidatesTokenCount", 0),
80
+ cache_read_tokens=None, # Gemini doesn't support caching yet
81
+ cache_write_tokens=None,
82
+ )
83
+
74
84
  def to_dict(self) -> dict:
75
85
  """Convert to dictionary for serialization."""
76
86
  return {
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lm_deluge
3
- Version: 0.0.14
3
+ Version: 0.0.15
4
4
  Summary: Python utility for using LLM API models.
5
5
  Author-email: Benjamin Anderson <ben@trytaylor.ai>
6
6
  Requires-Python: >=3.10
@@ -6,20 +6,21 @@ lm_deluge/client.py,sha256=kMHA3VlCRk_Ly1CiJ6rRz2GxttxhVuw6WEQtdMVrK-4,19806
6
6
  lm_deluge/config.py,sha256=H1tQyJDNHGFuwxqQNL5Z-CjWAC0luHSBA3iY_pxmACM,932
7
7
  lm_deluge/embed.py,sha256=CO-TOlC5kOTAM8lcnicoG4u4K664vCBwHF1vHa-nAGg,13382
8
8
  lm_deluge/errors.py,sha256=oHjt7YnxWbh-eXMScIzov4NvpJMo0-2r5J6Wh5DQ1tk,209
9
- lm_deluge/file.py,sha256=9l-zWKoHPnPhTL_CZNbxyoKwbLxlXHkRU2bz43qxaV4,5311
9
+ lm_deluge/file.py,sha256=zQH1STMjCG9pczO7Fk9Jw0_0Pj_8CogcdIxTe4J4AJw,5414
10
10
  lm_deluge/gemini_limits.py,sha256=V9mpS9JtXYz7AY6OuKyQp5TuIMRH1BVv9YrSNmGmHNA,1569
11
11
  lm_deluge/image.py,sha256=hFbRajqEVQbkirAfOxsTPkeq-27Zl-so4AWBFeUbpBI,7161
12
- lm_deluge/models.py,sha256=gW9ZhKYjwC-ZF-SzWqagFUE_7Mqerdtt_T5NxGo040E,46583
12
+ lm_deluge/models.py,sha256=Xad2Ya2U4nk0z6m0l8iba8EE34-mI2HbRqdXrM6Fqc0,48641
13
13
  lm_deluge/prompt.py,sha256=KOuJFwpRKuz2F5WLniZzjOTW05I--mzYyMglr-s47F8,34601
14
14
  lm_deluge/rerank.py,sha256=-NBAJdHz9OB-SWWJnHzkFmeVO4wR6lFV7Vw-SxG7aVo,11457
15
15
  lm_deluge/tool.py,sha256=C2zwU9-7fldfYT0TZDoVVGGSC6dN_It9GSxnfkN6Z_w,9822
16
16
  lm_deluge/tracker.py,sha256=4QQ0-H01KQp8x8KccidBIJWA5zfSQyA0kgTynvSG0gk,9202
17
- lm_deluge/usage.py,sha256=oS-rmF3ZJ1RMtR7WI6BB2uVOAjJg0scvGF3zZRahWVg,4449
17
+ lm_deluge/usage.py,sha256=VMEKghePFIID5JFBObqYxFpgYxnbYm_dnHy7V1-_T6M,4866
18
18
  lm_deluge/api_requests/__init__.py,sha256=_aSpD6CJL9g6OpLPoChXiHjl4MH_OlGcKgfZaW8cgLM,71
19
19
  lm_deluge/api_requests/anthropic.py,sha256=itKPu1cqCYcrr4fkLarlvSYr6tqLEAGVLGXEG05QXWM,8345
20
20
  lm_deluge/api_requests/base.py,sha256=THgCceZ_z9YjA_E9WWME5f2tIRSOOI2OAQCAWVlV-Xg,12448
21
21
  lm_deluge/api_requests/bedrock.py,sha256=yh4-zMrjlQfmxoBbrc2WYJ8gEqVkTP_-tMR7-XbTAtQ,11753
22
- lm_deluge/api_requests/common.py,sha256=pcOpODL4heoaNLjbA6_ogkrOAbUSKY3F37D2EyMLW10,359
22
+ lm_deluge/api_requests/common.py,sha256=BZ3vRO5TB669_UsNKugkkuFSzoLHOYJIKt4nV4sf4vc,422
23
+ lm_deluge/api_requests/gemini.py,sha256=8qWdHFsse3gYU2MiJRI_FAmM-Ez8YCGmHtHGI6_z-ww,8203
23
24
  lm_deluge/api_requests/mistral.py,sha256=PkuoKbOJAB6DOK_NvzbxpWPAktfvonf69QjC0tVCYuE,5366
24
25
  lm_deluge/api_requests/openai.py,sha256=HUn83Y_Roo3pCUTBnrQhL9skW_PJ4OvS5gr5rIg58dU,19366
25
26
  lm_deluge/api_requests/response.py,sha256=X6AHXv-4dWHLKkPv7J0MSesweunqxIqJED6UY6ypdzE,5770
@@ -37,8 +38,8 @@ lm_deluge/util/json.py,sha256=_4Oar2Cmz2L1DK3EtPLPDxD6rsYHxjROmV8ZpmMjQ-4,5822
37
38
  lm_deluge/util/logprobs.py,sha256=UkBZakOxWluaLqHrjARu7xnJ0uCHVfLGHJdnYlEcutk,11768
38
39
  lm_deluge/util/validation.py,sha256=hz5dDb3ebvZrZhnaWxOxbNSVMI6nmaOODBkk0htAUhs,1575
39
40
  lm_deluge/util/xml.py,sha256=Ft4zajoYBJR3HHCt2oHwGfymGLdvp_gegVmJ-Wqk4Ck,10547
40
- lm_deluge-0.0.14.dist-info/licenses/LICENSE,sha256=uNNXGXPCw2TC7CUs7SEBkA-Mz6QBQFWUUEWDMgEs1dU,1058
41
- lm_deluge-0.0.14.dist-info/METADATA,sha256=iK9UuTpf235TbQQ6CkrLX725loOMSdwTscZJQgEHeoo,11942
42
- lm_deluge-0.0.14.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
43
- lm_deluge-0.0.14.dist-info/top_level.txt,sha256=hqU-TJX93yBwpgkDtYcXyLr3t7TLSCCZ_reytJjwBaE,10
44
- lm_deluge-0.0.14.dist-info/RECORD,,
41
+ lm_deluge-0.0.15.dist-info/licenses/LICENSE,sha256=uNNXGXPCw2TC7CUs7SEBkA-Mz6QBQFWUUEWDMgEs1dU,1058
42
+ lm_deluge-0.0.15.dist-info/METADATA,sha256=Xahpew4j6u9EgJGJf6l_wvnKpq1c2I1hoQQh3RIhUes,11942
43
+ lm_deluge-0.0.15.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
44
+ lm_deluge-0.0.15.dist-info/top_level.txt,sha256=hqU-TJX93yBwpgkDtYcXyLr3t7TLSCCZ_reytJjwBaE,10
45
+ lm_deluge-0.0.15.dist-info/RECORD,,