lm-deluge 0.0.20__py3-none-any.whl → 0.0.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lm-deluge might be problematic. Click here for more details.
- lm_deluge/api_requests/anthropic.py +16 -13
- lm_deluge/api_requests/base.py +10 -2
- lm_deluge/api_requests/bedrock.py +18 -20
- lm_deluge/api_requests/gemini.py +4 -3
- lm_deluge/api_requests/mistral.py +2 -0
- lm_deluge/api_requests/openai.py +96 -83
- lm_deluge/api_requests/response.py +4 -2
- lm_deluge/batches.py +86 -77
- lm_deluge/client.py +19 -10
- lm_deluge/image.py +6 -2
- lm_deluge/models.py +61 -59
- lm_deluge/prompt.py +104 -56
- lm_deluge/request_context.py +2 -0
- lm_deluge/tool.py +115 -26
- {lm_deluge-0.0.20.dist-info → lm_deluge-0.0.22.dist-info}/METADATA +1 -1
- {lm_deluge-0.0.20.dist-info → lm_deluge-0.0.22.dist-info}/RECORD +19 -19
- {lm_deluge-0.0.20.dist-info → lm_deluge-0.0.22.dist-info}/WHEEL +0 -0
- {lm_deluge-0.0.20.dist-info → lm_deluge-0.0.22.dist-info}/licenses/LICENSE +0 -0
- {lm_deluge-0.0.20.dist-info → lm_deluge-0.0.22.dist-info}/top_level.txt +0 -0
lm_deluge/batches.py
CHANGED
|
@@ -3,7 +3,7 @@ import json
|
|
|
3
3
|
import time
|
|
4
4
|
import asyncio
|
|
5
5
|
import aiohttp
|
|
6
|
-
import
|
|
6
|
+
import tempfile
|
|
7
7
|
from lm_deluge.prompt import CachePattern, Conversation, prompts_to_conversations
|
|
8
8
|
from lm_deluge.config import SamplingParams
|
|
9
9
|
from lm_deluge.models import APIModel
|
|
@@ -79,11 +79,8 @@ def _create_batch_status_display(
|
|
|
79
79
|
return grid
|
|
80
80
|
|
|
81
81
|
|
|
82
|
-
async def submit_batch_oa(
|
|
83
|
-
"""
|
|
84
|
-
pd.DataFrame(batch_requests).to_json(
|
|
85
|
-
"requests_temp.jsonl", orient="records", lines=True
|
|
86
|
-
)
|
|
82
|
+
async def submit_batch_oa(file_path: str):
|
|
83
|
+
"""Upload a JSONL file and create one OpenAI batch."""
|
|
87
84
|
|
|
88
85
|
# upload the file
|
|
89
86
|
api_key = os.environ.get("OPENAI_API_KEY", None)
|
|
@@ -99,21 +96,22 @@ async def submit_batch_oa(batch_requests: list[dict]):
|
|
|
99
96
|
url = "https://api.openai.com/v1/files"
|
|
100
97
|
data = aiohttp.FormData()
|
|
101
98
|
data.add_field("purpose", "batch")
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
async with session.post(url, data=data, headers=headers) as response:
|
|
110
|
-
if response.status != 200:
|
|
111
|
-
text = await response.text()
|
|
112
|
-
raise ValueError(f"Error uploading file: {text}")
|
|
99
|
+
with open(file_path, "rb") as f:
|
|
100
|
+
data.add_field(
|
|
101
|
+
"file",
|
|
102
|
+
f,
|
|
103
|
+
filename=os.path.basename(file_path),
|
|
104
|
+
content_type="application/json",
|
|
105
|
+
)
|
|
113
106
|
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
107
|
+
async with session.post(url, data=data, headers=headers) as response:
|
|
108
|
+
if response.status != 200:
|
|
109
|
+
text = await response.text()
|
|
110
|
+
raise ValueError(f"Error uploading file: {text}")
|
|
111
|
+
|
|
112
|
+
print("File uploaded successfully")
|
|
113
|
+
response_data = await response.json()
|
|
114
|
+
file_id = response_data["id"]
|
|
117
115
|
|
|
118
116
|
# Create batch
|
|
119
117
|
url = "https://api.openai.com/v1/batches"
|
|
@@ -131,7 +129,36 @@ async def submit_batch_oa(batch_requests: list[dict]):
|
|
|
131
129
|
response_data = await response.json()
|
|
132
130
|
batch_id = response_data["id"]
|
|
133
131
|
print("Batch job started successfully: id = ", batch_id)
|
|
134
|
-
|
|
132
|
+
|
|
133
|
+
os.remove(file_path)
|
|
134
|
+
return batch_id
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
async def _submit_anthropic_batch(file_path: str, headers: dict, model: str):
|
|
138
|
+
"""Upload a JSONL file and create one Anthropic batch."""
|
|
139
|
+
|
|
140
|
+
async with aiohttp.ClientSession() as session:
|
|
141
|
+
url = f"{registry[model].api_base}/messages/batches"
|
|
142
|
+
data = aiohttp.FormData()
|
|
143
|
+
with open(file_path, "rb") as f:
|
|
144
|
+
data.add_field(
|
|
145
|
+
"file",
|
|
146
|
+
f,
|
|
147
|
+
filename=os.path.basename(file_path),
|
|
148
|
+
content_type="application/json",
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
async with session.post(url, data=data, headers=headers) as response:
|
|
152
|
+
if response.status != 200:
|
|
153
|
+
text = await response.text()
|
|
154
|
+
raise ValueError(f"Error creating batch: {text}")
|
|
155
|
+
|
|
156
|
+
batch_data = await response.json()
|
|
157
|
+
batch_id = batch_data["id"]
|
|
158
|
+
print(f"Anthropic batch job started successfully: id = {batch_id}")
|
|
159
|
+
|
|
160
|
+
os.remove(file_path)
|
|
161
|
+
return batch_id
|
|
135
162
|
|
|
136
163
|
|
|
137
164
|
async def submit_batches_oa(
|
|
@@ -139,38 +166,38 @@ async def submit_batches_oa(
|
|
|
139
166
|
sampling_params: SamplingParams,
|
|
140
167
|
prompts: Sequence[str | list[dict] | Conversation],
|
|
141
168
|
):
|
|
142
|
-
|
|
169
|
+
"""Write OpenAI batch requests to a file and submit."""
|
|
170
|
+
|
|
143
171
|
prompts = prompts_to_conversations(prompts)
|
|
144
172
|
if any(p is None for p in prompts):
|
|
145
173
|
raise ValueError("All prompts must be valid.")
|
|
146
|
-
ids = [i for i, _ in enumerate(prompts)]
|
|
147
174
|
|
|
148
|
-
# create file with requests to send to batch api
|
|
149
|
-
batch_requests = []
|
|
150
175
|
model_obj = APIModel.from_registry(model)
|
|
151
|
-
|
|
152
|
-
assert isinstance(prompt, Conversation)
|
|
153
|
-
batch_requests.append(
|
|
154
|
-
{
|
|
155
|
-
"custom_id": str(id),
|
|
156
|
-
"method": "POST",
|
|
157
|
-
"url": "/v1/chat/completions",
|
|
158
|
-
"body": _build_oa_chat_request(model_obj, prompt, [], sampling_params),
|
|
159
|
-
}
|
|
160
|
-
)
|
|
161
|
-
|
|
162
|
-
# since the api only accepts up to 50,000 requests per batch job, we chunk into 50k chunks
|
|
176
|
+
|
|
163
177
|
BATCH_SIZE = 50_000
|
|
164
|
-
batches = [
|
|
165
|
-
batch_requests[i : i + BATCH_SIZE]
|
|
166
|
-
for i in range(0, len(batch_requests), BATCH_SIZE)
|
|
167
|
-
]
|
|
168
178
|
tasks = []
|
|
169
|
-
|
|
170
|
-
|
|
179
|
+
|
|
180
|
+
for start in range(0, len(prompts), BATCH_SIZE):
|
|
181
|
+
batch_prompts = prompts[start : start + BATCH_SIZE]
|
|
182
|
+
with tempfile.NamedTemporaryFile(mode="w+", suffix=".jsonl", delete=False) as f:
|
|
183
|
+
for idx, prompt in enumerate(batch_prompts, start=start):
|
|
184
|
+
assert isinstance(prompt, Conversation)
|
|
185
|
+
request = {
|
|
186
|
+
"custom_id": str(idx),
|
|
187
|
+
"method": "POST",
|
|
188
|
+
"url": "/v1/chat/completions",
|
|
189
|
+
"body": _build_oa_chat_request(model_obj, prompt, [], sampling_params),
|
|
190
|
+
}
|
|
191
|
+
json.dump(request, f)
|
|
192
|
+
f.write("\n")
|
|
193
|
+
|
|
194
|
+
file_path = f.name
|
|
195
|
+
|
|
196
|
+
tasks.append(asyncio.create_task(submit_batch_oa(file_path)))
|
|
197
|
+
|
|
171
198
|
batch_ids = await asyncio.gather(*tasks)
|
|
172
199
|
|
|
173
|
-
print(f"Submitted {len(
|
|
200
|
+
print(f"Submitted {len(tasks)} batch jobs.")
|
|
174
201
|
|
|
175
202
|
return batch_ids
|
|
176
203
|
|
|
@@ -196,47 +223,29 @@ async def submit_batches_anthropic(
|
|
|
196
223
|
|
|
197
224
|
# Convert prompts to Conversations
|
|
198
225
|
prompts = prompts_to_conversations(prompts)
|
|
199
|
-
# Create batch requests
|
|
200
|
-
request_headers = None
|
|
201
|
-
batch_requests = []
|
|
202
|
-
for i, prompt in enumerate(prompts):
|
|
203
|
-
assert isinstance(prompt, Conversation)
|
|
204
|
-
# Build request body
|
|
205
|
-
request_body, request_headers = _build_anthropic_request(
|
|
206
|
-
APIModel.from_registry(model), prompt, [], sampling_params, cache
|
|
207
|
-
)
|
|
208
226
|
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
# Chunk into batches of 100k requests (Anthropic's limit)
|
|
227
|
+
request_headers = None
|
|
212
228
|
BATCH_SIZE = 100_000
|
|
213
|
-
batches = [
|
|
214
|
-
batch_requests[i : i + BATCH_SIZE]
|
|
215
|
-
for i in range(0, len(batch_requests), BATCH_SIZE)
|
|
216
|
-
]
|
|
217
|
-
batch_ids = []
|
|
218
229
|
batch_tasks = []
|
|
219
|
-
async with aiohttp.ClientSession() as session:
|
|
220
|
-
for batch in batches:
|
|
221
|
-
url = f"{registry[model].api_base}/messages/batches"
|
|
222
|
-
data = {"requests": batch}
|
|
223
230
|
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
231
|
+
for start in range(0, len(prompts), BATCH_SIZE):
|
|
232
|
+
batch_prompts = prompts[start : start + BATCH_SIZE]
|
|
233
|
+
with tempfile.NamedTemporaryFile(mode="w+", suffix=".jsonl", delete=False) as f:
|
|
234
|
+
for idx, prompt in enumerate(batch_prompts, start=start):
|
|
235
|
+
assert isinstance(prompt, Conversation)
|
|
236
|
+
request_body, request_headers = _build_anthropic_request(
|
|
237
|
+
APIModel.from_registry(model), prompt, [], sampling_params, cache
|
|
238
|
+
)
|
|
239
|
+
json.dump({"custom_id": str(idx), "params": request_body}, f)
|
|
240
|
+
f.write("\n")
|
|
229
241
|
|
|
230
|
-
|
|
231
|
-
batch_id = batch_data["id"]
|
|
232
|
-
print(f"Anthropic batch job started successfully: id = {batch_id}")
|
|
233
|
-
return batch_id
|
|
242
|
+
file_path = f.name
|
|
234
243
|
|
|
235
|
-
|
|
244
|
+
batch_tasks.append(asyncio.create_task(_submit_anthropic_batch(file_path, request_headers, model)))
|
|
236
245
|
|
|
237
|
-
|
|
246
|
+
batch_ids = await asyncio.gather(*batch_tasks)
|
|
238
247
|
|
|
239
|
-
print(f"Submitted {len(
|
|
248
|
+
print(f"Submitted {len(batch_tasks)} batch jobs.")
|
|
240
249
|
return batch_ids
|
|
241
250
|
|
|
242
251
|
|
lm_deluge/client.py
CHANGED
|
@@ -22,11 +22,8 @@ from .models import APIModel, registry
|
|
|
22
22
|
from .request_context import RequestContext
|
|
23
23
|
from .tracker import StatusTracker
|
|
24
24
|
|
|
25
|
-
# from .cache import LevelDBCache, SqliteCache
|
|
26
|
-
|
|
27
25
|
|
|
28
26
|
# TODO: get completions as they finish, not all at once at the end.
|
|
29
|
-
# relatedly, would be nice to cache them as they finish too.
|
|
30
27
|
# TODO: add optional max_input_tokens to client so we can reject long prompts to prevent abuse
|
|
31
28
|
class LLMClient(BaseModel):
|
|
32
29
|
"""
|
|
@@ -60,6 +57,7 @@ class LLMClient(BaseModel):
|
|
|
60
57
|
reasoning_effort: Literal["low", "medium", "high", None] = None
|
|
61
58
|
logprobs: bool = False
|
|
62
59
|
top_logprobs: int | None = None
|
|
60
|
+
force_local_mcp: bool = False
|
|
63
61
|
|
|
64
62
|
# NEW! Builder methods
|
|
65
63
|
def with_model(self, model: str):
|
|
@@ -113,6 +111,7 @@ class LLMClient(BaseModel):
|
|
|
113
111
|
if isinstance(self.model_names, str):
|
|
114
112
|
self.model_names = [self.model_names]
|
|
115
113
|
if any(m not in registry for m in self.model_names):
|
|
114
|
+
print("got model names:", self.model_names)
|
|
116
115
|
raise ValueError("all model_names must be in registry")
|
|
117
116
|
if isinstance(self.sampling_params, SamplingParams):
|
|
118
117
|
self.sampling_params = [self.sampling_params for _ in self.model_names]
|
|
@@ -368,6 +367,7 @@ class LLMClient(BaseModel):
|
|
|
368
367
|
cache=cache,
|
|
369
368
|
use_responses_api=use_responses_api,
|
|
370
369
|
extra_headers=self.extra_headers,
|
|
370
|
+
force_local_mcp=self.force_local_mcp,
|
|
371
371
|
)
|
|
372
372
|
except StopIteration:
|
|
373
373
|
prompts_not_finished = False
|
|
@@ -389,8 +389,6 @@ class LLMClient(BaseModel):
|
|
|
389
389
|
results[ctx.task_id] = response
|
|
390
390
|
except Exception as e:
|
|
391
391
|
# Create an error response for validation errors and other exceptions
|
|
392
|
-
from .api_requests.response import APIResponse
|
|
393
|
-
|
|
394
392
|
error_response = APIResponse(
|
|
395
393
|
id=ctx.task_id,
|
|
396
394
|
model_internal=ctx.model_name,
|
|
@@ -421,7 +419,8 @@ class LLMClient(BaseModel):
|
|
|
421
419
|
|
|
422
420
|
# Sleep - original logic
|
|
423
421
|
await asyncio.sleep(seconds_to_sleep_each_loop + tracker.seconds_to_pause)
|
|
424
|
-
|
|
422
|
+
|
|
423
|
+
tracker.log_final_status()
|
|
425
424
|
|
|
426
425
|
if return_completions_only:
|
|
427
426
|
return [r.completion if r is not None else None for r in results]
|
|
@@ -468,7 +467,7 @@ class LLMClient(BaseModel):
|
|
|
468
467
|
self,
|
|
469
468
|
conversation: str | Conversation,
|
|
470
469
|
*,
|
|
471
|
-
tools: list[Tool | dict] | None = None,
|
|
470
|
+
tools: list[Tool | dict | MCPServer] | None = None,
|
|
472
471
|
max_rounds: int = 5,
|
|
473
472
|
show_progress: bool = False,
|
|
474
473
|
) -> tuple[Conversation, APIResponse]:
|
|
@@ -482,6 +481,16 @@ class LLMClient(BaseModel):
|
|
|
482
481
|
if isinstance(conversation, str):
|
|
483
482
|
conversation = Conversation.user(conversation)
|
|
484
483
|
|
|
484
|
+
# Expand MCPServer objects to their constituent tools for tool execution
|
|
485
|
+
expanded_tools: list[Tool] = []
|
|
486
|
+
if tools:
|
|
487
|
+
for tool in tools:
|
|
488
|
+
if isinstance(tool, Tool):
|
|
489
|
+
expanded_tools.append(tool)
|
|
490
|
+
elif isinstance(tool, MCPServer):
|
|
491
|
+
mcp_tools = await tool.to_tools()
|
|
492
|
+
expanded_tools.extend(mcp_tools)
|
|
493
|
+
|
|
485
494
|
last_response: APIResponse | None = None
|
|
486
495
|
|
|
487
496
|
for _ in range(max_rounds):
|
|
@@ -504,9 +513,9 @@ class LLMClient(BaseModel):
|
|
|
504
513
|
|
|
505
514
|
for call in tool_calls:
|
|
506
515
|
tool_obj = None
|
|
507
|
-
if
|
|
508
|
-
for t in
|
|
509
|
-
if
|
|
516
|
+
if expanded_tools:
|
|
517
|
+
for t in expanded_tools:
|
|
518
|
+
if t.name == call.name:
|
|
510
519
|
tool_obj = t
|
|
511
520
|
break
|
|
512
521
|
|
lm_deluge/image.py
CHANGED
|
@@ -10,7 +10,7 @@ from typing import Literal
|
|
|
10
10
|
import requests
|
|
11
11
|
from PIL import Image as PILImage # type: ignore
|
|
12
12
|
|
|
13
|
-
MediaType = Literal["image/jpeg", "image/png", "image/gif", "image/webp"]
|
|
13
|
+
MediaType = Literal["image/jpeg", "image/png", "image/gif", "image/webp"] | str
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
@dataclass(slots=True)
|
|
@@ -23,6 +23,9 @@ class Image:
|
|
|
23
23
|
_fingerprint_cache: str | None = field(init=False, default=None)
|
|
24
24
|
_size_cache: tuple[int, int] | None = field(init=False, default=None)
|
|
25
25
|
|
|
26
|
+
def __repr__(self):
|
|
27
|
+
return f"Image(data=[{type(self.data)}], media_type={self.media_type}, detail={self.detail})"
|
|
28
|
+
|
|
26
29
|
@classmethod
|
|
27
30
|
def from_pdf(
|
|
28
31
|
cls,
|
|
@@ -69,10 +72,11 @@ class Image:
|
|
|
69
72
|
elif isinstance(self.data, Path) and self.data.exists():
|
|
70
73
|
return Path(self.data).read_bytes()
|
|
71
74
|
elif isinstance(self.data, str) and self.data.startswith("data:"):
|
|
75
|
+
# print("base64 path selected")
|
|
72
76
|
header, encoded = self.data.split(",", 1)
|
|
73
77
|
return base64.b64decode(encoded)
|
|
74
78
|
else:
|
|
75
|
-
raise ValueError("unreadable image format")
|
|
79
|
+
raise ValueError(f"unreadable image format. type: {type(self.data)}")
|
|
76
80
|
|
|
77
81
|
def _mime(self) -> str:
|
|
78
82
|
if self.media_type:
|
lm_deluge/models.py
CHANGED
|
@@ -42,7 +42,7 @@ BUILTIN_MODELS = {
|
|
|
42
42
|
"reasoning_model": False,
|
|
43
43
|
},
|
|
44
44
|
"llama-3.3-70b": {
|
|
45
|
-
"id": "llama-3.3-
|
|
45
|
+
"id": "llama-3.3-70b",
|
|
46
46
|
"name": "Llama-3.3-70B-Instruct",
|
|
47
47
|
"api_base": "https://api.llama.com/compat/v1",
|
|
48
48
|
"api_key_env_var": "META_API_KEY",
|
|
@@ -56,7 +56,7 @@ BUILTIN_MODELS = {
|
|
|
56
56
|
"reasoning_model": False,
|
|
57
57
|
},
|
|
58
58
|
"llama-3.3-8b": {
|
|
59
|
-
"id": "llama-3.3-
|
|
59
|
+
"id": "llama-3.3-8b",
|
|
60
60
|
"name": "Llama-3.3-8B-Instruct",
|
|
61
61
|
"api_base": "https://api.llama.com/compat/v1",
|
|
62
62
|
"api_key_env_var": "META_API_KEY",
|
|
@@ -670,62 +670,62 @@ BUILTIN_MODELS = {
|
|
|
670
670
|
# "requests_per_minute": 120,
|
|
671
671
|
# "tokens_per_minute": None,
|
|
672
672
|
# },
|
|
673
|
-
"gemini-2.5-pro-vertex": {
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
},
|
|
687
|
-
"gemini-2.5-flash-vertex": {
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
},
|
|
701
|
-
"gemini-2.0-flash-vertex": {
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
},
|
|
715
|
-
"gemini-2.0-flash-lite-vertex": {
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
},
|
|
673
|
+
# "gemini-2.5-pro-vertex": {
|
|
674
|
+
# "id": "gemini-2.5-pro",
|
|
675
|
+
# "name": "gemini-2.5-pro-preview-05-06",
|
|
676
|
+
# "api_base": "",
|
|
677
|
+
# "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
|
|
678
|
+
# "supports_json": True,
|
|
679
|
+
# "supports_logprobs": False,
|
|
680
|
+
# "api_spec": "vertex_gemini",
|
|
681
|
+
# "input_cost": 1.25,
|
|
682
|
+
# "output_cost": 10.0,
|
|
683
|
+
# "requests_per_minute": 20,
|
|
684
|
+
# "tokens_per_minute": 100_000,
|
|
685
|
+
# "reasoning_model": True,
|
|
686
|
+
# },
|
|
687
|
+
# "gemini-2.5-flash-vertex": {
|
|
688
|
+
# "id": "gemini-2.5-flash",
|
|
689
|
+
# "name": "gemini-2.5-flash-preview-05-20",
|
|
690
|
+
# "api_base": "",
|
|
691
|
+
# "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
|
|
692
|
+
# "supports_json": True,
|
|
693
|
+
# "supports_logprobs": False,
|
|
694
|
+
# "api_spec": "vertex_gemini",
|
|
695
|
+
# "input_cost": 0.15,
|
|
696
|
+
# "output_cost": 0.6,
|
|
697
|
+
# "requests_per_minute": 20,
|
|
698
|
+
# "tokens_per_minute": 100_000,
|
|
699
|
+
# "reasoning_model": True,
|
|
700
|
+
# },
|
|
701
|
+
# "gemini-2.0-flash-vertex": {
|
|
702
|
+
# "id": "gemini-2.0-flash",
|
|
703
|
+
# "name": "gemini-2.0-flash",
|
|
704
|
+
# "api_base": "",
|
|
705
|
+
# "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
|
|
706
|
+
# "supports_json": True,
|
|
707
|
+
# "supports_logprobs": False,
|
|
708
|
+
# "api_spec": "vertex_gemini",
|
|
709
|
+
# "input_cost": 0.10,
|
|
710
|
+
# "output_cost": 0.40,
|
|
711
|
+
# "requests_per_minute": 20,
|
|
712
|
+
# "tokens_per_minute": 100_000,
|
|
713
|
+
# "reasoning_model": False,
|
|
714
|
+
# },
|
|
715
|
+
# "gemini-2.0-flash-lite-vertex": {
|
|
716
|
+
# "id": "gemini-2.0-flash-lite",
|
|
717
|
+
# "name": "gemini-2.0-flash-lite",
|
|
718
|
+
# "api_base": "",
|
|
719
|
+
# "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
|
|
720
|
+
# "supports_json": True,
|
|
721
|
+
# "supports_logprobs": False,
|
|
722
|
+
# "api_spec": "vertex_gemini",
|
|
723
|
+
# "input_cost": 0.075,
|
|
724
|
+
# "output_cost": 0.30,
|
|
725
|
+
# "requests_per_minute": 20,
|
|
726
|
+
# "tokens_per_minute": 100_000,
|
|
727
|
+
# "reasoning_model": False,
|
|
728
|
+
# },
|
|
729
729
|
# ███████████ █████ █████
|
|
730
730
|
# ░░███░░░░░███ ░░███ ░░███
|
|
731
731
|
# ░███ ░███ ██████ ███████ ████████ ██████ ██████ ░███ █████
|
|
@@ -1138,7 +1138,7 @@ BUILTIN_MODELS = {
|
|
|
1138
1138
|
"output_cost": 0.7,
|
|
1139
1139
|
},
|
|
1140
1140
|
"mixtral-8x22b": {
|
|
1141
|
-
"id": "
|
|
1141
|
+
"id": "mixtral-8x22b",
|
|
1142
1142
|
"name": "open-mixtral-8x22b",
|
|
1143
1143
|
"api_base": "https://api.mistral.ai/v1",
|
|
1144
1144
|
"api_key_env_var": "MISTRAL_API_KEY",
|
|
@@ -1243,3 +1243,5 @@ def register_model(**kwargs) -> APIModel:
|
|
|
1243
1243
|
# Populate registry with builtin models
|
|
1244
1244
|
for cfg in BUILTIN_MODELS.values():
|
|
1245
1245
|
register_model(**cfg)
|
|
1246
|
+
|
|
1247
|
+
# print("Valid models:", registry.keys())
|