lm-deluge 0.0.56__py3-none-any.whl → 0.0.69__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. lm_deluge/__init__.py +12 -1
  2. lm_deluge/api_requests/anthropic.py +12 -1
  3. lm_deluge/api_requests/base.py +87 -5
  4. lm_deluge/api_requests/bedrock.py +3 -4
  5. lm_deluge/api_requests/chat_reasoning.py +4 -0
  6. lm_deluge/api_requests/gemini.py +7 -6
  7. lm_deluge/api_requests/mistral.py +8 -9
  8. lm_deluge/api_requests/openai.py +179 -124
  9. lm_deluge/batches.py +25 -9
  10. lm_deluge/client.py +280 -67
  11. lm_deluge/config.py +1 -1
  12. lm_deluge/file.py +382 -13
  13. lm_deluge/mock_openai.py +482 -0
  14. lm_deluge/models/__init__.py +12 -8
  15. lm_deluge/models/anthropic.py +12 -20
  16. lm_deluge/models/bedrock.py +0 -14
  17. lm_deluge/models/cohere.py +0 -16
  18. lm_deluge/models/google.py +0 -20
  19. lm_deluge/models/grok.py +48 -4
  20. lm_deluge/models/groq.py +2 -2
  21. lm_deluge/models/kimi.py +34 -0
  22. lm_deluge/models/meta.py +0 -8
  23. lm_deluge/models/minimax.py +10 -0
  24. lm_deluge/models/openai.py +28 -34
  25. lm_deluge/models/openrouter.py +64 -1
  26. lm_deluge/models/together.py +0 -16
  27. lm_deluge/prompt.py +138 -29
  28. lm_deluge/request_context.py +9 -11
  29. lm_deluge/tool.py +395 -19
  30. lm_deluge/tracker.py +11 -5
  31. lm_deluge/warnings.py +46 -0
  32. {lm_deluge-0.0.56.dist-info → lm_deluge-0.0.69.dist-info}/METADATA +3 -1
  33. {lm_deluge-0.0.56.dist-info → lm_deluge-0.0.69.dist-info}/RECORD +36 -33
  34. lm_deluge/agent.py +0 -0
  35. lm_deluge/gemini_limits.py +0 -65
  36. {lm_deluge-0.0.56.dist-info → lm_deluge-0.0.69.dist-info}/WHEEL +0 -0
  37. {lm_deluge-0.0.56.dist-info → lm_deluge-0.0.69.dist-info}/licenses/LICENSE +0 -0
  38. {lm_deluge-0.0.56.dist-info → lm_deluge-0.0.69.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,482 @@
1
+ """
2
+ Mock OpenAI client that implements the AsyncOpenAI interface but uses lm-deluge's
3
+ LLMClient internally. This allows using any lm-deluge-supported provider through
4
+ the standard OpenAI Python SDK interface.
5
+
6
+ Example usage:
7
+ from lm_deluge.mock_openai import MockAsyncOpenAI
8
+
9
+ # Use Claude through OpenAI interface
10
+ client = MockAsyncOpenAI(model="claude-sonnet-4")
11
+ response = await client.chat.completions.create(
12
+ model="claude-sonnet-4", # Can override here
13
+ messages=[{"role": "user", "content": "Hello!"}],
14
+ temperature=0.7
15
+ )
16
+ print(response.choices[0].message.content)
17
+
18
+ Installation:
19
+ pip install lm-deluge[openai]
20
+ """
21
+
22
+ import json
23
+ import time
24
+ import uuid
25
+ from typing import Any, AsyncIterator, Literal, Union, overload
26
+
27
+ try:
28
+ from openai.types.chat import (
29
+ ChatCompletion,
30
+ ChatCompletionChunk,
31
+ ChatCompletionMessage,
32
+ ChatCompletionMessageToolCall,
33
+ )
34
+ from openai.types.chat.chat_completion import Choice as CompletionChoice
35
+ from openai.types.chat.chat_completion_chunk import (
36
+ Choice as ChunkChoice,
37
+ ChoiceDelta,
38
+ ChoiceDeltaToolCall,
39
+ ChoiceDeltaToolCallFunction,
40
+ )
41
+ from openai.types.chat.chat_completion_message_tool_call import Function
42
+ from openai.types.completion_usage import CompletionUsage
43
+ except ImportError:
44
+ raise ImportError(
45
+ "The openai package is required to use MockAsyncOpenAI. "
46
+ "Install it with: pip install lm-deluge[openai]"
47
+ )
48
+
49
+ from lm_deluge.client import LLMClient
50
+ from lm_deluge.prompt import Conversation, Message, Part, Text, ToolCall, ToolResult
51
+
52
+
53
+ def _messages_to_conversation(messages: list[dict[str, Any]]) -> Conversation:
54
+ """Convert OpenAI messages format to lm-deluge Conversation."""
55
+ conv_messages = []
56
+
57
+ for msg in messages:
58
+ role = msg["role"]
59
+ content = msg.get("content")
60
+ tool_calls = msg.get("tool_calls")
61
+ tool_call_id = msg.get("tool_call_id")
62
+
63
+ parts: list[Part] = []
64
+
65
+ # Handle regular content
66
+ if content:
67
+ if isinstance(content, str):
68
+ parts.append(Text(content))
69
+ elif isinstance(content, list):
70
+ # Multi-part content (text, images, etc.)
71
+ for item in content:
72
+ if item.get("type") == "text":
73
+ parts.append(Text(item["text"]))
74
+ # Could add image support here later
75
+
76
+ # Handle tool calls (from assistant)
77
+ if tool_calls:
78
+ for tc in tool_calls:
79
+ # Parse arguments from JSON string to dict
80
+ args_str = tc["function"]["arguments"]
81
+ args_dict = (
82
+ json.loads(args_str) if isinstance(args_str, str) else args_str
83
+ )
84
+ parts.append(
85
+ ToolCall(
86
+ id=tc["id"],
87
+ name=tc["function"]["name"],
88
+ arguments=args_dict,
89
+ )
90
+ )
91
+
92
+ # Handle tool results (from tool role)
93
+ if role == "tool" and tool_call_id:
94
+ parts.append(ToolResult(tool_call_id=tool_call_id, result=content or ""))
95
+
96
+ conv_messages.append(Message(role=role, parts=parts))
97
+
98
+ return Conversation(messages=conv_messages)
99
+
100
+
101
+ def _response_to_chat_completion(
102
+ response: Any, # APIResponse
103
+ model: str,
104
+ request_id: str | None = None,
105
+ ) -> ChatCompletion:
106
+ """Convert lm-deluge APIResponse to OpenAI ChatCompletion."""
107
+ if request_id is None:
108
+ request_id = f"chatcmpl-{uuid.uuid4().hex[:24]}"
109
+
110
+ # Handle error responses
111
+ if response.is_error:
112
+ # For errors, create an empty response with error finish reason
113
+ message = ChatCompletionMessage(
114
+ role="assistant",
115
+ content=response.error_message or "Error occurred",
116
+ )
117
+ choice = CompletionChoice(
118
+ index=0,
119
+ message=message,
120
+ finish_reason="stop", # or could use "error" but that's not standard
121
+ )
122
+ return ChatCompletion(
123
+ id=request_id,
124
+ choices=[choice],
125
+ created=int(time.time()),
126
+ model=model,
127
+ object="chat.completion",
128
+ usage=None,
129
+ )
130
+
131
+ # Extract content from response
132
+ content_text = None
133
+ tool_calls = None
134
+
135
+ if response.content:
136
+ # Extract text parts
137
+ text_parts = [p.text for p in response.content.parts if isinstance(p, Text)]
138
+ if text_parts:
139
+ content_text = "".join(text_parts)
140
+
141
+ # Extract tool calls
142
+ tool_call_parts = [p for p in response.content.parts if isinstance(p, ToolCall)]
143
+ if tool_call_parts:
144
+ tool_calls = [
145
+ ChatCompletionMessageToolCall(
146
+ id=tc.id,
147
+ type="function",
148
+ function=Function(
149
+ name=tc.name,
150
+ # Convert dict arguments to JSON string for OpenAI format
151
+ arguments=json.dumps(tc.arguments)
152
+ if isinstance(tc.arguments, dict)
153
+ else tc.arguments,
154
+ ),
155
+ )
156
+ for tc in tool_call_parts
157
+ ]
158
+
159
+ # Create message
160
+ message = ChatCompletionMessage(
161
+ role="assistant",
162
+ content=content_text,
163
+ tool_calls=tool_calls,
164
+ )
165
+
166
+ # Create choice
167
+ choice = CompletionChoice(
168
+ index=0,
169
+ message=message,
170
+ finish_reason=response.finish_reason or "stop",
171
+ )
172
+
173
+ # Create usage
174
+ usage = None
175
+ if response.usage:
176
+ usage = CompletionUsage(
177
+ prompt_tokens=response.usage.input_tokens,
178
+ completion_tokens=response.usage.output_tokens,
179
+ total_tokens=response.usage.input_tokens + response.usage.output_tokens,
180
+ )
181
+
182
+ return ChatCompletion(
183
+ id=request_id,
184
+ choices=[choice],
185
+ created=int(time.time()),
186
+ model=model,
187
+ object="chat.completion",
188
+ usage=usage,
189
+ )
190
+
191
+
192
+ class _AsyncStreamWrapper:
193
+ """Wrapper to convert lm-deluge streaming to OpenAI ChatCompletionChunk format."""
194
+
195
+ def __init__(self, stream: AsyncIterator, model: str, request_id: str):
196
+ self._stream = stream
197
+ self._model = model
198
+ self._request_id = request_id
199
+ self._first_chunk = True
200
+
201
+ def __aiter__(self):
202
+ return self
203
+
204
+ async def __anext__(self) -> ChatCompletionChunk:
205
+ chunk = await self._stream.__anext__()
206
+
207
+ # Create delta based on chunk content
208
+ delta = ChoiceDelta()
209
+
210
+ if self._first_chunk:
211
+ delta.role = "assistant"
212
+ self._first_chunk = False
213
+
214
+ # Extract content from chunk
215
+ if hasattr(chunk, "content") and chunk.content:
216
+ if isinstance(chunk.content, str):
217
+ delta.content = chunk.content
218
+ elif hasattr(chunk.content, "parts"):
219
+ # Extract text from parts
220
+ text_parts = [
221
+ p.text for p in chunk.content.parts if isinstance(p, Text)
222
+ ]
223
+ if text_parts:
224
+ delta.content = "".join(text_parts)
225
+
226
+ # Extract tool calls from parts
227
+ tool_call_parts = [
228
+ p for p in chunk.content.parts if isinstance(p, ToolCall)
229
+ ]
230
+ if tool_call_parts:
231
+ delta.tool_calls = [
232
+ ChoiceDeltaToolCall(
233
+ index=i,
234
+ id=tc.id,
235
+ type="function",
236
+ function=ChoiceDeltaToolCallFunction(
237
+ name=tc.name,
238
+ # Convert dict arguments to JSON string for OpenAI format
239
+ arguments=json.dumps(tc.arguments)
240
+ if isinstance(tc.arguments, dict)
241
+ else tc.arguments,
242
+ ),
243
+ )
244
+ for i, tc in enumerate(tool_call_parts)
245
+ ]
246
+
247
+ # Create choice
248
+ choice = ChunkChoice(
249
+ index=0,
250
+ delta=delta,
251
+ finish_reason=getattr(chunk, "finish_reason", None),
252
+ )
253
+
254
+ return ChatCompletionChunk(
255
+ id=self._request_id,
256
+ choices=[choice],
257
+ created=int(time.time()),
258
+ model=self._model,
259
+ object="chat.completion.chunk",
260
+ )
261
+
262
+
263
+ class MockCompletions:
264
+ """Mock completions resource that implements OpenAI's completions.create interface."""
265
+
266
+ def __init__(self, parent: "MockAsyncOpenAI"):
267
+ self._parent = parent
268
+
269
+ @overload
270
+ async def create(
271
+ self,
272
+ *,
273
+ messages: list[dict[str, Any]],
274
+ model: str,
275
+ stream: Literal[False] = False,
276
+ **kwargs: Any,
277
+ ) -> ChatCompletion: ...
278
+
279
+ @overload
280
+ async def create(
281
+ self,
282
+ *,
283
+ messages: list[dict[str, Any]],
284
+ model: str,
285
+ stream: Literal[True],
286
+ **kwargs: Any,
287
+ ) -> AsyncIterator[ChatCompletionChunk]: ...
288
+
289
+ async def create(
290
+ self,
291
+ *,
292
+ messages: list[dict[str, Any]],
293
+ model: str,
294
+ stream: bool = False,
295
+ temperature: float | None = None,
296
+ max_tokens: int | None = None,
297
+ max_completion_tokens: int | None = None,
298
+ top_p: float | None = None,
299
+ seed: int | None = None,
300
+ tools: list[dict[str, Any]] | None = None,
301
+ tool_choice: Any | None = None,
302
+ reasoning_effort: str | None = None,
303
+ response_format: dict[str, Any] | None = None,
304
+ n: int | None = None,
305
+ stop: str | list[str] | None = None,
306
+ presence_penalty: float | None = None,
307
+ frequency_penalty: float | None = None,
308
+ **kwargs: Any,
309
+ ) -> Union[ChatCompletion, AsyncIterator[ChatCompletionChunk]]:
310
+ """
311
+ Create a chat completion using lm-deluge's LLMClient.
312
+
313
+ Args:
314
+ messages: List of message dictionaries with 'role' and 'content'
315
+ model: Model identifier (can override client's default model)
316
+ stream: Whether to stream the response
317
+ temperature: Sampling temperature (0-2)
318
+ max_tokens: Max tokens (deprecated, use max_completion_tokens)
319
+ max_completion_tokens: Max completion tokens
320
+ top_p: Nucleus sampling parameter
321
+ seed: Random seed for deterministic sampling
322
+ tools: List of tool definitions
323
+ tool_choice: Tool choice strategy
324
+ reasoning_effort: Reasoning effort for reasoning models
325
+ response_format: Response format (e.g., {"type": "json_object"})
326
+ **kwargs: Other parameters (mostly ignored for compatibility)
327
+
328
+ Returns:
329
+ ChatCompletion (non-streaming) or AsyncIterator[ChatCompletionChunk] (streaming)
330
+ """
331
+ # Get or create client for this model
332
+ client = self._parent._get_or_create_client(model)
333
+
334
+ # Convert messages to Conversation
335
+ conversation = _messages_to_conversation(messages)
336
+
337
+ # Build sampling params
338
+ sampling_kwargs = {}
339
+ if temperature is not None:
340
+ sampling_kwargs["temperature"] = temperature
341
+ if max_completion_tokens is not None:
342
+ sampling_kwargs["max_new_tokens"] = max_completion_tokens
343
+ elif max_tokens is not None:
344
+ sampling_kwargs["max_new_tokens"] = max_tokens
345
+ if top_p is not None:
346
+ sampling_kwargs["top_p"] = top_p
347
+ if seed is not None:
348
+ sampling_kwargs["seed"] = seed
349
+ if reasoning_effort is not None:
350
+ sampling_kwargs["reasoning_effort"] = reasoning_effort
351
+ if response_format and response_format.get("type") == "json_object":
352
+ sampling_kwargs["json_mode"] = True
353
+
354
+ # If sampling params are provided, create a new client with merged params
355
+ if sampling_kwargs:
356
+ # Merge with default params
357
+ merged_params = {**self._parent._default_sampling_params, **sampling_kwargs}
358
+ client = self._parent._create_client_with_params(model, merged_params)
359
+
360
+ # Convert tools if provided
361
+ lm_tools = None
362
+ if tools:
363
+ # For now, just pass through - lm-deluge will handle the format
364
+ lm_tools = tools
365
+
366
+ # Execute request
367
+ if stream:
368
+ # Streaming mode
369
+ request_id = f"chatcmpl-{uuid.uuid4().hex[:24]}"
370
+ # Note: client.stream() is an async generator, not a coroutine
371
+ # We can directly wrap it
372
+ stream_iter = client.stream(conversation, tools=lm_tools)
373
+ # Verify it's a generator, not a coroutine
374
+ if hasattr(stream_iter, "__anext__"):
375
+ return _AsyncStreamWrapper(stream_iter, model, request_id)
376
+ else:
377
+ # If it's a coroutine, we need to await it first
378
+ # But this shouldn't happen with the current implementation
379
+ raise TypeError(f"Expected async generator, got {type(stream_iter)}")
380
+ else:
381
+ # Non-streaming mode
382
+ response = await client.start(conversation, tools=lm_tools)
383
+ return _response_to_chat_completion(response, model)
384
+
385
+
386
+ class MockChat:
387
+ """Mock chat resource that provides access to completions."""
388
+
389
+ def __init__(self, parent: "MockAsyncOpenAI"):
390
+ self._parent = parent
391
+ self._completions = MockCompletions(parent)
392
+
393
+ @property
394
+ def completions(self) -> MockCompletions:
395
+ """Access the completions resource."""
396
+ return self._completions
397
+
398
+
399
+ class MockAsyncOpenAI:
400
+ """
401
+ Mock AsyncOpenAI client that uses lm-deluge's LLMClient internally.
402
+
403
+ This allows using any lm-deluge-supported provider (Anthropic, Google, etc.)
404
+ through the standard OpenAI Python SDK interface.
405
+
406
+ Example:
407
+ # Use Claude through OpenAI interface
408
+ client = MockAsyncOpenAI(model="claude-sonnet-4")
409
+ response = await client.chat.completions.create(
410
+ model="claude-sonnet-4",
411
+ messages=[{"role": "user", "content": "Hello!"}],
412
+ temperature=0.7
413
+ )
414
+
415
+ Args:
416
+ model: Default model to use (can be overridden in create())
417
+ temperature: Default temperature
418
+ max_completion_tokens: Default max completion tokens
419
+ top_p: Default top_p
420
+ **kwargs: Additional parameters passed to LLMClient
421
+ """
422
+
423
+ def __init__(
424
+ self,
425
+ *,
426
+ model: str,
427
+ temperature: float | None = None,
428
+ max_completion_tokens: int | None = None,
429
+ top_p: float | None = None,
430
+ seed: int | None = None,
431
+ **kwargs: Any,
432
+ ):
433
+ self._default_model = model
434
+ self._default_sampling_params = {}
435
+
436
+ if temperature is not None:
437
+ self._default_sampling_params["temperature"] = temperature
438
+ if max_completion_tokens is not None:
439
+ self._default_sampling_params["max_new_tokens"] = max_completion_tokens
440
+ if top_p is not None:
441
+ self._default_sampling_params["top_p"] = top_p
442
+ if seed is not None:
443
+ self._default_sampling_params["seed"] = seed
444
+
445
+ # Additional kwargs for LLMClient
446
+ self._client_kwargs = kwargs
447
+
448
+ # Cache of LLMClient instances by model
449
+ self._clients: dict[str, Any] = {}
450
+
451
+ # Create the default client
452
+ self._clients[model] = self._create_client(model)
453
+
454
+ # Create nested resources
455
+ self._chat = MockChat(self)
456
+
457
+ def _create_client(self, model: str) -> Any:
458
+ """Create a new LLMClient for the given model."""
459
+ return LLMClient(
460
+ model,
461
+ **self._default_sampling_params,
462
+ **self._client_kwargs,
463
+ )
464
+
465
+ def _create_client_with_params(self, model: str, params: dict[str, Any]) -> Any:
466
+ """Create a new LLMClient with specific sampling parameters."""
467
+ return LLMClient(
468
+ model,
469
+ **params,
470
+ **self._client_kwargs,
471
+ )
472
+
473
+ def _get_or_create_client(self, model: str) -> Any:
474
+ """Get existing client or create new one for the model."""
475
+ if model not in self._clients:
476
+ self._clients[model] = self._create_client(model)
477
+ return self._clients[model]
478
+
479
+ @property
480
+ def chat(self) -> MockChat:
481
+ """Access the chat resource."""
482
+ return self._chat
@@ -15,7 +15,9 @@ from .fireworks import FIREWORKS_MODELS
15
15
  from .google import GOOGLE_MODELS
16
16
  from .grok import XAI_MODELS
17
17
  from .groq import GROQ_MODELS
18
+ from .kimi import KIMI_MODELS
18
19
  from .meta import META_MODELS
20
+ from .minimax import MINIMAX_MODELS
19
21
  from .mistral import MISTRAL_MODELS
20
22
  from .openai import OPENAI_MODELS
21
23
  from .openrouter import OPENROUTER_MODELS
@@ -38,9 +40,9 @@ class APIModel:
38
40
  supports_responses: bool = False
39
41
  reasoning_model: bool = False
40
42
  regions: list[str] | dict[str, int] = field(default_factory=list)
41
- tokens_per_minute: int | None = None
42
- requests_per_minute: int | None = None
43
- gpus: list[str] | None = None
43
+ # tokens_per_minute: int | None = None
44
+ # requests_per_minute: int | None = None
45
+ # gpus: list[str] | None = None
44
46
 
45
47
  @classmethod
46
48
  def from_registry(cls, name: str):
@@ -62,7 +64,7 @@ class APIModel:
62
64
  raise ValueError("no regions to sample")
63
65
  random.sample(regions, 1, counts=weights)[0]
64
66
 
65
- def make_request(self, context: RequestContext): # -> "APIRequestBase"
67
+ def make_request(self, context: RequestContext):
66
68
  from ..api_requests.common import CLASSES
67
69
 
68
70
  api_spec = self.api_spec
@@ -97,8 +99,8 @@ def register_model(
97
99
  supports_responses: bool = False,
98
100
  reasoning_model: bool = False,
99
101
  regions: list[str] | dict[str, int] = field(default_factory=list),
100
- tokens_per_minute: int | None = None,
101
- requests_per_minute: int | None = None,
102
+ # tokens_per_minute: int | None = None,
103
+ # requests_per_minute: int | None = None,
102
104
  ) -> APIModel:
103
105
  """Register a model configuration and return the created APIModel."""
104
106
  model = APIModel(
@@ -116,8 +118,8 @@ def register_model(
116
118
  supports_responses=supports_responses,
117
119
  reasoning_model=reasoning_model,
118
120
  regions=regions,
119
- tokens_per_minute=tokens_per_minute,
120
- requests_per_minute=requests_per_minute,
121
+ # tokens_per_minute=tokens_per_minute,
122
+ # requests_per_minute=requests_per_minute,
121
123
  )
122
124
  registry[model.id] = model
123
125
  return model
@@ -132,7 +134,9 @@ for model_dict in [
132
134
  FIREWORKS_MODELS,
133
135
  GOOGLE_MODELS,
134
136
  XAI_MODELS,
137
+ KIMI_MODELS,
135
138
  META_MODELS,
139
+ MINIMAX_MODELS,
136
140
  MISTRAL_MODELS,
137
141
  OPENAI_MODELS,
138
142
  OPENROUTER_MODELS,
@@ -10,6 +10,18 @@ ANTHROPIC_MODELS = {
10
10
  # ░███
11
11
  # █████
12
12
  #
13
+ "claude-4.5-haiku": {
14
+ "id": "claude-4.5-haiku",
15
+ "name": "claude-haiku-4-5-20251001",
16
+ "api_base": "https://api.anthropic.com/v1",
17
+ "api_key_env_var": "ANTHROPIC_API_KEY",
18
+ "supports_json": False,
19
+ "api_spec": "anthropic",
20
+ "input_cost": 1.0,
21
+ "cached_input_cost": 0.10,
22
+ "cache_write_cost": 1.25,
23
+ "output_cost": 3.0,
24
+ },
13
25
  "claude-4.5-sonnet": {
14
26
  "id": "claude-4.5-sonnet",
15
27
  "name": "claude-sonnet-4-5-20250929",
@@ -21,8 +33,6 @@ ANTHROPIC_MODELS = {
21
33
  "cached_input_cost": 0.30,
22
34
  "cache_write_cost": 3.75,
23
35
  "output_cost": 15.0,
24
- "requests_per_minute": 4_000,
25
- "tokens_per_minute": 400_000,
26
36
  },
27
37
  "claude-4.1-opus": {
28
38
  "id": "claude-4.1-opus",
@@ -35,8 +45,6 @@ ANTHROPIC_MODELS = {
35
45
  "cached_input_cost": 1.50,
36
46
  "cache_write_cost": 18.75,
37
47
  "output_cost": 75.0,
38
- "requests_per_minute": 4_000,
39
- "tokens_per_minute": 400_000,
40
48
  "reasoning_model": True,
41
49
  },
42
50
  "claude-4-opus": {
@@ -50,8 +58,6 @@ ANTHROPIC_MODELS = {
50
58
  "cached_input_cost": 1.50,
51
59
  "cache_write_cost": 18.75,
52
60
  "output_cost": 75.0,
53
- "requests_per_minute": 4_000,
54
- "tokens_per_minute": 400_000,
55
61
  "reasoning_model": True,
56
62
  },
57
63
  "claude-4-sonnet": {
@@ -65,8 +71,6 @@ ANTHROPIC_MODELS = {
65
71
  "cached_input_cost": 0.30,
66
72
  "cache_write_cost": 3.75,
67
73
  "output_cost": 15.0,
68
- "requests_per_minute": 4_000,
69
- "tokens_per_minute": 400_000,
70
74
  },
71
75
  "claude-3.7-sonnet": {
72
76
  "id": "claude-3.7-sonnet",
@@ -79,8 +83,6 @@ ANTHROPIC_MODELS = {
79
83
  "cached_input_cost": 0.30,
80
84
  "cache_write_cost": 3.75,
81
85
  "output_cost": 15.0,
82
- "requests_per_minute": 4_000,
83
- "tokens_per_minute": 400_000,
84
86
  "reasoning_model": True,
85
87
  },
86
88
  "claude-3.6-sonnet": {
@@ -94,8 +96,6 @@ ANTHROPIC_MODELS = {
94
96
  "cached_input_cost": 0.30,
95
97
  "cache_write_cost": 3.75,
96
98
  "output_cost": 15.0,
97
- "requests_per_minute": 4_000,
98
- "tokens_per_minute": 400_000,
99
99
  },
100
100
  "claude-3.5-sonnet": {
101
101
  "id": "claude-3.5-sonnet",
@@ -108,8 +108,6 @@ ANTHROPIC_MODELS = {
108
108
  "cached_input_cost": 0.30,
109
109
  "cache_write_cost": 3.75,
110
110
  "output_cost": 15.0,
111
- "requests_per_minute": 4_000,
112
- "tokens_per_minute": 400_000,
113
111
  },
114
112
  "claude-3-opus": {
115
113
  "id": "claude-3-opus",
@@ -120,8 +118,6 @@ ANTHROPIC_MODELS = {
120
118
  "api_spec": "anthropic",
121
119
  "input_cost": 15.0,
122
120
  "output_cost": 75.0,
123
- "requests_per_minute": 4_000,
124
- "tokens_per_minute": 400_000,
125
121
  },
126
122
  "claude-3.5-haiku": {
127
123
  "id": "claude-3.5-haiku",
@@ -134,8 +130,6 @@ ANTHROPIC_MODELS = {
134
130
  "cached_input_cost": 0.08,
135
131
  "cache_write_cost": 1.00,
136
132
  "output_cost": 4.00,
137
- "requests_per_minute": 20_000,
138
- "tokens_per_minute": 4_000_000, # supposed to be this but they fucked up
139
133
  },
140
134
  "claude-3-haiku": {
141
135
  "id": "claude-3-haiku",
@@ -148,7 +142,5 @@ ANTHROPIC_MODELS = {
148
142
  "cache_write_cost": 0.30,
149
143
  "cached_input_cost": 0.03,
150
144
  "output_cost": 1.25,
151
- "requests_per_minute": 10_000,
152
- "tokens_per_minute": 4_000_000, # supposed to be this but they fucked up
153
145
  },
154
146
  }