lm-deluge 0.0.88__py3-none-any.whl → 0.0.90__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lm-deluge might be problematic. Click here for more details.

Files changed (41) hide show
  1. lm_deluge/__init__.py +0 -24
  2. lm_deluge/api_requests/anthropic.py +25 -5
  3. lm_deluge/api_requests/base.py +37 -0
  4. lm_deluge/api_requests/bedrock.py +23 -2
  5. lm_deluge/api_requests/gemini.py +36 -10
  6. lm_deluge/api_requests/openai.py +31 -4
  7. lm_deluge/batches.py +15 -45
  8. lm_deluge/client.py +27 -1
  9. lm_deluge/models/__init__.py +2 -0
  10. lm_deluge/models/anthropic.py +12 -12
  11. lm_deluge/models/google.py +13 -0
  12. lm_deluge/models/minimax.py +9 -1
  13. lm_deluge/models/openrouter.py +48 -0
  14. lm_deluge/models/zai.py +50 -1
  15. lm_deluge/pipelines/gepa/docs/samples.py +19 -10
  16. lm_deluge/prompt.py +333 -68
  17. lm_deluge/server/__init__.py +24 -0
  18. lm_deluge/server/__main__.py +144 -0
  19. lm_deluge/server/adapters.py +369 -0
  20. lm_deluge/server/app.py +388 -0
  21. lm_deluge/server/auth.py +71 -0
  22. lm_deluge/server/model_policy.py +215 -0
  23. lm_deluge/server/models_anthropic.py +172 -0
  24. lm_deluge/server/models_openai.py +175 -0
  25. lm_deluge/skills/anthropic.py +0 -0
  26. lm_deluge/skills/compat.py +0 -0
  27. lm_deluge/tool/__init__.py +13 -1
  28. lm_deluge/tool/prefab/sandbox/__init__.py +19 -0
  29. lm_deluge/tool/prefab/sandbox/daytona_sandbox.py +483 -0
  30. lm_deluge/tool/prefab/sandbox/docker_sandbox.py +609 -0
  31. lm_deluge/tool/prefab/sandbox/fargate_sandbox.py +546 -0
  32. lm_deluge/tool/prefab/sandbox/modal_sandbox.py +469 -0
  33. lm_deluge/tool/prefab/sandbox/seatbelt_sandbox.py +827 -0
  34. lm_deluge/tool/prefab/skills.py +0 -0
  35. {lm_deluge-0.0.88.dist-info → lm_deluge-0.0.90.dist-info}/METADATA +4 -3
  36. {lm_deluge-0.0.88.dist-info → lm_deluge-0.0.90.dist-info}/RECORD +39 -24
  37. lm_deluge/mock_openai.py +0 -643
  38. lm_deluge/tool/prefab/sandbox.py +0 -1621
  39. {lm_deluge-0.0.88.dist-info → lm_deluge-0.0.90.dist-info}/WHEEL +0 -0
  40. {lm_deluge-0.0.88.dist-info → lm_deluge-0.0.90.dist-info}/licenses/LICENSE +0 -0
  41. {lm_deluge-0.0.88.dist-info → lm_deluge-0.0.90.dist-info}/top_level.txt +0 -0
lm_deluge/__init__.py CHANGED
@@ -3,19 +3,6 @@ from .file import File
3
3
  from .prompt import Conversation, Message
4
4
  from .tool import Tool
5
5
 
6
- try:
7
- from .mock_openai import ( # noqa
8
- APIError,
9
- APITimeoutError,
10
- BadRequestError,
11
- MockAsyncOpenAI,
12
- RateLimitError,
13
- )
14
-
15
- _has_openai = True
16
- except ImportError:
17
- _has_openai = False
18
-
19
6
  # dotenv.load_dotenv() - don't do this, fucks with other packages
20
7
 
21
8
  __all__ = [
@@ -27,14 +14,3 @@ __all__ = [
27
14
  "Tool",
28
15
  "File",
29
16
  ]
30
-
31
- if _has_openai:
32
- __all__.extend(
33
- [
34
- "MockAsyncOpenAI",
35
- "APIError",
36
- "APITimeoutError",
37
- "BadRequestError",
38
- "RateLimitError",
39
- ]
40
- )
@@ -6,6 +6,7 @@ from aiohttp import ClientResponse
6
6
  from lm_deluge.prompt import (
7
7
  Message,
8
8
  Text,
9
+ ThoughtSignature,
9
10
  Thinking,
10
11
  ToolCall,
11
12
  )
@@ -250,8 +251,28 @@ class AnthropicRequest(APIRequestBase):
250
251
  if item["type"] == "text":
251
252
  parts.append(Text(item["text"]))
252
253
  elif item["type"] == "thinking":
253
- thinking = item["thinking"]
254
- parts.append(Thinking(item["thinking"]))
254
+ thinking_content = item.get("thinking", "")
255
+ thinking = thinking_content
256
+ signature = item.get("signature")
257
+ parts.append(
258
+ Thinking(
259
+ thinking_content,
260
+ raw_payload=item,
261
+ thought_signature=ThoughtSignature(
262
+ signature,
263
+ provider="anthropic",
264
+ )
265
+ if signature is not None
266
+ else None,
267
+ )
268
+ )
269
+ elif item["type"] == "redacted_thinking":
270
+ parts.append(
271
+ Thinking(
272
+ item.get("data", ""),
273
+ raw_payload=item,
274
+ )
275
+ )
255
276
  elif item["type"] == "tool_use":
256
277
  parts.append(
257
278
  ToolCall(
@@ -265,9 +286,8 @@ class AnthropicRequest(APIRequestBase):
265
286
  usage = Usage.from_anthropic_usage(data["usage"])
266
287
  except Exception as e:
267
288
  is_error = True
268
- error_message = (
269
- f"Error calling .json() on response w/ status {status_code}: {e}"
270
- )
289
+ response_text = await http_response.text()
290
+ error_message = f"Error calling .json() on response w/ status {status_code}: {e}. Response: {response_text[:500]}"
271
291
  elif mimetype and "json" in mimetype.lower():
272
292
  is_error = True # expected status is 200, otherwise it's an error
273
293
  data = await http_response.json()
@@ -1,4 +1,6 @@
1
1
  import asyncio
2
+ import json
3
+ import os
2
4
  import time
3
5
  import traceback
4
6
  from abc import ABC, abstractmethod
@@ -73,6 +75,24 @@ class APIRequestBase(ABC):
73
75
 
74
76
  # Start with base headers, then overlay filtered extra headers (extra takes precedence)
75
77
  merged = dict(base_headers)
78
+ if "anthropic-beta" in merged and "anthropic-beta" in filtered_extra:
79
+ combined = []
80
+ seen = set()
81
+ for (
82
+ raw
83
+ ) in f"{merged['anthropic-beta']},{filtered_extra['anthropic-beta']}".split(
84
+ ","
85
+ ):
86
+ token = raw.strip()
87
+ if token and token not in seen:
88
+ seen.add(token)
89
+ combined.append(token)
90
+ merged["anthropic-beta"] = ",".join(combined)
91
+ filtered_extra = {
92
+ key: value
93
+ for key, value in filtered_extra.items()
94
+ if key != "anthropic-beta"
95
+ }
76
96
  merged.update(filtered_extra)
77
97
 
78
98
  # Filter out None values from final merged headers
@@ -189,6 +209,23 @@ class APIRequestBase(ABC):
189
209
  await self.build_request()
190
210
  assert self.context.status_tracker
191
211
 
212
+ if os.getenv("DELUGE_PROXY_LOG_PROVIDER_REQUESTS", "").strip().lower() in {
213
+ "1",
214
+ "true",
215
+ "yes",
216
+ "on",
217
+ }:
218
+ print("DELUGE_PROXY_PROVIDER_REQUEST")
219
+ print(f"URL: {self.url}")
220
+ print("Headers:")
221
+ print(self.request_header)
222
+ if self.request_json is not None:
223
+ print("JSON:")
224
+ try:
225
+ print(json.dumps(self.request_json, indent=2))
226
+ except Exception:
227
+ print(self.request_json)
228
+
192
229
  if (
193
230
  self.context.background
194
231
  and self.context.use_responses_api
@@ -16,6 +16,7 @@ except ImportError:
16
16
  from lm_deluge.prompt import (
17
17
  Message,
18
18
  Text,
19
+ ThoughtSignature,
19
20
  Thinking,
20
21
  ToolCall,
21
22
  )
@@ -363,8 +364,28 @@ class BedrockRequest(APIRequestBase):
363
364
  if item["type"] == "text":
364
365
  parts.append(Text(item["text"]))
365
366
  elif item["type"] == "thinking":
366
- thinking = item["thinking"]
367
- parts.append(Thinking(item["thinking"]))
367
+ thinking_content = item.get("thinking", "")
368
+ thinking = thinking_content
369
+ signature = item.get("signature")
370
+ parts.append(
371
+ Thinking(
372
+ thinking_content,
373
+ raw_payload=item,
374
+ thought_signature=ThoughtSignature(
375
+ signature,
376
+ provider="anthropic",
377
+ )
378
+ if signature is not None
379
+ else None,
380
+ )
381
+ )
382
+ elif item["type"] == "redacted_thinking":
383
+ parts.append(
384
+ Thinking(
385
+ item.get("data", ""),
386
+ raw_payload=item,
387
+ )
388
+ )
368
389
  elif item["type"] == "tool_use":
369
390
  parts.append(
370
391
  ToolCall(
@@ -9,7 +9,7 @@ from lm_deluge.warnings import maybe_warn
9
9
 
10
10
  from ..config import SamplingParams
11
11
  from ..models import APIModel
12
- from ..prompt import Conversation, Message, Text, Thinking, ToolCall
12
+ from ..prompt import Conversation, Message, Text, ThoughtSignature, Thinking, ToolCall
13
13
  from ..usage import Usage
14
14
  from .base import APIRequestBase, APIResponse
15
15
 
@@ -52,6 +52,7 @@ async def _build_gemini_request(
52
52
 
53
53
  # Handle reasoning models (thinking)
54
54
  is_gemini_3 = "gemini-3" in model.name.lower()
55
+ is_gemini_3_flash = "gemini-3-flash" in model.name.lower()
55
56
  if is_gemini_3:
56
57
  # gemini3 MUST think
57
58
  if not sampling_params.reasoning_effort:
@@ -62,13 +63,24 @@ async def _build_gemini_request(
62
63
  if effort_key == "xhigh":
63
64
  maybe_warn("WARN_XHIGH_TO_HIGH", model_name=model.name)
64
65
  effort_key = "high"
65
- level_map = {
66
- "none": "low",
67
- "minimal": "low",
68
- "low": "low",
69
- "medium": "high", # change when supported
70
- "high": "high",
71
- }
66
+ if is_gemini_3_flash:
67
+ # Flash supports minimal, low, medium, high
68
+ level_map = {
69
+ "none": "low",
70
+ "minimal": "minimal",
71
+ "low": "low",
72
+ "medium": "medium",
73
+ "high": "high",
74
+ }
75
+ else:
76
+ # Pro only supports low, high
77
+ level_map = {
78
+ "none": "low",
79
+ "minimal": "low",
80
+ "low": "low",
81
+ "medium": "high",
82
+ "high": "high",
83
+ }
72
84
  effort = level_map[effort_key]
73
85
  thinking_config = {"thinkingLevel": effort}
74
86
  request_json["generationConfig"]["thinkingConfig"] = thinking_config
@@ -248,10 +260,20 @@ class GeminiRequest(APIRequestBase):
248
260
  if "content" in candidate and "parts" in candidate["content"]:
249
261
  for part in candidate["content"]["parts"]:
250
262
  # Extract thought signature if present
251
- thought_sig = part.get("thoughtSignature")
263
+ raw_sig = part.get("thoughtSignature")
264
+ thought_sig = (
265
+ ThoughtSignature(raw_sig, provider="gemini")
266
+ if raw_sig is not None
267
+ else None
268
+ )
252
269
 
253
270
  if "text" in part:
254
- parts.append(Text(part["text"]))
271
+ parts.append(
272
+ Text(
273
+ part["text"],
274
+ thought_signature=thought_sig,
275
+ )
276
+ )
255
277
  elif "thought" in part:
256
278
  # Thought with optional signature
257
279
  parts.append(
@@ -274,6 +296,10 @@ class GeminiRequest(APIRequestBase):
274
296
  thought_signature=thought_sig,
275
297
  )
276
298
  )
299
+ elif thought_sig:
300
+ parts.append(
301
+ Text("", thought_signature=thought_sig)
302
+ )
277
303
 
278
304
  content = Message("assistant", parts)
279
305
 
@@ -22,6 +22,24 @@ from ..usage import Usage
22
22
  from .base import APIRequestBase, APIResponse
23
23
 
24
24
 
25
+ def _message_contents_to_string(messages: list[dict]):
26
+ messages = messages.copy()
27
+
28
+ for msg in messages:
29
+ content = msg.get("content")
30
+ assert content
31
+ if isinstance(content, list):
32
+ new_content = ""
33
+ for part in content:
34
+ assert "text" in part, "Invalid text part: " + str(part)
35
+ new_content += part["text"]
36
+ new_content += "\n"
37
+
38
+ msg["content"] = new_content.strip()
39
+
40
+ return messages
41
+
42
+
25
43
  async def _build_oa_chat_request(
26
44
  model: APIModel,
27
45
  context: RequestContext,
@@ -55,6 +73,10 @@ async def _build_oa_chat_request(
55
73
  request_json["service_tier"] = context.service_tier
56
74
  else:
57
75
  request_json["service_tier"] = context.service_tier
76
+ # if tinker, for now hack to mush into 1 string
77
+ if "tinker" in model.name:
78
+ request_json["messages"] = _message_contents_to_string(request_json["messages"])
79
+
58
80
  # set max_tokens or max_completion_tokens dep. on provider
59
81
  if "cohere" in model.api_base:
60
82
  request_json["max_tokens"] = sampling_params.max_new_tokens
@@ -217,7 +239,7 @@ class OpenAIRequest(APIRequestBase):
217
239
  parts.append(Text(message["content"]))
218
240
 
219
241
  # Add tool calls if present
220
- if "tool_calls" in message:
242
+ if "tool_calls" in message and message["tool_calls"] is not None:
221
243
  for tool_call in message["tool_calls"]:
222
244
  parts.append(
223
245
  ToolCall(
@@ -238,9 +260,9 @@ class OpenAIRequest(APIRequestBase):
238
260
  and "logprobs" in data["choices"][0]
239
261
  ):
240
262
  logprobs = data["choices"][0]["logprobs"]["content"]
241
- except Exception:
263
+ except Exception as e:
242
264
  is_error = True
243
- error_message = f"Error getting 'choices' and 'usage' from {self.model.name} response."
265
+ error_message = f"Error getting 'choices' and 'usage' from {self.model.name} response: {data}. Error: {e}"
244
266
  elif mimetype and "json" in mimetype.lower():
245
267
  is_error = True # expected status is 200, otherwise it's an error
246
268
  data = await http_response.json()
@@ -655,7 +677,12 @@ async def stream_chat(
655
677
  request_header.update(filtered_extra)
656
678
 
657
679
  context = SimpleNamespace(
658
- prompt=prompt, tools=tools, sampling_params=sampling_params
680
+ prompt=prompt,
681
+ tools=tools,
682
+ sampling_params=sampling_params,
683
+ service_tier=None,
684
+ output_schema=None,
685
+ model_name=model_name,
659
686
  )
660
687
 
661
688
  request_json = await _build_oa_chat_request(model, context) # type: ignore
lm_deluge/batches.py CHANGED
@@ -141,31 +141,22 @@ async def submit_batch_oa(file_path: str):
141
141
  return batch_id
142
142
 
143
143
 
144
- async def _submit_anthropic_batch(file_path: str, headers: dict, model: str):
145
- """Upload a JSONL file and create one Anthropic batch."""
144
+ async def _submit_anthropic_batch(requests: list[dict], headers: dict, model: str):
145
+ """Submit batch requests to Anthropic's Message Batches API."""
146
146
 
147
147
  async with aiohttp.ClientSession() as session:
148
148
  url = f"{registry[model].api_base}/messages/batches"
149
- data = aiohttp.FormData()
150
- with open(file_path, "rb") as f:
151
- data.add_field(
152
- "file",
153
- f,
154
- filename=os.path.basename(file_path),
155
- content_type="application/json",
156
- )
157
-
158
- async with session.post(url, data=data, headers=headers) as response:
159
- if response.status != 200:
160
- text = await response.text()
161
- raise ValueError(f"Error creating batch: {text}")
149
+ payload = {"requests": requests}
162
150
 
163
- batch_data = await response.json()
164
- batch_id = batch_data["id"]
165
- print(f"Anthropic batch job started successfully: id = {batch_id}")
151
+ async with session.post(url, json=payload, headers=headers) as response:
152
+ if response.status != 200:
153
+ text = await response.text()
154
+ raise ValueError(f"Error creating batch: {text}")
166
155
 
167
- os.remove(file_path)
168
- return batch_id
156
+ batch_data = await response.json()
157
+ batch_id = batch_data["id"]
158
+ print(f"Anthropic batch job started successfully: id = {batch_id}")
159
+ return batch_id
169
160
 
170
161
 
171
162
  async def create_batch_files_oa(
@@ -409,20 +400,10 @@ async def submit_batches_anthropic(
409
400
 
410
401
  if current_batch and (would_exceed_size or would_exceed_items):
411
402
  # Submit current batch
412
- def write_batch_file():
413
- with tempfile.NamedTemporaryFile(
414
- mode="w+", suffix=".jsonl", delete=False
415
- ) as f:
416
- for batch_request in current_batch:
417
- json.dump(batch_request, f)
418
- f.write("\n")
419
- print("wrote", len(current_batch), "items")
420
- return f.name
421
-
422
- file_path = await asyncio.to_thread(write_batch_file)
403
+ print("wrote", len(current_batch), "items")
423
404
  batch_tasks.append(
424
405
  asyncio.create_task(
425
- _submit_anthropic_batch(file_path, request_headers, model) # type: ignore
406
+ _submit_anthropic_batch(current_batch, request_headers, model) # type: ignore
426
407
  )
427
408
  )
428
409
 
@@ -436,21 +417,10 @@ async def submit_batches_anthropic(
436
417
 
437
418
  # Submit final batch if it has items
438
419
  if current_batch:
439
-
440
- def write_final_batch_file():
441
- with tempfile.NamedTemporaryFile(
442
- mode="w+", suffix=".jsonl", delete=False
443
- ) as f:
444
- for batch_request in current_batch:
445
- json.dump(batch_request, f)
446
- f.write("\n")
447
- print("wrote", len(current_batch), "items")
448
- return f.name
449
-
450
- file_path = await asyncio.to_thread(write_final_batch_file)
420
+ print("wrote", len(current_batch), "items")
451
421
  batch_tasks.append(
452
422
  asyncio.create_task(
453
- _submit_anthropic_batch(file_path, request_headers, model) # type: ignore
423
+ _submit_anthropic_batch(current_batch, request_headers, model) # type: ignore
454
424
  )
455
425
  )
456
426
 
lm_deluge/client.py CHANGED
@@ -289,6 +289,28 @@ class _LLMClient(BaseModel):
289
289
  def models(self):
290
290
  return self.model_names # why? idk
291
291
 
292
+ @staticmethod
293
+ def _preprocess_tinker_model(model_name: str) -> str:
294
+ if model_name.startswith("tinker://"):
295
+ model_id = model_name
296
+ if model_id not in registry:
297
+ register_model(
298
+ id=model_name,
299
+ name=model_name,
300
+ api_base="https://tinker.thinkingmachines.dev/services/tinker-prod/oai/api/v1",
301
+ api_key_env_var="TINKER_API_KEY",
302
+ api_spec="openai",
303
+ supports_json=True,
304
+ supports_logprobs=False,
305
+ supports_responses=False,
306
+ input_cost=0, # Unknown costs for arbitrary tinker models
307
+ cached_input_cost=0,
308
+ cache_write_cost=0,
309
+ output_cost=0,
310
+ )
311
+
312
+ return model_name
313
+
292
314
  @staticmethod
293
315
  def _preprocess_openrouter_model(model_name: str) -> str:
294
316
  """Process openrouter: prefix and register model if needed."""
@@ -315,7 +337,8 @@ class _LLMClient(BaseModel):
315
337
  )
316
338
 
317
339
  return model_id
318
- return model_name
340
+ else:
341
+ return model_name
319
342
 
320
343
  @model_validator(mode="before")
321
344
  @classmethod
@@ -328,6 +351,9 @@ class _LLMClient(BaseModel):
328
351
  # First, handle OpenRouter prefix
329
352
  model_name = cls._preprocess_openrouter_model(model_names)
330
353
 
354
+ # next handle tinker prefix
355
+ model_name = cls._preprocess_tinker_model(model_name)
356
+
331
357
  # Then handle reasoning effort suffix (e.g., "gpt-5-high")
332
358
  model_name, effort = cls._strip_reasoning_suffix_if_registered(model_name)
333
359
  if effort and data.get("reasoning_effort") is None:
@@ -23,6 +23,7 @@ from .mistral import MISTRAL_MODELS
23
23
  from .openai import OPENAI_MODELS
24
24
  from .openrouter import OPENROUTER_MODELS
25
25
  from .together import TOGETHER_MODELS
26
+ from .zai import ZAI_MODELS
26
27
 
27
28
 
28
29
  @dataclass
@@ -134,6 +135,7 @@ def register_model(
134
135
  # Register all models from all providers
135
136
  for model_dict in [
136
137
  ANTHROPIC_MODELS,
138
+ ZAI_MODELS,
137
139
  ARCEE_MODELS,
138
140
  BEDROCK_MODELS,
139
141
  COHERE_MODELS,
@@ -112,18 +112,18 @@ ANTHROPIC_MODELS = {
112
112
  "cache_write_cost": 3.75,
113
113
  "output_cost": 15.0,
114
114
  },
115
- "claude-3.5-sonnet": {
116
- "id": "claude-3.5-sonnet",
117
- "name": "claude-3-5-sonnet-20240620",
118
- "api_base": "https://api.anthropic.com/v1",
119
- "api_key_env_var": "ANTHROPIC_API_KEY",
120
- "supports_json": False,
121
- "api_spec": "anthropic",
122
- "input_cost": 3.0,
123
- "cached_input_cost": 0.30,
124
- "cache_write_cost": 3.75,
125
- "output_cost": 15.0,
126
- },
115
+ # "claude-3.5-sonnet": {
116
+ # "id": "claude-3.5-sonnet",
117
+ # "name": "claude-3-5-sonnet-20240620",
118
+ # "api_base": "https://api.anthropic.com/v1",
119
+ # "api_key_env_var": "ANTHROPIC_API_KEY",
120
+ # "supports_json": False,
121
+ # "api_spec": "anthropic",
122
+ # "input_cost": 3.0,
123
+ # "cached_input_cost": 0.30,
124
+ # "cache_write_cost": 3.75,
125
+ # "output_cost": 15.0,
126
+ # },
127
127
  "claude-3-opus": {
128
128
  "id": "claude-3-opus",
129
129
  "name": "claude-3-opus-20240229",
@@ -153,6 +153,19 @@ GOOGLE_MODELS = {
153
153
  # Note: >200k tokens pricing is $4/$18 per million
154
154
  "reasoning_model": True,
155
155
  },
156
+ "gemini-3-flash-preview": {
157
+ "id": "gemini-3-flash-preview",
158
+ "name": "gemini-3-flash-preview",
159
+ "api_base": "https://generativelanguage.googleapis.com/v1alpha",
160
+ "api_key_env_var": "GEMINI_API_KEY",
161
+ "supports_json": True,
162
+ "supports_logprobs": False,
163
+ "api_spec": "gemini",
164
+ "input_cost": 0.5,
165
+ "cached_input_cost": 0.125, # estimated
166
+ "output_cost": 3.0,
167
+ "reasoning_model": True,
168
+ },
156
169
  # Gemini 2.5 Computer Use model
157
170
  "gemini-2.5-computer-use": {
158
171
  "id": "gemini-2.5-computer-use",
@@ -1,4 +1,12 @@
1
1
  MINIMAX_MODELS = {
2
+ "minimax-m2.1": {
3
+ "id": "minimax-m2.1",
4
+ "name": "MiniMax-M2.1",
5
+ "api_base": "https://api.minimax.io/anthropic/v1",
6
+ "api_key_env_var": "MINIMAX_API_KEY",
7
+ "supports_json": False,
8
+ "api_spec": "anthropic",
9
+ },
2
10
  "minimax-m2": {
3
11
  "id": "minimax-m2",
4
12
  "name": "MiniMax-M2",
@@ -6,5 +14,5 @@ MINIMAX_MODELS = {
6
14
  "api_key_env_var": "MINIMAX_API_KEY",
7
15
  "supports_json": False,
8
16
  "api_spec": "anthropic",
9
- }
17
+ },
10
18
  }
@@ -83,6 +83,18 @@ OPENROUTER_MODELS = {
83
83
  "cache_write_cost": 0.04,
84
84
  "output_cost": 0.18,
85
85
  },
86
+ "gpt-oss-20b-free-openrouter": {
87
+ "id": "gpt-oss-20b-openrouter",
88
+ "name": "openai/gpt-oss-20b:free",
89
+ "api_base": "https://openrouter.ai/api/v1",
90
+ "api_key_env_var": "OPENROUTER_API_KEY",
91
+ "supports_json": True,
92
+ "api_spec": "openai",
93
+ "input_cost": 0.0,
94
+ "cached_input_cost": 0.0,
95
+ "cache_write_cost": 0.0,
96
+ "output_cost": 0.0,
97
+ },
86
98
  "gpt-oss-120b-openrouter": {
87
99
  "id": "gpt-oss-120b-openrouter",
88
100
  "name": "openai/gpt-oss-120b",
@@ -95,6 +107,18 @@ OPENROUTER_MODELS = {
95
107
  "cache_write_cost": 0.05,
96
108
  "output_cost": 0.45,
97
109
  },
110
+ "gpt-oss-120b-free-openrouter": {
111
+ "id": "gpt-oss-120b-free-openrouter",
112
+ "name": "openai/gpt-oss-120b:free",
113
+ "api_base": "https://openrouter.ai/api/v1",
114
+ "api_key_env_var": "OPENROUTER_API_KEY",
115
+ "supports_json": True,
116
+ "api_spec": "openai",
117
+ "input_cost": 0.00,
118
+ "cached_input_cost": 0.00,
119
+ "cache_write_cost": 0.00,
120
+ "output_cost": 0.0,
121
+ },
98
122
  "kimi-k2-openrouter": {
99
123
  "id": "kimi-k2-openrouter",
100
124
  "name": "moonshotai/kimi-k2-0905:exacto",
@@ -139,4 +163,28 @@ OPENROUTER_MODELS = {
139
163
  "input_cost": 0.045,
140
164
  "output_cost": 0.15,
141
165
  },
166
+ "glm-4.7-openrouter": {
167
+ "id": "glm-4.7-openrouter",
168
+ "name": "z-ai/glm-4.7",
169
+ "api_base": "https://openrouter.ai/api/v1",
170
+ "api_key_env_var": "OPENROUTER_API_KEY",
171
+ "supports_json": True,
172
+ "api_spec": "openai",
173
+ "input_cost": 0.6,
174
+ "cached_input_cost": 0.6,
175
+ "cache_write_cost": 0.6,
176
+ "output_cost": 2.20,
177
+ },
178
+ "minimax-m2.1-openrouter": {
179
+ "id": "minimax-m2.1-openrouter",
180
+ "name": "minimax/minimax-m2.1",
181
+ "api_base": "https://openrouter.ai/api/v1",
182
+ "api_key_env_var": "OPENROUTER_API_KEY",
183
+ "supports_json": True,
184
+ "api_spec": "openai",
185
+ "input_cost": 0.3,
186
+ "cached_input_cost": 0.3,
187
+ "cache_write_cost": 0.3,
188
+ "output_cost": 1.20,
189
+ },
142
190
  }