lm-deluge 0.0.59__tar.gz → 0.0.61__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lm-deluge might be problematic. Click here for more details.

Files changed (82) hide show
  1. {lm_deluge-0.0.59/src/lm_deluge.egg-info → lm_deluge-0.0.61}/PKG-INFO +1 -1
  2. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/pyproject.toml +1 -1
  3. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/api_requests/anthropic.py +8 -0
  4. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/api_requests/bedrock.py +3 -4
  5. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/api_requests/gemini.py +7 -6
  6. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/api_requests/mistral.py +8 -9
  7. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/api_requests/openai.py +16 -13
  8. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/client.py +176 -5
  9. lm_deluge-0.0.61/src/lm_deluge/file.py +527 -0
  10. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/models/openai.py +28 -0
  11. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/prompt.py +70 -14
  12. lm_deluge-0.0.61/src/lm_deluge/warnings.py +46 -0
  13. {lm_deluge-0.0.59 → lm_deluge-0.0.61/src/lm_deluge.egg-info}/PKG-INFO +1 -1
  14. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge.egg-info/SOURCES.txt +4 -1
  15. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/tests/test_builtin_tools.py +2 -2
  16. lm_deluge-0.0.61/tests/test_file_upload.py +627 -0
  17. lm_deluge-0.0.61/tests/test_openrouter_generic.py +238 -0
  18. lm_deluge-0.0.59/src/lm_deluge/file.py +0 -158
  19. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/LICENSE +0 -0
  20. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/README.md +0 -0
  21. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/setup.cfg +0 -0
  22. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/__init__.py +0 -0
  23. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/api_requests/__init__.py +0 -0
  24. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/api_requests/base.py +0 -0
  25. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/api_requests/common.py +0 -0
  26. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/api_requests/deprecated/bedrock.py +0 -0
  27. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/api_requests/deprecated/cohere.py +0 -0
  28. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/api_requests/deprecated/deepseek.py +0 -0
  29. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/api_requests/deprecated/mistral.py +0 -0
  30. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/api_requests/deprecated/vertex.py +0 -0
  31. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/api_requests/response.py +0 -0
  32. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/batches.py +0 -0
  33. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/built_in_tools/anthropic/__init__.py +0 -0
  34. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/built_in_tools/anthropic/bash.py +0 -0
  35. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/built_in_tools/anthropic/computer_use.py +0 -0
  36. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/built_in_tools/anthropic/editor.py +0 -0
  37. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/built_in_tools/base.py +0 -0
  38. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/built_in_tools/openai.py +0 -0
  39. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/cache.py +0 -0
  40. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/cli.py +0 -0
  41. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/config.py +0 -0
  42. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/embed.py +0 -0
  43. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/errors.py +0 -0
  44. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/image.py +0 -0
  45. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/llm_tools/__init__.py +0 -0
  46. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/llm_tools/classify.py +0 -0
  47. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/llm_tools/extract.py +0 -0
  48. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/llm_tools/locate.py +0 -0
  49. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/llm_tools/ocr.py +0 -0
  50. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/llm_tools/score.py +0 -0
  51. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/llm_tools/translate.py +0 -0
  52. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/models/__init__.py +0 -0
  53. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/models/anthropic.py +0 -0
  54. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/models/bedrock.py +0 -0
  55. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/models/cerebras.py +0 -0
  56. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/models/cohere.py +0 -0
  57. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/models/deepseek.py +0 -0
  58. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/models/fireworks.py +0 -0
  59. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/models/google.py +0 -0
  60. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/models/grok.py +0 -0
  61. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/models/groq.py +0 -0
  62. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/models/meta.py +0 -0
  63. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/models/mistral.py +0 -0
  64. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/models/openrouter.py +0 -0
  65. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/models/together.py +0 -0
  66. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/presets/cerebras.py +0 -0
  67. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/presets/meta.py +0 -0
  68. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/request_context.py +0 -0
  69. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/rerank.py +0 -0
  70. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/tool.py +0 -0
  71. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/tracker.py +0 -0
  72. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/usage.py +0 -0
  73. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/util/harmony.py +0 -0
  74. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/util/json.py +0 -0
  75. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/util/logprobs.py +0 -0
  76. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/util/spatial.py +0 -0
  77. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/util/validation.py +0 -0
  78. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/util/xml.py +0 -0
  79. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge.egg-info/dependency_links.txt +0 -0
  80. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge.egg-info/requires.txt +0 -0
  81. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge.egg-info/top_level.txt +0 -0
  82. {lm_deluge-0.0.59 → lm_deluge-0.0.61}/tests/test_native_mcp_server.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lm_deluge
3
- Version: 0.0.59
3
+ Version: 0.0.61
4
4
  Summary: Python utility for using LLM API models.
5
5
  Author-email: Benjamin Anderson <ben@trytaylor.ai>
6
6
  Requires-Python: >=3.10
@@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
3
3
 
4
4
  [project]
5
5
  name = "lm_deluge"
6
- version = "0.0.59"
6
+ version = "0.0.61"
7
7
  authors = [{ name = "Benjamin Anderson", email = "ben@trytaylor.ai" }]
8
8
  description = "Python utility for using LLM API models."
9
9
  readme = "README.md"
@@ -42,6 +42,14 @@ def _build_anthropic_request(
42
42
  "content-type": "application/json",
43
43
  }
44
44
 
45
+ # Check if any messages contain uploaded files (file_id)
46
+ # If so, add the files-api beta header
47
+ for msg in prompt.messages:
48
+ for file in msg.files:
49
+ if file.is_remote and file.remote_provider == "anthropic":
50
+ _add_beta(base_headers, "files-api-2025-04-14")
51
+ break
52
+
45
53
  request_json = {
46
54
  "model": model.name,
47
55
  "messages": messages,
@@ -1,10 +1,11 @@
1
1
  import asyncio
2
2
  import json
3
3
  import os
4
- import warnings
5
4
 
6
5
  from aiohttp import ClientResponse
7
6
 
7
+ from lm_deluge.warnings import maybe_warn
8
+
8
9
  try:
9
10
  from requests_aws4auth import AWS4Auth
10
11
  except ImportError:
@@ -187,9 +188,7 @@ async def _build_openai_bedrock_request(
187
188
  # Note: GPT-OSS on Bedrock doesn't support response_format parameter
188
189
  # Even though the model supports JSON, we can't use the response_format parameter
189
190
  if sampling_params.json_mode and model.supports_json:
190
- warnings.warn(
191
- f"JSON mode requested for {model.name} but response_format parameter not supported on Bedrock"
192
- )
191
+ maybe_warn("WARN_JSON_MODE_UNSUPPORTED", model_name=model.name)
193
192
 
194
193
  if tools:
195
194
  request_tools = []
@@ -1,11 +1,12 @@
1
1
  import json
2
2
  import os
3
- import warnings
4
3
  from typing import Any
4
+
5
5
  from aiohttp import ClientResponse
6
6
 
7
7
  from lm_deluge.request_context import RequestContext
8
8
  from lm_deluge.tool import Tool
9
+ from lm_deluge.warnings import maybe_warn
9
10
 
10
11
  from ..config import SamplingParams
11
12
  from ..models import APIModel
@@ -54,9 +55,7 @@ async def _build_gemini_request(
54
55
 
55
56
  else:
56
57
  if sampling_params.reasoning_effort:
57
- warnings.warn(
58
- f"Ignoring reasoning_effort param for non-reasoning model: {model.name}"
59
- )
58
+ maybe_warn("WARN_REASONING_UNSUPPORTED", model_name=model.name)
60
59
 
61
60
  # Add tools if provided
62
61
  if tools:
@@ -76,8 +75,10 @@ class GeminiRequest(APIRequestBase):
76
75
 
77
76
  # Warn if cache is specified for Gemini model
78
77
  if self.context.cache is not None:
79
- warnings.warn(
80
- f"Cache parameter '{self.context.cache}' is not supported for Gemini models, ignoring for {self.context.model_name}"
78
+ maybe_warn(
79
+ "WARN_CACHING_UNSUPPORTED",
80
+ model_name=self.context.model_name,
81
+ cache_param=self.context.cache,
81
82
  )
82
83
 
83
84
  self.model = APIModel.from_registry(self.context.model_name)
@@ -1,9 +1,10 @@
1
1
  import json
2
2
  import os
3
- import warnings
4
3
 
5
4
  from aiohttp import ClientResponse
6
5
 
6
+ from lm_deluge.warnings import maybe_warn
7
+
7
8
  from ..models import APIModel
8
9
  from ..prompt import Message
9
10
  from ..request_context import RequestContext
@@ -17,8 +18,10 @@ class MistralRequest(APIRequestBase):
17
18
 
18
19
  # Warn if cache is specified for non-Anthropic model
19
20
  if self.context.cache is not None:
20
- warnings.warn(
21
- f"Cache parameter '{self.context.cache}' is only supported for Anthropic models, ignoring for {self.context.model_name}"
21
+ maybe_warn(
22
+ "WARN_CACHING_UNSUPPORTED",
23
+ model_name=self.context.model_name,
24
+ cache_param=self.context.cache,
22
25
  )
23
26
  self.model = APIModel.from_registry(self.context.model_name)
24
27
 
@@ -38,13 +41,9 @@ class MistralRequest(APIRequestBase):
38
41
  "max_tokens": self.context.sampling_params.max_new_tokens,
39
42
  }
40
43
  if self.context.sampling_params.reasoning_effort:
41
- warnings.warn(
42
- f"Ignoring reasoning_effort param for non-reasoning model: {self.context.model_name}"
43
- )
44
+ maybe_warn("WARN_REASONING_UNSUPPORTED", model_name=self.context.model_name)
44
45
  if self.context.sampling_params.logprobs:
45
- warnings.warn(
46
- f"Ignoring logprobs param for non-logprobs model: {self.context.model_name}"
47
- )
46
+ maybe_warn("WARN_LOGPROBS_UNSUPPORTED", model_name=self.context.model_name)
48
47
  if self.context.sampling_params.json_mode and self.model.supports_json:
49
48
  self.request_json["response_format"] = {"type": "json_object"}
50
49
 
@@ -1,7 +1,6 @@
1
1
  import json
2
2
  import os
3
3
  import traceback as tb
4
- import warnings
5
4
  from types import SimpleNamespace
6
5
 
7
6
  import aiohttp
@@ -9,6 +8,7 @@ from aiohttp import ClientResponse
9
8
 
10
9
  from lm_deluge.request_context import RequestContext
11
10
  from lm_deluge.tool import MCPServer, Tool
11
+ from lm_deluge.warnings import maybe_warn
12
12
 
13
13
  from ..config import SamplingParams
14
14
  from ..models import APIModel
@@ -75,9 +75,8 @@ async def _build_oa_chat_request(
75
75
  request_json["reasoning_effort"] = effort
76
76
  else:
77
77
  if sampling_params.reasoning_effort:
78
- warnings.warn(
79
- f"Ignoring reasoning_effort param for non-reasoning model: {model.name}"
80
- )
78
+ maybe_warn("WARN_REASONING_UNSUPPORTED", model_name=context.model_name)
79
+
81
80
  if sampling_params.logprobs:
82
81
  request_json["logprobs"] = True
83
82
  if sampling_params.top_logprobs is not None:
@@ -105,8 +104,10 @@ class OpenAIRequest(APIRequestBase):
105
104
 
106
105
  # Warn if cache is specified for non-Anthropic model
107
106
  if self.context.cache is not None:
108
- warnings.warn(
109
- f"Cache parameter '{self.context.cache}' is only supported for Anthropic models, ignoring for {self.context.model_name}"
107
+ maybe_warn(
108
+ "WARN_CACHING_UNSUPPORTED",
109
+ model_name=self.context.model_name,
110
+ cache_param=self.context.cache,
110
111
  )
111
112
  self.model = APIModel.from_registry(self.context.model_name)
112
113
 
@@ -283,9 +284,7 @@ async def _build_oa_responses_request(
283
284
  }
284
285
  else:
285
286
  if sampling_params.reasoning_effort:
286
- warnings.warn(
287
- f"Ignoring reasoning_effort for non-reasoning model: {model.id}"
288
- )
287
+ maybe_warn("WARN_REASONING_UNSUPPORTED", model_name=context.model_name)
289
288
 
290
289
  if sampling_params.json_mode and model.supports_json:
291
290
  request_json["text"] = {"format": {"type": "json_object"}}
@@ -322,8 +321,10 @@ class OpenAIResponsesRequest(APIRequestBase):
322
321
  super().__init__(context)
323
322
  # Warn if cache is specified for non-Anthropic model
324
323
  if self.context.cache is not None:
325
- warnings.warn(
326
- f"Cache parameter '{self.context.cache}' is only supported for Anthropic models, ignoring for {self.context.model_name}"
324
+ maybe_warn(
325
+ "WARN_CACHING_UNSUPPORTED",
326
+ model_name=self.context.model_name,
327
+ cache_param=self.context.cache,
327
328
  )
328
329
  self.model = APIModel.from_registry(self.context.model_name)
329
330
 
@@ -526,8 +527,10 @@ async def stream_chat(
526
527
  extra_headers: dict[str, str] | None = None,
527
528
  ):
528
529
  if cache is not None:
529
- warnings.warn(
530
- f"Cache parameter '{cache}' is only supported for Anthropic models, ignoring for {model_name}"
530
+ maybe_warn(
531
+ "WARN_CACHING_UNSUPPORTED",
532
+ model_name=model_name,
533
+ cache_param=cache,
531
534
  )
532
535
 
533
536
  model = APIModel.from_registry(model_name)
@@ -3,6 +3,7 @@ from typing import (
3
3
  Any,
4
4
  AsyncGenerator,
5
5
  Callable,
6
+ ClassVar,
6
7
  Literal,
7
8
  Self,
8
9
  Sequence,
@@ -31,7 +32,7 @@ from lm_deluge.tool import MCPServer, Tool
31
32
 
32
33
  from .api_requests.base import APIResponse
33
34
  from .config import SamplingParams
34
- from .models import APIModel, registry
35
+ from .models import APIModel, register_model, registry
35
36
  from .request_context import RequestContext
36
37
  from .tracker import StatusTracker
37
38
 
@@ -43,6 +44,12 @@ class _LLMClient(BaseModel):
43
44
  Keeps all validation, serialization, and existing functionality.
44
45
  """
45
46
 
47
+ _REASONING_SUFFIXES: ClassVar[dict[str, Literal["low", "medium", "high"]]] = {
48
+ "-low": "low",
49
+ "-medium": "medium",
50
+ "-high": "high",
51
+ }
52
+
46
53
  model_names: str | list[str] = ["gpt-4.1-mini"]
47
54
  name: str | None = None
48
55
  max_requests_per_minute: int = 1_000
@@ -117,13 +124,112 @@ class _LLMClient(BaseModel):
117
124
 
118
125
  # NEW! Builder methods
119
126
  def with_model(self, model: str):
120
- self.model_names = [model]
127
+ self._update_models([model])
121
128
  return self
122
129
 
123
130
  def with_models(self, models: list[str]):
124
- self.model_names = models
131
+ self._update_models(models)
125
132
  return self
126
133
 
134
+ def _update_models(self, models: list[str]) -> None:
135
+ normalized, per_model_efforts = self._normalize_model_names(models)
136
+ if self.reasoning_effort is None:
137
+ unique_efforts = {eff for eff in per_model_efforts if eff is not None}
138
+ if len(normalized) == 1 and per_model_efforts[0] is not None:
139
+ self.reasoning_effort = per_model_efforts[0]
140
+ elif (
141
+ len(unique_efforts) == 1
142
+ and len(unique_efforts) != 0
143
+ and None not in per_model_efforts
144
+ ):
145
+ self.reasoning_effort = next(iter(unique_efforts)) # type: ignore
146
+ self.model_names = normalized
147
+ self._align_sampling_params(per_model_efforts)
148
+ self._reset_model_weights()
149
+
150
+ def _normalize_model_names(
151
+ self, models: list[str]
152
+ ) -> tuple[list[str], list[Literal["low", "medium", "high"] | None]]:
153
+ normalized: list[str] = []
154
+ efforts: list[Literal["low", "medium", "high"] | None] = []
155
+
156
+ for name in models:
157
+ base_name = self._preprocess_openrouter_model(name)
158
+ trimmed_name, effort = self.__class__._strip_reasoning_suffix_if_registered(
159
+ base_name
160
+ )
161
+ normalized.append(trimmed_name)
162
+ efforts.append(effort)
163
+
164
+ return normalized, efforts
165
+
166
+ def _align_sampling_params(
167
+ self, per_model_efforts: list[Literal["low", "medium", "high"] | None]
168
+ ) -> None:
169
+ if len(per_model_efforts) < len(self.model_names):
170
+ per_model_efforts = per_model_efforts + [None] * (
171
+ len(self.model_names) - len(per_model_efforts)
172
+ )
173
+
174
+ if not self.model_names:
175
+ self.sampling_params = []
176
+ return
177
+
178
+ if not self.sampling_params:
179
+ self.sampling_params = []
180
+
181
+ if len(self.sampling_params) == 0:
182
+ for _ in self.model_names:
183
+ self.sampling_params.append(
184
+ SamplingParams(
185
+ temperature=self.temperature,
186
+ top_p=self.top_p,
187
+ json_mode=self.json_mode,
188
+ max_new_tokens=self.max_new_tokens,
189
+ reasoning_effort=self.reasoning_effort,
190
+ logprobs=self.logprobs,
191
+ top_logprobs=self.top_logprobs,
192
+ )
193
+ )
194
+ elif len(self.sampling_params) == 1 and len(self.model_names) > 1:
195
+ base_param = self.sampling_params[0]
196
+ self.sampling_params = [
197
+ base_param.model_copy(deep=True) for _ in self.model_names
198
+ ]
199
+ elif len(self.sampling_params) != len(self.model_names):
200
+ base_param = self.sampling_params[0]
201
+ self.sampling_params = [
202
+ base_param.model_copy(deep=True) for _ in self.model_names
203
+ ]
204
+
205
+ if self.reasoning_effort is not None:
206
+ for sp in self.sampling_params:
207
+ sp.reasoning_effort = self.reasoning_effort
208
+ else:
209
+ for sp, effort in zip(self.sampling_params, per_model_efforts):
210
+ if effort is not None:
211
+ sp.reasoning_effort = effort
212
+
213
+ def _reset_model_weights(self) -> None:
214
+ if not self.model_names:
215
+ self.model_weights = []
216
+ return
217
+
218
+ if isinstance(self.model_weights, list):
219
+ if len(self.model_weights) == len(self.model_names) and any(
220
+ self.model_weights
221
+ ):
222
+ total = sum(self.model_weights)
223
+ if total == 0:
224
+ self.model_weights = [
225
+ 1 / len(self.model_names) for _ in self.model_names
226
+ ]
227
+ else:
228
+ self.model_weights = [w / total for w in self.model_weights]
229
+ return
230
+ # Fallback to uniform distribution
231
+ self.model_weights = [1 / len(self.model_names) for _ in self.model_names]
232
+
127
233
  def with_limits(
128
234
  self,
129
235
  max_requests_per_minute: int | None = None,
@@ -147,11 +253,64 @@ class _LLMClient(BaseModel):
147
253
  def models(self):
148
254
  return self.model_names # why? idk
149
255
 
256
+ @staticmethod
257
+ def _preprocess_openrouter_model(model_name: str) -> str:
258
+ """Process openrouter: prefix and register model if needed."""
259
+ if model_name.startswith("openrouter:"):
260
+ slug = model_name.split(":", 1)[1] # Everything after "openrouter:"
261
+ # Create a unique id by replacing slashes with hyphens
262
+ model_id = f"openrouter-{slug.replace('/', '-')}"
263
+
264
+ # Register the model if not already in registry
265
+ if model_id not in registry:
266
+ register_model(
267
+ id=model_id,
268
+ name=slug, # The full slug sent to OpenRouter API (e.g., "openrouter/andromeda-alpha")
269
+ api_base="https://openrouter.ai/api/v1",
270
+ api_key_env_var="OPENROUTER_API_KEY",
271
+ api_spec="openai",
272
+ supports_json=True,
273
+ supports_logprobs=False,
274
+ supports_responses=False,
275
+ input_cost=0, # Unknown costs for generic models
276
+ cached_input_cost=0,
277
+ cache_write_cost=0,
278
+ output_cost=0,
279
+ )
280
+
281
+ return model_id
282
+ return model_name
283
+
150
284
  @model_validator(mode="before")
151
285
  @classmethod
152
286
  def fix_lists(cls, data) -> "_LLMClient":
153
- if isinstance(data.get("model_names"), str):
154
- data["model_names"] = [data["model_names"]]
287
+ # Process model_names - handle both strings and lists
288
+ model_names = data.get("model_names")
289
+
290
+ if isinstance(model_names, str):
291
+ # Single model as string
292
+ # First, handle OpenRouter prefix
293
+ model_name = cls._preprocess_openrouter_model(model_names)
294
+
295
+ # Then handle reasoning effort suffix (e.g., "gpt-5-high")
296
+ model_name, effort = cls._strip_reasoning_suffix_if_registered(model_name)
297
+ if effort and data.get("reasoning_effort") is None:
298
+ data["reasoning_effort"] = effort
299
+
300
+ data["model_names"] = [model_name]
301
+
302
+ elif isinstance(model_names, list):
303
+ # List of models - process each one
304
+ processed_models = []
305
+ for model_name in model_names:
306
+ # Handle OpenRouter prefix for each model
307
+ processed_model = cls._preprocess_openrouter_model(model_name)
308
+ processed_model, _ = cls._strip_reasoning_suffix_if_registered(
309
+ processed_model
310
+ )
311
+ processed_models.append(processed_model)
312
+ data["model_names"] = processed_models
313
+
155
314
  if not isinstance(data.get("sampling_params", []), list):
156
315
  data["sampling_params"] = [data["sampling_params"]]
157
316
  if "sampling_params" not in data or len(data.get("sampling_params", [])) == 0:
@@ -170,6 +329,18 @@ class _LLMClient(BaseModel):
170
329
  data["sampling_params"] = data["sampling_params"] * len(data["model_names"])
171
330
  return data
172
331
 
332
+ @classmethod
333
+ def _strip_reasoning_suffix_if_registered(
334
+ cls, model_name: str
335
+ ) -> tuple[str, Literal["low", "medium", "high"] | None]:
336
+ """Remove reasoning suffix only when the trimmed model already exists."""
337
+ for suffix, effort in cls._REASONING_SUFFIXES.items():
338
+ if model_name.endswith(suffix) and len(model_name) > len(suffix):
339
+ candidate = model_name[: -len(suffix)]
340
+ if candidate in registry:
341
+ return candidate, effort
342
+ return model_name, None
343
+
173
344
  @model_validator(mode="after")
174
345
  def validate_client(self) -> Self:
175
346
  if isinstance(self.model_names, str):