lm-deluge 0.0.59__tar.gz → 0.0.61__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lm-deluge might be problematic. Click here for more details.
- {lm_deluge-0.0.59/src/lm_deluge.egg-info → lm_deluge-0.0.61}/PKG-INFO +1 -1
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/pyproject.toml +1 -1
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/api_requests/anthropic.py +8 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/api_requests/bedrock.py +3 -4
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/api_requests/gemini.py +7 -6
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/api_requests/mistral.py +8 -9
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/api_requests/openai.py +16 -13
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/client.py +176 -5
- lm_deluge-0.0.61/src/lm_deluge/file.py +527 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/models/openai.py +28 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/prompt.py +70 -14
- lm_deluge-0.0.61/src/lm_deluge/warnings.py +46 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61/src/lm_deluge.egg-info}/PKG-INFO +1 -1
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge.egg-info/SOURCES.txt +4 -1
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/tests/test_builtin_tools.py +2 -2
- lm_deluge-0.0.61/tests/test_file_upload.py +627 -0
- lm_deluge-0.0.61/tests/test_openrouter_generic.py +238 -0
- lm_deluge-0.0.59/src/lm_deluge/file.py +0 -158
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/LICENSE +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/README.md +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/setup.cfg +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/__init__.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/api_requests/__init__.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/api_requests/base.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/api_requests/common.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/api_requests/deprecated/bedrock.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/api_requests/deprecated/cohere.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/api_requests/deprecated/deepseek.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/api_requests/deprecated/mistral.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/api_requests/deprecated/vertex.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/api_requests/response.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/batches.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/built_in_tools/anthropic/__init__.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/built_in_tools/anthropic/bash.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/built_in_tools/anthropic/computer_use.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/built_in_tools/anthropic/editor.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/built_in_tools/base.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/built_in_tools/openai.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/cache.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/cli.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/config.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/embed.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/errors.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/image.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/llm_tools/__init__.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/llm_tools/classify.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/llm_tools/extract.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/llm_tools/locate.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/llm_tools/ocr.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/llm_tools/score.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/llm_tools/translate.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/models/__init__.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/models/anthropic.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/models/bedrock.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/models/cerebras.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/models/cohere.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/models/deepseek.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/models/fireworks.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/models/google.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/models/grok.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/models/groq.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/models/meta.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/models/mistral.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/models/openrouter.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/models/together.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/presets/cerebras.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/presets/meta.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/request_context.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/rerank.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/tool.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/tracker.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/usage.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/util/harmony.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/util/json.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/util/logprobs.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/util/spatial.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/util/validation.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge/util/xml.py +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge.egg-info/dependency_links.txt +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge.egg-info/requires.txt +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/src/lm_deluge.egg-info/top_level.txt +0 -0
- {lm_deluge-0.0.59 → lm_deluge-0.0.61}/tests/test_native_mcp_server.py +0 -0
|
@@ -42,6 +42,14 @@ def _build_anthropic_request(
|
|
|
42
42
|
"content-type": "application/json",
|
|
43
43
|
}
|
|
44
44
|
|
|
45
|
+
# Check if any messages contain uploaded files (file_id)
|
|
46
|
+
# If so, add the files-api beta header
|
|
47
|
+
for msg in prompt.messages:
|
|
48
|
+
for file in msg.files:
|
|
49
|
+
if file.is_remote and file.remote_provider == "anthropic":
|
|
50
|
+
_add_beta(base_headers, "files-api-2025-04-14")
|
|
51
|
+
break
|
|
52
|
+
|
|
45
53
|
request_json = {
|
|
46
54
|
"model": model.name,
|
|
47
55
|
"messages": messages,
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import json
|
|
3
3
|
import os
|
|
4
|
-
import warnings
|
|
5
4
|
|
|
6
5
|
from aiohttp import ClientResponse
|
|
7
6
|
|
|
7
|
+
from lm_deluge.warnings import maybe_warn
|
|
8
|
+
|
|
8
9
|
try:
|
|
9
10
|
from requests_aws4auth import AWS4Auth
|
|
10
11
|
except ImportError:
|
|
@@ -187,9 +188,7 @@ async def _build_openai_bedrock_request(
|
|
|
187
188
|
# Note: GPT-OSS on Bedrock doesn't support response_format parameter
|
|
188
189
|
# Even though the model supports JSON, we can't use the response_format parameter
|
|
189
190
|
if sampling_params.json_mode and model.supports_json:
|
|
190
|
-
|
|
191
|
-
f"JSON mode requested for {model.name} but response_format parameter not supported on Bedrock"
|
|
192
|
-
)
|
|
191
|
+
maybe_warn("WARN_JSON_MODE_UNSUPPORTED", model_name=model.name)
|
|
193
192
|
|
|
194
193
|
if tools:
|
|
195
194
|
request_tools = []
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
|
-
import warnings
|
|
4
3
|
from typing import Any
|
|
4
|
+
|
|
5
5
|
from aiohttp import ClientResponse
|
|
6
6
|
|
|
7
7
|
from lm_deluge.request_context import RequestContext
|
|
8
8
|
from lm_deluge.tool import Tool
|
|
9
|
+
from lm_deluge.warnings import maybe_warn
|
|
9
10
|
|
|
10
11
|
from ..config import SamplingParams
|
|
11
12
|
from ..models import APIModel
|
|
@@ -54,9 +55,7 @@ async def _build_gemini_request(
|
|
|
54
55
|
|
|
55
56
|
else:
|
|
56
57
|
if sampling_params.reasoning_effort:
|
|
57
|
-
|
|
58
|
-
f"Ignoring reasoning_effort param for non-reasoning model: {model.name}"
|
|
59
|
-
)
|
|
58
|
+
maybe_warn("WARN_REASONING_UNSUPPORTED", model_name=model.name)
|
|
60
59
|
|
|
61
60
|
# Add tools if provided
|
|
62
61
|
if tools:
|
|
@@ -76,8 +75,10 @@ class GeminiRequest(APIRequestBase):
|
|
|
76
75
|
|
|
77
76
|
# Warn if cache is specified for Gemini model
|
|
78
77
|
if self.context.cache is not None:
|
|
79
|
-
|
|
80
|
-
|
|
78
|
+
maybe_warn(
|
|
79
|
+
"WARN_CACHING_UNSUPPORTED",
|
|
80
|
+
model_name=self.context.model_name,
|
|
81
|
+
cache_param=self.context.cache,
|
|
81
82
|
)
|
|
82
83
|
|
|
83
84
|
self.model = APIModel.from_registry(self.context.model_name)
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
|
-
import warnings
|
|
4
3
|
|
|
5
4
|
from aiohttp import ClientResponse
|
|
6
5
|
|
|
6
|
+
from lm_deluge.warnings import maybe_warn
|
|
7
|
+
|
|
7
8
|
from ..models import APIModel
|
|
8
9
|
from ..prompt import Message
|
|
9
10
|
from ..request_context import RequestContext
|
|
@@ -17,8 +18,10 @@ class MistralRequest(APIRequestBase):
|
|
|
17
18
|
|
|
18
19
|
# Warn if cache is specified for non-Anthropic model
|
|
19
20
|
if self.context.cache is not None:
|
|
20
|
-
|
|
21
|
-
|
|
21
|
+
maybe_warn(
|
|
22
|
+
"WARN_CACHING_UNSUPPORTED",
|
|
23
|
+
model_name=self.context.model_name,
|
|
24
|
+
cache_param=self.context.cache,
|
|
22
25
|
)
|
|
23
26
|
self.model = APIModel.from_registry(self.context.model_name)
|
|
24
27
|
|
|
@@ -38,13 +41,9 @@ class MistralRequest(APIRequestBase):
|
|
|
38
41
|
"max_tokens": self.context.sampling_params.max_new_tokens,
|
|
39
42
|
}
|
|
40
43
|
if self.context.sampling_params.reasoning_effort:
|
|
41
|
-
|
|
42
|
-
f"Ignoring reasoning_effort param for non-reasoning model: {self.context.model_name}"
|
|
43
|
-
)
|
|
44
|
+
maybe_warn("WARN_REASONING_UNSUPPORTED", model_name=self.context.model_name)
|
|
44
45
|
if self.context.sampling_params.logprobs:
|
|
45
|
-
|
|
46
|
-
f"Ignoring logprobs param for non-logprobs model: {self.context.model_name}"
|
|
47
|
-
)
|
|
46
|
+
maybe_warn("WARN_LOGPROBS_UNSUPPORTED", model_name=self.context.model_name)
|
|
48
47
|
if self.context.sampling_params.json_mode and self.model.supports_json:
|
|
49
48
|
self.request_json["response_format"] = {"type": "json_object"}
|
|
50
49
|
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
3
|
import traceback as tb
|
|
4
|
-
import warnings
|
|
5
4
|
from types import SimpleNamespace
|
|
6
5
|
|
|
7
6
|
import aiohttp
|
|
@@ -9,6 +8,7 @@ from aiohttp import ClientResponse
|
|
|
9
8
|
|
|
10
9
|
from lm_deluge.request_context import RequestContext
|
|
11
10
|
from lm_deluge.tool import MCPServer, Tool
|
|
11
|
+
from lm_deluge.warnings import maybe_warn
|
|
12
12
|
|
|
13
13
|
from ..config import SamplingParams
|
|
14
14
|
from ..models import APIModel
|
|
@@ -75,9 +75,8 @@ async def _build_oa_chat_request(
|
|
|
75
75
|
request_json["reasoning_effort"] = effort
|
|
76
76
|
else:
|
|
77
77
|
if sampling_params.reasoning_effort:
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
)
|
|
78
|
+
maybe_warn("WARN_REASONING_UNSUPPORTED", model_name=context.model_name)
|
|
79
|
+
|
|
81
80
|
if sampling_params.logprobs:
|
|
82
81
|
request_json["logprobs"] = True
|
|
83
82
|
if sampling_params.top_logprobs is not None:
|
|
@@ -105,8 +104,10 @@ class OpenAIRequest(APIRequestBase):
|
|
|
105
104
|
|
|
106
105
|
# Warn if cache is specified for non-Anthropic model
|
|
107
106
|
if self.context.cache is not None:
|
|
108
|
-
|
|
109
|
-
|
|
107
|
+
maybe_warn(
|
|
108
|
+
"WARN_CACHING_UNSUPPORTED",
|
|
109
|
+
model_name=self.context.model_name,
|
|
110
|
+
cache_param=self.context.cache,
|
|
110
111
|
)
|
|
111
112
|
self.model = APIModel.from_registry(self.context.model_name)
|
|
112
113
|
|
|
@@ -283,9 +284,7 @@ async def _build_oa_responses_request(
|
|
|
283
284
|
}
|
|
284
285
|
else:
|
|
285
286
|
if sampling_params.reasoning_effort:
|
|
286
|
-
|
|
287
|
-
f"Ignoring reasoning_effort for non-reasoning model: {model.id}"
|
|
288
|
-
)
|
|
287
|
+
maybe_warn("WARN_REASONING_UNSUPPORTED", model_name=context.model_name)
|
|
289
288
|
|
|
290
289
|
if sampling_params.json_mode and model.supports_json:
|
|
291
290
|
request_json["text"] = {"format": {"type": "json_object"}}
|
|
@@ -322,8 +321,10 @@ class OpenAIResponsesRequest(APIRequestBase):
|
|
|
322
321
|
super().__init__(context)
|
|
323
322
|
# Warn if cache is specified for non-Anthropic model
|
|
324
323
|
if self.context.cache is not None:
|
|
325
|
-
|
|
326
|
-
|
|
324
|
+
maybe_warn(
|
|
325
|
+
"WARN_CACHING_UNSUPPORTED",
|
|
326
|
+
model_name=self.context.model_name,
|
|
327
|
+
cache_param=self.context.cache,
|
|
327
328
|
)
|
|
328
329
|
self.model = APIModel.from_registry(self.context.model_name)
|
|
329
330
|
|
|
@@ -526,8 +527,10 @@ async def stream_chat(
|
|
|
526
527
|
extra_headers: dict[str, str] | None = None,
|
|
527
528
|
):
|
|
528
529
|
if cache is not None:
|
|
529
|
-
|
|
530
|
-
|
|
530
|
+
maybe_warn(
|
|
531
|
+
"WARN_CACHING_UNSUPPORTED",
|
|
532
|
+
model_name=model_name,
|
|
533
|
+
cache_param=cache,
|
|
531
534
|
)
|
|
532
535
|
|
|
533
536
|
model = APIModel.from_registry(model_name)
|
|
@@ -3,6 +3,7 @@ from typing import (
|
|
|
3
3
|
Any,
|
|
4
4
|
AsyncGenerator,
|
|
5
5
|
Callable,
|
|
6
|
+
ClassVar,
|
|
6
7
|
Literal,
|
|
7
8
|
Self,
|
|
8
9
|
Sequence,
|
|
@@ -31,7 +32,7 @@ from lm_deluge.tool import MCPServer, Tool
|
|
|
31
32
|
|
|
32
33
|
from .api_requests.base import APIResponse
|
|
33
34
|
from .config import SamplingParams
|
|
34
|
-
from .models import APIModel, registry
|
|
35
|
+
from .models import APIModel, register_model, registry
|
|
35
36
|
from .request_context import RequestContext
|
|
36
37
|
from .tracker import StatusTracker
|
|
37
38
|
|
|
@@ -43,6 +44,12 @@ class _LLMClient(BaseModel):
|
|
|
43
44
|
Keeps all validation, serialization, and existing functionality.
|
|
44
45
|
"""
|
|
45
46
|
|
|
47
|
+
_REASONING_SUFFIXES: ClassVar[dict[str, Literal["low", "medium", "high"]]] = {
|
|
48
|
+
"-low": "low",
|
|
49
|
+
"-medium": "medium",
|
|
50
|
+
"-high": "high",
|
|
51
|
+
}
|
|
52
|
+
|
|
46
53
|
model_names: str | list[str] = ["gpt-4.1-mini"]
|
|
47
54
|
name: str | None = None
|
|
48
55
|
max_requests_per_minute: int = 1_000
|
|
@@ -117,13 +124,112 @@ class _LLMClient(BaseModel):
|
|
|
117
124
|
|
|
118
125
|
# NEW! Builder methods
|
|
119
126
|
def with_model(self, model: str):
|
|
120
|
-
self.
|
|
127
|
+
self._update_models([model])
|
|
121
128
|
return self
|
|
122
129
|
|
|
123
130
|
def with_models(self, models: list[str]):
|
|
124
|
-
self.
|
|
131
|
+
self._update_models(models)
|
|
125
132
|
return self
|
|
126
133
|
|
|
134
|
+
def _update_models(self, models: list[str]) -> None:
|
|
135
|
+
normalized, per_model_efforts = self._normalize_model_names(models)
|
|
136
|
+
if self.reasoning_effort is None:
|
|
137
|
+
unique_efforts = {eff for eff in per_model_efforts if eff is not None}
|
|
138
|
+
if len(normalized) == 1 and per_model_efforts[0] is not None:
|
|
139
|
+
self.reasoning_effort = per_model_efforts[0]
|
|
140
|
+
elif (
|
|
141
|
+
len(unique_efforts) == 1
|
|
142
|
+
and len(unique_efforts) != 0
|
|
143
|
+
and None not in per_model_efforts
|
|
144
|
+
):
|
|
145
|
+
self.reasoning_effort = next(iter(unique_efforts)) # type: ignore
|
|
146
|
+
self.model_names = normalized
|
|
147
|
+
self._align_sampling_params(per_model_efforts)
|
|
148
|
+
self._reset_model_weights()
|
|
149
|
+
|
|
150
|
+
def _normalize_model_names(
|
|
151
|
+
self, models: list[str]
|
|
152
|
+
) -> tuple[list[str], list[Literal["low", "medium", "high"] | None]]:
|
|
153
|
+
normalized: list[str] = []
|
|
154
|
+
efforts: list[Literal["low", "medium", "high"] | None] = []
|
|
155
|
+
|
|
156
|
+
for name in models:
|
|
157
|
+
base_name = self._preprocess_openrouter_model(name)
|
|
158
|
+
trimmed_name, effort = self.__class__._strip_reasoning_suffix_if_registered(
|
|
159
|
+
base_name
|
|
160
|
+
)
|
|
161
|
+
normalized.append(trimmed_name)
|
|
162
|
+
efforts.append(effort)
|
|
163
|
+
|
|
164
|
+
return normalized, efforts
|
|
165
|
+
|
|
166
|
+
def _align_sampling_params(
|
|
167
|
+
self, per_model_efforts: list[Literal["low", "medium", "high"] | None]
|
|
168
|
+
) -> None:
|
|
169
|
+
if len(per_model_efforts) < len(self.model_names):
|
|
170
|
+
per_model_efforts = per_model_efforts + [None] * (
|
|
171
|
+
len(self.model_names) - len(per_model_efforts)
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
if not self.model_names:
|
|
175
|
+
self.sampling_params = []
|
|
176
|
+
return
|
|
177
|
+
|
|
178
|
+
if not self.sampling_params:
|
|
179
|
+
self.sampling_params = []
|
|
180
|
+
|
|
181
|
+
if len(self.sampling_params) == 0:
|
|
182
|
+
for _ in self.model_names:
|
|
183
|
+
self.sampling_params.append(
|
|
184
|
+
SamplingParams(
|
|
185
|
+
temperature=self.temperature,
|
|
186
|
+
top_p=self.top_p,
|
|
187
|
+
json_mode=self.json_mode,
|
|
188
|
+
max_new_tokens=self.max_new_tokens,
|
|
189
|
+
reasoning_effort=self.reasoning_effort,
|
|
190
|
+
logprobs=self.logprobs,
|
|
191
|
+
top_logprobs=self.top_logprobs,
|
|
192
|
+
)
|
|
193
|
+
)
|
|
194
|
+
elif len(self.sampling_params) == 1 and len(self.model_names) > 1:
|
|
195
|
+
base_param = self.sampling_params[0]
|
|
196
|
+
self.sampling_params = [
|
|
197
|
+
base_param.model_copy(deep=True) for _ in self.model_names
|
|
198
|
+
]
|
|
199
|
+
elif len(self.sampling_params) != len(self.model_names):
|
|
200
|
+
base_param = self.sampling_params[0]
|
|
201
|
+
self.sampling_params = [
|
|
202
|
+
base_param.model_copy(deep=True) for _ in self.model_names
|
|
203
|
+
]
|
|
204
|
+
|
|
205
|
+
if self.reasoning_effort is not None:
|
|
206
|
+
for sp in self.sampling_params:
|
|
207
|
+
sp.reasoning_effort = self.reasoning_effort
|
|
208
|
+
else:
|
|
209
|
+
for sp, effort in zip(self.sampling_params, per_model_efforts):
|
|
210
|
+
if effort is not None:
|
|
211
|
+
sp.reasoning_effort = effort
|
|
212
|
+
|
|
213
|
+
def _reset_model_weights(self) -> None:
|
|
214
|
+
if not self.model_names:
|
|
215
|
+
self.model_weights = []
|
|
216
|
+
return
|
|
217
|
+
|
|
218
|
+
if isinstance(self.model_weights, list):
|
|
219
|
+
if len(self.model_weights) == len(self.model_names) and any(
|
|
220
|
+
self.model_weights
|
|
221
|
+
):
|
|
222
|
+
total = sum(self.model_weights)
|
|
223
|
+
if total == 0:
|
|
224
|
+
self.model_weights = [
|
|
225
|
+
1 / len(self.model_names) for _ in self.model_names
|
|
226
|
+
]
|
|
227
|
+
else:
|
|
228
|
+
self.model_weights = [w / total for w in self.model_weights]
|
|
229
|
+
return
|
|
230
|
+
# Fallback to uniform distribution
|
|
231
|
+
self.model_weights = [1 / len(self.model_names) for _ in self.model_names]
|
|
232
|
+
|
|
127
233
|
def with_limits(
|
|
128
234
|
self,
|
|
129
235
|
max_requests_per_minute: int | None = None,
|
|
@@ -147,11 +253,64 @@ class _LLMClient(BaseModel):
|
|
|
147
253
|
def models(self):
|
|
148
254
|
return self.model_names # why? idk
|
|
149
255
|
|
|
256
|
+
@staticmethod
|
|
257
|
+
def _preprocess_openrouter_model(model_name: str) -> str:
|
|
258
|
+
"""Process openrouter: prefix and register model if needed."""
|
|
259
|
+
if model_name.startswith("openrouter:"):
|
|
260
|
+
slug = model_name.split(":", 1)[1] # Everything after "openrouter:"
|
|
261
|
+
# Create a unique id by replacing slashes with hyphens
|
|
262
|
+
model_id = f"openrouter-{slug.replace('/', '-')}"
|
|
263
|
+
|
|
264
|
+
# Register the model if not already in registry
|
|
265
|
+
if model_id not in registry:
|
|
266
|
+
register_model(
|
|
267
|
+
id=model_id,
|
|
268
|
+
name=slug, # The full slug sent to OpenRouter API (e.g., "openrouter/andromeda-alpha")
|
|
269
|
+
api_base="https://openrouter.ai/api/v1",
|
|
270
|
+
api_key_env_var="OPENROUTER_API_KEY",
|
|
271
|
+
api_spec="openai",
|
|
272
|
+
supports_json=True,
|
|
273
|
+
supports_logprobs=False,
|
|
274
|
+
supports_responses=False,
|
|
275
|
+
input_cost=0, # Unknown costs for generic models
|
|
276
|
+
cached_input_cost=0,
|
|
277
|
+
cache_write_cost=0,
|
|
278
|
+
output_cost=0,
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
return model_id
|
|
282
|
+
return model_name
|
|
283
|
+
|
|
150
284
|
@model_validator(mode="before")
|
|
151
285
|
@classmethod
|
|
152
286
|
def fix_lists(cls, data) -> "_LLMClient":
|
|
153
|
-
|
|
154
|
-
|
|
287
|
+
# Process model_names - handle both strings and lists
|
|
288
|
+
model_names = data.get("model_names")
|
|
289
|
+
|
|
290
|
+
if isinstance(model_names, str):
|
|
291
|
+
# Single model as string
|
|
292
|
+
# First, handle OpenRouter prefix
|
|
293
|
+
model_name = cls._preprocess_openrouter_model(model_names)
|
|
294
|
+
|
|
295
|
+
# Then handle reasoning effort suffix (e.g., "gpt-5-high")
|
|
296
|
+
model_name, effort = cls._strip_reasoning_suffix_if_registered(model_name)
|
|
297
|
+
if effort and data.get("reasoning_effort") is None:
|
|
298
|
+
data["reasoning_effort"] = effort
|
|
299
|
+
|
|
300
|
+
data["model_names"] = [model_name]
|
|
301
|
+
|
|
302
|
+
elif isinstance(model_names, list):
|
|
303
|
+
# List of models - process each one
|
|
304
|
+
processed_models = []
|
|
305
|
+
for model_name in model_names:
|
|
306
|
+
# Handle OpenRouter prefix for each model
|
|
307
|
+
processed_model = cls._preprocess_openrouter_model(model_name)
|
|
308
|
+
processed_model, _ = cls._strip_reasoning_suffix_if_registered(
|
|
309
|
+
processed_model
|
|
310
|
+
)
|
|
311
|
+
processed_models.append(processed_model)
|
|
312
|
+
data["model_names"] = processed_models
|
|
313
|
+
|
|
155
314
|
if not isinstance(data.get("sampling_params", []), list):
|
|
156
315
|
data["sampling_params"] = [data["sampling_params"]]
|
|
157
316
|
if "sampling_params" not in data or len(data.get("sampling_params", [])) == 0:
|
|
@@ -170,6 +329,18 @@ class _LLMClient(BaseModel):
|
|
|
170
329
|
data["sampling_params"] = data["sampling_params"] * len(data["model_names"])
|
|
171
330
|
return data
|
|
172
331
|
|
|
332
|
+
@classmethod
|
|
333
|
+
def _strip_reasoning_suffix_if_registered(
|
|
334
|
+
cls, model_name: str
|
|
335
|
+
) -> tuple[str, Literal["low", "medium", "high"] | None]:
|
|
336
|
+
"""Remove reasoning suffix only when the trimmed model already exists."""
|
|
337
|
+
for suffix, effort in cls._REASONING_SUFFIXES.items():
|
|
338
|
+
if model_name.endswith(suffix) and len(model_name) > len(suffix):
|
|
339
|
+
candidate = model_name[: -len(suffix)]
|
|
340
|
+
if candidate in registry:
|
|
341
|
+
return candidate, effort
|
|
342
|
+
return model_name, None
|
|
343
|
+
|
|
173
344
|
@model_validator(mode="after")
|
|
174
345
|
def validate_client(self) -> Self:
|
|
175
346
|
if isinstance(self.model_names, str):
|