lm-deluge 0.0.52__tar.gz → 0.0.70__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lm_deluge-0.0.52/src/lm_deluge.egg-info → lm_deluge-0.0.70}/PKG-INFO +3 -1
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/pyproject.toml +4 -1
- lm_deluge-0.0.70/src/lm_deluge/__init__.py +41 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/api_requests/anthropic.py +21 -1
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/api_requests/base.py +93 -5
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/api_requests/bedrock.py +3 -4
- lm_deluge-0.0.70/src/lm_deluge/api_requests/chat_reasoning.py +4 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/api_requests/gemini.py +7 -6
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/api_requests/mistral.py +8 -9
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/api_requests/openai.py +179 -124
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/api_requests/response.py +28 -1
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/batches.py +25 -9
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/client.py +348 -188
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/config.py +1 -1
- lm_deluge-0.0.70/src/lm_deluge/file.py +527 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/llm_tools/extract.py +7 -5
- lm_deluge-0.0.70/src/lm_deluge/mock_openai.py +641 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/models/__init__.py +16 -9
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/models/anthropic.py +42 -20
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/models/bedrock.py +0 -14
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/models/cohere.py +0 -16
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/models/google.py +20 -32
- lm_deluge-0.0.70/src/lm_deluge/models/grok.py +82 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/models/groq.py +2 -2
- lm_deluge-0.0.70/src/lm_deluge/models/kimi.py +34 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/models/meta.py +0 -8
- lm_deluge-0.0.70/src/lm_deluge/models/minimax.py +10 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/models/openai.py +46 -42
- lm_deluge-0.0.70/src/lm_deluge/models/openrouter.py +64 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/models/together.py +0 -16
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/prompt.py +138 -29
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/request_context.py +9 -11
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/tool.py +395 -19
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/tracker.py +84 -10
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/usage.py +30 -21
- lm_deluge-0.0.70/src/lm_deluge/warnings.py +46 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70/src/lm_deluge.egg-info}/PKG-INFO +3 -1
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge.egg-info/SOURCES.txt +9 -3
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge.egg-info/requires.txt +3 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/tests/test_builtin_tools.py +2 -2
- lm_deluge-0.0.70/tests/test_file_upload.py +627 -0
- lm_deluge-0.0.70/tests/test_mock_openai.py +479 -0
- lm_deluge-0.0.70/tests/test_openrouter_generic.py +238 -0
- lm_deluge-0.0.52/src/lm_deluge/__init__.py +0 -17
- lm_deluge-0.0.52/src/lm_deluge/agent.py +0 -0
- lm_deluge-0.0.52/src/lm_deluge/file.py +0 -158
- lm_deluge-0.0.52/src/lm_deluge/gemini_limits.py +0 -65
- lm_deluge-0.0.52/src/lm_deluge/models/grok.py +0 -38
- lm_deluge-0.0.52/src/lm_deluge/models/openrouter.py +0 -1
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/LICENSE +0 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/README.md +0 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/setup.cfg +0 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/api_requests/__init__.py +0 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/api_requests/common.py +0 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/api_requests/deprecated/bedrock.py +0 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/api_requests/deprecated/cohere.py +0 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/api_requests/deprecated/deepseek.py +0 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/api_requests/deprecated/mistral.py +0 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/api_requests/deprecated/vertex.py +0 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/built_in_tools/anthropic/__init__.py +0 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/built_in_tools/anthropic/bash.py +0 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/built_in_tools/anthropic/computer_use.py +0 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/built_in_tools/anthropic/editor.py +0 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/built_in_tools/base.py +0 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/built_in_tools/openai.py +0 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/cache.py +0 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/cli.py +0 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/embed.py +0 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/errors.py +0 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/image.py +0 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/llm_tools/__init__.py +0 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/llm_tools/classify.py +0 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/llm_tools/locate.py +0 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/llm_tools/ocr.py +0 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/llm_tools/score.py +0 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/llm_tools/translate.py +0 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/models/cerebras.py +0 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/models/deepseek.py +0 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/models/fireworks.py +0 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/models/mistral.py +0 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/presets/cerebras.py +0 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/presets/meta.py +0 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/rerank.py +0 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/util/harmony.py +0 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/util/json.py +0 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/util/logprobs.py +0 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/util/spatial.py +0 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/util/validation.py +0 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge/util/xml.py +0 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge.egg-info/dependency_links.txt +0 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/src/lm_deluge.egg-info/top_level.txt +0 -0
- {lm_deluge-0.0.52 → lm_deluge-0.0.70}/tests/test_native_mcp_server.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lm_deluge
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.70
|
|
4
4
|
Summary: Python utility for using LLM API models.
|
|
5
5
|
Author-email: Benjamin Anderson <ben@trytaylor.ai>
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -23,6 +23,8 @@ Requires-Dist: pdf2image
|
|
|
23
23
|
Requires-Dist: pillow
|
|
24
24
|
Requires-Dist: fastmcp>=2.4
|
|
25
25
|
Requires-Dist: rich
|
|
26
|
+
Provides-Extra: openai
|
|
27
|
+
Requires-Dist: openai>=1.0.0; extra == "openai"
|
|
26
28
|
Dynamic: license-file
|
|
27
29
|
|
|
28
30
|
# lm-deluge
|
|
@@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
|
|
|
3
3
|
|
|
4
4
|
[project]
|
|
5
5
|
name = "lm_deluge"
|
|
6
|
-
version = "0.0.
|
|
6
|
+
version = "0.0.70"
|
|
7
7
|
authors = [{ name = "Benjamin Anderson", email = "ben@trytaylor.ai" }]
|
|
8
8
|
description = "Python utility for using LLM API models."
|
|
9
9
|
readme = "README.md"
|
|
@@ -32,5 +32,8 @@ dependencies = [
|
|
|
32
32
|
# "textual>=0.58.0"
|
|
33
33
|
]
|
|
34
34
|
|
|
35
|
+
[project.optional-dependencies]
|
|
36
|
+
openai = ["openai>=1.0.0"]
|
|
37
|
+
|
|
35
38
|
# [project.scripts]
|
|
36
39
|
# deluge = "lm_deluge.cli:main"
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from .client import APIResponse, LLMClient, SamplingParams
|
|
2
|
+
from .file import File
|
|
3
|
+
from .prompt import Conversation, Message
|
|
4
|
+
from .tool import Tool, ToolParams
|
|
5
|
+
|
|
6
|
+
try:
|
|
7
|
+
from .mock_openai import ( # noqa
|
|
8
|
+
APIError,
|
|
9
|
+
APITimeoutError,
|
|
10
|
+
BadRequestError,
|
|
11
|
+
MockAsyncOpenAI,
|
|
12
|
+
RateLimitError,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
_has_openai = True
|
|
16
|
+
except ImportError:
|
|
17
|
+
_has_openai = False
|
|
18
|
+
|
|
19
|
+
# dotenv.load_dotenv() - don't do this, fucks with other packages
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
"LLMClient",
|
|
23
|
+
"SamplingParams",
|
|
24
|
+
"APIResponse",
|
|
25
|
+
"Conversation",
|
|
26
|
+
"Message",
|
|
27
|
+
"Tool",
|
|
28
|
+
"ToolParams",
|
|
29
|
+
"File",
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
if _has_openai:
|
|
33
|
+
__all__.extend(
|
|
34
|
+
[
|
|
35
|
+
"MockAsyncOpenAI",
|
|
36
|
+
"APIError",
|
|
37
|
+
"APITimeoutError",
|
|
38
|
+
"BadRequestError",
|
|
39
|
+
"RateLimitError",
|
|
40
|
+
]
|
|
41
|
+
)
|
|
@@ -42,6 +42,14 @@ def _build_anthropic_request(
|
|
|
42
42
|
"content-type": "application/json",
|
|
43
43
|
}
|
|
44
44
|
|
|
45
|
+
# Check if any messages contain uploaded files (file_id)
|
|
46
|
+
# If so, add the files-api beta header
|
|
47
|
+
for msg in prompt.messages:
|
|
48
|
+
for file in msg.files:
|
|
49
|
+
if file.is_remote and file.remote_provider == "anthropic":
|
|
50
|
+
_add_beta(base_headers, "files-api-2025-04-14")
|
|
51
|
+
break
|
|
52
|
+
|
|
45
53
|
request_json = {
|
|
46
54
|
"model": model.name,
|
|
47
55
|
"messages": messages,
|
|
@@ -60,7 +68,8 @@ def _build_anthropic_request(
|
|
|
60
68
|
"type": "enabled",
|
|
61
69
|
"budget_tokens": budget,
|
|
62
70
|
}
|
|
63
|
-
|
|
71
|
+
if "top_p" in request_json:
|
|
72
|
+
request_json["top_p"] = max(request_json["top_p"], 0.95)
|
|
64
73
|
request_json["temperature"] = 1.0
|
|
65
74
|
request_json["max_tokens"] += budget
|
|
66
75
|
else:
|
|
@@ -70,12 +79,20 @@ def _build_anthropic_request(
|
|
|
70
79
|
if system_message is not None:
|
|
71
80
|
request_json["system"] = system_message
|
|
72
81
|
|
|
82
|
+
# handle temp + top_p for opus 4.1/sonnet 4.5
|
|
83
|
+
if "4-1" in model.name or "4-5" in model.name:
|
|
84
|
+
if "temperature" in request_json and "top_p" in request_json:
|
|
85
|
+
request_json.pop("top_p")
|
|
86
|
+
|
|
73
87
|
if tools:
|
|
74
88
|
mcp_servers = []
|
|
75
89
|
tool_definitions = []
|
|
76
90
|
for tool in tools:
|
|
77
91
|
if isinstance(tool, Tool):
|
|
78
92
|
tool_definitions.append(tool.dump_for("anthropic"))
|
|
93
|
+
elif isinstance(tool, dict) and "url" in tool:
|
|
94
|
+
_add_beta(base_headers, "mcp-client-2025-04-04")
|
|
95
|
+
mcp_servers.append(tool)
|
|
79
96
|
elif isinstance(tool, dict):
|
|
80
97
|
tool_definitions.append(tool)
|
|
81
98
|
# add betas if needed
|
|
@@ -89,6 +106,9 @@ def _build_anthropic_request(
|
|
|
89
106
|
_add_beta(base_headers, "computer-use-2025-01-24")
|
|
90
107
|
elif tool["type"] == "code_execution_20250522":
|
|
91
108
|
_add_beta(base_headers, "code-execution-2025-05-22")
|
|
109
|
+
elif tool["type"] in ["memory_20250818", "clear_tool_uses_20250919"]:
|
|
110
|
+
_add_beta(base_headers, "context-management-2025-06-27")
|
|
111
|
+
|
|
92
112
|
elif isinstance(tool, MCPServer):
|
|
93
113
|
_add_beta(base_headers, "mcp-client-2025-04-04")
|
|
94
114
|
mcp_servers.append(tool.for_anthropic())
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
+
import time
|
|
2
3
|
import traceback
|
|
3
4
|
from abc import ABC, abstractmethod
|
|
4
5
|
|
|
@@ -6,6 +7,7 @@ import aiohttp
|
|
|
6
7
|
from aiohttp import ClientResponse
|
|
7
8
|
|
|
8
9
|
from ..errors import raise_if_modal_exception
|
|
10
|
+
from ..models.openai import OPENAI_MODELS
|
|
9
11
|
from ..request_context import RequestContext
|
|
10
12
|
from .response import APIResponse
|
|
11
13
|
|
|
@@ -52,6 +54,9 @@ class APIRequestBase(ABC):
|
|
|
52
54
|
self, base_headers: dict[str, str], exclude_patterns: list[str] | None = None
|
|
53
55
|
) -> dict[str, str]:
|
|
54
56
|
"""Merge extra_headers with base headers, giving priority to extra_headers."""
|
|
57
|
+
# Filter out None values from base headers (e.g., missing API keys)
|
|
58
|
+
base_headers = {k: v for k, v in base_headers.items() if v is not None}
|
|
59
|
+
|
|
55
60
|
if not self.context.extra_headers:
|
|
56
61
|
return base_headers
|
|
57
62
|
|
|
@@ -69,6 +74,9 @@ class APIRequestBase(ABC):
|
|
|
69
74
|
# Start with base headers, then overlay filtered extra headers (extra takes precedence)
|
|
70
75
|
merged = dict(base_headers)
|
|
71
76
|
merged.update(filtered_extra)
|
|
77
|
+
|
|
78
|
+
# Filter out None values from final merged headers
|
|
79
|
+
merged = {k: v for k, v in merged.items() if v is not None}
|
|
72
80
|
return merged
|
|
73
81
|
|
|
74
82
|
def handle_success(self, data):
|
|
@@ -76,15 +84,95 @@ class APIRequestBase(ABC):
|
|
|
76
84
|
if self.context.status_tracker:
|
|
77
85
|
self.context.status_tracker.task_succeeded(self.context.task_id)
|
|
78
86
|
|
|
87
|
+
async def _execute_once_background_mode(self) -> APIResponse:
|
|
88
|
+
"""
|
|
89
|
+
ONLY for OpenAI responses API. Implement the
|
|
90
|
+
start -> poll -> result style of request.
|
|
91
|
+
"""
|
|
92
|
+
assert self.context.status_tracker, "no status tracker"
|
|
93
|
+
start_time = time.time()
|
|
94
|
+
async with aiohttp.ClientSession() as session:
|
|
95
|
+
last_status: str | None = None
|
|
96
|
+
|
|
97
|
+
try:
|
|
98
|
+
self.context.status_tracker.total_requests += 1
|
|
99
|
+
assert self.url is not None, "URL is not set"
|
|
100
|
+
async with session.post(
|
|
101
|
+
url=self.url,
|
|
102
|
+
headers=self.request_header,
|
|
103
|
+
json=self.request_json,
|
|
104
|
+
) as http_response:
|
|
105
|
+
# make sure we created the Response object
|
|
106
|
+
http_response.raise_for_status()
|
|
107
|
+
data = await http_response.json()
|
|
108
|
+
response_id = data["id"]
|
|
109
|
+
last_status = data["status"]
|
|
110
|
+
|
|
111
|
+
while True:
|
|
112
|
+
if time.time() - start_time > self.context.request_timeout:
|
|
113
|
+
# cancel the response
|
|
114
|
+
async with session.post(
|
|
115
|
+
url=f"{self.url}/{response_id}/cancel",
|
|
116
|
+
headers=self.request_header,
|
|
117
|
+
) as http_response:
|
|
118
|
+
http_response.raise_for_status()
|
|
119
|
+
|
|
120
|
+
return APIResponse(
|
|
121
|
+
id=self.context.task_id,
|
|
122
|
+
model_internal=self.context.model_name,
|
|
123
|
+
prompt=self.context.prompt,
|
|
124
|
+
sampling_params=self.context.sampling_params,
|
|
125
|
+
status_code=None,
|
|
126
|
+
is_error=True,
|
|
127
|
+
error_message="Request timed out (terminated by client).",
|
|
128
|
+
content=None,
|
|
129
|
+
usage=None,
|
|
130
|
+
)
|
|
131
|
+
# poll for the response
|
|
132
|
+
await asyncio.sleep(5.0)
|
|
133
|
+
async with session.get(
|
|
134
|
+
url=f"{self.url}/{response_id}",
|
|
135
|
+
headers=self.request_header,
|
|
136
|
+
) as http_response:
|
|
137
|
+
http_response.raise_for_status()
|
|
138
|
+
data = await http_response.json()
|
|
139
|
+
|
|
140
|
+
if data["status"] != last_status:
|
|
141
|
+
print(
|
|
142
|
+
f"Background req {response_id} status updated to: {data['status']}"
|
|
143
|
+
)
|
|
144
|
+
last_status = data["status"]
|
|
145
|
+
if last_status not in ["queued", "in_progress"]:
|
|
146
|
+
return await self.handle_response(http_response)
|
|
147
|
+
|
|
148
|
+
except Exception as e:
|
|
149
|
+
raise_if_modal_exception(e)
|
|
150
|
+
tb = traceback.format_exc()
|
|
151
|
+
print(tb)
|
|
152
|
+
return APIResponse(
|
|
153
|
+
id=self.context.task_id,
|
|
154
|
+
model_internal=self.context.model_name,
|
|
155
|
+
prompt=self.context.prompt,
|
|
156
|
+
sampling_params=self.context.sampling_params,
|
|
157
|
+
status_code=None,
|
|
158
|
+
is_error=True,
|
|
159
|
+
error_message=f"Unexpected {type(e).__name__}: {str(e) or 'No message.'}",
|
|
160
|
+
content=None,
|
|
161
|
+
usage=None,
|
|
162
|
+
)
|
|
163
|
+
|
|
79
164
|
async def execute_once(self) -> APIResponse:
|
|
80
165
|
"""Send the HTTP request once and return the parsed APIResponse."""
|
|
81
166
|
await self.build_request()
|
|
82
167
|
assert self.context.status_tracker
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
168
|
+
|
|
169
|
+
if (
|
|
170
|
+
self.context.background
|
|
171
|
+
and self.context.use_responses_api
|
|
172
|
+
and self.context.model_name in OPENAI_MODELS
|
|
173
|
+
):
|
|
174
|
+
return await self._execute_once_background_mode()
|
|
175
|
+
|
|
88
176
|
try:
|
|
89
177
|
self.context.status_tracker.total_requests += 1
|
|
90
178
|
timeout = aiohttp.ClientTimeout(total=self.context.request_timeout)
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import json
|
|
3
3
|
import os
|
|
4
|
-
import warnings
|
|
5
4
|
|
|
6
5
|
from aiohttp import ClientResponse
|
|
7
6
|
|
|
7
|
+
from lm_deluge.warnings import maybe_warn
|
|
8
|
+
|
|
8
9
|
try:
|
|
9
10
|
from requests_aws4auth import AWS4Auth
|
|
10
11
|
except ImportError:
|
|
@@ -187,9 +188,7 @@ async def _build_openai_bedrock_request(
|
|
|
187
188
|
# Note: GPT-OSS on Bedrock doesn't support response_format parameter
|
|
188
189
|
# Even though the model supports JSON, we can't use the response_format parameter
|
|
189
190
|
if sampling_params.json_mode and model.supports_json:
|
|
190
|
-
|
|
191
|
-
f"JSON mode requested for {model.name} but response_format parameter not supported on Bedrock"
|
|
192
|
-
)
|
|
191
|
+
maybe_warn("WARN_JSON_MODE_UNSUPPORTED", model_name=model.name)
|
|
193
192
|
|
|
194
193
|
if tools:
|
|
195
194
|
request_tools = []
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
|
-
import warnings
|
|
4
3
|
from typing import Any
|
|
4
|
+
|
|
5
5
|
from aiohttp import ClientResponse
|
|
6
6
|
|
|
7
7
|
from lm_deluge.request_context import RequestContext
|
|
8
8
|
from lm_deluge.tool import Tool
|
|
9
|
+
from lm_deluge.warnings import maybe_warn
|
|
9
10
|
|
|
10
11
|
from ..config import SamplingParams
|
|
11
12
|
from ..models import APIModel
|
|
@@ -54,9 +55,7 @@ async def _build_gemini_request(
|
|
|
54
55
|
|
|
55
56
|
else:
|
|
56
57
|
if sampling_params.reasoning_effort:
|
|
57
|
-
|
|
58
|
-
f"Ignoring reasoning_effort param for non-reasoning model: {model.name}"
|
|
59
|
-
)
|
|
58
|
+
maybe_warn("WARN_REASONING_UNSUPPORTED", model_name=model.name)
|
|
60
59
|
|
|
61
60
|
# Add tools if provided
|
|
62
61
|
if tools:
|
|
@@ -76,8 +75,10 @@ class GeminiRequest(APIRequestBase):
|
|
|
76
75
|
|
|
77
76
|
# Warn if cache is specified for Gemini model
|
|
78
77
|
if self.context.cache is not None:
|
|
79
|
-
|
|
80
|
-
|
|
78
|
+
maybe_warn(
|
|
79
|
+
"WARN_CACHING_UNSUPPORTED",
|
|
80
|
+
model_name=self.context.model_name,
|
|
81
|
+
cache_param=self.context.cache,
|
|
81
82
|
)
|
|
82
83
|
|
|
83
84
|
self.model = APIModel.from_registry(self.context.model_name)
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
|
-
import warnings
|
|
4
3
|
|
|
5
4
|
from aiohttp import ClientResponse
|
|
6
5
|
|
|
6
|
+
from lm_deluge.warnings import maybe_warn
|
|
7
|
+
|
|
7
8
|
from ..models import APIModel
|
|
8
9
|
from ..prompt import Message
|
|
9
10
|
from ..request_context import RequestContext
|
|
@@ -17,8 +18,10 @@ class MistralRequest(APIRequestBase):
|
|
|
17
18
|
|
|
18
19
|
# Warn if cache is specified for non-Anthropic model
|
|
19
20
|
if self.context.cache is not None:
|
|
20
|
-
|
|
21
|
-
|
|
21
|
+
maybe_warn(
|
|
22
|
+
"WARN_CACHING_UNSUPPORTED",
|
|
23
|
+
model_name=self.context.model_name,
|
|
24
|
+
cache_param=self.context.cache,
|
|
22
25
|
)
|
|
23
26
|
self.model = APIModel.from_registry(self.context.model_name)
|
|
24
27
|
|
|
@@ -38,13 +41,9 @@ class MistralRequest(APIRequestBase):
|
|
|
38
41
|
"max_tokens": self.context.sampling_params.max_new_tokens,
|
|
39
42
|
}
|
|
40
43
|
if self.context.sampling_params.reasoning_effort:
|
|
41
|
-
|
|
42
|
-
f"Ignoring reasoning_effort param for non-reasoning model: {self.context.model_name}"
|
|
43
|
-
)
|
|
44
|
+
maybe_warn("WARN_REASONING_UNSUPPORTED", model_name=self.context.model_name)
|
|
44
45
|
if self.context.sampling_params.logprobs:
|
|
45
|
-
|
|
46
|
-
f"Ignoring logprobs param for non-logprobs model: {self.context.model_name}"
|
|
47
|
-
)
|
|
46
|
+
maybe_warn("WARN_LOGPROBS_UNSUPPORTED", model_name=self.context.model_name)
|
|
48
47
|
if self.context.sampling_params.json_mode and self.model.supports_json:
|
|
49
48
|
self.request_json["response_format"] = {"type": "json_object"}
|
|
50
49
|
|