lm-deluge 0.0.20__tar.gz → 0.0.22__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lm_deluge-0.0.20/src/lm_deluge.egg-info → lm_deluge-0.0.22}/PKG-INFO +1 -1
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/pyproject.toml +1 -1
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/api_requests/anthropic.py +16 -13
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/api_requests/base.py +10 -2
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/api_requests/bedrock.py +18 -20
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/api_requests/gemini.py +4 -3
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/api_requests/mistral.py +2 -0
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/api_requests/openai.py +96 -83
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/api_requests/response.py +4 -2
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/batches.py +86 -77
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/client.py +19 -10
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/image.py +6 -2
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/models.py +61 -59
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/prompt.py +104 -56
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/request_context.py +2 -0
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/tool.py +115 -26
- {lm_deluge-0.0.20 → lm_deluge-0.0.22/src/lm_deluge.egg-info}/PKG-INFO +1 -1
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/LICENSE +0 -0
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/README.md +0 -0
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/setup.cfg +0 -0
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/__init__.py +0 -0
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/agent.py +0 -0
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/api_requests/__init__.py +0 -0
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/api_requests/common.py +0 -0
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/api_requests/deprecated/bedrock.py +0 -0
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/api_requests/deprecated/cohere.py +0 -0
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/api_requests/deprecated/deepseek.py +0 -0
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/api_requests/deprecated/mistral.py +0 -0
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/api_requests/deprecated/vertex.py +0 -0
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/built_in_tools/anthropic/__init__.py +0 -0
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/built_in_tools/anthropic/bash.py +0 -0
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/built_in_tools/anthropic/computer_use.py +0 -0
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/built_in_tools/anthropic/editor.py +0 -0
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/built_in_tools/base.py +0 -0
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/built_in_tools/openai.py +0 -0
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/cache.py +0 -0
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/config.py +0 -0
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/embed.py +0 -0
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/errors.py +0 -0
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/file.py +0 -0
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/gemini_limits.py +0 -0
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/llm_tools/__init__.py +0 -0
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/llm_tools/classify.py +0 -0
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/llm_tools/extract.py +0 -0
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/llm_tools/locate.py +0 -0
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/llm_tools/ocr.py +0 -0
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/llm_tools/score.py +0 -0
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/llm_tools/translate.py +0 -0
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/rerank.py +0 -0
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/tracker.py +0 -0
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/usage.py +0 -0
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/util/json.py +0 -0
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/util/logprobs.py +0 -0
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/util/spatial.py +0 -0
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/util/validation.py +0 -0
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/util/xml.py +0 -0
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge.egg-info/SOURCES.txt +0 -0
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge.egg-info/dependency_links.txt +0 -0
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge.egg-info/requires.txt +0 -0
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge.egg-info/top_level.txt +0 -0
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/tests/test_builtin_tools.py +0 -0
- {lm_deluge-0.0.20 → lm_deluge-0.0.22}/tests/test_native_mcp_server.py +0 -0
|
@@ -4,8 +4,6 @@ import os
|
|
|
4
4
|
from aiohttp import ClientResponse
|
|
5
5
|
|
|
6
6
|
from lm_deluge.prompt import (
|
|
7
|
-
CachePattern,
|
|
8
|
-
Conversation,
|
|
9
7
|
Message,
|
|
10
8
|
Text,
|
|
11
9
|
Thinking,
|
|
@@ -15,7 +13,6 @@ from lm_deluge.request_context import RequestContext
|
|
|
15
13
|
from lm_deluge.tool import MCPServer, Tool
|
|
16
14
|
from lm_deluge.usage import Usage
|
|
17
15
|
|
|
18
|
-
from ..config import SamplingParams
|
|
19
16
|
from ..models import APIModel
|
|
20
17
|
from .base import APIRequestBase, APIResponse
|
|
21
18
|
|
|
@@ -30,12 +27,19 @@ def _add_beta(headers: dict, beta: str):
|
|
|
30
27
|
|
|
31
28
|
def _build_anthropic_request(
|
|
32
29
|
model: APIModel,
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
30
|
+
context: RequestContext,
|
|
31
|
+
# prompt: Conversation,
|
|
32
|
+
# tools: list[Tool | dict | MCPServer] | None,
|
|
33
|
+
# sampling_params: SamplingParams,
|
|
34
|
+
# cache_pattern: CachePattern | None = None,
|
|
37
35
|
):
|
|
36
|
+
prompt = context.prompt
|
|
37
|
+
cache_pattern = context.cache
|
|
38
|
+
tools = context.tools
|
|
39
|
+
sampling_params = context.sampling_params
|
|
38
40
|
system_message, messages = prompt.to_anthropic(cache_pattern=cache_pattern)
|
|
41
|
+
if not system_message:
|
|
42
|
+
print("WARNING: system_message is None")
|
|
39
43
|
base_headers = {
|
|
40
44
|
"x-api-key": os.getenv(model.api_key_env_var),
|
|
41
45
|
"anthropic-version": "2023-06-01",
|
|
@@ -69,6 +73,7 @@ def _build_anthropic_request(
|
|
|
69
73
|
print("ignoring reasoning_effort for non-reasoning model")
|
|
70
74
|
if system_message is not None:
|
|
71
75
|
request_json["system"] = system_message
|
|
76
|
+
|
|
72
77
|
if tools:
|
|
73
78
|
mcp_servers = []
|
|
74
79
|
tool_definitions = []
|
|
@@ -100,6 +105,7 @@ def _build_anthropic_request(
|
|
|
100
105
|
if len(mcp_servers) > 0:
|
|
101
106
|
request_json["mcp_servers"] = mcp_servers
|
|
102
107
|
|
|
108
|
+
# print("request json:", request_json)
|
|
103
109
|
return request_json, base_headers
|
|
104
110
|
|
|
105
111
|
|
|
@@ -108,18 +114,15 @@ class AnthropicRequest(APIRequestBase):
|
|
|
108
114
|
super().__init__(context=context)
|
|
109
115
|
|
|
110
116
|
self.model = APIModel.from_registry(self.context.model_name)
|
|
111
|
-
self.url = f"{self.model.api_base}/messages"
|
|
112
117
|
|
|
113
118
|
# Lock images as bytes if caching is enabled
|
|
114
119
|
if self.context.cache is not None:
|
|
115
120
|
self.context.prompt.lock_images_as_bytes()
|
|
116
121
|
|
|
122
|
+
async def build_request(self):
|
|
123
|
+
self.url = f"{self.model.api_base}/messages"
|
|
117
124
|
self.request_json, base_headers = _build_anthropic_request(
|
|
118
|
-
self.model,
|
|
119
|
-
self.context.prompt,
|
|
120
|
-
self.context.tools,
|
|
121
|
-
self.context.sampling_params,
|
|
122
|
-
self.context.cache,
|
|
125
|
+
self.model, self.context
|
|
123
126
|
)
|
|
124
127
|
self.request_header = self.merge_headers(
|
|
125
128
|
base_headers, exclude_patterns=["openai", "gemini", "mistral"]
|
|
@@ -24,19 +24,21 @@ class APIRequestBase(ABC):
|
|
|
24
24
|
self,
|
|
25
25
|
context: RequestContext,
|
|
26
26
|
):
|
|
27
|
-
# If context is provided, use it; otherwise construct one from individual parameters
|
|
28
27
|
self.context = context
|
|
29
28
|
|
|
30
29
|
# Everything is now accessed through self.context - no copying!
|
|
31
30
|
self.system_prompt = None
|
|
32
31
|
self.result = [] # list of APIResponse objects from each attempt
|
|
33
32
|
|
|
34
|
-
# these should be set in
|
|
33
|
+
# these should be set in build_request of the subclass
|
|
35
34
|
self.url = None
|
|
36
35
|
self.request_header = None
|
|
37
36
|
self.request_json = None
|
|
38
37
|
self.region = None
|
|
39
38
|
|
|
39
|
+
async def build_request(self):
|
|
40
|
+
raise NotImplementedError("must be implemented in subclass")
|
|
41
|
+
|
|
40
42
|
def increment_pbar(self):
|
|
41
43
|
if self.context.status_tracker:
|
|
42
44
|
self.context.status_tracker.increment_pbar()
|
|
@@ -76,7 +78,13 @@ class APIRequestBase(ABC):
|
|
|
76
78
|
|
|
77
79
|
async def execute_once(self) -> APIResponse:
|
|
78
80
|
"""Send the HTTP request once and return the parsed APIResponse."""
|
|
81
|
+
await self.build_request()
|
|
79
82
|
assert self.context.status_tracker
|
|
83
|
+
# try:
|
|
84
|
+
# dumped = json.dumps(self.request_json)
|
|
85
|
+
# except Exception:
|
|
86
|
+
# print("couldn't serialize request json")
|
|
87
|
+
# print(self.request_json)
|
|
80
88
|
try:
|
|
81
89
|
self.context.status_tracker.total_requests += 1
|
|
82
90
|
timeout = aiohttp.ClientTimeout(total=self.context.request_timeout)
|
|
@@ -12,8 +12,6 @@ except ImportError:
|
|
|
12
12
|
)
|
|
13
13
|
|
|
14
14
|
from lm_deluge.prompt import (
|
|
15
|
-
CachePattern,
|
|
16
|
-
Conversation,
|
|
17
15
|
Message,
|
|
18
16
|
Text,
|
|
19
17
|
Thinking,
|
|
@@ -23,7 +21,6 @@ from lm_deluge.request_context import RequestContext
|
|
|
23
21
|
from lm_deluge.tool import MCPServer, Tool
|
|
24
22
|
from lm_deluge.usage import Usage
|
|
25
23
|
|
|
26
|
-
from ..config import SamplingParams
|
|
27
24
|
from ..models import APIModel
|
|
28
25
|
from .base import APIRequestBase, APIResponse
|
|
29
26
|
|
|
@@ -38,13 +35,14 @@ def _add_beta(headers: dict, beta: str):
|
|
|
38
35
|
headers["anthropic_beta"] = beta
|
|
39
36
|
|
|
40
37
|
|
|
41
|
-
def _build_anthropic_bedrock_request(
|
|
38
|
+
async def _build_anthropic_bedrock_request(
|
|
42
39
|
model: APIModel,
|
|
43
|
-
|
|
44
|
-
tools: list[Tool | dict | MCPServer] | None,
|
|
45
|
-
sampling_params: SamplingParams,
|
|
46
|
-
cache_pattern: CachePattern | None = None,
|
|
40
|
+
context: RequestContext,
|
|
47
41
|
):
|
|
42
|
+
prompt = context.prompt
|
|
43
|
+
cache_pattern = context.cache
|
|
44
|
+
tools = context.tools
|
|
45
|
+
sampling_params = context.sampling_params
|
|
48
46
|
system_message, messages = prompt.to_anthropic(cache_pattern=cache_pattern)
|
|
49
47
|
|
|
50
48
|
# handle AWS auth
|
|
@@ -121,9 +119,10 @@ def _build_anthropic_bedrock_request(
|
|
|
121
119
|
elif tool["type"] == "code_execution_20250522":
|
|
122
120
|
_add_beta(base_headers, "code-execution-2025-05-22")
|
|
123
121
|
elif isinstance(tool, MCPServer):
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
122
|
+
# Convert to individual tools locally (like OpenAI does)
|
|
123
|
+
individual_tools = await tool.to_tools()
|
|
124
|
+
for individual_tool in individual_tools:
|
|
125
|
+
tool_definitions.append(individual_tool.dump_for("anthropic"))
|
|
127
126
|
|
|
128
127
|
# Add cache control to last tool if tools_only caching is specified
|
|
129
128
|
if cache_pattern == "tools_only" and tool_definitions:
|
|
@@ -141,21 +140,20 @@ class BedrockRequest(APIRequestBase):
|
|
|
141
140
|
super().__init__(context=context)
|
|
142
141
|
|
|
143
142
|
self.model = APIModel.from_registry(self.context.model_name)
|
|
143
|
+
|
|
144
|
+
async def build_request(self):
|
|
144
145
|
self.url = f"{self.model.api_base}/messages"
|
|
145
146
|
|
|
146
147
|
# Lock images as bytes if caching is enabled
|
|
147
148
|
if self.context.cache is not None:
|
|
148
149
|
self.context.prompt.lock_images_as_bytes()
|
|
149
150
|
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
context.cache,
|
|
157
|
-
)
|
|
158
|
-
)
|
|
151
|
+
(
|
|
152
|
+
self.request_json,
|
|
153
|
+
base_headers,
|
|
154
|
+
self.auth,
|
|
155
|
+
self.url,
|
|
156
|
+
) = await _build_anthropic_bedrock_request(self.model, self.context)
|
|
159
157
|
self.request_header = self.merge_headers(
|
|
160
158
|
base_headers, exclude_patterns=["anthropic", "openai", "gemini", "mistral"]
|
|
161
159
|
)
|
|
@@ -14,7 +14,7 @@ from ..usage import Usage
|
|
|
14
14
|
from .base import APIRequestBase, APIResponse
|
|
15
15
|
|
|
16
16
|
|
|
17
|
-
def _build_gemini_request(
|
|
17
|
+
async def _build_gemini_request(
|
|
18
18
|
model: APIModel,
|
|
19
19
|
prompt: Conversation,
|
|
20
20
|
tools: list[Tool] | None,
|
|
@@ -75,7 +75,8 @@ class GeminiRequest(APIRequestBase):
|
|
|
75
75
|
)
|
|
76
76
|
|
|
77
77
|
self.model = APIModel.from_registry(self.context.model_name)
|
|
78
|
-
|
|
78
|
+
|
|
79
|
+
async def build_request(self):
|
|
79
80
|
self.url = f"{self.model.api_base}/models/{self.model.name}:generateContent"
|
|
80
81
|
base_headers = {
|
|
81
82
|
"Content-Type": "application/json",
|
|
@@ -92,7 +93,7 @@ class GeminiRequest(APIRequestBase):
|
|
|
92
93
|
)
|
|
93
94
|
self.url += f"?key={api_key}"
|
|
94
95
|
|
|
95
|
-
self.request_json = _build_gemini_request(
|
|
96
|
+
self.request_json = await _build_gemini_request(
|
|
96
97
|
self.model,
|
|
97
98
|
self.context.prompt,
|
|
98
99
|
self.context.tools,
|
|
@@ -21,6 +21,8 @@ class MistralRequest(APIRequestBase):
|
|
|
21
21
|
f"Cache parameter '{self.context.cache}' is only supported for Anthropic models, ignoring for {self.context.model_name}"
|
|
22
22
|
)
|
|
23
23
|
self.model = APIModel.from_registry(self.context.model_name)
|
|
24
|
+
|
|
25
|
+
async def build_request(self):
|
|
24
26
|
self.url = f"{self.model.api_base}/chat/completions"
|
|
25
27
|
base_headers = {
|
|
26
28
|
"Authorization": f"Bearer {os.getenv(self.model.api_key_env_var)}"
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
3
|
import warnings
|
|
4
|
+
from types import SimpleNamespace
|
|
4
5
|
|
|
5
6
|
import aiohttp
|
|
6
7
|
from aiohttp import ClientResponse
|
|
@@ -15,12 +16,13 @@ from ..usage import Usage
|
|
|
15
16
|
from .base import APIRequestBase, APIResponse
|
|
16
17
|
|
|
17
18
|
|
|
18
|
-
def _build_oa_chat_request(
|
|
19
|
+
async def _build_oa_chat_request(
|
|
19
20
|
model: APIModel,
|
|
20
|
-
|
|
21
|
-
tools: list[Tool] | None,
|
|
22
|
-
sampling_params: SamplingParams,
|
|
21
|
+
context: RequestContext,
|
|
23
22
|
) -> dict:
|
|
23
|
+
prompt = context.prompt
|
|
24
|
+
sampling_params = context.sampling_params
|
|
25
|
+
tools = context.tools
|
|
24
26
|
request_json = {
|
|
25
27
|
"model": model.name,
|
|
26
28
|
"messages": prompt.to_openai(),
|
|
@@ -48,19 +50,19 @@ def _build_oa_chat_request(
|
|
|
48
50
|
if sampling_params.json_mode and model.supports_json:
|
|
49
51
|
request_json["response_format"] = {"type": "json_object"}
|
|
50
52
|
if tools:
|
|
51
|
-
|
|
53
|
+
request_tools = []
|
|
54
|
+
for tool in tools:
|
|
55
|
+
if isinstance(tool, Tool):
|
|
56
|
+
request_tools.append(tool.dump_for("openai-completions"))
|
|
57
|
+
elif isinstance(tool, MCPServer):
|
|
58
|
+
as_tools = await tool.to_tools()
|
|
59
|
+
request_tools.extend(
|
|
60
|
+
[t.dump_for("openai-completions") for t in as_tools]
|
|
61
|
+
)
|
|
62
|
+
request_json["tools"] = request_tools
|
|
52
63
|
return request_json
|
|
53
64
|
|
|
54
65
|
|
|
55
|
-
def _build_oa_responses_request(
|
|
56
|
-
model: APIModel,
|
|
57
|
-
prompt: Conversation,
|
|
58
|
-
tools: list[Tool] | None,
|
|
59
|
-
sampling_params: SamplingParams,
|
|
60
|
-
):
|
|
61
|
-
pass # TODO: implement
|
|
62
|
-
|
|
63
|
-
|
|
64
66
|
class OpenAIRequest(APIRequestBase):
|
|
65
67
|
def __init__(self, context: RequestContext):
|
|
66
68
|
# Pass context to parent, which will handle backwards compatibility
|
|
@@ -72,6 +74,8 @@ class OpenAIRequest(APIRequestBase):
|
|
|
72
74
|
f"Cache parameter '{self.context.cache}' is only supported for Anthropic models, ignoring for {self.context.model_name}"
|
|
73
75
|
)
|
|
74
76
|
self.model = APIModel.from_registry(self.context.model_name)
|
|
77
|
+
|
|
78
|
+
async def build_request(self):
|
|
75
79
|
self.url = f"{self.model.api_base}/chat/completions"
|
|
76
80
|
base_headers = {
|
|
77
81
|
"Authorization": f"Bearer {os.getenv(self.model.api_key_env_var)}"
|
|
@@ -80,12 +84,7 @@ class OpenAIRequest(APIRequestBase):
|
|
|
80
84
|
base_headers, exclude_patterns=["anthropic"]
|
|
81
85
|
)
|
|
82
86
|
|
|
83
|
-
self.request_json = _build_oa_chat_request(
|
|
84
|
-
self.model,
|
|
85
|
-
self.context.prompt,
|
|
86
|
-
self.context.tools,
|
|
87
|
-
self.context.sampling_params,
|
|
88
|
-
)
|
|
87
|
+
self.request_json = await _build_oa_chat_request(self.model, self.context)
|
|
89
88
|
|
|
90
89
|
async def handle_response(self, http_response: ClientResponse) -> APIResponse:
|
|
91
90
|
is_error = False
|
|
@@ -187,6 +186,75 @@ class OpenAIRequest(APIRequestBase):
|
|
|
187
186
|
)
|
|
188
187
|
|
|
189
188
|
|
|
189
|
+
async def _build_oa_responses_request(
|
|
190
|
+
model: APIModel,
|
|
191
|
+
context: RequestContext,
|
|
192
|
+
# prompt: Conversation,
|
|
193
|
+
# tools: list[Tool] | None,
|
|
194
|
+
# sampling_params: SamplingParams,
|
|
195
|
+
):
|
|
196
|
+
prompt = context.prompt
|
|
197
|
+
sampling_params = context.sampling_params
|
|
198
|
+
tools = context.tools
|
|
199
|
+
openai_responses_format = prompt.to_openai_responses()
|
|
200
|
+
request_json = {
|
|
201
|
+
"model": model.name,
|
|
202
|
+
"input": openai_responses_format["input"],
|
|
203
|
+
"temperature": sampling_params.temperature,
|
|
204
|
+
"top_p": sampling_params.top_p,
|
|
205
|
+
}
|
|
206
|
+
if sampling_params.max_new_tokens:
|
|
207
|
+
request_json["max_output_tokens"] = sampling_params.max_new_tokens
|
|
208
|
+
|
|
209
|
+
if model.reasoning_model:
|
|
210
|
+
if sampling_params.reasoning_effort in [None, "none"]:
|
|
211
|
+
# gemini models can switch reasoning off
|
|
212
|
+
if "gemini" in model.id:
|
|
213
|
+
sampling_params.reasoning_effort = "none"
|
|
214
|
+
else:
|
|
215
|
+
sampling_params.reasoning_effort = "low"
|
|
216
|
+
request_json["temperature"] = 1.0
|
|
217
|
+
request_json["top_p"] = 1.0
|
|
218
|
+
request_json["reasoning"] = {
|
|
219
|
+
"effort": sampling_params.reasoning_effort,
|
|
220
|
+
"summary": "auto",
|
|
221
|
+
}
|
|
222
|
+
else:
|
|
223
|
+
if sampling_params.reasoning_effort:
|
|
224
|
+
warnings.warn(
|
|
225
|
+
f"Ignoring reasoning_effort for non-reasoning model: {model.id}"
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
if sampling_params.json_mode and model.supports_json:
|
|
229
|
+
request_json["text"] = {"format": {"type": "json_object"}}
|
|
230
|
+
|
|
231
|
+
# Handle tools
|
|
232
|
+
request_tools = []
|
|
233
|
+
# Add regular function tools
|
|
234
|
+
for tool in tools or []:
|
|
235
|
+
if isinstance(tool, Tool):
|
|
236
|
+
request_tools.append(tool.dump_for("openai-responses"))
|
|
237
|
+
elif isinstance(tool, dict):
|
|
238
|
+
# if computer use, make sure model supports it
|
|
239
|
+
if tool["type"] == "computer_use_preview":
|
|
240
|
+
if model.name != "openai-computer-use-preview":
|
|
241
|
+
raise ValueError(f"model {model.id} does not support computer use")
|
|
242
|
+
# have to use truncation
|
|
243
|
+
request_json["truncation"] = "auto"
|
|
244
|
+
request_tools.append(tool) # allow passing dict
|
|
245
|
+
elif isinstance(tool, MCPServer):
|
|
246
|
+
if context.force_local_mcp:
|
|
247
|
+
as_tools = await tool.to_tools()
|
|
248
|
+
request_tools.extend([t.dump_for("openai-responses") for t in as_tools])
|
|
249
|
+
else:
|
|
250
|
+
request_tools.append(tool.for_openai_responses())
|
|
251
|
+
|
|
252
|
+
if request_tools:
|
|
253
|
+
request_json["tools"] = request_tools
|
|
254
|
+
|
|
255
|
+
return request_json
|
|
256
|
+
|
|
257
|
+
|
|
190
258
|
class OpenAIResponsesRequest(APIRequestBase):
|
|
191
259
|
def __init__(self, context: RequestContext):
|
|
192
260
|
super().__init__(context)
|
|
@@ -196,73 +264,14 @@ class OpenAIResponsesRequest(APIRequestBase):
|
|
|
196
264
|
f"Cache parameter '{self.context.cache}' is only supported for Anthropic models, ignoring for {self.context.model_name}"
|
|
197
265
|
)
|
|
198
266
|
self.model = APIModel.from_registry(self.context.model_name)
|
|
267
|
+
|
|
268
|
+
async def build_request(self):
|
|
199
269
|
self.url = f"{self.model.api_base}/responses"
|
|
200
270
|
self.request_header = {
|
|
201
271
|
"Authorization": f"Bearer {os.getenv(self.model.api_key_env_var)}"
|
|
202
272
|
}
|
|
203
273
|
|
|
204
|
-
|
|
205
|
-
openai_responses_format = self.context.prompt.to_openai_responses()
|
|
206
|
-
|
|
207
|
-
self.request_json = {
|
|
208
|
-
"model": self.model.name,
|
|
209
|
-
"input": openai_responses_format["input"],
|
|
210
|
-
"temperature": self.context.sampling_params.temperature,
|
|
211
|
-
"top_p": self.context.sampling_params.top_p,
|
|
212
|
-
}
|
|
213
|
-
|
|
214
|
-
# Add max_output_tokens for responses API
|
|
215
|
-
if self.context.sampling_params.max_new_tokens:
|
|
216
|
-
self.request_json["max_output_tokens"] = (
|
|
217
|
-
self.context.sampling_params.max_new_tokens
|
|
218
|
-
)
|
|
219
|
-
|
|
220
|
-
if self.model.reasoning_model:
|
|
221
|
-
if self.context.sampling_params.reasoning_effort in [None, "none"]:
|
|
222
|
-
# gemini models can switch reasoning off
|
|
223
|
-
if "gemini" in self.model.id:
|
|
224
|
-
self.context.sampling_params.reasoning_effort = (
|
|
225
|
-
"none" # expects string
|
|
226
|
-
)
|
|
227
|
-
# openai models can only go down to "low"
|
|
228
|
-
else:
|
|
229
|
-
self.context.sampling_params.reasoning_effort = "low"
|
|
230
|
-
self.request_json["temperature"] = 1.0
|
|
231
|
-
self.request_json["top_p"] = 1.0
|
|
232
|
-
self.request_json["reasoning"] = {
|
|
233
|
-
"effort": self.context.sampling_params.reasoning_effort
|
|
234
|
-
}
|
|
235
|
-
else:
|
|
236
|
-
if self.context.sampling_params.reasoning_effort:
|
|
237
|
-
warnings.warn(
|
|
238
|
-
f"Ignoring reasoning_effort param for non-reasoning model: {self.context.model_name}"
|
|
239
|
-
)
|
|
240
|
-
|
|
241
|
-
if self.context.sampling_params.json_mode and self.model.supports_json:
|
|
242
|
-
self.request_json["text"] = {"format": {"type": "json_object"}}
|
|
243
|
-
|
|
244
|
-
# Handle tools
|
|
245
|
-
request_tools = []
|
|
246
|
-
if self.context.tools:
|
|
247
|
-
# Add regular function tools
|
|
248
|
-
for tool in self.context.tools:
|
|
249
|
-
if isinstance(tool, Tool):
|
|
250
|
-
request_tools.append(tool.dump_for("openai-responses"))
|
|
251
|
-
elif isinstance(tool, dict):
|
|
252
|
-
# if computer use, make sure model supports it
|
|
253
|
-
if tool["type"] == "computer_use_preview":
|
|
254
|
-
if self.context.model_name != "openai-computer-use-preview":
|
|
255
|
-
raise ValueError(
|
|
256
|
-
f"model {self.context.model_name} does not support computer use"
|
|
257
|
-
)
|
|
258
|
-
# have to use truncation
|
|
259
|
-
self.request_json["truncation"] = "auto"
|
|
260
|
-
request_tools.append(tool) # allow passing dict
|
|
261
|
-
elif isinstance(tool, MCPServer):
|
|
262
|
-
request_tools.append(tool.for_openai_responses())
|
|
263
|
-
|
|
264
|
-
if request_tools:
|
|
265
|
-
self.request_json["tools"] = request_tools
|
|
274
|
+
self.request_json = await _build_oa_responses_request(self.model, self.context)
|
|
266
275
|
|
|
267
276
|
async def handle_response(self, http_response: ClientResponse) -> APIResponse:
|
|
268
277
|
is_error = False
|
|
@@ -459,7 +468,11 @@ async def stream_chat(
|
|
|
459
468
|
}
|
|
460
469
|
request_header.update(filtered_extra)
|
|
461
470
|
|
|
462
|
-
|
|
471
|
+
context = SimpleNamespace(
|
|
472
|
+
prompt=prompt, tools=tools, sampling_params=sampling_params
|
|
473
|
+
)
|
|
474
|
+
|
|
475
|
+
request_json = await _build_oa_chat_request(model, context) # type: ignore
|
|
463
476
|
request_json["stream"] = True
|
|
464
477
|
|
|
465
478
|
async with aiohttp.ClientSession() as s:
|
|
@@ -14,7 +14,7 @@ class APIResponse:
|
|
|
14
14
|
# request information
|
|
15
15
|
id: int # should be unique to the request within a given prompt-processing call
|
|
16
16
|
model_internal: str # our internal model tag
|
|
17
|
-
prompt: Conversation
|
|
17
|
+
prompt: Conversation | dict
|
|
18
18
|
sampling_params: SamplingParams
|
|
19
19
|
|
|
20
20
|
# http response information
|
|
@@ -92,6 +92,8 @@ class APIResponse:
|
|
|
92
92
|
print(
|
|
93
93
|
f"Warning: Completion provided without token counts for model {self.model_internal}."
|
|
94
94
|
)
|
|
95
|
+
if isinstance(self.prompt, Conversation):
|
|
96
|
+
self.prompt = self.prompt.to_log() # avoid keeping images in memory
|
|
95
97
|
|
|
96
98
|
def to_dict(self):
|
|
97
99
|
return {
|
|
@@ -99,7 +101,7 @@ class APIResponse:
|
|
|
99
101
|
"model_internal": self.model_internal,
|
|
100
102
|
"model_external": self.model_external,
|
|
101
103
|
"region": self.region,
|
|
102
|
-
"prompt": self.prompt
|
|
104
|
+
"prompt": self.prompt,
|
|
103
105
|
"sampling_params": self.sampling_params.__dict__,
|
|
104
106
|
"status_code": self.status_code,
|
|
105
107
|
"is_error": self.is_error,
|