lm-deluge 0.0.20__tar.gz → 0.0.22__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. {lm_deluge-0.0.20/src/lm_deluge.egg-info → lm_deluge-0.0.22}/PKG-INFO +1 -1
  2. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/pyproject.toml +1 -1
  3. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/api_requests/anthropic.py +16 -13
  4. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/api_requests/base.py +10 -2
  5. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/api_requests/bedrock.py +18 -20
  6. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/api_requests/gemini.py +4 -3
  7. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/api_requests/mistral.py +2 -0
  8. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/api_requests/openai.py +96 -83
  9. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/api_requests/response.py +4 -2
  10. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/batches.py +86 -77
  11. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/client.py +19 -10
  12. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/image.py +6 -2
  13. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/models.py +61 -59
  14. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/prompt.py +104 -56
  15. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/request_context.py +2 -0
  16. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/tool.py +115 -26
  17. {lm_deluge-0.0.20 → lm_deluge-0.0.22/src/lm_deluge.egg-info}/PKG-INFO +1 -1
  18. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/LICENSE +0 -0
  19. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/README.md +0 -0
  20. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/setup.cfg +0 -0
  21. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/__init__.py +0 -0
  22. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/agent.py +0 -0
  23. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/api_requests/__init__.py +0 -0
  24. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/api_requests/common.py +0 -0
  25. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/api_requests/deprecated/bedrock.py +0 -0
  26. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/api_requests/deprecated/cohere.py +0 -0
  27. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/api_requests/deprecated/deepseek.py +0 -0
  28. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/api_requests/deprecated/mistral.py +0 -0
  29. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/api_requests/deprecated/vertex.py +0 -0
  30. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/built_in_tools/anthropic/__init__.py +0 -0
  31. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/built_in_tools/anthropic/bash.py +0 -0
  32. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/built_in_tools/anthropic/computer_use.py +0 -0
  33. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/built_in_tools/anthropic/editor.py +0 -0
  34. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/built_in_tools/base.py +0 -0
  35. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/built_in_tools/openai.py +0 -0
  36. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/cache.py +0 -0
  37. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/config.py +0 -0
  38. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/embed.py +0 -0
  39. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/errors.py +0 -0
  40. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/file.py +0 -0
  41. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/gemini_limits.py +0 -0
  42. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/llm_tools/__init__.py +0 -0
  43. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/llm_tools/classify.py +0 -0
  44. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/llm_tools/extract.py +0 -0
  45. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/llm_tools/locate.py +0 -0
  46. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/llm_tools/ocr.py +0 -0
  47. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/llm_tools/score.py +0 -0
  48. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/llm_tools/translate.py +0 -0
  49. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/rerank.py +0 -0
  50. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/tracker.py +0 -0
  51. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/usage.py +0 -0
  52. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/util/json.py +0 -0
  53. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/util/logprobs.py +0 -0
  54. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/util/spatial.py +0 -0
  55. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/util/validation.py +0 -0
  56. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge/util/xml.py +0 -0
  57. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge.egg-info/SOURCES.txt +0 -0
  58. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge.egg-info/dependency_links.txt +0 -0
  59. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge.egg-info/requires.txt +0 -0
  60. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/src/lm_deluge.egg-info/top_level.txt +0 -0
  61. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/tests/test_builtin_tools.py +0 -0
  62. {lm_deluge-0.0.20 → lm_deluge-0.0.22}/tests/test_native_mcp_server.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lm_deluge
3
- Version: 0.0.20
3
+ Version: 0.0.22
4
4
  Summary: Python utility for using LLM API models.
5
5
  Author-email: Benjamin Anderson <ben@trytaylor.ai>
6
6
  Requires-Python: >=3.10
@@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
3
3
 
4
4
  [project]
5
5
  name = "lm_deluge"
6
- version = "0.0.20"
6
+ version = "0.0.22"
7
7
  authors = [{ name = "Benjamin Anderson", email = "ben@trytaylor.ai" }]
8
8
  description = "Python utility for using LLM API models."
9
9
  readme = "README.md"
@@ -4,8 +4,6 @@ import os
4
4
  from aiohttp import ClientResponse
5
5
 
6
6
  from lm_deluge.prompt import (
7
- CachePattern,
8
- Conversation,
9
7
  Message,
10
8
  Text,
11
9
  Thinking,
@@ -15,7 +13,6 @@ from lm_deluge.request_context import RequestContext
15
13
  from lm_deluge.tool import MCPServer, Tool
16
14
  from lm_deluge.usage import Usage
17
15
 
18
- from ..config import SamplingParams
19
16
  from ..models import APIModel
20
17
  from .base import APIRequestBase, APIResponse
21
18
 
@@ -30,12 +27,19 @@ def _add_beta(headers: dict, beta: str):
30
27
 
31
28
  def _build_anthropic_request(
32
29
  model: APIModel,
33
- prompt: Conversation,
34
- tools: list[Tool | dict | MCPServer] | None,
35
- sampling_params: SamplingParams,
36
- cache_pattern: CachePattern | None = None,
30
+ context: RequestContext,
31
+ # prompt: Conversation,
32
+ # tools: list[Tool | dict | MCPServer] | None,
33
+ # sampling_params: SamplingParams,
34
+ # cache_pattern: CachePattern | None = None,
37
35
  ):
36
+ prompt = context.prompt
37
+ cache_pattern = context.cache
38
+ tools = context.tools
39
+ sampling_params = context.sampling_params
38
40
  system_message, messages = prompt.to_anthropic(cache_pattern=cache_pattern)
41
+ if not system_message:
42
+ print("WARNING: system_message is None")
39
43
  base_headers = {
40
44
  "x-api-key": os.getenv(model.api_key_env_var),
41
45
  "anthropic-version": "2023-06-01",
@@ -69,6 +73,7 @@ def _build_anthropic_request(
69
73
  print("ignoring reasoning_effort for non-reasoning model")
70
74
  if system_message is not None:
71
75
  request_json["system"] = system_message
76
+
72
77
  if tools:
73
78
  mcp_servers = []
74
79
  tool_definitions = []
@@ -100,6 +105,7 @@ def _build_anthropic_request(
100
105
  if len(mcp_servers) > 0:
101
106
  request_json["mcp_servers"] = mcp_servers
102
107
 
108
+ # print("request json:", request_json)
103
109
  return request_json, base_headers
104
110
 
105
111
 
@@ -108,18 +114,15 @@ class AnthropicRequest(APIRequestBase):
108
114
  super().__init__(context=context)
109
115
 
110
116
  self.model = APIModel.from_registry(self.context.model_name)
111
- self.url = f"{self.model.api_base}/messages"
112
117
 
113
118
  # Lock images as bytes if caching is enabled
114
119
  if self.context.cache is not None:
115
120
  self.context.prompt.lock_images_as_bytes()
116
121
 
122
+ async def build_request(self):
123
+ self.url = f"{self.model.api_base}/messages"
117
124
  self.request_json, base_headers = _build_anthropic_request(
118
- self.model,
119
- self.context.prompt,
120
- self.context.tools,
121
- self.context.sampling_params,
122
- self.context.cache,
125
+ self.model, self.context
123
126
  )
124
127
  self.request_header = self.merge_headers(
125
128
  base_headers, exclude_patterns=["openai", "gemini", "mistral"]
@@ -24,19 +24,21 @@ class APIRequestBase(ABC):
24
24
  self,
25
25
  context: RequestContext,
26
26
  ):
27
- # If context is provided, use it; otherwise construct one from individual parameters
28
27
  self.context = context
29
28
 
30
29
  # Everything is now accessed through self.context - no copying!
31
30
  self.system_prompt = None
32
31
  self.result = [] # list of APIResponse objects from each attempt
33
32
 
34
- # these should be set in the __init__ of the subclass
33
+ # these should be set in build_request of the subclass
35
34
  self.url = None
36
35
  self.request_header = None
37
36
  self.request_json = None
38
37
  self.region = None
39
38
 
39
+ async def build_request(self):
40
+ raise NotImplementedError("must be implemented in subclass")
41
+
40
42
  def increment_pbar(self):
41
43
  if self.context.status_tracker:
42
44
  self.context.status_tracker.increment_pbar()
@@ -76,7 +78,13 @@ class APIRequestBase(ABC):
76
78
 
77
79
  async def execute_once(self) -> APIResponse:
78
80
  """Send the HTTP request once and return the parsed APIResponse."""
81
+ await self.build_request()
79
82
  assert self.context.status_tracker
83
+ # try:
84
+ # dumped = json.dumps(self.request_json)
85
+ # except Exception:
86
+ # print("couldn't serialize request json")
87
+ # print(self.request_json)
80
88
  try:
81
89
  self.context.status_tracker.total_requests += 1
82
90
  timeout = aiohttp.ClientTimeout(total=self.context.request_timeout)
@@ -12,8 +12,6 @@ except ImportError:
12
12
  )
13
13
 
14
14
  from lm_deluge.prompt import (
15
- CachePattern,
16
- Conversation,
17
15
  Message,
18
16
  Text,
19
17
  Thinking,
@@ -23,7 +21,6 @@ from lm_deluge.request_context import RequestContext
23
21
  from lm_deluge.tool import MCPServer, Tool
24
22
  from lm_deluge.usage import Usage
25
23
 
26
- from ..config import SamplingParams
27
24
  from ..models import APIModel
28
25
  from .base import APIRequestBase, APIResponse
29
26
 
@@ -38,13 +35,14 @@ def _add_beta(headers: dict, beta: str):
38
35
  headers["anthropic_beta"] = beta
39
36
 
40
37
 
41
- def _build_anthropic_bedrock_request(
38
+ async def _build_anthropic_bedrock_request(
42
39
  model: APIModel,
43
- prompt: Conversation,
44
- tools: list[Tool | dict | MCPServer] | None,
45
- sampling_params: SamplingParams,
46
- cache_pattern: CachePattern | None = None,
40
+ context: RequestContext,
47
41
  ):
42
+ prompt = context.prompt
43
+ cache_pattern = context.cache
44
+ tools = context.tools
45
+ sampling_params = context.sampling_params
48
46
  system_message, messages = prompt.to_anthropic(cache_pattern=cache_pattern)
49
47
 
50
48
  # handle AWS auth
@@ -121,9 +119,10 @@ def _build_anthropic_bedrock_request(
121
119
  elif tool["type"] == "code_execution_20250522":
122
120
  _add_beta(base_headers, "code-execution-2025-05-22")
123
121
  elif isinstance(tool, MCPServer):
124
- raise ValueError("bedrock doesn't support MCP connector right now")
125
- # _add_beta(request_header, "mcp-client-2025-04-04")
126
- # mcp_servers.append(tool.for_anthropic())
122
+ # Convert to individual tools locally (like OpenAI does)
123
+ individual_tools = await tool.to_tools()
124
+ for individual_tool in individual_tools:
125
+ tool_definitions.append(individual_tool.dump_for("anthropic"))
127
126
 
128
127
  # Add cache control to last tool if tools_only caching is specified
129
128
  if cache_pattern == "tools_only" and tool_definitions:
@@ -141,21 +140,20 @@ class BedrockRequest(APIRequestBase):
141
140
  super().__init__(context=context)
142
141
 
143
142
  self.model = APIModel.from_registry(self.context.model_name)
143
+
144
+ async def build_request(self):
144
145
  self.url = f"{self.model.api_base}/messages"
145
146
 
146
147
  # Lock images as bytes if caching is enabled
147
148
  if self.context.cache is not None:
148
149
  self.context.prompt.lock_images_as_bytes()
149
150
 
150
- self.request_json, base_headers, self.auth, self.url = (
151
- _build_anthropic_bedrock_request(
152
- self.model,
153
- context.prompt,
154
- context.tools,
155
- context.sampling_params,
156
- context.cache,
157
- )
158
- )
151
+ (
152
+ self.request_json,
153
+ base_headers,
154
+ self.auth,
155
+ self.url,
156
+ ) = await _build_anthropic_bedrock_request(self.model, self.context)
159
157
  self.request_header = self.merge_headers(
160
158
  base_headers, exclude_patterns=["anthropic", "openai", "gemini", "mistral"]
161
159
  )
@@ -14,7 +14,7 @@ from ..usage import Usage
14
14
  from .base import APIRequestBase, APIResponse
15
15
 
16
16
 
17
- def _build_gemini_request(
17
+ async def _build_gemini_request(
18
18
  model: APIModel,
19
19
  prompt: Conversation,
20
20
  tools: list[Tool] | None,
@@ -75,7 +75,8 @@ class GeminiRequest(APIRequestBase):
75
75
  )
76
76
 
77
77
  self.model = APIModel.from_registry(self.context.model_name)
78
- # Gemini API endpoint format: https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent
78
+
79
+ async def build_request(self):
79
80
  self.url = f"{self.model.api_base}/models/{self.model.name}:generateContent"
80
81
  base_headers = {
81
82
  "Content-Type": "application/json",
@@ -92,7 +93,7 @@ class GeminiRequest(APIRequestBase):
92
93
  )
93
94
  self.url += f"?key={api_key}"
94
95
 
95
- self.request_json = _build_gemini_request(
96
+ self.request_json = await _build_gemini_request(
96
97
  self.model,
97
98
  self.context.prompt,
98
99
  self.context.tools,
@@ -21,6 +21,8 @@ class MistralRequest(APIRequestBase):
21
21
  f"Cache parameter '{self.context.cache}' is only supported for Anthropic models, ignoring for {self.context.model_name}"
22
22
  )
23
23
  self.model = APIModel.from_registry(self.context.model_name)
24
+
25
+ async def build_request(self):
24
26
  self.url = f"{self.model.api_base}/chat/completions"
25
27
  base_headers = {
26
28
  "Authorization": f"Bearer {os.getenv(self.model.api_key_env_var)}"
@@ -1,6 +1,7 @@
1
1
  import json
2
2
  import os
3
3
  import warnings
4
+ from types import SimpleNamespace
4
5
 
5
6
  import aiohttp
6
7
  from aiohttp import ClientResponse
@@ -15,12 +16,13 @@ from ..usage import Usage
15
16
  from .base import APIRequestBase, APIResponse
16
17
 
17
18
 
18
- def _build_oa_chat_request(
19
+ async def _build_oa_chat_request(
19
20
  model: APIModel,
20
- prompt: Conversation,
21
- tools: list[Tool] | None,
22
- sampling_params: SamplingParams,
21
+ context: RequestContext,
23
22
  ) -> dict:
23
+ prompt = context.prompt
24
+ sampling_params = context.sampling_params
25
+ tools = context.tools
24
26
  request_json = {
25
27
  "model": model.name,
26
28
  "messages": prompt.to_openai(),
@@ -48,19 +50,19 @@ def _build_oa_chat_request(
48
50
  if sampling_params.json_mode and model.supports_json:
49
51
  request_json["response_format"] = {"type": "json_object"}
50
52
  if tools:
51
- request_json["tools"] = [tool.dump_for("openai-completions") for tool in tools]
53
+ request_tools = []
54
+ for tool in tools:
55
+ if isinstance(tool, Tool):
56
+ request_tools.append(tool.dump_for("openai-completions"))
57
+ elif isinstance(tool, MCPServer):
58
+ as_tools = await tool.to_tools()
59
+ request_tools.extend(
60
+ [t.dump_for("openai-completions") for t in as_tools]
61
+ )
62
+ request_json["tools"] = request_tools
52
63
  return request_json
53
64
 
54
65
 
55
- def _build_oa_responses_request(
56
- model: APIModel,
57
- prompt: Conversation,
58
- tools: list[Tool] | None,
59
- sampling_params: SamplingParams,
60
- ):
61
- pass # TODO: implement
62
-
63
-
64
66
  class OpenAIRequest(APIRequestBase):
65
67
  def __init__(self, context: RequestContext):
66
68
  # Pass context to parent, which will handle backwards compatibility
@@ -72,6 +74,8 @@ class OpenAIRequest(APIRequestBase):
72
74
  f"Cache parameter '{self.context.cache}' is only supported for Anthropic models, ignoring for {self.context.model_name}"
73
75
  )
74
76
  self.model = APIModel.from_registry(self.context.model_name)
77
+
78
+ async def build_request(self):
75
79
  self.url = f"{self.model.api_base}/chat/completions"
76
80
  base_headers = {
77
81
  "Authorization": f"Bearer {os.getenv(self.model.api_key_env_var)}"
@@ -80,12 +84,7 @@ class OpenAIRequest(APIRequestBase):
80
84
  base_headers, exclude_patterns=["anthropic"]
81
85
  )
82
86
 
83
- self.request_json = _build_oa_chat_request(
84
- self.model,
85
- self.context.prompt,
86
- self.context.tools,
87
- self.context.sampling_params,
88
- )
87
+ self.request_json = await _build_oa_chat_request(self.model, self.context)
89
88
 
90
89
  async def handle_response(self, http_response: ClientResponse) -> APIResponse:
91
90
  is_error = False
@@ -187,6 +186,75 @@ class OpenAIRequest(APIRequestBase):
187
186
  )
188
187
 
189
188
 
189
+ async def _build_oa_responses_request(
190
+ model: APIModel,
191
+ context: RequestContext,
192
+ # prompt: Conversation,
193
+ # tools: list[Tool] | None,
194
+ # sampling_params: SamplingParams,
195
+ ):
196
+ prompt = context.prompt
197
+ sampling_params = context.sampling_params
198
+ tools = context.tools
199
+ openai_responses_format = prompt.to_openai_responses()
200
+ request_json = {
201
+ "model": model.name,
202
+ "input": openai_responses_format["input"],
203
+ "temperature": sampling_params.temperature,
204
+ "top_p": sampling_params.top_p,
205
+ }
206
+ if sampling_params.max_new_tokens:
207
+ request_json["max_output_tokens"] = sampling_params.max_new_tokens
208
+
209
+ if model.reasoning_model:
210
+ if sampling_params.reasoning_effort in [None, "none"]:
211
+ # gemini models can switch reasoning off
212
+ if "gemini" in model.id:
213
+ sampling_params.reasoning_effort = "none"
214
+ else:
215
+ sampling_params.reasoning_effort = "low"
216
+ request_json["temperature"] = 1.0
217
+ request_json["top_p"] = 1.0
218
+ request_json["reasoning"] = {
219
+ "effort": sampling_params.reasoning_effort,
220
+ "summary": "auto",
221
+ }
222
+ else:
223
+ if sampling_params.reasoning_effort:
224
+ warnings.warn(
225
+ f"Ignoring reasoning_effort for non-reasoning model: {model.id}"
226
+ )
227
+
228
+ if sampling_params.json_mode and model.supports_json:
229
+ request_json["text"] = {"format": {"type": "json_object"}}
230
+
231
+ # Handle tools
232
+ request_tools = []
233
+ # Add regular function tools
234
+ for tool in tools or []:
235
+ if isinstance(tool, Tool):
236
+ request_tools.append(tool.dump_for("openai-responses"))
237
+ elif isinstance(tool, dict):
238
+ # if computer use, make sure model supports it
239
+ if tool["type"] == "computer_use_preview":
240
+ if model.name != "openai-computer-use-preview":
241
+ raise ValueError(f"model {model.id} does not support computer use")
242
+ # have to use truncation
243
+ request_json["truncation"] = "auto"
244
+ request_tools.append(tool) # allow passing dict
245
+ elif isinstance(tool, MCPServer):
246
+ if context.force_local_mcp:
247
+ as_tools = await tool.to_tools()
248
+ request_tools.extend([t.dump_for("openai-responses") for t in as_tools])
249
+ else:
250
+ request_tools.append(tool.for_openai_responses())
251
+
252
+ if request_tools:
253
+ request_json["tools"] = request_tools
254
+
255
+ return request_json
256
+
257
+
190
258
  class OpenAIResponsesRequest(APIRequestBase):
191
259
  def __init__(self, context: RequestContext):
192
260
  super().__init__(context)
@@ -196,73 +264,14 @@ class OpenAIResponsesRequest(APIRequestBase):
196
264
  f"Cache parameter '{self.context.cache}' is only supported for Anthropic models, ignoring for {self.context.model_name}"
197
265
  )
198
266
  self.model = APIModel.from_registry(self.context.model_name)
267
+
268
+ async def build_request(self):
199
269
  self.url = f"{self.model.api_base}/responses"
200
270
  self.request_header = {
201
271
  "Authorization": f"Bearer {os.getenv(self.model.api_key_env_var)}"
202
272
  }
203
273
 
204
- # Convert conversation to input format for Responses API
205
- openai_responses_format = self.context.prompt.to_openai_responses()
206
-
207
- self.request_json = {
208
- "model": self.model.name,
209
- "input": openai_responses_format["input"],
210
- "temperature": self.context.sampling_params.temperature,
211
- "top_p": self.context.sampling_params.top_p,
212
- }
213
-
214
- # Add max_output_tokens for responses API
215
- if self.context.sampling_params.max_new_tokens:
216
- self.request_json["max_output_tokens"] = (
217
- self.context.sampling_params.max_new_tokens
218
- )
219
-
220
- if self.model.reasoning_model:
221
- if self.context.sampling_params.reasoning_effort in [None, "none"]:
222
- # gemini models can switch reasoning off
223
- if "gemini" in self.model.id:
224
- self.context.sampling_params.reasoning_effort = (
225
- "none" # expects string
226
- )
227
- # openai models can only go down to "low"
228
- else:
229
- self.context.sampling_params.reasoning_effort = "low"
230
- self.request_json["temperature"] = 1.0
231
- self.request_json["top_p"] = 1.0
232
- self.request_json["reasoning"] = {
233
- "effort": self.context.sampling_params.reasoning_effort
234
- }
235
- else:
236
- if self.context.sampling_params.reasoning_effort:
237
- warnings.warn(
238
- f"Ignoring reasoning_effort param for non-reasoning model: {self.context.model_name}"
239
- )
240
-
241
- if self.context.sampling_params.json_mode and self.model.supports_json:
242
- self.request_json["text"] = {"format": {"type": "json_object"}}
243
-
244
- # Handle tools
245
- request_tools = []
246
- if self.context.tools:
247
- # Add regular function tools
248
- for tool in self.context.tools:
249
- if isinstance(tool, Tool):
250
- request_tools.append(tool.dump_for("openai-responses"))
251
- elif isinstance(tool, dict):
252
- # if computer use, make sure model supports it
253
- if tool["type"] == "computer_use_preview":
254
- if self.context.model_name != "openai-computer-use-preview":
255
- raise ValueError(
256
- f"model {self.context.model_name} does not support computer use"
257
- )
258
- # have to use truncation
259
- self.request_json["truncation"] = "auto"
260
- request_tools.append(tool) # allow passing dict
261
- elif isinstance(tool, MCPServer):
262
- request_tools.append(tool.for_openai_responses())
263
-
264
- if request_tools:
265
- self.request_json["tools"] = request_tools
274
+ self.request_json = await _build_oa_responses_request(self.model, self.context)
266
275
 
267
276
  async def handle_response(self, http_response: ClientResponse) -> APIResponse:
268
277
  is_error = False
@@ -459,7 +468,11 @@ async def stream_chat(
459
468
  }
460
469
  request_header.update(filtered_extra)
461
470
 
462
- request_json = _build_oa_chat_request(model, prompt, tools, sampling_params)
471
+ context = SimpleNamespace(
472
+ prompt=prompt, tools=tools, sampling_params=sampling_params
473
+ )
474
+
475
+ request_json = await _build_oa_chat_request(model, context) # type: ignore
463
476
  request_json["stream"] = True
464
477
 
465
478
  async with aiohttp.ClientSession() as s:
@@ -14,7 +14,7 @@ class APIResponse:
14
14
  # request information
15
15
  id: int # should be unique to the request within a given prompt-processing call
16
16
  model_internal: str # our internal model tag
17
- prompt: Conversation
17
+ prompt: Conversation | dict
18
18
  sampling_params: SamplingParams
19
19
 
20
20
  # http response information
@@ -92,6 +92,8 @@ class APIResponse:
92
92
  print(
93
93
  f"Warning: Completion provided without token counts for model {self.model_internal}."
94
94
  )
95
+ if isinstance(self.prompt, Conversation):
96
+ self.prompt = self.prompt.to_log() # avoid keeping images in memory
95
97
 
96
98
  def to_dict(self):
97
99
  return {
@@ -99,7 +101,7 @@ class APIResponse:
99
101
  "model_internal": self.model_internal,
100
102
  "model_external": self.model_external,
101
103
  "region": self.region,
102
- "prompt": self.prompt.to_log(), # destroys image if present
104
+ "prompt": self.prompt,
103
105
  "sampling_params": self.sampling_params.__dict__,
104
106
  "status_code": self.status_code,
105
107
  "is_error": self.is_error,