lm-deluge 0.0.67__py3-none-any.whl → 0.0.90__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lm-deluge might be problematic. Click here for more details.
- lm_deluge/__init__.py +1 -2
- lm_deluge/api_requests/anthropic.py +117 -22
- lm_deluge/api_requests/base.py +84 -11
- lm_deluge/api_requests/bedrock.py +30 -6
- lm_deluge/api_requests/chat_reasoning.py +4 -0
- lm_deluge/api_requests/gemini.py +166 -20
- lm_deluge/api_requests/openai.py +145 -25
- lm_deluge/batches.py +15 -45
- lm_deluge/client.py +309 -50
- lm_deluge/config.py +15 -3
- lm_deluge/models/__init__.py +14 -1
- lm_deluge/models/anthropic.py +29 -14
- lm_deluge/models/arcee.py +16 -0
- lm_deluge/models/deepseek.py +36 -4
- lm_deluge/models/google.py +42 -0
- lm_deluge/models/grok.py +24 -0
- lm_deluge/models/kimi.py +36 -0
- lm_deluge/models/minimax.py +18 -0
- lm_deluge/models/openai.py +100 -0
- lm_deluge/models/openrouter.py +133 -7
- lm_deluge/models/together.py +11 -0
- lm_deluge/models/zai.py +50 -0
- lm_deluge/pipelines/gepa/__init__.py +95 -0
- lm_deluge/pipelines/gepa/core.py +354 -0
- lm_deluge/pipelines/gepa/docs/samples.py +705 -0
- lm_deluge/pipelines/gepa/examples/01_synthetic_keywords.py +140 -0
- lm_deluge/pipelines/gepa/examples/02_gsm8k_math.py +261 -0
- lm_deluge/pipelines/gepa/examples/03_hotpotqa_multihop.py +300 -0
- lm_deluge/pipelines/gepa/examples/04_batch_classification.py +271 -0
- lm_deluge/pipelines/gepa/examples/simple_qa.py +129 -0
- lm_deluge/pipelines/gepa/optimizer.py +435 -0
- lm_deluge/pipelines/gepa/proposer.py +235 -0
- lm_deluge/pipelines/gepa/util.py +165 -0
- lm_deluge/{llm_tools → pipelines}/score.py +2 -2
- lm_deluge/{llm_tools → pipelines}/translate.py +5 -3
- lm_deluge/prompt.py +537 -88
- lm_deluge/request_context.py +7 -2
- lm_deluge/server/__init__.py +24 -0
- lm_deluge/server/__main__.py +144 -0
- lm_deluge/server/adapters.py +369 -0
- lm_deluge/server/app.py +388 -0
- lm_deluge/server/auth.py +71 -0
- lm_deluge/server/model_policy.py +215 -0
- lm_deluge/server/models_anthropic.py +172 -0
- lm_deluge/server/models_openai.py +175 -0
- lm_deluge/tool/__init__.py +1130 -0
- lm_deluge/tool/builtin/anthropic/__init__.py +300 -0
- lm_deluge/tool/builtin/anthropic/bash.py +0 -0
- lm_deluge/tool/builtin/anthropic/computer_use.py +0 -0
- lm_deluge/tool/builtin/gemini.py +59 -0
- lm_deluge/tool/builtin/openai.py +74 -0
- lm_deluge/tool/cua/__init__.py +173 -0
- lm_deluge/tool/cua/actions.py +148 -0
- lm_deluge/tool/cua/base.py +27 -0
- lm_deluge/tool/cua/batch.py +215 -0
- lm_deluge/tool/cua/converters.py +466 -0
- lm_deluge/tool/cua/kernel.py +702 -0
- lm_deluge/tool/cua/trycua.py +989 -0
- lm_deluge/tool/prefab/__init__.py +45 -0
- lm_deluge/tool/prefab/batch_tool.py +156 -0
- lm_deluge/tool/prefab/docs.py +1119 -0
- lm_deluge/tool/prefab/email.py +294 -0
- lm_deluge/tool/prefab/filesystem.py +1711 -0
- lm_deluge/tool/prefab/full_text_search/__init__.py +285 -0
- lm_deluge/tool/prefab/full_text_search/tantivy_index.py +396 -0
- lm_deluge/tool/prefab/memory.py +458 -0
- lm_deluge/tool/prefab/otc/__init__.py +165 -0
- lm_deluge/tool/prefab/otc/executor.py +281 -0
- lm_deluge/tool/prefab/otc/parse.py +188 -0
- lm_deluge/tool/prefab/random.py +212 -0
- lm_deluge/tool/prefab/rlm/__init__.py +296 -0
- lm_deluge/tool/prefab/rlm/executor.py +349 -0
- lm_deluge/tool/prefab/rlm/parse.py +144 -0
- lm_deluge/tool/prefab/sandbox/__init__.py +19 -0
- lm_deluge/tool/prefab/sandbox/daytona_sandbox.py +483 -0
- lm_deluge/tool/prefab/sandbox/docker_sandbox.py +609 -0
- lm_deluge/tool/prefab/sandbox/fargate_sandbox.py +546 -0
- lm_deluge/tool/prefab/sandbox/modal_sandbox.py +469 -0
- lm_deluge/tool/prefab/sandbox/seatbelt_sandbox.py +827 -0
- lm_deluge/tool/prefab/sheets.py +385 -0
- lm_deluge/tool/prefab/skills.py +0 -0
- lm_deluge/tool/prefab/subagents.py +233 -0
- lm_deluge/tool/prefab/todos.py +342 -0
- lm_deluge/tool/prefab/tool_search.py +169 -0
- lm_deluge/tool/prefab/web_search.py +199 -0
- lm_deluge/tracker.py +16 -13
- lm_deluge/util/schema.py +412 -0
- lm_deluge/warnings.py +8 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/METADATA +23 -9
- lm_deluge-0.0.90.dist-info/RECORD +132 -0
- lm_deluge/built_in_tools/anthropic/__init__.py +0 -128
- lm_deluge/built_in_tools/openai.py +0 -28
- lm_deluge/presets/cerebras.py +0 -17
- lm_deluge/presets/meta.py +0 -13
- lm_deluge/tool.py +0 -849
- lm_deluge-0.0.67.dist-info/RECORD +0 -72
- lm_deluge/{llm_tools → pipelines}/__init__.py +1 -1
- /lm_deluge/{llm_tools → pipelines}/classify.py +0 -0
- /lm_deluge/{llm_tools → pipelines}/extract.py +0 -0
- /lm_deluge/{llm_tools → pipelines}/locate.py +0 -0
- /lm_deluge/{llm_tools → pipelines}/ocr.py +0 -0
- /lm_deluge/{built_in_tools/anthropic/bash.py → skills/anthropic.py} +0 -0
- /lm_deluge/{built_in_tools/anthropic/computer_use.py → skills/compat.py} +0 -0
- /lm_deluge/{built_in_tools → tool/builtin}/anthropic/editor.py +0 -0
- /lm_deluge/{built_in_tools → tool/builtin}/base.py +0 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/WHEEL +0 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/licenses/LICENSE +0 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/top_level.txt +0 -0
lm_deluge/__init__.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from .client import APIResponse, LLMClient, SamplingParams
|
|
2
2
|
from .file import File
|
|
3
3
|
from .prompt import Conversation, Message
|
|
4
|
-
from .tool import Tool
|
|
4
|
+
from .tool import Tool
|
|
5
5
|
|
|
6
6
|
# dotenv.load_dotenv() - don't do this, fucks with other packages
|
|
7
7
|
|
|
@@ -12,6 +12,5 @@ __all__ = [
|
|
|
12
12
|
"Conversation",
|
|
13
13
|
"Message",
|
|
14
14
|
"Tool",
|
|
15
|
-
"ToolParams",
|
|
16
15
|
"File",
|
|
17
16
|
]
|
|
@@ -6,12 +6,18 @@ from aiohttp import ClientResponse
|
|
|
6
6
|
from lm_deluge.prompt import (
|
|
7
7
|
Message,
|
|
8
8
|
Text,
|
|
9
|
+
ThoughtSignature,
|
|
9
10
|
Thinking,
|
|
10
11
|
ToolCall,
|
|
11
12
|
)
|
|
12
13
|
from lm_deluge.request_context import RequestContext
|
|
13
14
|
from lm_deluge.tool import MCPServer, Tool
|
|
14
15
|
from lm_deluge.usage import Usage
|
|
16
|
+
from lm_deluge.util.schema import (
|
|
17
|
+
prepare_output_schema,
|
|
18
|
+
transform_schema_for_anthropic,
|
|
19
|
+
)
|
|
20
|
+
from lm_deluge.warnings import maybe_warn
|
|
15
21
|
|
|
16
22
|
from ..models import APIModel
|
|
17
23
|
from .base import APIRequestBase, APIResponse
|
|
@@ -58,38 +64,102 @@ def _build_anthropic_request(
|
|
|
58
64
|
"max_tokens": sampling_params.max_new_tokens,
|
|
59
65
|
}
|
|
60
66
|
|
|
67
|
+
if model.id == "claude-4.5-opus" and sampling_params.global_effort:
|
|
68
|
+
request_json["output_config"] = {"effort": sampling_params.global_effort}
|
|
69
|
+
_add_beta(base_headers, "effort-2025-11-24")
|
|
70
|
+
|
|
61
71
|
# handle thinking
|
|
62
|
-
if model.reasoning_model
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
sampling_params.reasoning_effort
|
|
66
|
-
)
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
72
|
+
if model.reasoning_model:
|
|
73
|
+
if (
|
|
74
|
+
sampling_params.thinking_budget is not None
|
|
75
|
+
and sampling_params.reasoning_effort is not None
|
|
76
|
+
):
|
|
77
|
+
maybe_warn("WARN_THINKING_BUDGET_AND_REASONING_EFFORT")
|
|
78
|
+
|
|
79
|
+
if sampling_params.thinking_budget is not None:
|
|
80
|
+
budget = sampling_params.thinking_budget
|
|
81
|
+
elif sampling_params.reasoning_effort is not None:
|
|
82
|
+
effort = sampling_params.reasoning_effort
|
|
83
|
+
if effort == "xhigh":
|
|
84
|
+
maybe_warn("WARN_XHIGH_TO_HIGH", model_name=context.model_name)
|
|
85
|
+
effort = "high"
|
|
86
|
+
# translate reasoning effort of low, medium, high to budget tokens
|
|
87
|
+
budget = {
|
|
88
|
+
"none": 0,
|
|
89
|
+
"minimal": 256,
|
|
90
|
+
"low": 1024,
|
|
91
|
+
"medium": 4096,
|
|
92
|
+
"high": 16384,
|
|
93
|
+
}.get(effort)
|
|
94
|
+
assert isinstance(budget, int)
|
|
95
|
+
else:
|
|
96
|
+
budget = 0
|
|
97
|
+
|
|
98
|
+
if budget > 0:
|
|
99
|
+
request_json["thinking"] = {
|
|
100
|
+
"type": "enabled",
|
|
101
|
+
"budget_tokens": budget,
|
|
102
|
+
}
|
|
103
|
+
if "top_p" in request_json:
|
|
104
|
+
request_json["top_p"] = max(request_json["top_p"], 0.95)
|
|
105
|
+
request_json["temperature"] = 1.0
|
|
106
|
+
request_json["max_tokens"] += budget
|
|
107
|
+
else:
|
|
108
|
+
request_json["thinking"] = {"type": "disabled"}
|
|
109
|
+
if "kimi" in model.id and "thinking" in model.id:
|
|
110
|
+
maybe_warn("WARN_KIMI_THINKING_NO_REASONING")
|
|
111
|
+
|
|
75
112
|
else:
|
|
76
113
|
request_json["thinking"] = {"type": "disabled"}
|
|
77
114
|
if sampling_params.reasoning_effort:
|
|
78
115
|
print("ignoring reasoning_effort for non-reasoning model")
|
|
116
|
+
|
|
79
117
|
if system_message is not None:
|
|
80
118
|
request_json["system"] = system_message
|
|
81
119
|
|
|
82
|
-
# handle temp + top_p for opus 4.1/sonnet 4.5
|
|
120
|
+
# handle temp + top_p for opus 4.1/sonnet 4.5.
|
|
121
|
+
# TODO: make clearer / more user-friendly so there can be NotGiven
|
|
122
|
+
# and user can control which one they want to use
|
|
83
123
|
if "4-1" in model.name or "4-5" in model.name:
|
|
84
|
-
|
|
85
|
-
|
|
124
|
+
request_json.pop("top_p")
|
|
125
|
+
|
|
126
|
+
# print(request_json)
|
|
127
|
+
# Handle structured outputs (output_format)
|
|
128
|
+
if context.output_schema:
|
|
129
|
+
if model.supports_json:
|
|
130
|
+
base_schema = prepare_output_schema(context.output_schema)
|
|
131
|
+
|
|
132
|
+
# Apply Anthropic-specific transformations (move unsupported constraints to description)
|
|
133
|
+
transformed_schema = transform_schema_for_anthropic(base_schema)
|
|
134
|
+
|
|
135
|
+
_add_beta(base_headers, "structured-outputs-2025-11-13")
|
|
136
|
+
request_json["output_format"] = {
|
|
137
|
+
"type": "json_schema",
|
|
138
|
+
"schema": transformed_schema,
|
|
139
|
+
}
|
|
140
|
+
else:
|
|
141
|
+
print(
|
|
142
|
+
f"WARNING: Model {model.name} does not support structured outputs. Ignoring output_schema."
|
|
143
|
+
)
|
|
144
|
+
elif sampling_params.json_mode:
|
|
145
|
+
# Anthropic doesn't support basic json_mode without a schema
|
|
146
|
+
print(
|
|
147
|
+
"WARNING: Anthropic does not support basic json_mode without a schema. "
|
|
148
|
+
"Use output_schema parameter for structured JSON outputs."
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
# Add beta header for strict tools when enabled
|
|
152
|
+
if tools and sampling_params.strict_tools and model.supports_json:
|
|
153
|
+
_add_beta(base_headers, "structured-outputs-2025-11-13")
|
|
86
154
|
|
|
87
155
|
if tools:
|
|
88
156
|
mcp_servers = []
|
|
89
157
|
tool_definitions = []
|
|
90
158
|
for tool in tools:
|
|
91
159
|
if isinstance(tool, Tool):
|
|
92
|
-
|
|
160
|
+
# Only use strict mode if model supports structured outputs
|
|
161
|
+
use_strict = sampling_params.strict_tools and model.supports_json
|
|
162
|
+
tool_definitions.append(tool.dump_for("anthropic", strict=use_strict))
|
|
93
163
|
elif isinstance(tool, dict) and "url" in tool:
|
|
94
164
|
_add_beta(base_headers, "mcp-client-2025-04-04")
|
|
95
165
|
mcp_servers.append(tool)
|
|
@@ -102,6 +172,9 @@ def _build_anthropic_request(
|
|
|
102
172
|
"bash_20241022",
|
|
103
173
|
]:
|
|
104
174
|
_add_beta(base_headers, "computer-use-2024-10-22")
|
|
175
|
+
elif tool["type"] == "computer_20251124":
|
|
176
|
+
# Claude Opus 4.5 - newest computer use with zoom support
|
|
177
|
+
_add_beta(base_headers, "computer-use-2025-11-24")
|
|
105
178
|
elif tool["type"] == "computer_20250124":
|
|
106
179
|
_add_beta(base_headers, "computer-use-2025-01-24")
|
|
107
180
|
elif tool["type"] == "code_execution_20250522":
|
|
@@ -169,14 +242,37 @@ class AnthropicRequest(APIRequestBase):
|
|
|
169
242
|
data = await http_response.json()
|
|
170
243
|
response_content = data["content"]
|
|
171
244
|
|
|
245
|
+
# print("=== CONTENT ===")
|
|
246
|
+
# print(response_content)
|
|
247
|
+
|
|
172
248
|
# Parse response into Message with parts
|
|
173
249
|
parts = []
|
|
174
250
|
for item in response_content:
|
|
175
251
|
if item["type"] == "text":
|
|
176
252
|
parts.append(Text(item["text"]))
|
|
177
253
|
elif item["type"] == "thinking":
|
|
178
|
-
|
|
179
|
-
|
|
254
|
+
thinking_content = item.get("thinking", "")
|
|
255
|
+
thinking = thinking_content
|
|
256
|
+
signature = item.get("signature")
|
|
257
|
+
parts.append(
|
|
258
|
+
Thinking(
|
|
259
|
+
thinking_content,
|
|
260
|
+
raw_payload=item,
|
|
261
|
+
thought_signature=ThoughtSignature(
|
|
262
|
+
signature,
|
|
263
|
+
provider="anthropic",
|
|
264
|
+
)
|
|
265
|
+
if signature is not None
|
|
266
|
+
else None,
|
|
267
|
+
)
|
|
268
|
+
)
|
|
269
|
+
elif item["type"] == "redacted_thinking":
|
|
270
|
+
parts.append(
|
|
271
|
+
Thinking(
|
|
272
|
+
item.get("data", ""),
|
|
273
|
+
raw_payload=item,
|
|
274
|
+
)
|
|
275
|
+
)
|
|
180
276
|
elif item["type"] == "tool_use":
|
|
181
277
|
parts.append(
|
|
182
278
|
ToolCall(
|
|
@@ -190,9 +286,8 @@ class AnthropicRequest(APIRequestBase):
|
|
|
190
286
|
usage = Usage.from_anthropic_usage(data["usage"])
|
|
191
287
|
except Exception as e:
|
|
192
288
|
is_error = True
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
)
|
|
289
|
+
response_text = await http_response.text()
|
|
290
|
+
error_message = f"Error calling .json() on response w/ status {status_code}: {e}. Response: {response_text[:500]}"
|
|
196
291
|
elif mimetype and "json" in mimetype.lower():
|
|
197
292
|
is_error = True # expected status is 200, otherwise it's an error
|
|
198
293
|
data = await http_response.json()
|
lm_deluge/api_requests/base.py
CHANGED
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
+
import json
|
|
3
|
+
import os
|
|
2
4
|
import time
|
|
3
5
|
import traceback
|
|
4
6
|
from abc import ABC, abstractmethod
|
|
@@ -73,6 +75,24 @@ class APIRequestBase(ABC):
|
|
|
73
75
|
|
|
74
76
|
# Start with base headers, then overlay filtered extra headers (extra takes precedence)
|
|
75
77
|
merged = dict(base_headers)
|
|
78
|
+
if "anthropic-beta" in merged and "anthropic-beta" in filtered_extra:
|
|
79
|
+
combined = []
|
|
80
|
+
seen = set()
|
|
81
|
+
for (
|
|
82
|
+
raw
|
|
83
|
+
) in f"{merged['anthropic-beta']},{filtered_extra['anthropic-beta']}".split(
|
|
84
|
+
","
|
|
85
|
+
):
|
|
86
|
+
token = raw.strip()
|
|
87
|
+
if token and token not in seen:
|
|
88
|
+
seen.add(token)
|
|
89
|
+
combined.append(token)
|
|
90
|
+
merged["anthropic-beta"] = ",".join(combined)
|
|
91
|
+
filtered_extra = {
|
|
92
|
+
key: value
|
|
93
|
+
for key, value in filtered_extra.items()
|
|
94
|
+
if key != "anthropic-beta"
|
|
95
|
+
}
|
|
76
96
|
merged.update(filtered_extra)
|
|
77
97
|
|
|
78
98
|
# Filter out None values from final merged headers
|
|
@@ -90,9 +110,32 @@ class APIRequestBase(ABC):
|
|
|
90
110
|
start -> poll -> result style of request.
|
|
91
111
|
"""
|
|
92
112
|
assert self.context.status_tracker, "no status tracker"
|
|
93
|
-
|
|
113
|
+
poll_interval = 5.0
|
|
114
|
+
attempt_start = time.monotonic()
|
|
115
|
+
deadline = attempt_start + self.context.request_timeout
|
|
116
|
+
response_id: str | None = None
|
|
117
|
+
last_status: str | None = None
|
|
118
|
+
|
|
94
119
|
async with aiohttp.ClientSession() as session:
|
|
95
|
-
|
|
120
|
+
|
|
121
|
+
async def cancel_response(reason: str) -> None:
|
|
122
|
+
nonlocal response_id
|
|
123
|
+
if not response_id:
|
|
124
|
+
return
|
|
125
|
+
cancel_url = f"{self.url}/{response_id}/cancel"
|
|
126
|
+
try:
|
|
127
|
+
async with session.post(
|
|
128
|
+
url=cancel_url,
|
|
129
|
+
headers=self.request_header,
|
|
130
|
+
) as cancel_response:
|
|
131
|
+
cancel_response.raise_for_status()
|
|
132
|
+
print(f"Background req {response_id} cancelled: {reason}")
|
|
133
|
+
except (
|
|
134
|
+
Exception
|
|
135
|
+
) as cancel_err: # pragma: no cover - best effort logging
|
|
136
|
+
print(
|
|
137
|
+
f"Failed to cancel background req {response_id}: {cancel_err}"
|
|
138
|
+
)
|
|
96
139
|
|
|
97
140
|
try:
|
|
98
141
|
self.context.status_tracker.total_requests += 1
|
|
@@ -109,14 +152,11 @@ class APIRequestBase(ABC):
|
|
|
109
152
|
last_status = data["status"]
|
|
110
153
|
|
|
111
154
|
while True:
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
) as http_response:
|
|
118
|
-
http_response.raise_for_status()
|
|
119
|
-
|
|
155
|
+
now = time.monotonic()
|
|
156
|
+
remaining = deadline - now
|
|
157
|
+
if remaining <= 0:
|
|
158
|
+
elapsed = now - attempt_start
|
|
159
|
+
await cancel_response(f"timed out after {elapsed:.1f}s")
|
|
120
160
|
return APIResponse(
|
|
121
161
|
id=self.context.task_id,
|
|
122
162
|
model_internal=self.context.model_name,
|
|
@@ -128,8 +168,9 @@ class APIRequestBase(ABC):
|
|
|
128
168
|
content=None,
|
|
129
169
|
usage=None,
|
|
130
170
|
)
|
|
171
|
+
|
|
131
172
|
# poll for the response
|
|
132
|
-
await asyncio.sleep(
|
|
173
|
+
await asyncio.sleep(min(poll_interval, max(remaining, 0)))
|
|
133
174
|
async with session.get(
|
|
134
175
|
url=f"{self.url}/{response_id}",
|
|
135
176
|
headers=self.request_header,
|
|
@@ -146,6 +187,8 @@ class APIRequestBase(ABC):
|
|
|
146
187
|
return await self.handle_response(http_response)
|
|
147
188
|
|
|
148
189
|
except Exception as e:
|
|
190
|
+
if response_id:
|
|
191
|
+
await cancel_response(f"errored: {type(e).__name__}")
|
|
149
192
|
raise_if_modal_exception(e)
|
|
150
193
|
tb = traceback.format_exc()
|
|
151
194
|
print(tb)
|
|
@@ -166,6 +209,23 @@ class APIRequestBase(ABC):
|
|
|
166
209
|
await self.build_request()
|
|
167
210
|
assert self.context.status_tracker
|
|
168
211
|
|
|
212
|
+
if os.getenv("DELUGE_PROXY_LOG_PROVIDER_REQUESTS", "").strip().lower() in {
|
|
213
|
+
"1",
|
|
214
|
+
"true",
|
|
215
|
+
"yes",
|
|
216
|
+
"on",
|
|
217
|
+
}:
|
|
218
|
+
print("DELUGE_PROXY_PROVIDER_REQUEST")
|
|
219
|
+
print(f"URL: {self.url}")
|
|
220
|
+
print("Headers:")
|
|
221
|
+
print(self.request_header)
|
|
222
|
+
if self.request_json is not None:
|
|
223
|
+
print("JSON:")
|
|
224
|
+
try:
|
|
225
|
+
print(json.dumps(self.request_json, indent=2))
|
|
226
|
+
except Exception:
|
|
227
|
+
print(self.request_json)
|
|
228
|
+
|
|
169
229
|
if (
|
|
170
230
|
self.context.background
|
|
171
231
|
and self.context.use_responses_api
|
|
@@ -199,6 +259,19 @@ class APIRequestBase(ABC):
|
|
|
199
259
|
usage=None,
|
|
200
260
|
)
|
|
201
261
|
|
|
262
|
+
except aiohttp.ServerDisconnectedError:
|
|
263
|
+
return APIResponse(
|
|
264
|
+
id=self.context.task_id,
|
|
265
|
+
model_internal=self.context.model_name,
|
|
266
|
+
prompt=self.context.prompt,
|
|
267
|
+
sampling_params=self.context.sampling_params,
|
|
268
|
+
status_code=None,
|
|
269
|
+
is_error=True,
|
|
270
|
+
error_message="Server disconnected.",
|
|
271
|
+
content=None,
|
|
272
|
+
usage=None,
|
|
273
|
+
)
|
|
274
|
+
|
|
202
275
|
except Exception as e:
|
|
203
276
|
raise_if_modal_exception(e)
|
|
204
277
|
tb = traceback.format_exc()
|
|
@@ -16,6 +16,7 @@ except ImportError:
|
|
|
16
16
|
from lm_deluge.prompt import (
|
|
17
17
|
Message,
|
|
18
18
|
Text,
|
|
19
|
+
ThoughtSignature,
|
|
19
20
|
Thinking,
|
|
20
21
|
ToolCall,
|
|
21
22
|
)
|
|
@@ -106,7 +107,8 @@ async def _build_anthropic_bedrock_request(
|
|
|
106
107
|
tool_definitions = []
|
|
107
108
|
for tool in tools:
|
|
108
109
|
if isinstance(tool, Tool):
|
|
109
|
-
|
|
110
|
+
# Bedrock doesn't have the strict-mode betas Anthropic exposes yet
|
|
111
|
+
tool_definitions.append(tool.dump_for("anthropic", strict=False))
|
|
110
112
|
elif isinstance(tool, dict):
|
|
111
113
|
tool_definitions.append(tool)
|
|
112
114
|
# add betas if needed
|
|
@@ -124,7 +126,9 @@ async def _build_anthropic_bedrock_request(
|
|
|
124
126
|
# Convert to individual tools locally (like OpenAI does)
|
|
125
127
|
individual_tools = await tool.to_tools()
|
|
126
128
|
for individual_tool in individual_tools:
|
|
127
|
-
tool_definitions.append(
|
|
129
|
+
tool_definitions.append(
|
|
130
|
+
individual_tool.dump_for("anthropic", strict=False)
|
|
131
|
+
)
|
|
128
132
|
|
|
129
133
|
# Add cache control to last tool if tools_only caching is specified
|
|
130
134
|
if cache_pattern == "tools_only" and tool_definitions:
|
|
@@ -194,11 +198,11 @@ async def _build_openai_bedrock_request(
|
|
|
194
198
|
request_tools = []
|
|
195
199
|
for tool in tools:
|
|
196
200
|
if isinstance(tool, Tool):
|
|
197
|
-
request_tools.append(tool.dump_for("openai-completions"))
|
|
201
|
+
request_tools.append(tool.dump_for("openai-completions", strict=False))
|
|
198
202
|
elif isinstance(tool, MCPServer):
|
|
199
203
|
as_tools = await tool.to_tools()
|
|
200
204
|
request_tools.extend(
|
|
201
|
-
[t.dump_for("openai-completions") for t in as_tools]
|
|
205
|
+
[t.dump_for("openai-completions", strict=False) for t in as_tools]
|
|
202
206
|
)
|
|
203
207
|
request_json["tools"] = request_tools
|
|
204
208
|
|
|
@@ -360,8 +364,28 @@ class BedrockRequest(APIRequestBase):
|
|
|
360
364
|
if item["type"] == "text":
|
|
361
365
|
parts.append(Text(item["text"]))
|
|
362
366
|
elif item["type"] == "thinking":
|
|
363
|
-
|
|
364
|
-
|
|
367
|
+
thinking_content = item.get("thinking", "")
|
|
368
|
+
thinking = thinking_content
|
|
369
|
+
signature = item.get("signature")
|
|
370
|
+
parts.append(
|
|
371
|
+
Thinking(
|
|
372
|
+
thinking_content,
|
|
373
|
+
raw_payload=item,
|
|
374
|
+
thought_signature=ThoughtSignature(
|
|
375
|
+
signature,
|
|
376
|
+
provider="anthropic",
|
|
377
|
+
)
|
|
378
|
+
if signature is not None
|
|
379
|
+
else None,
|
|
380
|
+
)
|
|
381
|
+
)
|
|
382
|
+
elif item["type"] == "redacted_thinking":
|
|
383
|
+
parts.append(
|
|
384
|
+
Thinking(
|
|
385
|
+
item.get("data", ""),
|
|
386
|
+
raw_payload=item,
|
|
387
|
+
)
|
|
388
|
+
)
|
|
365
389
|
elif item["type"] == "tool_use":
|
|
366
390
|
parts.append(
|
|
367
391
|
ToolCall(
|