lm-deluge 0.0.67__py3-none-any.whl → 0.0.88__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lm-deluge might be problematic. Click here for more details.
- lm_deluge/__init__.py +25 -2
- lm_deluge/api_requests/anthropic.py +92 -17
- lm_deluge/api_requests/base.py +47 -11
- lm_deluge/api_requests/bedrock.py +7 -4
- lm_deluge/api_requests/chat_reasoning.py +4 -0
- lm_deluge/api_requests/gemini.py +138 -18
- lm_deluge/api_requests/openai.py +114 -21
- lm_deluge/client.py +282 -49
- lm_deluge/config.py +15 -3
- lm_deluge/mock_openai.py +643 -0
- lm_deluge/models/__init__.py +12 -1
- lm_deluge/models/anthropic.py +17 -2
- lm_deluge/models/arcee.py +16 -0
- lm_deluge/models/deepseek.py +36 -4
- lm_deluge/models/google.py +29 -0
- lm_deluge/models/grok.py +24 -0
- lm_deluge/models/kimi.py +36 -0
- lm_deluge/models/minimax.py +10 -0
- lm_deluge/models/openai.py +100 -0
- lm_deluge/models/openrouter.py +86 -8
- lm_deluge/models/together.py +11 -0
- lm_deluge/models/zai.py +1 -0
- lm_deluge/pipelines/gepa/__init__.py +95 -0
- lm_deluge/pipelines/gepa/core.py +354 -0
- lm_deluge/pipelines/gepa/docs/samples.py +696 -0
- lm_deluge/pipelines/gepa/examples/01_synthetic_keywords.py +140 -0
- lm_deluge/pipelines/gepa/examples/02_gsm8k_math.py +261 -0
- lm_deluge/pipelines/gepa/examples/03_hotpotqa_multihop.py +300 -0
- lm_deluge/pipelines/gepa/examples/04_batch_classification.py +271 -0
- lm_deluge/pipelines/gepa/examples/simple_qa.py +129 -0
- lm_deluge/pipelines/gepa/optimizer.py +435 -0
- lm_deluge/pipelines/gepa/proposer.py +235 -0
- lm_deluge/pipelines/gepa/util.py +165 -0
- lm_deluge/{llm_tools → pipelines}/score.py +2 -2
- lm_deluge/{llm_tools → pipelines}/translate.py +5 -3
- lm_deluge/prompt.py +224 -40
- lm_deluge/request_context.py +7 -2
- lm_deluge/tool/__init__.py +1118 -0
- lm_deluge/tool/builtin/anthropic/__init__.py +300 -0
- lm_deluge/tool/builtin/gemini.py +59 -0
- lm_deluge/tool/builtin/openai.py +74 -0
- lm_deluge/tool/cua/__init__.py +173 -0
- lm_deluge/tool/cua/actions.py +148 -0
- lm_deluge/tool/cua/base.py +27 -0
- lm_deluge/tool/cua/batch.py +215 -0
- lm_deluge/tool/cua/converters.py +466 -0
- lm_deluge/tool/cua/kernel.py +702 -0
- lm_deluge/tool/cua/trycua.py +989 -0
- lm_deluge/tool/prefab/__init__.py +45 -0
- lm_deluge/tool/prefab/batch_tool.py +156 -0
- lm_deluge/tool/prefab/docs.py +1119 -0
- lm_deluge/tool/prefab/email.py +294 -0
- lm_deluge/tool/prefab/filesystem.py +1711 -0
- lm_deluge/tool/prefab/full_text_search/__init__.py +285 -0
- lm_deluge/tool/prefab/full_text_search/tantivy_index.py +396 -0
- lm_deluge/tool/prefab/memory.py +458 -0
- lm_deluge/tool/prefab/otc/__init__.py +165 -0
- lm_deluge/tool/prefab/otc/executor.py +281 -0
- lm_deluge/tool/prefab/otc/parse.py +188 -0
- lm_deluge/tool/prefab/random.py +212 -0
- lm_deluge/tool/prefab/rlm/__init__.py +296 -0
- lm_deluge/tool/prefab/rlm/executor.py +349 -0
- lm_deluge/tool/prefab/rlm/parse.py +144 -0
- lm_deluge/tool/prefab/sandbox.py +1621 -0
- lm_deluge/tool/prefab/sheets.py +385 -0
- lm_deluge/tool/prefab/subagents.py +233 -0
- lm_deluge/tool/prefab/todos.py +342 -0
- lm_deluge/tool/prefab/tool_search.py +169 -0
- lm_deluge/tool/prefab/web_search.py +199 -0
- lm_deluge/tracker.py +16 -13
- lm_deluge/util/schema.py +412 -0
- lm_deluge/warnings.py +8 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.88.dist-info}/METADATA +22 -9
- lm_deluge-0.0.88.dist-info/RECORD +117 -0
- lm_deluge/built_in_tools/anthropic/__init__.py +0 -128
- lm_deluge/built_in_tools/openai.py +0 -28
- lm_deluge/presets/cerebras.py +0 -17
- lm_deluge/presets/meta.py +0 -13
- lm_deluge/tool.py +0 -849
- lm_deluge-0.0.67.dist-info/RECORD +0 -72
- lm_deluge/{llm_tools → pipelines}/__init__.py +1 -1
- /lm_deluge/{llm_tools → pipelines}/classify.py +0 -0
- /lm_deluge/{llm_tools → pipelines}/extract.py +0 -0
- /lm_deluge/{llm_tools → pipelines}/locate.py +0 -0
- /lm_deluge/{llm_tools → pipelines}/ocr.py +0 -0
- /lm_deluge/{built_in_tools → tool/builtin}/anthropic/bash.py +0 -0
- /lm_deluge/{built_in_tools → tool/builtin}/anthropic/computer_use.py +0 -0
- /lm_deluge/{built_in_tools → tool/builtin}/anthropic/editor.py +0 -0
- /lm_deluge/{built_in_tools → tool/builtin}/base.py +0 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.88.dist-info}/WHEEL +0 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.88.dist-info}/licenses/LICENSE +0 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.88.dist-info}/top_level.txt +0 -0
lm_deluge/__init__.py
CHANGED
|
@@ -1,7 +1,20 @@
|
|
|
1
1
|
from .client import APIResponse, LLMClient, SamplingParams
|
|
2
2
|
from .file import File
|
|
3
3
|
from .prompt import Conversation, Message
|
|
4
|
-
from .tool import Tool
|
|
4
|
+
from .tool import Tool
|
|
5
|
+
|
|
6
|
+
try:
|
|
7
|
+
from .mock_openai import ( # noqa
|
|
8
|
+
APIError,
|
|
9
|
+
APITimeoutError,
|
|
10
|
+
BadRequestError,
|
|
11
|
+
MockAsyncOpenAI,
|
|
12
|
+
RateLimitError,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
_has_openai = True
|
|
16
|
+
except ImportError:
|
|
17
|
+
_has_openai = False
|
|
5
18
|
|
|
6
19
|
# dotenv.load_dotenv() - don't do this, fucks with other packages
|
|
7
20
|
|
|
@@ -12,6 +25,16 @@ __all__ = [
|
|
|
12
25
|
"Conversation",
|
|
13
26
|
"Message",
|
|
14
27
|
"Tool",
|
|
15
|
-
"ToolParams",
|
|
16
28
|
"File",
|
|
17
29
|
]
|
|
30
|
+
|
|
31
|
+
if _has_openai:
|
|
32
|
+
__all__.extend(
|
|
33
|
+
[
|
|
34
|
+
"MockAsyncOpenAI",
|
|
35
|
+
"APIError",
|
|
36
|
+
"APITimeoutError",
|
|
37
|
+
"BadRequestError",
|
|
38
|
+
"RateLimitError",
|
|
39
|
+
]
|
|
40
|
+
)
|
|
@@ -12,6 +12,11 @@ from lm_deluge.prompt import (
|
|
|
12
12
|
from lm_deluge.request_context import RequestContext
|
|
13
13
|
from lm_deluge.tool import MCPServer, Tool
|
|
14
14
|
from lm_deluge.usage import Usage
|
|
15
|
+
from lm_deluge.util.schema import (
|
|
16
|
+
prepare_output_schema,
|
|
17
|
+
transform_schema_for_anthropic,
|
|
18
|
+
)
|
|
19
|
+
from lm_deluge.warnings import maybe_warn
|
|
15
20
|
|
|
16
21
|
from ..models import APIModel
|
|
17
22
|
from .base import APIRequestBase, APIResponse
|
|
@@ -58,38 +63,102 @@ def _build_anthropic_request(
|
|
|
58
63
|
"max_tokens": sampling_params.max_new_tokens,
|
|
59
64
|
}
|
|
60
65
|
|
|
66
|
+
if model.id == "claude-4.5-opus" and sampling_params.global_effort:
|
|
67
|
+
request_json["output_config"] = {"effort": sampling_params.global_effort}
|
|
68
|
+
_add_beta(base_headers, "effort-2025-11-24")
|
|
69
|
+
|
|
61
70
|
# handle thinking
|
|
62
|
-
if model.reasoning_model
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
sampling_params.reasoning_effort
|
|
66
|
-
)
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
71
|
+
if model.reasoning_model:
|
|
72
|
+
if (
|
|
73
|
+
sampling_params.thinking_budget is not None
|
|
74
|
+
and sampling_params.reasoning_effort is not None
|
|
75
|
+
):
|
|
76
|
+
maybe_warn("WARN_THINKING_BUDGET_AND_REASONING_EFFORT")
|
|
77
|
+
|
|
78
|
+
if sampling_params.thinking_budget is not None:
|
|
79
|
+
budget = sampling_params.thinking_budget
|
|
80
|
+
elif sampling_params.reasoning_effort is not None:
|
|
81
|
+
effort = sampling_params.reasoning_effort
|
|
82
|
+
if effort == "xhigh":
|
|
83
|
+
maybe_warn("WARN_XHIGH_TO_HIGH", model_name=context.model_name)
|
|
84
|
+
effort = "high"
|
|
85
|
+
# translate reasoning effort of low, medium, high to budget tokens
|
|
86
|
+
budget = {
|
|
87
|
+
"none": 0,
|
|
88
|
+
"minimal": 256,
|
|
89
|
+
"low": 1024,
|
|
90
|
+
"medium": 4096,
|
|
91
|
+
"high": 16384,
|
|
92
|
+
}.get(effort)
|
|
93
|
+
assert isinstance(budget, int)
|
|
94
|
+
else:
|
|
95
|
+
budget = 0
|
|
96
|
+
|
|
97
|
+
if budget > 0:
|
|
98
|
+
request_json["thinking"] = {
|
|
99
|
+
"type": "enabled",
|
|
100
|
+
"budget_tokens": budget,
|
|
101
|
+
}
|
|
102
|
+
if "top_p" in request_json:
|
|
103
|
+
request_json["top_p"] = max(request_json["top_p"], 0.95)
|
|
104
|
+
request_json["temperature"] = 1.0
|
|
105
|
+
request_json["max_tokens"] += budget
|
|
106
|
+
else:
|
|
107
|
+
request_json["thinking"] = {"type": "disabled"}
|
|
108
|
+
if "kimi" in model.id and "thinking" in model.id:
|
|
109
|
+
maybe_warn("WARN_KIMI_THINKING_NO_REASONING")
|
|
110
|
+
|
|
75
111
|
else:
|
|
76
112
|
request_json["thinking"] = {"type": "disabled"}
|
|
77
113
|
if sampling_params.reasoning_effort:
|
|
78
114
|
print("ignoring reasoning_effort for non-reasoning model")
|
|
115
|
+
|
|
79
116
|
if system_message is not None:
|
|
80
117
|
request_json["system"] = system_message
|
|
81
118
|
|
|
82
|
-
# handle temp + top_p for opus 4.1/sonnet 4.5
|
|
119
|
+
# handle temp + top_p for opus 4.1/sonnet 4.5.
|
|
120
|
+
# TODO: make clearer / more user-friendly so there can be NotGiven
|
|
121
|
+
# and user can control which one they want to use
|
|
83
122
|
if "4-1" in model.name or "4-5" in model.name:
|
|
84
|
-
|
|
85
|
-
|
|
123
|
+
request_json.pop("top_p")
|
|
124
|
+
|
|
125
|
+
# print(request_json)
|
|
126
|
+
# Handle structured outputs (output_format)
|
|
127
|
+
if context.output_schema:
|
|
128
|
+
if model.supports_json:
|
|
129
|
+
base_schema = prepare_output_schema(context.output_schema)
|
|
130
|
+
|
|
131
|
+
# Apply Anthropic-specific transformations (move unsupported constraints to description)
|
|
132
|
+
transformed_schema = transform_schema_for_anthropic(base_schema)
|
|
133
|
+
|
|
134
|
+
_add_beta(base_headers, "structured-outputs-2025-11-13")
|
|
135
|
+
request_json["output_format"] = {
|
|
136
|
+
"type": "json_schema",
|
|
137
|
+
"schema": transformed_schema,
|
|
138
|
+
}
|
|
139
|
+
else:
|
|
140
|
+
print(
|
|
141
|
+
f"WARNING: Model {model.name} does not support structured outputs. Ignoring output_schema."
|
|
142
|
+
)
|
|
143
|
+
elif sampling_params.json_mode:
|
|
144
|
+
# Anthropic doesn't support basic json_mode without a schema
|
|
145
|
+
print(
|
|
146
|
+
"WARNING: Anthropic does not support basic json_mode without a schema. "
|
|
147
|
+
"Use output_schema parameter for structured JSON outputs."
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
# Add beta header for strict tools when enabled
|
|
151
|
+
if tools and sampling_params.strict_tools and model.supports_json:
|
|
152
|
+
_add_beta(base_headers, "structured-outputs-2025-11-13")
|
|
86
153
|
|
|
87
154
|
if tools:
|
|
88
155
|
mcp_servers = []
|
|
89
156
|
tool_definitions = []
|
|
90
157
|
for tool in tools:
|
|
91
158
|
if isinstance(tool, Tool):
|
|
92
|
-
|
|
159
|
+
# Only use strict mode if model supports structured outputs
|
|
160
|
+
use_strict = sampling_params.strict_tools and model.supports_json
|
|
161
|
+
tool_definitions.append(tool.dump_for("anthropic", strict=use_strict))
|
|
93
162
|
elif isinstance(tool, dict) and "url" in tool:
|
|
94
163
|
_add_beta(base_headers, "mcp-client-2025-04-04")
|
|
95
164
|
mcp_servers.append(tool)
|
|
@@ -102,6 +171,9 @@ def _build_anthropic_request(
|
|
|
102
171
|
"bash_20241022",
|
|
103
172
|
]:
|
|
104
173
|
_add_beta(base_headers, "computer-use-2024-10-22")
|
|
174
|
+
elif tool["type"] == "computer_20251124":
|
|
175
|
+
# Claude Opus 4.5 - newest computer use with zoom support
|
|
176
|
+
_add_beta(base_headers, "computer-use-2025-11-24")
|
|
105
177
|
elif tool["type"] == "computer_20250124":
|
|
106
178
|
_add_beta(base_headers, "computer-use-2025-01-24")
|
|
107
179
|
elif tool["type"] == "code_execution_20250522":
|
|
@@ -169,6 +241,9 @@ class AnthropicRequest(APIRequestBase):
|
|
|
169
241
|
data = await http_response.json()
|
|
170
242
|
response_content = data["content"]
|
|
171
243
|
|
|
244
|
+
# print("=== CONTENT ===")
|
|
245
|
+
# print(response_content)
|
|
246
|
+
|
|
172
247
|
# Parse response into Message with parts
|
|
173
248
|
parts = []
|
|
174
249
|
for item in response_content:
|
lm_deluge/api_requests/base.py
CHANGED
|
@@ -90,9 +90,32 @@ class APIRequestBase(ABC):
|
|
|
90
90
|
start -> poll -> result style of request.
|
|
91
91
|
"""
|
|
92
92
|
assert self.context.status_tracker, "no status tracker"
|
|
93
|
-
|
|
93
|
+
poll_interval = 5.0
|
|
94
|
+
attempt_start = time.monotonic()
|
|
95
|
+
deadline = attempt_start + self.context.request_timeout
|
|
96
|
+
response_id: str | None = None
|
|
97
|
+
last_status: str | None = None
|
|
98
|
+
|
|
94
99
|
async with aiohttp.ClientSession() as session:
|
|
95
|
-
|
|
100
|
+
|
|
101
|
+
async def cancel_response(reason: str) -> None:
|
|
102
|
+
nonlocal response_id
|
|
103
|
+
if not response_id:
|
|
104
|
+
return
|
|
105
|
+
cancel_url = f"{self.url}/{response_id}/cancel"
|
|
106
|
+
try:
|
|
107
|
+
async with session.post(
|
|
108
|
+
url=cancel_url,
|
|
109
|
+
headers=self.request_header,
|
|
110
|
+
) as cancel_response:
|
|
111
|
+
cancel_response.raise_for_status()
|
|
112
|
+
print(f"Background req {response_id} cancelled: {reason}")
|
|
113
|
+
except (
|
|
114
|
+
Exception
|
|
115
|
+
) as cancel_err: # pragma: no cover - best effort logging
|
|
116
|
+
print(
|
|
117
|
+
f"Failed to cancel background req {response_id}: {cancel_err}"
|
|
118
|
+
)
|
|
96
119
|
|
|
97
120
|
try:
|
|
98
121
|
self.context.status_tracker.total_requests += 1
|
|
@@ -109,14 +132,11 @@ class APIRequestBase(ABC):
|
|
|
109
132
|
last_status = data["status"]
|
|
110
133
|
|
|
111
134
|
while True:
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
) as http_response:
|
|
118
|
-
http_response.raise_for_status()
|
|
119
|
-
|
|
135
|
+
now = time.monotonic()
|
|
136
|
+
remaining = deadline - now
|
|
137
|
+
if remaining <= 0:
|
|
138
|
+
elapsed = now - attempt_start
|
|
139
|
+
await cancel_response(f"timed out after {elapsed:.1f}s")
|
|
120
140
|
return APIResponse(
|
|
121
141
|
id=self.context.task_id,
|
|
122
142
|
model_internal=self.context.model_name,
|
|
@@ -128,8 +148,9 @@ class APIRequestBase(ABC):
|
|
|
128
148
|
content=None,
|
|
129
149
|
usage=None,
|
|
130
150
|
)
|
|
151
|
+
|
|
131
152
|
# poll for the response
|
|
132
|
-
await asyncio.sleep(
|
|
153
|
+
await asyncio.sleep(min(poll_interval, max(remaining, 0)))
|
|
133
154
|
async with session.get(
|
|
134
155
|
url=f"{self.url}/{response_id}",
|
|
135
156
|
headers=self.request_header,
|
|
@@ -146,6 +167,8 @@ class APIRequestBase(ABC):
|
|
|
146
167
|
return await self.handle_response(http_response)
|
|
147
168
|
|
|
148
169
|
except Exception as e:
|
|
170
|
+
if response_id:
|
|
171
|
+
await cancel_response(f"errored: {type(e).__name__}")
|
|
149
172
|
raise_if_modal_exception(e)
|
|
150
173
|
tb = traceback.format_exc()
|
|
151
174
|
print(tb)
|
|
@@ -199,6 +222,19 @@ class APIRequestBase(ABC):
|
|
|
199
222
|
usage=None,
|
|
200
223
|
)
|
|
201
224
|
|
|
225
|
+
except aiohttp.ServerDisconnectedError:
|
|
226
|
+
return APIResponse(
|
|
227
|
+
id=self.context.task_id,
|
|
228
|
+
model_internal=self.context.model_name,
|
|
229
|
+
prompt=self.context.prompt,
|
|
230
|
+
sampling_params=self.context.sampling_params,
|
|
231
|
+
status_code=None,
|
|
232
|
+
is_error=True,
|
|
233
|
+
error_message="Server disconnected.",
|
|
234
|
+
content=None,
|
|
235
|
+
usage=None,
|
|
236
|
+
)
|
|
237
|
+
|
|
202
238
|
except Exception as e:
|
|
203
239
|
raise_if_modal_exception(e)
|
|
204
240
|
tb = traceback.format_exc()
|
|
@@ -106,7 +106,8 @@ async def _build_anthropic_bedrock_request(
|
|
|
106
106
|
tool_definitions = []
|
|
107
107
|
for tool in tools:
|
|
108
108
|
if isinstance(tool, Tool):
|
|
109
|
-
|
|
109
|
+
# Bedrock doesn't have the strict-mode betas Anthropic exposes yet
|
|
110
|
+
tool_definitions.append(tool.dump_for("anthropic", strict=False))
|
|
110
111
|
elif isinstance(tool, dict):
|
|
111
112
|
tool_definitions.append(tool)
|
|
112
113
|
# add betas if needed
|
|
@@ -124,7 +125,9 @@ async def _build_anthropic_bedrock_request(
|
|
|
124
125
|
# Convert to individual tools locally (like OpenAI does)
|
|
125
126
|
individual_tools = await tool.to_tools()
|
|
126
127
|
for individual_tool in individual_tools:
|
|
127
|
-
tool_definitions.append(
|
|
128
|
+
tool_definitions.append(
|
|
129
|
+
individual_tool.dump_for("anthropic", strict=False)
|
|
130
|
+
)
|
|
128
131
|
|
|
129
132
|
# Add cache control to last tool if tools_only caching is specified
|
|
130
133
|
if cache_pattern == "tools_only" and tool_definitions:
|
|
@@ -194,11 +197,11 @@ async def _build_openai_bedrock_request(
|
|
|
194
197
|
request_tools = []
|
|
195
198
|
for tool in tools:
|
|
196
199
|
if isinstance(tool, Tool):
|
|
197
|
-
request_tools.append(tool.dump_for("openai-completions"))
|
|
200
|
+
request_tools.append(tool.dump_for("openai-completions", strict=False))
|
|
198
201
|
elif isinstance(tool, MCPServer):
|
|
199
202
|
as_tools = await tool.to_tools()
|
|
200
203
|
request_tools.extend(
|
|
201
|
-
[t.dump_for("openai-completions") for t in as_tools]
|
|
204
|
+
[t.dump_for("openai-completions", strict=False) for t in as_tools]
|
|
202
205
|
)
|
|
203
206
|
request_json["tools"] = request_tools
|
|
204
207
|
|
lm_deluge/api_requests/gemini.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
|
-
from typing import Any
|
|
4
3
|
|
|
5
4
|
from aiohttp import ClientResponse
|
|
6
5
|
|
|
@@ -23,6 +22,21 @@ async def _build_gemini_request(
|
|
|
23
22
|
) -> dict:
|
|
24
23
|
system_message, messages = prompt.to_gemini()
|
|
25
24
|
|
|
25
|
+
# For Gemini 3, inject dummy signatures when missing for function calls
|
|
26
|
+
is_gemini_3 = "gemini-3" in model.name.lower()
|
|
27
|
+
if is_gemini_3:
|
|
28
|
+
dummy_sig = "context_engineering_is_the_way_to_go"
|
|
29
|
+
for msg in messages:
|
|
30
|
+
if "parts" in msg:
|
|
31
|
+
for part in msg["parts"]:
|
|
32
|
+
# For function calls, inject dummy signature if missing
|
|
33
|
+
if "functionCall" in part and "thoughtSignature" not in part:
|
|
34
|
+
part["thoughtSignature"] = dummy_sig
|
|
35
|
+
maybe_warn(
|
|
36
|
+
"WARN_GEMINI3_MISSING_SIGNATURE",
|
|
37
|
+
part_type="function call",
|
|
38
|
+
)
|
|
39
|
+
|
|
26
40
|
request_json = {
|
|
27
41
|
"contents": messages,
|
|
28
42
|
"generationConfig": {
|
|
@@ -37,20 +51,69 @@ async def _build_gemini_request(
|
|
|
37
51
|
request_json["systemInstruction"] = {"parts": [{"text": system_message}]}
|
|
38
52
|
|
|
39
53
|
# Handle reasoning models (thinking)
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
if
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
thinking_config = {"includeThoughts": False, "thinkingBudget": budget}
|
|
54
|
+
is_gemini_3 = "gemini-3" in model.name.lower()
|
|
55
|
+
if is_gemini_3:
|
|
56
|
+
# gemini3 MUST think
|
|
57
|
+
if not sampling_params.reasoning_effort:
|
|
58
|
+
maybe_warn("WARN_GEMINI3_NO_REASONING")
|
|
59
|
+
effort = "low"
|
|
47
60
|
else:
|
|
48
|
-
|
|
49
|
-
if
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
61
|
+
effort_key = sampling_params.reasoning_effort
|
|
62
|
+
if effort_key == "xhigh":
|
|
63
|
+
maybe_warn("WARN_XHIGH_TO_HIGH", model_name=model.name)
|
|
64
|
+
effort_key = "high"
|
|
65
|
+
level_map = {
|
|
66
|
+
"none": "low",
|
|
67
|
+
"minimal": "low",
|
|
68
|
+
"low": "low",
|
|
69
|
+
"medium": "high", # change when supported
|
|
70
|
+
"high": "high",
|
|
71
|
+
}
|
|
72
|
+
effort = level_map[effort_key]
|
|
73
|
+
thinking_config = {"thinkingLevel": effort}
|
|
74
|
+
request_json["generationConfig"]["thinkingConfig"] = thinking_config
|
|
75
|
+
|
|
76
|
+
elif model.reasoning_model:
|
|
77
|
+
if (
|
|
78
|
+
sampling_params.thinking_budget is not None
|
|
79
|
+
and sampling_params.reasoning_effort is not None
|
|
80
|
+
):
|
|
81
|
+
maybe_warn("WARN_THINKING_BUDGET_AND_REASONING_EFFORT")
|
|
82
|
+
|
|
83
|
+
if (
|
|
84
|
+
sampling_params.thinking_budget is not None
|
|
85
|
+
and sampling_params.thinking_budget > 0
|
|
86
|
+
):
|
|
87
|
+
thinking_config = {
|
|
88
|
+
"includeThoughts": True,
|
|
89
|
+
"thinkingBudget": sampling_params.thinking_budget,
|
|
90
|
+
}
|
|
91
|
+
elif sampling_params.thinking_budget == -1:
|
|
92
|
+
# dynamic thinking
|
|
93
|
+
thinking_config = {"includeThoughts": True, "thinkingBudget": -1}
|
|
94
|
+
elif sampling_params.reasoning_effort not in [None, "none"]:
|
|
95
|
+
effort_key = sampling_params.reasoning_effort
|
|
96
|
+
if effort_key == "xhigh":
|
|
97
|
+
maybe_warn("WARN_XHIGH_TO_HIGH", model_name=model.name)
|
|
98
|
+
effort_key = "high"
|
|
99
|
+
level_map = {
|
|
100
|
+
"minimal": 256,
|
|
101
|
+
"low": 1024,
|
|
102
|
+
"medium": 4096,
|
|
103
|
+
"high": 16384,
|
|
104
|
+
}
|
|
105
|
+
assert effort_key in level_map
|
|
106
|
+
budget = level_map[effort_key]
|
|
107
|
+
if "flash-lite" in model.id:
|
|
108
|
+
budget = max(budget, 512)
|
|
109
|
+
thinking_config = {"includeThoughts": True, "thinkingBudget": budget}
|
|
110
|
+
elif "2.5-pro" in model.id:
|
|
111
|
+
# 2.5 pro must think.
|
|
112
|
+
thinking_config = {"includeThoughts": True, "thinkingBudget": 128}
|
|
113
|
+
else:
|
|
114
|
+
# no thoughts head empty
|
|
115
|
+
thinking_config = {"includeThoughts": False, "thinkingBudget": 0}
|
|
116
|
+
|
|
54
117
|
request_json["generationConfig"]["thinkingConfig"] = thinking_config
|
|
55
118
|
|
|
56
119
|
else:
|
|
@@ -59,13 +122,60 @@ async def _build_gemini_request(
|
|
|
59
122
|
|
|
60
123
|
# Add tools if provided
|
|
61
124
|
if tools:
|
|
62
|
-
|
|
63
|
-
|
|
125
|
+
request_tools = []
|
|
126
|
+
function_declarations = []
|
|
127
|
+
|
|
128
|
+
for tool in tools:
|
|
129
|
+
if isinstance(tool, dict) and tool.get("type") == "gemini_computer_use":
|
|
130
|
+
# Gemini computer use tool - add as separate tool entry
|
|
131
|
+
env_map = {
|
|
132
|
+
"browser": "ENVIRONMENT_BROWSER",
|
|
133
|
+
"android": "ENVIRONMENT_ANDROID",
|
|
134
|
+
}
|
|
135
|
+
env = env_map.get(
|
|
136
|
+
tool.get("environment", "browser"), "ENVIRONMENT_BROWSER"
|
|
137
|
+
)
|
|
138
|
+
cu_tool: dict = {
|
|
139
|
+
"computerUse": {
|
|
140
|
+
"environment": env,
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
excluded = tool.get("excluded_predefined_functions")
|
|
144
|
+
if excluded:
|
|
145
|
+
cu_tool["computerUse"]["excludedPredefinedFunctions"] = excluded
|
|
146
|
+
request_tools.append(cu_tool)
|
|
147
|
+
elif hasattr(tool, "dump_for"):
|
|
148
|
+
# Regular Tool object
|
|
149
|
+
function_declarations.append(tool.dump_for("google"))
|
|
150
|
+
elif isinstance(tool, dict):
|
|
151
|
+
# Raw dict tool - assume it's a function declaration
|
|
152
|
+
function_declarations.append(tool)
|
|
153
|
+
|
|
154
|
+
if function_declarations:
|
|
155
|
+
request_tools.append({"functionDeclarations": function_declarations})
|
|
156
|
+
|
|
157
|
+
if request_tools:
|
|
158
|
+
request_json["tools"] = request_tools
|
|
64
159
|
|
|
65
160
|
# Handle JSON mode
|
|
66
161
|
if sampling_params.json_mode and model.supports_json:
|
|
67
162
|
request_json["generationConfig"]["responseMimeType"] = "application/json"
|
|
68
163
|
|
|
164
|
+
# Handle media_resolution for Gemini 3 (requires v1alpha)
|
|
165
|
+
if sampling_params.media_resolution is not None:
|
|
166
|
+
is_gemini_3 = "gemini-3" in model.name.lower()
|
|
167
|
+
if is_gemini_3:
|
|
168
|
+
# Add global media resolution to generationConfig
|
|
169
|
+
request_json["generationConfig"]["mediaResolution"] = {
|
|
170
|
+
"level": sampling_params.media_resolution
|
|
171
|
+
}
|
|
172
|
+
else:
|
|
173
|
+
# Warn if trying to use media_resolution on non-Gemini-3 models
|
|
174
|
+
maybe_warn(
|
|
175
|
+
"WARN_MEDIA_RESOLUTION_UNSUPPORTED",
|
|
176
|
+
model_name=model.name,
|
|
177
|
+
)
|
|
178
|
+
|
|
69
179
|
return request_json
|
|
70
180
|
|
|
71
181
|
|
|
@@ -103,7 +213,7 @@ class GeminiRequest(APIRequestBase):
|
|
|
103
213
|
self.request_json = await _build_gemini_request(
|
|
104
214
|
self.model,
|
|
105
215
|
self.context.prompt,
|
|
106
|
-
self.context.tools,
|
|
216
|
+
self.context.tools, # type: ignore
|
|
107
217
|
self.context.sampling_params,
|
|
108
218
|
)
|
|
109
219
|
|
|
@@ -137,10 +247,19 @@ class GeminiRequest(APIRequestBase):
|
|
|
137
247
|
candidate = data["candidates"][0]
|
|
138
248
|
if "content" in candidate and "parts" in candidate["content"]:
|
|
139
249
|
for part in candidate["content"]["parts"]:
|
|
250
|
+
# Extract thought signature if present
|
|
251
|
+
thought_sig = part.get("thoughtSignature")
|
|
252
|
+
|
|
140
253
|
if "text" in part:
|
|
141
254
|
parts.append(Text(part["text"]))
|
|
142
255
|
elif "thought" in part:
|
|
143
|
-
|
|
256
|
+
# Thought with optional signature
|
|
257
|
+
parts.append(
|
|
258
|
+
Thinking(
|
|
259
|
+
content=part["thought"],
|
|
260
|
+
thought_signature=thought_sig,
|
|
261
|
+
)
|
|
262
|
+
)
|
|
144
263
|
elif "functionCall" in part:
|
|
145
264
|
func_call = part["functionCall"]
|
|
146
265
|
# Generate a unique ID since Gemini doesn't provide one
|
|
@@ -152,6 +271,7 @@ class GeminiRequest(APIRequestBase):
|
|
|
152
271
|
id=tool_id,
|
|
153
272
|
name=func_call["name"],
|
|
154
273
|
arguments=func_call.get("args", {}),
|
|
274
|
+
thought_signature=thought_sig,
|
|
155
275
|
)
|
|
156
276
|
)
|
|
157
277
|
|