lm-deluge 0.0.67__py3-none-any.whl → 0.0.88__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lm-deluge might be problematic. Click here for more details.

Files changed (92) hide show
  1. lm_deluge/__init__.py +25 -2
  2. lm_deluge/api_requests/anthropic.py +92 -17
  3. lm_deluge/api_requests/base.py +47 -11
  4. lm_deluge/api_requests/bedrock.py +7 -4
  5. lm_deluge/api_requests/chat_reasoning.py +4 -0
  6. lm_deluge/api_requests/gemini.py +138 -18
  7. lm_deluge/api_requests/openai.py +114 -21
  8. lm_deluge/client.py +282 -49
  9. lm_deluge/config.py +15 -3
  10. lm_deluge/mock_openai.py +643 -0
  11. lm_deluge/models/__init__.py +12 -1
  12. lm_deluge/models/anthropic.py +17 -2
  13. lm_deluge/models/arcee.py +16 -0
  14. lm_deluge/models/deepseek.py +36 -4
  15. lm_deluge/models/google.py +29 -0
  16. lm_deluge/models/grok.py +24 -0
  17. lm_deluge/models/kimi.py +36 -0
  18. lm_deluge/models/minimax.py +10 -0
  19. lm_deluge/models/openai.py +100 -0
  20. lm_deluge/models/openrouter.py +86 -8
  21. lm_deluge/models/together.py +11 -0
  22. lm_deluge/models/zai.py +1 -0
  23. lm_deluge/pipelines/gepa/__init__.py +95 -0
  24. lm_deluge/pipelines/gepa/core.py +354 -0
  25. lm_deluge/pipelines/gepa/docs/samples.py +696 -0
  26. lm_deluge/pipelines/gepa/examples/01_synthetic_keywords.py +140 -0
  27. lm_deluge/pipelines/gepa/examples/02_gsm8k_math.py +261 -0
  28. lm_deluge/pipelines/gepa/examples/03_hotpotqa_multihop.py +300 -0
  29. lm_deluge/pipelines/gepa/examples/04_batch_classification.py +271 -0
  30. lm_deluge/pipelines/gepa/examples/simple_qa.py +129 -0
  31. lm_deluge/pipelines/gepa/optimizer.py +435 -0
  32. lm_deluge/pipelines/gepa/proposer.py +235 -0
  33. lm_deluge/pipelines/gepa/util.py +165 -0
  34. lm_deluge/{llm_tools → pipelines}/score.py +2 -2
  35. lm_deluge/{llm_tools → pipelines}/translate.py +5 -3
  36. lm_deluge/prompt.py +224 -40
  37. lm_deluge/request_context.py +7 -2
  38. lm_deluge/tool/__init__.py +1118 -0
  39. lm_deluge/tool/builtin/anthropic/__init__.py +300 -0
  40. lm_deluge/tool/builtin/gemini.py +59 -0
  41. lm_deluge/tool/builtin/openai.py +74 -0
  42. lm_deluge/tool/cua/__init__.py +173 -0
  43. lm_deluge/tool/cua/actions.py +148 -0
  44. lm_deluge/tool/cua/base.py +27 -0
  45. lm_deluge/tool/cua/batch.py +215 -0
  46. lm_deluge/tool/cua/converters.py +466 -0
  47. lm_deluge/tool/cua/kernel.py +702 -0
  48. lm_deluge/tool/cua/trycua.py +989 -0
  49. lm_deluge/tool/prefab/__init__.py +45 -0
  50. lm_deluge/tool/prefab/batch_tool.py +156 -0
  51. lm_deluge/tool/prefab/docs.py +1119 -0
  52. lm_deluge/tool/prefab/email.py +294 -0
  53. lm_deluge/tool/prefab/filesystem.py +1711 -0
  54. lm_deluge/tool/prefab/full_text_search/__init__.py +285 -0
  55. lm_deluge/tool/prefab/full_text_search/tantivy_index.py +396 -0
  56. lm_deluge/tool/prefab/memory.py +458 -0
  57. lm_deluge/tool/prefab/otc/__init__.py +165 -0
  58. lm_deluge/tool/prefab/otc/executor.py +281 -0
  59. lm_deluge/tool/prefab/otc/parse.py +188 -0
  60. lm_deluge/tool/prefab/random.py +212 -0
  61. lm_deluge/tool/prefab/rlm/__init__.py +296 -0
  62. lm_deluge/tool/prefab/rlm/executor.py +349 -0
  63. lm_deluge/tool/prefab/rlm/parse.py +144 -0
  64. lm_deluge/tool/prefab/sandbox.py +1621 -0
  65. lm_deluge/tool/prefab/sheets.py +385 -0
  66. lm_deluge/tool/prefab/subagents.py +233 -0
  67. lm_deluge/tool/prefab/todos.py +342 -0
  68. lm_deluge/tool/prefab/tool_search.py +169 -0
  69. lm_deluge/tool/prefab/web_search.py +199 -0
  70. lm_deluge/tracker.py +16 -13
  71. lm_deluge/util/schema.py +412 -0
  72. lm_deluge/warnings.py +8 -0
  73. {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.88.dist-info}/METADATA +22 -9
  74. lm_deluge-0.0.88.dist-info/RECORD +117 -0
  75. lm_deluge/built_in_tools/anthropic/__init__.py +0 -128
  76. lm_deluge/built_in_tools/openai.py +0 -28
  77. lm_deluge/presets/cerebras.py +0 -17
  78. lm_deluge/presets/meta.py +0 -13
  79. lm_deluge/tool.py +0 -849
  80. lm_deluge-0.0.67.dist-info/RECORD +0 -72
  81. lm_deluge/{llm_tools → pipelines}/__init__.py +1 -1
  82. /lm_deluge/{llm_tools → pipelines}/classify.py +0 -0
  83. /lm_deluge/{llm_tools → pipelines}/extract.py +0 -0
  84. /lm_deluge/{llm_tools → pipelines}/locate.py +0 -0
  85. /lm_deluge/{llm_tools → pipelines}/ocr.py +0 -0
  86. /lm_deluge/{built_in_tools → tool/builtin}/anthropic/bash.py +0 -0
  87. /lm_deluge/{built_in_tools → tool/builtin}/anthropic/computer_use.py +0 -0
  88. /lm_deluge/{built_in_tools → tool/builtin}/anthropic/editor.py +0 -0
  89. /lm_deluge/{built_in_tools → tool/builtin}/base.py +0 -0
  90. {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.88.dist-info}/WHEEL +0 -0
  91. {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.88.dist-info}/licenses/LICENSE +0 -0
  92. {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.88.dist-info}/top_level.txt +0 -0
lm_deluge/__init__.py CHANGED
@@ -1,7 +1,20 @@
1
1
  from .client import APIResponse, LLMClient, SamplingParams
2
2
  from .file import File
3
3
  from .prompt import Conversation, Message
4
- from .tool import Tool, ToolParams
4
+ from .tool import Tool
5
+
6
+ try:
7
+ from .mock_openai import ( # noqa
8
+ APIError,
9
+ APITimeoutError,
10
+ BadRequestError,
11
+ MockAsyncOpenAI,
12
+ RateLimitError,
13
+ )
14
+
15
+ _has_openai = True
16
+ except ImportError:
17
+ _has_openai = False
5
18
 
6
19
  # dotenv.load_dotenv() - don't do this, fucks with other packages
7
20
 
@@ -12,6 +25,16 @@ __all__ = [
12
25
  "Conversation",
13
26
  "Message",
14
27
  "Tool",
15
- "ToolParams",
16
28
  "File",
17
29
  ]
30
+
31
+ if _has_openai:
32
+ __all__.extend(
33
+ [
34
+ "MockAsyncOpenAI",
35
+ "APIError",
36
+ "APITimeoutError",
37
+ "BadRequestError",
38
+ "RateLimitError",
39
+ ]
40
+ )
@@ -12,6 +12,11 @@ from lm_deluge.prompt import (
12
12
  from lm_deluge.request_context import RequestContext
13
13
  from lm_deluge.tool import MCPServer, Tool
14
14
  from lm_deluge.usage import Usage
15
+ from lm_deluge.util.schema import (
16
+ prepare_output_schema,
17
+ transform_schema_for_anthropic,
18
+ )
19
+ from lm_deluge.warnings import maybe_warn
15
20
 
16
21
  from ..models import APIModel
17
22
  from .base import APIRequestBase, APIResponse
@@ -58,38 +63,102 @@ def _build_anthropic_request(
58
63
  "max_tokens": sampling_params.max_new_tokens,
59
64
  }
60
65
 
66
+ if model.id == "claude-4.5-opus" and sampling_params.global_effort:
67
+ request_json["output_config"] = {"effort": sampling_params.global_effort}
68
+ _add_beta(base_headers, "effort-2025-11-24")
69
+
61
70
  # handle thinking
62
- if model.reasoning_model and sampling_params.reasoning_effort:
63
- # translate reasoning effort of low, medium, high to budget tokens
64
- budget = {"minimal": 256, "low": 1024, "medium": 4096, "high": 16384}.get(
65
- sampling_params.reasoning_effort
66
- )
67
- request_json["thinking"] = {
68
- "type": "enabled",
69
- "budget_tokens": budget,
70
- }
71
- if "top_p" in request_json:
72
- request_json["top_p"] = max(request_json["top_p"], 0.95)
73
- request_json["temperature"] = 1.0
74
- request_json["max_tokens"] += budget
71
+ if model.reasoning_model:
72
+ if (
73
+ sampling_params.thinking_budget is not None
74
+ and sampling_params.reasoning_effort is not None
75
+ ):
76
+ maybe_warn("WARN_THINKING_BUDGET_AND_REASONING_EFFORT")
77
+
78
+ if sampling_params.thinking_budget is not None:
79
+ budget = sampling_params.thinking_budget
80
+ elif sampling_params.reasoning_effort is not None:
81
+ effort = sampling_params.reasoning_effort
82
+ if effort == "xhigh":
83
+ maybe_warn("WARN_XHIGH_TO_HIGH", model_name=context.model_name)
84
+ effort = "high"
85
+ # translate reasoning effort of low, medium, high to budget tokens
86
+ budget = {
87
+ "none": 0,
88
+ "minimal": 256,
89
+ "low": 1024,
90
+ "medium": 4096,
91
+ "high": 16384,
92
+ }.get(effort)
93
+ assert isinstance(budget, int)
94
+ else:
95
+ budget = 0
96
+
97
+ if budget > 0:
98
+ request_json["thinking"] = {
99
+ "type": "enabled",
100
+ "budget_tokens": budget,
101
+ }
102
+ if "top_p" in request_json:
103
+ request_json["top_p"] = max(request_json["top_p"], 0.95)
104
+ request_json["temperature"] = 1.0
105
+ request_json["max_tokens"] += budget
106
+ else:
107
+ request_json["thinking"] = {"type": "disabled"}
108
+ if "kimi" in model.id and "thinking" in model.id:
109
+ maybe_warn("WARN_KIMI_THINKING_NO_REASONING")
110
+
75
111
  else:
76
112
  request_json["thinking"] = {"type": "disabled"}
77
113
  if sampling_params.reasoning_effort:
78
114
  print("ignoring reasoning_effort for non-reasoning model")
115
+
79
116
  if system_message is not None:
80
117
  request_json["system"] = system_message
81
118
 
82
- # handle temp + top_p for opus 4.1/sonnet 4.5
119
+ # handle temp + top_p for opus 4.1/sonnet 4.5.
120
+ # TODO: make clearer / more user-friendly so there can be NotGiven
121
+ # and user can control which one they want to use
83
122
  if "4-1" in model.name or "4-5" in model.name:
84
- if "temperature" in request_json and "top_p" in request_json:
85
- request_json.pop("top_p")
123
+ request_json.pop("top_p")
124
+
125
+ # print(request_json)
126
+ # Handle structured outputs (output_format)
127
+ if context.output_schema:
128
+ if model.supports_json:
129
+ base_schema = prepare_output_schema(context.output_schema)
130
+
131
+ # Apply Anthropic-specific transformations (move unsupported constraints to description)
132
+ transformed_schema = transform_schema_for_anthropic(base_schema)
133
+
134
+ _add_beta(base_headers, "structured-outputs-2025-11-13")
135
+ request_json["output_format"] = {
136
+ "type": "json_schema",
137
+ "schema": transformed_schema,
138
+ }
139
+ else:
140
+ print(
141
+ f"WARNING: Model {model.name} does not support structured outputs. Ignoring output_schema."
142
+ )
143
+ elif sampling_params.json_mode:
144
+ # Anthropic doesn't support basic json_mode without a schema
145
+ print(
146
+ "WARNING: Anthropic does not support basic json_mode without a schema. "
147
+ "Use output_schema parameter for structured JSON outputs."
148
+ )
149
+
150
+ # Add beta header for strict tools when enabled
151
+ if tools and sampling_params.strict_tools and model.supports_json:
152
+ _add_beta(base_headers, "structured-outputs-2025-11-13")
86
153
 
87
154
  if tools:
88
155
  mcp_servers = []
89
156
  tool_definitions = []
90
157
  for tool in tools:
91
158
  if isinstance(tool, Tool):
92
- tool_definitions.append(tool.dump_for("anthropic"))
159
+ # Only use strict mode if model supports structured outputs
160
+ use_strict = sampling_params.strict_tools and model.supports_json
161
+ tool_definitions.append(tool.dump_for("anthropic", strict=use_strict))
93
162
  elif isinstance(tool, dict) and "url" in tool:
94
163
  _add_beta(base_headers, "mcp-client-2025-04-04")
95
164
  mcp_servers.append(tool)
@@ -102,6 +171,9 @@ def _build_anthropic_request(
102
171
  "bash_20241022",
103
172
  ]:
104
173
  _add_beta(base_headers, "computer-use-2024-10-22")
174
+ elif tool["type"] == "computer_20251124":
175
+ # Claude Opus 4.5 - newest computer use with zoom support
176
+ _add_beta(base_headers, "computer-use-2025-11-24")
105
177
  elif tool["type"] == "computer_20250124":
106
178
  _add_beta(base_headers, "computer-use-2025-01-24")
107
179
  elif tool["type"] == "code_execution_20250522":
@@ -169,6 +241,9 @@ class AnthropicRequest(APIRequestBase):
169
241
  data = await http_response.json()
170
242
  response_content = data["content"]
171
243
 
244
+ # print("=== CONTENT ===")
245
+ # print(response_content)
246
+
172
247
  # Parse response into Message with parts
173
248
  parts = []
174
249
  for item in response_content:
@@ -90,9 +90,32 @@ class APIRequestBase(ABC):
90
90
  start -> poll -> result style of request.
91
91
  """
92
92
  assert self.context.status_tracker, "no status tracker"
93
- start_time = time.time()
93
+ poll_interval = 5.0
94
+ attempt_start = time.monotonic()
95
+ deadline = attempt_start + self.context.request_timeout
96
+ response_id: str | None = None
97
+ last_status: str | None = None
98
+
94
99
  async with aiohttp.ClientSession() as session:
95
- last_status: str | None = None
100
+
101
+ async def cancel_response(reason: str) -> None:
102
+ nonlocal response_id
103
+ if not response_id:
104
+ return
105
+ cancel_url = f"{self.url}/{response_id}/cancel"
106
+ try:
107
+ async with session.post(
108
+ url=cancel_url,
109
+ headers=self.request_header,
110
+ ) as cancel_response:
111
+ cancel_response.raise_for_status()
112
+ print(f"Background req {response_id} cancelled: {reason}")
113
+ except (
114
+ Exception
115
+ ) as cancel_err: # pragma: no cover - best effort logging
116
+ print(
117
+ f"Failed to cancel background req {response_id}: {cancel_err}"
118
+ )
96
119
 
97
120
  try:
98
121
  self.context.status_tracker.total_requests += 1
@@ -109,14 +132,11 @@ class APIRequestBase(ABC):
109
132
  last_status = data["status"]
110
133
 
111
134
  while True:
112
- if time.time() - start_time > self.context.request_timeout:
113
- # cancel the response
114
- async with session.post(
115
- url=f"{self.url}/{response_id}/cancel",
116
- headers=self.request_header,
117
- ) as http_response:
118
- http_response.raise_for_status()
119
-
135
+ now = time.monotonic()
136
+ remaining = deadline - now
137
+ if remaining <= 0:
138
+ elapsed = now - attempt_start
139
+ await cancel_response(f"timed out after {elapsed:.1f}s")
120
140
  return APIResponse(
121
141
  id=self.context.task_id,
122
142
  model_internal=self.context.model_name,
@@ -128,8 +148,9 @@ class APIRequestBase(ABC):
128
148
  content=None,
129
149
  usage=None,
130
150
  )
151
+
131
152
  # poll for the response
132
- await asyncio.sleep(5.0)
153
+ await asyncio.sleep(min(poll_interval, max(remaining, 0)))
133
154
  async with session.get(
134
155
  url=f"{self.url}/{response_id}",
135
156
  headers=self.request_header,
@@ -146,6 +167,8 @@ class APIRequestBase(ABC):
146
167
  return await self.handle_response(http_response)
147
168
 
148
169
  except Exception as e:
170
+ if response_id:
171
+ await cancel_response(f"errored: {type(e).__name__}")
149
172
  raise_if_modal_exception(e)
150
173
  tb = traceback.format_exc()
151
174
  print(tb)
@@ -199,6 +222,19 @@ class APIRequestBase(ABC):
199
222
  usage=None,
200
223
  )
201
224
 
225
+ except aiohttp.ServerDisconnectedError:
226
+ return APIResponse(
227
+ id=self.context.task_id,
228
+ model_internal=self.context.model_name,
229
+ prompt=self.context.prompt,
230
+ sampling_params=self.context.sampling_params,
231
+ status_code=None,
232
+ is_error=True,
233
+ error_message="Server disconnected.",
234
+ content=None,
235
+ usage=None,
236
+ )
237
+
202
238
  except Exception as e:
203
239
  raise_if_modal_exception(e)
204
240
  tb = traceback.format_exc()
@@ -106,7 +106,8 @@ async def _build_anthropic_bedrock_request(
106
106
  tool_definitions = []
107
107
  for tool in tools:
108
108
  if isinstance(tool, Tool):
109
- tool_definitions.append(tool.dump_for("anthropic"))
109
+ # Bedrock doesn't have the strict-mode betas Anthropic exposes yet
110
+ tool_definitions.append(tool.dump_for("anthropic", strict=False))
110
111
  elif isinstance(tool, dict):
111
112
  tool_definitions.append(tool)
112
113
  # add betas if needed
@@ -124,7 +125,9 @@ async def _build_anthropic_bedrock_request(
124
125
  # Convert to individual tools locally (like OpenAI does)
125
126
  individual_tools = await tool.to_tools()
126
127
  for individual_tool in individual_tools:
127
- tool_definitions.append(individual_tool.dump_for("anthropic"))
128
+ tool_definitions.append(
129
+ individual_tool.dump_for("anthropic", strict=False)
130
+ )
128
131
 
129
132
  # Add cache control to last tool if tools_only caching is specified
130
133
  if cache_pattern == "tools_only" and tool_definitions:
@@ -194,11 +197,11 @@ async def _build_openai_bedrock_request(
194
197
  request_tools = []
195
198
  for tool in tools:
196
199
  if isinstance(tool, Tool):
197
- request_tools.append(tool.dump_for("openai-completions"))
200
+ request_tools.append(tool.dump_for("openai-completions", strict=False))
198
201
  elif isinstance(tool, MCPServer):
199
202
  as_tools = await tool.to_tools()
200
203
  request_tools.extend(
201
- [t.dump_for("openai-completions") for t in as_tools]
204
+ [t.dump_for("openai-completions", strict=False) for t in as_tools]
202
205
  )
203
206
  request_json["tools"] = request_tools
204
207
 
@@ -0,0 +1,4 @@
1
+ # this request type is for models that add "reasoning_content"
2
+ # on top of the openai chat completions. it's important to be separate
3
+ # for providers that expect you to provide back the reasoning content to
4
+ # preserve best performance.
@@ -1,6 +1,5 @@
1
1
  import json
2
2
  import os
3
- from typing import Any
4
3
 
5
4
  from aiohttp import ClientResponse
6
5
 
@@ -23,6 +22,21 @@ async def _build_gemini_request(
23
22
  ) -> dict:
24
23
  system_message, messages = prompt.to_gemini()
25
24
 
25
+ # For Gemini 3, inject dummy signatures when missing for function calls
26
+ is_gemini_3 = "gemini-3" in model.name.lower()
27
+ if is_gemini_3:
28
+ dummy_sig = "context_engineering_is_the_way_to_go"
29
+ for msg in messages:
30
+ if "parts" in msg:
31
+ for part in msg["parts"]:
32
+ # For function calls, inject dummy signature if missing
33
+ if "functionCall" in part and "thoughtSignature" not in part:
34
+ part["thoughtSignature"] = dummy_sig
35
+ maybe_warn(
36
+ "WARN_GEMINI3_MISSING_SIGNATURE",
37
+ part_type="function call",
38
+ )
39
+
26
40
  request_json = {
27
41
  "contents": messages,
28
42
  "generationConfig": {
@@ -37,20 +51,69 @@ async def _build_gemini_request(
37
51
  request_json["systemInstruction"] = {"parts": [{"text": system_message}]}
38
52
 
39
53
  # Handle reasoning models (thinking)
40
- if model.reasoning_model:
41
- thinking_config: dict[str, Any] | None = None
42
- effort = sampling_params.reasoning_effort
43
- if effort is None or effort == "none":
44
- budget = 128 if "2.5-pro" in model.id else 0
45
- # Explicitly disable thoughts when no effort is requested
46
- thinking_config = {"includeThoughts": False, "thinkingBudget": budget}
54
+ is_gemini_3 = "gemini-3" in model.name.lower()
55
+ if is_gemini_3:
56
+ # gemini3 MUST think
57
+ if not sampling_params.reasoning_effort:
58
+ maybe_warn("WARN_GEMINI3_NO_REASONING")
59
+ effort = "low"
47
60
  else:
48
- thinking_config = {"includeThoughts": True}
49
- if effort in {"minimal", "low", "medium", "high"} and "flash" in model.id:
50
- budget = {"minimal": 256, "low": 1024, "medium": 4096, "high": 16384}[
51
- effort
52
- ]
53
- thinking_config["thinkingBudget"] = budget
61
+ effort_key = sampling_params.reasoning_effort
62
+ if effort_key == "xhigh":
63
+ maybe_warn("WARN_XHIGH_TO_HIGH", model_name=model.name)
64
+ effort_key = "high"
65
+ level_map = {
66
+ "none": "low",
67
+ "minimal": "low",
68
+ "low": "low",
69
+ "medium": "high", # change when supported
70
+ "high": "high",
71
+ }
72
+ effort = level_map[effort_key]
73
+ thinking_config = {"thinkingLevel": effort}
74
+ request_json["generationConfig"]["thinkingConfig"] = thinking_config
75
+
76
+ elif model.reasoning_model:
77
+ if (
78
+ sampling_params.thinking_budget is not None
79
+ and sampling_params.reasoning_effort is not None
80
+ ):
81
+ maybe_warn("WARN_THINKING_BUDGET_AND_REASONING_EFFORT")
82
+
83
+ if (
84
+ sampling_params.thinking_budget is not None
85
+ and sampling_params.thinking_budget > 0
86
+ ):
87
+ thinking_config = {
88
+ "includeThoughts": True,
89
+ "thinkingBudget": sampling_params.thinking_budget,
90
+ }
91
+ elif sampling_params.thinking_budget == -1:
92
+ # dynamic thinking
93
+ thinking_config = {"includeThoughts": True, "thinkingBudget": -1}
94
+ elif sampling_params.reasoning_effort not in [None, "none"]:
95
+ effort_key = sampling_params.reasoning_effort
96
+ if effort_key == "xhigh":
97
+ maybe_warn("WARN_XHIGH_TO_HIGH", model_name=model.name)
98
+ effort_key = "high"
99
+ level_map = {
100
+ "minimal": 256,
101
+ "low": 1024,
102
+ "medium": 4096,
103
+ "high": 16384,
104
+ }
105
+ assert effort_key in level_map
106
+ budget = level_map[effort_key]
107
+ if "flash-lite" in model.id:
108
+ budget = max(budget, 512)
109
+ thinking_config = {"includeThoughts": True, "thinkingBudget": budget}
110
+ elif "2.5-pro" in model.id:
111
+ # 2.5 pro must think.
112
+ thinking_config = {"includeThoughts": True, "thinkingBudget": 128}
113
+ else:
114
+ # no thoughts head empty
115
+ thinking_config = {"includeThoughts": False, "thinkingBudget": 0}
116
+
54
117
  request_json["generationConfig"]["thinkingConfig"] = thinking_config
55
118
 
56
119
  else:
@@ -59,13 +122,60 @@ async def _build_gemini_request(
59
122
 
60
123
  # Add tools if provided
61
124
  if tools:
62
- tool_declarations = [tool.dump_for("google") for tool in tools]
63
- request_json["tools"] = [{"functionDeclarations": tool_declarations}]
125
+ request_tools = []
126
+ function_declarations = []
127
+
128
+ for tool in tools:
129
+ if isinstance(tool, dict) and tool.get("type") == "gemini_computer_use":
130
+ # Gemini computer use tool - add as separate tool entry
131
+ env_map = {
132
+ "browser": "ENVIRONMENT_BROWSER",
133
+ "android": "ENVIRONMENT_ANDROID",
134
+ }
135
+ env = env_map.get(
136
+ tool.get("environment", "browser"), "ENVIRONMENT_BROWSER"
137
+ )
138
+ cu_tool: dict = {
139
+ "computerUse": {
140
+ "environment": env,
141
+ }
142
+ }
143
+ excluded = tool.get("excluded_predefined_functions")
144
+ if excluded:
145
+ cu_tool["computerUse"]["excludedPredefinedFunctions"] = excluded
146
+ request_tools.append(cu_tool)
147
+ elif hasattr(tool, "dump_for"):
148
+ # Regular Tool object
149
+ function_declarations.append(tool.dump_for("google"))
150
+ elif isinstance(tool, dict):
151
+ # Raw dict tool - assume it's a function declaration
152
+ function_declarations.append(tool)
153
+
154
+ if function_declarations:
155
+ request_tools.append({"functionDeclarations": function_declarations})
156
+
157
+ if request_tools:
158
+ request_json["tools"] = request_tools
64
159
 
65
160
  # Handle JSON mode
66
161
  if sampling_params.json_mode and model.supports_json:
67
162
  request_json["generationConfig"]["responseMimeType"] = "application/json"
68
163
 
164
+ # Handle media_resolution for Gemini 3 (requires v1alpha)
165
+ if sampling_params.media_resolution is not None:
166
+ is_gemini_3 = "gemini-3" in model.name.lower()
167
+ if is_gemini_3:
168
+ # Add global media resolution to generationConfig
169
+ request_json["generationConfig"]["mediaResolution"] = {
170
+ "level": sampling_params.media_resolution
171
+ }
172
+ else:
173
+ # Warn if trying to use media_resolution on non-Gemini-3 models
174
+ maybe_warn(
175
+ "WARN_MEDIA_RESOLUTION_UNSUPPORTED",
176
+ model_name=model.name,
177
+ )
178
+
69
179
  return request_json
70
180
 
71
181
 
@@ -103,7 +213,7 @@ class GeminiRequest(APIRequestBase):
103
213
  self.request_json = await _build_gemini_request(
104
214
  self.model,
105
215
  self.context.prompt,
106
- self.context.tools,
216
+ self.context.tools, # type: ignore
107
217
  self.context.sampling_params,
108
218
  )
109
219
 
@@ -137,10 +247,19 @@ class GeminiRequest(APIRequestBase):
137
247
  candidate = data["candidates"][0]
138
248
  if "content" in candidate and "parts" in candidate["content"]:
139
249
  for part in candidate["content"]["parts"]:
250
+ # Extract thought signature if present
251
+ thought_sig = part.get("thoughtSignature")
252
+
140
253
  if "text" in part:
141
254
  parts.append(Text(part["text"]))
142
255
  elif "thought" in part:
143
- parts.append(Thinking(part["thought"]))
256
+ # Thought with optional signature
257
+ parts.append(
258
+ Thinking(
259
+ content=part["thought"],
260
+ thought_signature=thought_sig,
261
+ )
262
+ )
144
263
  elif "functionCall" in part:
145
264
  func_call = part["functionCall"]
146
265
  # Generate a unique ID since Gemini doesn't provide one
@@ -152,6 +271,7 @@ class GeminiRequest(APIRequestBase):
152
271
  id=tool_id,
153
272
  name=func_call["name"],
154
273
  arguments=func_call.get("args", {}),
274
+ thought_signature=thought_sig,
155
275
  )
156
276
  )
157
277