lm-deluge 0.0.78__tar.gz → 0.0.80__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. {lm_deluge-0.0.78/src/lm_deluge.egg-info → lm_deluge-0.0.80}/PKG-INFO +8 -8
  2. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/README.md +7 -7
  3. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/pyproject.toml +1 -1
  4. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/api_requests/anthropic.py +43 -16
  5. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/api_requests/gemini.py +95 -15
  6. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/client.py +6 -5
  7. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/config.py +10 -1
  8. lm_deluge-0.0.80/src/lm_deluge/llm_tools/sandbox.py +523 -0
  9. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/models/anthropic.py +15 -0
  10. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/models/google.py +15 -0
  11. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/models/openrouter.py +10 -0
  12. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/prompt.py +62 -24
  13. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/warnings.py +4 -0
  14. {lm_deluge-0.0.78 → lm_deluge-0.0.80/src/lm_deluge.egg-info}/PKG-INFO +8 -8
  15. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge.egg-info/SOURCES.txt +1 -0
  16. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/tests/test_filesystem_live.py +1 -1
  17. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/LICENSE +0 -0
  18. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/setup.cfg +0 -0
  19. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/__init__.py +0 -0
  20. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/api_requests/__init__.py +0 -0
  21. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/api_requests/base.py +0 -0
  22. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/api_requests/bedrock.py +0 -0
  23. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/api_requests/chat_reasoning.py +0 -0
  24. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/api_requests/common.py +0 -0
  25. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/api_requests/deprecated/bedrock.py +0 -0
  26. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/api_requests/deprecated/cohere.py +0 -0
  27. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/api_requests/deprecated/deepseek.py +0 -0
  28. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/api_requests/deprecated/mistral.py +0 -0
  29. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/api_requests/deprecated/vertex.py +0 -0
  30. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/api_requests/mistral.py +0 -0
  31. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/api_requests/openai.py +0 -0
  32. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/api_requests/response.py +0 -0
  33. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/batches.py +0 -0
  34. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/built_in_tools/anthropic/__init__.py +0 -0
  35. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/built_in_tools/anthropic/bash.py +0 -0
  36. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/built_in_tools/anthropic/computer_use.py +0 -0
  37. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/built_in_tools/anthropic/editor.py +0 -0
  38. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/built_in_tools/base.py +0 -0
  39. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/built_in_tools/openai.py +0 -0
  40. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/cache.py +0 -0
  41. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/cli.py +0 -0
  42. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/embed.py +0 -0
  43. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/errors.py +0 -0
  44. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/file.py +0 -0
  45. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/image.py +0 -0
  46. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/llm_tools/__init__.py +0 -0
  47. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/llm_tools/classify.py +0 -0
  48. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/llm_tools/extract.py +0 -0
  49. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/llm_tools/filesystem.py +0 -0
  50. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/llm_tools/locate.py +0 -0
  51. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/llm_tools/ocr.py +0 -0
  52. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/llm_tools/score.py +0 -0
  53. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/llm_tools/subagents.py +0 -0
  54. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/llm_tools/todos.py +0 -0
  55. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/llm_tools/translate.py +0 -0
  56. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/mock_openai.py +0 -0
  57. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/models/__init__.py +0 -0
  58. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/models/bedrock.py +0 -0
  59. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/models/cerebras.py +0 -0
  60. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/models/cohere.py +0 -0
  61. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/models/deepseek.py +0 -0
  62. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/models/fireworks.py +0 -0
  63. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/models/grok.py +0 -0
  64. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/models/groq.py +0 -0
  65. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/models/kimi.py +0 -0
  66. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/models/meta.py +0 -0
  67. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/models/minimax.py +0 -0
  68. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/models/mistral.py +0 -0
  69. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/models/openai.py +0 -0
  70. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/models/together.py +0 -0
  71. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/presets/cerebras.py +0 -0
  72. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/presets/meta.py +0 -0
  73. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/request_context.py +0 -0
  74. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/rerank.py +0 -0
  75. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/tool.py +0 -0
  76. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/tracker.py +0 -0
  77. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/usage.py +0 -0
  78. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/util/harmony.py +0 -0
  79. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/util/json.py +0 -0
  80. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/util/logprobs.py +0 -0
  81. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/util/schema.py +0 -0
  82. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/util/spatial.py +0 -0
  83. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/util/validation.py +0 -0
  84. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge/util/xml.py +0 -0
  85. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge.egg-info/dependency_links.txt +0 -0
  86. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge.egg-info/requires.txt +0 -0
  87. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/src/lm_deluge.egg-info/top_level.txt +0 -0
  88. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/tests/test_builtin_tools.py +0 -0
  89. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/tests/test_file_upload.py +0 -0
  90. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/tests/test_filesystem.py +0 -0
  91. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/tests/test_mock_openai.py +0 -0
  92. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/tests/test_native_mcp_server.py +0 -0
  93. {lm_deluge-0.0.78 → lm_deluge-0.0.80}/tests/test_openrouter_generic.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lm_deluge
3
- Version: 0.0.78
3
+ Version: 0.0.80
4
4
  Summary: Python utility for using LLM API models.
5
5
  Author-email: Benjamin Anderson <ben@trytaylor.ai>
6
6
  Requires-Python: >=3.10
@@ -52,7 +52,7 @@ Dynamic: license-file
52
52
  pip install lm-deluge
53
53
  ```
54
54
 
55
- The package relies on environment variables for API keys. Typical variables include `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `COHERE_API_KEY`, `META_API_KEY`, and `GOOGLE_API_KEY`. `LLMClient` will automatically load the `.env` file when imported; we recommend using that to set the environment variables. For Bedrock, you'll need to set `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`.
55
+ The package relies on environment variables for API keys. Typical variables include `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `COHERE_API_KEY`, `META_API_KEY`, and `GEMINI_API_KEY`. `LLMClient` will automatically load the `.env` file when imported; we recommend using that to set the environment variables. For Bedrock, you'll need to set `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`.
56
56
 
57
57
  ## Quickstart
58
58
 
@@ -61,9 +61,9 @@ The package relies on environment variables for API keys. Typical variables incl
61
61
  ```python
62
62
  from lm_deluge import LLMClient
63
63
 
64
- client = LLMClient("gpt-4o-mini")
64
+ client = LLMClient("gpt-4.1-mini")
65
65
  resps = client.process_prompts_sync(["Hello, world!"])
66
- print(resp[0].completion)
66
+ print(resps[0].completion)
67
67
  ```
68
68
 
69
69
  ## Spraying Across Models
@@ -74,13 +74,13 @@ To distribute your requests across models, just provide a list of more than one
74
74
  from lm_deluge import LLMClient
75
75
 
76
76
  client = LLMClient(
77
- ["gpt-4o-mini", "claude-3-haiku"],
77
+ ["gpt-4.1-mini", "claude-4.5-haiku"],
78
78
  max_requests_per_minute=10_000
79
79
  )
80
80
  resps = client.process_prompts_sync(
81
81
  ["Hello, ChatGPT!", "Hello, Claude!"]
82
82
  )
83
- print(resp[0].completion)
83
+ print(resps[0].completion)
84
84
  ```
85
85
 
86
86
  ## Configuration
@@ -181,7 +181,7 @@ def get_weather(city: str) -> str:
181
181
  return f"The weather in {city} is sunny and 72°F"
182
182
 
183
183
  tool = Tool.from_function(get_weather)
184
- client = LLMClient("claude-3-haiku")
184
+ client = LLMClient("claude-4.5-haiku")
185
185
  resps = client.process_prompts_sync(
186
186
  ["What's the weather in Paris?"],
187
187
  tools=[tool]
@@ -255,7 +255,7 @@ conv = (
255
255
  )
256
256
 
257
257
  # Use prompt caching to cache system message and tools
258
- client = LLMClient("claude-3-5-sonnet")
258
+ client = LLMClient("claude-4.5-sonnet")
259
259
  resps = client.process_prompts_sync(
260
260
  [conv],
261
261
  cache="system_and_tools" # Cache system message and any tools
@@ -23,7 +23,7 @@
23
23
  pip install lm-deluge
24
24
  ```
25
25
 
26
- The package relies on environment variables for API keys. Typical variables include `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `COHERE_API_KEY`, `META_API_KEY`, and `GOOGLE_API_KEY`. `LLMClient` will automatically load the `.env` file when imported; we recommend using that to set the environment variables. For Bedrock, you'll need to set `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`.
26
+ The package relies on environment variables for API keys. Typical variables include `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `COHERE_API_KEY`, `META_API_KEY`, and `GEMINI_API_KEY`. `LLMClient` will automatically load the `.env` file when imported; we recommend using that to set the environment variables. For Bedrock, you'll need to set `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`.
27
27
 
28
28
  ## Quickstart
29
29
 
@@ -32,9 +32,9 @@ The package relies on environment variables for API keys. Typical variables incl
32
32
  ```python
33
33
  from lm_deluge import LLMClient
34
34
 
35
- client = LLMClient("gpt-4o-mini")
35
+ client = LLMClient("gpt-4.1-mini")
36
36
  resps = client.process_prompts_sync(["Hello, world!"])
37
- print(resp[0].completion)
37
+ print(resps[0].completion)
38
38
  ```
39
39
 
40
40
  ## Spraying Across Models
@@ -45,13 +45,13 @@ To distribute your requests across models, just provide a list of more than one
45
45
  from lm_deluge import LLMClient
46
46
 
47
47
  client = LLMClient(
48
- ["gpt-4o-mini", "claude-3-haiku"],
48
+ ["gpt-4.1-mini", "claude-4.5-haiku"],
49
49
  max_requests_per_minute=10_000
50
50
  )
51
51
  resps = client.process_prompts_sync(
52
52
  ["Hello, ChatGPT!", "Hello, Claude!"]
53
53
  )
54
- print(resp[0].completion)
54
+ print(resps[0].completion)
55
55
  ```
56
56
 
57
57
  ## Configuration
@@ -152,7 +152,7 @@ def get_weather(city: str) -> str:
152
152
  return f"The weather in {city} is sunny and 72°F"
153
153
 
154
154
  tool = Tool.from_function(get_weather)
155
- client = LLMClient("claude-3-haiku")
155
+ client = LLMClient("claude-4.5-haiku")
156
156
  resps = client.process_prompts_sync(
157
157
  ["What's the weather in Paris?"],
158
158
  tools=[tool]
@@ -226,7 +226,7 @@ conv = (
226
226
  )
227
227
 
228
228
  # Use prompt caching to cache system message and tools
229
- client = LLMClient("claude-3-5-sonnet")
229
+ client = LLMClient("claude-4.5-sonnet")
230
230
  resps = client.process_prompts_sync(
231
231
  [conv],
232
232
  cache="system_and_tools" # Cache system message and any tools
@@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
3
3
 
4
4
  [project]
5
5
  name = "lm_deluge"
6
- version = "0.0.78"
6
+ version = "0.0.80"
7
7
  authors = [{ name = "Benjamin Anderson", email = "ben@trytaylor.ai" }]
8
8
  description = "Python utility for using LLM API models."
9
9
  readme = "README.md"
@@ -16,6 +16,7 @@ from lm_deluge.util.schema import (
16
16
  prepare_output_schema,
17
17
  transform_schema_for_anthropic,
18
18
  )
19
+ from lm_deluge.warnings import maybe_warn
19
20
 
20
21
  from ..models import APIModel
21
22
  from .base import APIRequestBase, APIResponse
@@ -62,20 +63,45 @@ def _build_anthropic_request(
62
63
  "max_tokens": sampling_params.max_new_tokens,
63
64
  }
64
65
 
66
+ if model.id == "claude-4.5-opus" and sampling_params.global_effort:
67
+ request_json["effort"] = sampling_params.global_effort
68
+ _add_beta(base_headers, "effort-2025-11-24")
69
+
65
70
  # handle thinking
66
- if model.reasoning_model and sampling_params.reasoning_effort:
67
- # translate reasoning effort of low, medium, high to budget tokens
68
- budget = {"minimal": 256, "low": 1024, "medium": 4096, "high": 16384}.get(
69
- sampling_params.reasoning_effort
70
- )
71
- request_json["thinking"] = {
72
- "type": "enabled",
73
- "budget_tokens": budget,
74
- }
75
- if "top_p" in request_json:
76
- request_json["top_p"] = max(request_json["top_p"], 0.95)
77
- request_json["temperature"] = 1.0
78
- request_json["max_tokens"] += budget
71
+ if model.reasoning_model:
72
+ if (
73
+ sampling_params.thinking_budget is not None
74
+ and sampling_params.reasoning_effort is not None
75
+ ):
76
+ maybe_warn("WARN_THINKING_BUDGET_AND_REASONING_EFFORT")
77
+
78
+ if sampling_params.thinking_budget is not None:
79
+ budget = sampling_params.thinking_budget
80
+ elif sampling_params.reasoning_effort is not None:
81
+ # translate reasoning effort of low, medium, high to budget tokens
82
+ budget = {
83
+ "none": 0,
84
+ "minimal": 256,
85
+ "low": 1024,
86
+ "medium": 4096,
87
+ "high": 16384,
88
+ }.get(sampling_params.reasoning_effort)
89
+ assert isinstance(budget, int)
90
+ else:
91
+ budget = 0
92
+
93
+ if budget > 0:
94
+ request_json["thinking"] = {
95
+ "type": "enabled",
96
+ "budget_tokens": budget,
97
+ }
98
+ if "top_p" in request_json:
99
+ request_json["top_p"] = max(request_json["top_p"], 0.95)
100
+ request_json["temperature"] = 1.0
101
+ request_json["max_tokens"] += budget
102
+ else:
103
+ request_json["thinking"] = {"type": "disabled"}
104
+
79
105
  else:
80
106
  request_json["thinking"] = {"type": "disabled"}
81
107
  if sampling_params.reasoning_effort:
@@ -83,10 +109,11 @@ def _build_anthropic_request(
83
109
  if system_message is not None:
84
110
  request_json["system"] = system_message
85
111
 
86
- # handle temp + top_p for opus 4.1/sonnet 4.5
112
+ # handle temp + top_p for opus 4.1/sonnet 4.5.
113
+ # TODO: make clearer / more user-friendly so there can be NotGiven
114
+ # and user can control which one they want to use
87
115
  if "4-1" in model.name or "4-5" in model.name:
88
- if "temperature" in request_json and "top_p" in request_json:
89
- request_json.pop("top_p")
116
+ request_json.pop("top_p")
90
117
 
91
118
  # Handle structured outputs (output_format)
92
119
  if context.output_schema:
@@ -1,6 +1,5 @@
1
1
  import json
2
2
  import os
3
- from typing import Any
4
3
 
5
4
  from aiohttp import ClientResponse
6
5
 
@@ -23,6 +22,21 @@ async def _build_gemini_request(
23
22
  ) -> dict:
24
23
  system_message, messages = prompt.to_gemini()
25
24
 
25
+ # For Gemini 3, inject dummy signatures when missing for function calls
26
+ is_gemini_3 = "gemini-3" in model.name.lower()
27
+ if is_gemini_3:
28
+ dummy_sig = "context_engineering_is_the_way_to_go"
29
+ for msg in messages:
30
+ if "parts" in msg:
31
+ for part in msg["parts"]:
32
+ # For function calls, inject dummy signature if missing
33
+ if "functionCall" in part and "thoughtSignature" not in part:
34
+ part["thoughtSignature"] = dummy_sig
35
+ maybe_warn(
36
+ "WARN_GEMINI3_MISSING_SIGNATURE",
37
+ part_type="function call",
38
+ )
39
+
26
40
  request_json = {
27
41
  "contents": messages,
28
42
  "generationConfig": {
@@ -37,20 +51,61 @@ async def _build_gemini_request(
37
51
  request_json["systemInstruction"] = {"parts": [{"text": system_message}]}
38
52
 
39
53
  # Handle reasoning models (thinking)
40
- if model.reasoning_model:
41
- thinking_config: dict[str, Any] | None = None
42
- effort = sampling_params.reasoning_effort
43
- if effort is None or effort == "none":
44
- budget = 128 if "2.5-pro" in model.id else 0
45
- # Explicitly disable thoughts when no effort is requested
46
- thinking_config = {"includeThoughts": False, "thinkingBudget": budget}
54
+ is_gemini_3 = "gemini-3" in model.name.lower()
55
+ if is_gemini_3:
56
+ # gemini3 MUST think
57
+ if not sampling_params.reasoning_effort:
58
+ maybe_warn("WARN_GEMINI3_NO_REASONING")
59
+ effort = "low"
47
60
  else:
48
- thinking_config = {"includeThoughts": True}
49
- if effort in {"minimal", "low", "medium", "high"} and "flash" in model.id:
50
- budget = {"minimal": 256, "low": 1024, "medium": 4096, "high": 16384}[
51
- effort
52
- ]
53
- thinking_config["thinkingBudget"] = budget
61
+ level_map = {
62
+ "none": "low",
63
+ "minimal": "low",
64
+ "low": "low",
65
+ "medium": "high", # change when supported
66
+ "high": "high",
67
+ }
68
+ effort = level_map[sampling_params.reasoning_effort]
69
+ thinking_config = {"thinkingLevel": effort}
70
+ request_json["generationConfig"]["thinkingConfig"] = thinking_config
71
+
72
+ elif model.reasoning_model:
73
+ if (
74
+ sampling_params.thinking_budget is not None
75
+ and sampling_params.reasoning_effort is not None
76
+ ):
77
+ maybe_warn("WARN_THINKING_BUDGET_AND_REASONING_EFFORT")
78
+
79
+ if (
80
+ sampling_params.thinking_budget is not None
81
+ and sampling_params.thinking_budget > 0
82
+ ):
83
+ thinking_config = {
84
+ "includeThoughts": True,
85
+ "thinkingBudget": sampling_params.thinking_budget,
86
+ }
87
+ elif sampling_params.thinking_budget == -1:
88
+ # dynamic thinking
89
+ thinking_config = {"includeThoughts": True, "thinkingBudget": -1}
90
+ elif sampling_params.reasoning_effort not in [None, "none"]:
91
+ level_map = {
92
+ "minimal": 256,
93
+ "low": 1024,
94
+ "medium": 4096,
95
+ "high": 16384,
96
+ }
97
+ assert sampling_params.reasoning_effort in level_map
98
+ budget = level_map[sampling_params.reasoning_effort]
99
+ if "flash-lite" in model.id:
100
+ budget = max(budget, 512)
101
+ thinking_config = {"includeThoughts": True, "thinkingBudget": budget}
102
+ elif "2.5-pro" in model.id:
103
+ # 2.5 pro must think.
104
+ thinking_config = {"includeThoughts": True, "thinkingBudget": 128}
105
+ else:
106
+ # no thoughts head empty
107
+ thinking_config = {"includeThoughts": False, "thinkingBudget": 0}
108
+
54
109
  request_json["generationConfig"]["thinkingConfig"] = thinking_config
55
110
 
56
111
  else:
@@ -66,6 +121,21 @@ async def _build_gemini_request(
66
121
  if sampling_params.json_mode and model.supports_json:
67
122
  request_json["generationConfig"]["responseMimeType"] = "application/json"
68
123
 
124
+ # Handle media_resolution for Gemini 3 (requires v1alpha)
125
+ if sampling_params.media_resolution is not None:
126
+ is_gemini_3 = "gemini-3" in model.name.lower()
127
+ if is_gemini_3:
128
+ # Add global media resolution to generationConfig
129
+ request_json["generationConfig"]["mediaResolution"] = {
130
+ "level": sampling_params.media_resolution
131
+ }
132
+ else:
133
+ # Warn if trying to use media_resolution on non-Gemini-3 models
134
+ maybe_warn(
135
+ "WARN_MEDIA_RESOLUTION_UNSUPPORTED",
136
+ model_name=model.name,
137
+ )
138
+
69
139
  return request_json
70
140
 
71
141
 
@@ -137,10 +207,19 @@ class GeminiRequest(APIRequestBase):
137
207
  candidate = data["candidates"][0]
138
208
  if "content" in candidate and "parts" in candidate["content"]:
139
209
  for part in candidate["content"]["parts"]:
210
+ # Extract thought signature if present
211
+ thought_sig = part.get("thoughtSignature")
212
+
140
213
  if "text" in part:
141
214
  parts.append(Text(part["text"]))
142
215
  elif "thought" in part:
143
- parts.append(Thinking(part["thought"]))
216
+ # Thought with optional signature
217
+ parts.append(
218
+ Thinking(
219
+ content=part["thought"],
220
+ thought_signature=thought_sig,
221
+ )
222
+ )
144
223
  elif "functionCall" in part:
145
224
  func_call = part["functionCall"]
146
225
  # Generate a unique ID since Gemini doesn't provide one
@@ -152,6 +231,7 @@ class GeminiRequest(APIRequestBase):
152
231
  id=tool_id,
153
232
  name=func_call["name"],
154
233
  arguments=func_call.get("args", {}),
234
+ thought_signature=thought_sig,
155
235
  )
156
236
  )
157
237
 
@@ -79,7 +79,7 @@ class _LLMClient(BaseModel):
79
79
  background: bool = False
80
80
  # sampling params - if provided, and sampling_params is not,
81
81
  # these override the defaults
82
- temperature: float = 0.75
82
+ temperature: float = 1.0
83
83
  top_p: float = 1.0
84
84
  json_mode: bool = False
85
85
  max_new_tokens: int = 512
@@ -262,6 +262,7 @@ class _LLMClient(BaseModel):
262
262
  self.max_tokens_per_minute = max_tokens_per_minute
263
263
  if max_concurrent_requests:
264
264
  self.max_concurrent_requests = max_concurrent_requests
265
+ return self
265
266
 
266
267
  def _get_tracker(self) -> StatusTracker:
267
268
  if self._tracker is None:
@@ -336,7 +337,7 @@ class _LLMClient(BaseModel):
336
337
  if "sampling_params" not in data or len(data.get("sampling_params", [])) == 0:
337
338
  data["sampling_params"] = [
338
339
  SamplingParams(
339
- temperature=data.get("temperature", 0.75),
340
+ temperature=data.get("temperature", 1.0),
340
341
  top_p=data.get("top_p", 1.0),
341
342
  json_mode=data.get("json_mode", False),
342
343
  max_new_tokens=data.get("max_new_tokens", 512),
@@ -1066,7 +1067,7 @@ def LLMClient(
1066
1067
  extra_headers: dict[str, str] | None = None,
1067
1068
  use_responses_api: bool = False,
1068
1069
  background: bool = False,
1069
- temperature: float = 0.75,
1070
+ temperature: float = 1.0,
1070
1071
  top_p: float = 1.0,
1071
1072
  json_mode: bool = False,
1072
1073
  max_new_tokens: int = 512,
@@ -1095,7 +1096,7 @@ def LLMClient(
1095
1096
  extra_headers: dict[str, str] | None = None,
1096
1097
  use_responses_api: bool = False,
1097
1098
  background: bool = False,
1098
- temperature: float = 0.75,
1099
+ temperature: float = 1.0,
1099
1100
  top_p: float = 1.0,
1100
1101
  json_mode: bool = False,
1101
1102
  max_new_tokens: int = 512,
@@ -1123,7 +1124,7 @@ def LLMClient(
1123
1124
  extra_headers: dict[str, str] | None = None,
1124
1125
  use_responses_api: bool = False,
1125
1126
  background: bool = False,
1126
- temperature: float = 0.75,
1127
+ temperature: float = 1.0,
1127
1128
  top_p: float = 1.0,
1128
1129
  json_mode: bool = False,
1129
1130
  max_new_tokens: int = 512,
@@ -4,14 +4,23 @@ from pydantic import BaseModel
4
4
 
5
5
 
6
6
  class SamplingParams(BaseModel):
7
- temperature: float = 0.0
7
+ temperature: float = 1.0 # more typical for new models
8
8
  top_p: float = 1.0
9
9
  json_mode: bool = False
10
10
  max_new_tokens: int = 2_048
11
+ global_effort: Literal["low", "medium", "high"] = "high" # for opus-4.5
11
12
  reasoning_effort: Literal["low", "medium", "high", "minimal", "none", None] = None
13
+ thinking_budget: int | None = None
12
14
  logprobs: bool = False
13
15
  top_logprobs: int | None = None
14
16
  strict_tools: bool = True
17
+ # Gemini 3 only - controls multimodal vision processing fidelity
18
+ media_resolution: (
19
+ Literal[
20
+ "media_resolution_low", "media_resolution_medium", "media_resolution_high"
21
+ ]
22
+ | None
23
+ ) = None
15
24
 
16
25
  def to_vllm(self):
17
26
  try: