lm-deluge 0.0.76__tar.gz → 0.0.79__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lm_deluge-0.0.76/src/lm_deluge.egg-info → lm_deluge-0.0.79}/PKG-INFO +9 -8
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/README.md +8 -7
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/pyproject.toml +1 -1
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/api_requests/gemini.py +78 -11
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/client.py +1 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/config.py +7 -0
- lm_deluge-0.0.79/src/lm_deluge/llm_tools/filesystem.py +821 -0
- lm_deluge-0.0.79/src/lm_deluge/llm_tools/sandbox.py +523 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/models/google.py +15 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/models/openrouter.py +10 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/prompt.py +62 -24
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/warnings.py +2 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79/src/lm_deluge.egg-info}/PKG-INFO +9 -8
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge.egg-info/SOURCES.txt +3 -0
- lm_deluge-0.0.79/tests/test_filesystem.py +119 -0
- lm_deluge-0.0.79/tests/test_filesystem_live.py +82 -0
- lm_deluge-0.0.76/src/lm_deluge/llm_tools/filesystem.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/LICENSE +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/setup.cfg +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/__init__.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/api_requests/__init__.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/api_requests/anthropic.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/api_requests/base.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/api_requests/bedrock.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/api_requests/chat_reasoning.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/api_requests/common.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/api_requests/deprecated/bedrock.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/api_requests/deprecated/cohere.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/api_requests/deprecated/deepseek.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/api_requests/deprecated/mistral.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/api_requests/deprecated/vertex.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/api_requests/mistral.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/api_requests/openai.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/api_requests/response.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/batches.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/built_in_tools/anthropic/__init__.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/built_in_tools/anthropic/bash.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/built_in_tools/anthropic/computer_use.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/built_in_tools/anthropic/editor.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/built_in_tools/base.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/built_in_tools/openai.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/cache.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/cli.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/embed.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/errors.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/file.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/image.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/llm_tools/__init__.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/llm_tools/classify.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/llm_tools/extract.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/llm_tools/locate.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/llm_tools/ocr.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/llm_tools/score.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/llm_tools/subagents.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/llm_tools/todos.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/llm_tools/translate.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/mock_openai.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/models/__init__.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/models/anthropic.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/models/bedrock.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/models/cerebras.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/models/cohere.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/models/deepseek.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/models/fireworks.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/models/grok.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/models/groq.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/models/kimi.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/models/meta.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/models/minimax.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/models/mistral.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/models/openai.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/models/together.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/presets/cerebras.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/presets/meta.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/request_context.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/rerank.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/tool.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/tracker.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/usage.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/util/harmony.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/util/json.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/util/logprobs.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/util/schema.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/util/spatial.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/util/validation.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge/util/xml.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge.egg-info/dependency_links.txt +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge.egg-info/requires.txt +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/src/lm_deluge.egg-info/top_level.txt +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/tests/test_builtin_tools.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/tests/test_file_upload.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/tests/test_mock_openai.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/tests/test_native_mcp_server.py +0 -0
- {lm_deluge-0.0.76 → lm_deluge-0.0.79}/tests/test_openrouter_generic.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lm_deluge
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.79
|
|
4
4
|
Summary: Python utility for using LLM API models.
|
|
5
5
|
Author-email: Benjamin Anderson <ben@trytaylor.ai>
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -52,7 +52,7 @@ Dynamic: license-file
|
|
|
52
52
|
pip install lm-deluge
|
|
53
53
|
```
|
|
54
54
|
|
|
55
|
-
The package relies on environment variables for API keys. Typical variables include `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `COHERE_API_KEY`, `META_API_KEY`, and `
|
|
55
|
+
The package relies on environment variables for API keys. Typical variables include `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `COHERE_API_KEY`, `META_API_KEY`, and `GEMINI_API_KEY`. `LLMClient` will automatically load the `.env` file when imported; we recommend using that to set the environment variables. For Bedrock, you'll need to set `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`.
|
|
56
56
|
|
|
57
57
|
## Quickstart
|
|
58
58
|
|
|
@@ -61,9 +61,9 @@ The package relies on environment variables for API keys. Typical variables incl
|
|
|
61
61
|
```python
|
|
62
62
|
from lm_deluge import LLMClient
|
|
63
63
|
|
|
64
|
-
client = LLMClient("gpt-
|
|
64
|
+
client = LLMClient("gpt-4.1-mini")
|
|
65
65
|
resps = client.process_prompts_sync(["Hello, world!"])
|
|
66
|
-
print(
|
|
66
|
+
print(resps[0].completion)
|
|
67
67
|
```
|
|
68
68
|
|
|
69
69
|
## Spraying Across Models
|
|
@@ -74,13 +74,13 @@ To distribute your requests across models, just provide a list of more than one
|
|
|
74
74
|
from lm_deluge import LLMClient
|
|
75
75
|
|
|
76
76
|
client = LLMClient(
|
|
77
|
-
["gpt-
|
|
77
|
+
["gpt-4.1-mini", "claude-4.5-haiku"],
|
|
78
78
|
max_requests_per_minute=10_000
|
|
79
79
|
)
|
|
80
80
|
resps = client.process_prompts_sync(
|
|
81
81
|
["Hello, ChatGPT!", "Hello, Claude!"]
|
|
82
82
|
)
|
|
83
|
-
print(
|
|
83
|
+
print(resps[0].completion)
|
|
84
84
|
```
|
|
85
85
|
|
|
86
86
|
## Configuration
|
|
@@ -181,7 +181,7 @@ def get_weather(city: str) -> str:
|
|
|
181
181
|
return f"The weather in {city} is sunny and 72°F"
|
|
182
182
|
|
|
183
183
|
tool = Tool.from_function(get_weather)
|
|
184
|
-
client = LLMClient("claude-
|
|
184
|
+
client = LLMClient("claude-4.5-haiku")
|
|
185
185
|
resps = client.process_prompts_sync(
|
|
186
186
|
["What's the weather in Paris?"],
|
|
187
187
|
tools=[tool]
|
|
@@ -255,7 +255,7 @@ conv = (
|
|
|
255
255
|
)
|
|
256
256
|
|
|
257
257
|
# Use prompt caching to cache system message and tools
|
|
258
|
-
client = LLMClient("claude-
|
|
258
|
+
client = LLMClient("claude-4.5-sonnet")
|
|
259
259
|
resps = client.process_prompts_sync(
|
|
260
260
|
[conv],
|
|
261
261
|
cache="system_and_tools" # Cache system message and any tools
|
|
@@ -301,5 +301,6 @@ The `lm_deluge.llm_tools` package exposes a few helper functions:
|
|
|
301
301
|
- `extract` – structure text or images into a Pydantic model based on a schema.
|
|
302
302
|
- `translate` – translate a list of strings to English.
|
|
303
303
|
- `score_llm` – simple yes/no style scoring with optional log probability output.
|
|
304
|
+
- `FilesystemManager` – expose a sandboxed read/write filesystem tool (with optional regex search and `apply_patch` support) that agents can call without touching the host machine.
|
|
304
305
|
|
|
305
306
|
Experimental embeddings (`embed.embed_parallel_async`) and document reranking (`rerank.rerank_parallel_async`) clients are also provided.
|
|
@@ -23,7 +23,7 @@
|
|
|
23
23
|
pip install lm-deluge
|
|
24
24
|
```
|
|
25
25
|
|
|
26
|
-
The package relies on environment variables for API keys. Typical variables include `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `COHERE_API_KEY`, `META_API_KEY`, and `
|
|
26
|
+
The package relies on environment variables for API keys. Typical variables include `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `COHERE_API_KEY`, `META_API_KEY`, and `GEMINI_API_KEY`. `LLMClient` will automatically load the `.env` file when imported; we recommend using that to set the environment variables. For Bedrock, you'll need to set `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`.
|
|
27
27
|
|
|
28
28
|
## Quickstart
|
|
29
29
|
|
|
@@ -32,9 +32,9 @@ The package relies on environment variables for API keys. Typical variables incl
|
|
|
32
32
|
```python
|
|
33
33
|
from lm_deluge import LLMClient
|
|
34
34
|
|
|
35
|
-
client = LLMClient("gpt-
|
|
35
|
+
client = LLMClient("gpt-4.1-mini")
|
|
36
36
|
resps = client.process_prompts_sync(["Hello, world!"])
|
|
37
|
-
print(
|
|
37
|
+
print(resps[0].completion)
|
|
38
38
|
```
|
|
39
39
|
|
|
40
40
|
## Spraying Across Models
|
|
@@ -45,13 +45,13 @@ To distribute your requests across models, just provide a list of more than one
|
|
|
45
45
|
from lm_deluge import LLMClient
|
|
46
46
|
|
|
47
47
|
client = LLMClient(
|
|
48
|
-
["gpt-
|
|
48
|
+
["gpt-4.1-mini", "claude-4.5-haiku"],
|
|
49
49
|
max_requests_per_minute=10_000
|
|
50
50
|
)
|
|
51
51
|
resps = client.process_prompts_sync(
|
|
52
52
|
["Hello, ChatGPT!", "Hello, Claude!"]
|
|
53
53
|
)
|
|
54
|
-
print(
|
|
54
|
+
print(resps[0].completion)
|
|
55
55
|
```
|
|
56
56
|
|
|
57
57
|
## Configuration
|
|
@@ -152,7 +152,7 @@ def get_weather(city: str) -> str:
|
|
|
152
152
|
return f"The weather in {city} is sunny and 72°F"
|
|
153
153
|
|
|
154
154
|
tool = Tool.from_function(get_weather)
|
|
155
|
-
client = LLMClient("claude-
|
|
155
|
+
client = LLMClient("claude-4.5-haiku")
|
|
156
156
|
resps = client.process_prompts_sync(
|
|
157
157
|
["What's the weather in Paris?"],
|
|
158
158
|
tools=[tool]
|
|
@@ -226,7 +226,7 @@ conv = (
|
|
|
226
226
|
)
|
|
227
227
|
|
|
228
228
|
# Use prompt caching to cache system message and tools
|
|
229
|
-
client = LLMClient("claude-
|
|
229
|
+
client = LLMClient("claude-4.5-sonnet")
|
|
230
230
|
resps = client.process_prompts_sync(
|
|
231
231
|
[conv],
|
|
232
232
|
cache="system_and_tools" # Cache system message and any tools
|
|
@@ -272,5 +272,6 @@ The `lm_deluge.llm_tools` package exposes a few helper functions:
|
|
|
272
272
|
- `extract` – structure text or images into a Pydantic model based on a schema.
|
|
273
273
|
- `translate` – translate a list of strings to English.
|
|
274
274
|
- `score_llm` – simple yes/no style scoring with optional log probability output.
|
|
275
|
+
- `FilesystemManager` – expose a sandboxed read/write filesystem tool (with optional regex search and `apply_patch` support) that agents can call without touching the host machine.
|
|
275
276
|
|
|
276
277
|
Experimental embeddings (`embed.embed_parallel_async`) and document reranking (`rerank.rerank_parallel_async`) clients are also provided.
|
|
@@ -23,6 +23,21 @@ async def _build_gemini_request(
|
|
|
23
23
|
) -> dict:
|
|
24
24
|
system_message, messages = prompt.to_gemini()
|
|
25
25
|
|
|
26
|
+
# For Gemini 3, inject dummy signatures when missing for function calls
|
|
27
|
+
is_gemini_3 = "gemini-3" in model.name.lower()
|
|
28
|
+
if is_gemini_3:
|
|
29
|
+
dummy_sig = "context_engineering_is_the_way_to_go"
|
|
30
|
+
for msg in messages:
|
|
31
|
+
if "parts" in msg:
|
|
32
|
+
for part in msg["parts"]:
|
|
33
|
+
# For function calls, inject dummy signature if missing
|
|
34
|
+
if "functionCall" in part and "thoughtSignature" not in part:
|
|
35
|
+
part["thoughtSignature"] = dummy_sig
|
|
36
|
+
maybe_warn(
|
|
37
|
+
"WARN_GEMINI3_MISSING_SIGNATURE",
|
|
38
|
+
part_type="function call",
|
|
39
|
+
)
|
|
40
|
+
|
|
26
41
|
request_json = {
|
|
27
42
|
"contents": messages,
|
|
28
43
|
"generationConfig": {
|
|
@@ -40,17 +55,44 @@ async def _build_gemini_request(
|
|
|
40
55
|
if model.reasoning_model:
|
|
41
56
|
thinking_config: dict[str, Any] | None = None
|
|
42
57
|
effort = sampling_params.reasoning_effort
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
58
|
+
is_gemini_3 = "gemini-3" in model.name.lower()
|
|
59
|
+
|
|
60
|
+
if is_gemini_3:
|
|
61
|
+
# Gemini 3 uses thinkingLevel instead of thinkingBudget
|
|
62
|
+
if effort in {"none", "minimal"}:
|
|
63
|
+
thinking_config = {"thinkingLevel": "low"}
|
|
64
|
+
elif effort is None:
|
|
65
|
+
# Default to high when reasoning is enabled but no preference was provided
|
|
66
|
+
thinking_config = {"thinkingLevel": "high"}
|
|
67
|
+
else:
|
|
68
|
+
# Map reasoning_effort to thinkingLevel
|
|
69
|
+
level_map = {
|
|
70
|
+
"minimal": "low",
|
|
71
|
+
"low": "low",
|
|
72
|
+
"medium": "medium", # Will work when supported
|
|
73
|
+
"high": "high",
|
|
74
|
+
}
|
|
75
|
+
thinking_level = level_map.get(effort, "high")
|
|
76
|
+
thinking_config = {"thinkingLevel": thinking_level}
|
|
47
77
|
else:
|
|
48
|
-
|
|
49
|
-
if effort
|
|
50
|
-
budget =
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
78
|
+
# Gemini 2.5 uses thinkingBudget (legacy)
|
|
79
|
+
if effort is None or effort == "none":
|
|
80
|
+
budget = 128 if "2.5-pro" in model.id else 0
|
|
81
|
+
# Explicitly disable thoughts when no effort is requested
|
|
82
|
+
thinking_config = {"includeThoughts": False, "thinkingBudget": budget}
|
|
83
|
+
else:
|
|
84
|
+
thinking_config = {"includeThoughts": True}
|
|
85
|
+
if (
|
|
86
|
+
effort in {"minimal", "low", "medium", "high"}
|
|
87
|
+
and "flash" in model.id
|
|
88
|
+
):
|
|
89
|
+
budget = {
|
|
90
|
+
"minimal": 256,
|
|
91
|
+
"low": 1024,
|
|
92
|
+
"medium": 4096,
|
|
93
|
+
"high": 16384,
|
|
94
|
+
}[effort]
|
|
95
|
+
thinking_config["thinkingBudget"] = budget
|
|
54
96
|
request_json["generationConfig"]["thinkingConfig"] = thinking_config
|
|
55
97
|
|
|
56
98
|
else:
|
|
@@ -66,6 +108,21 @@ async def _build_gemini_request(
|
|
|
66
108
|
if sampling_params.json_mode and model.supports_json:
|
|
67
109
|
request_json["generationConfig"]["responseMimeType"] = "application/json"
|
|
68
110
|
|
|
111
|
+
# Handle media_resolution for Gemini 3 (requires v1alpha)
|
|
112
|
+
if sampling_params.media_resolution is not None:
|
|
113
|
+
is_gemini_3 = "gemini-3" in model.name.lower()
|
|
114
|
+
if is_gemini_3:
|
|
115
|
+
# Add global media resolution to generationConfig
|
|
116
|
+
request_json["generationConfig"]["mediaResolution"] = {
|
|
117
|
+
"level": sampling_params.media_resolution
|
|
118
|
+
}
|
|
119
|
+
else:
|
|
120
|
+
# Warn if trying to use media_resolution on non-Gemini-3 models
|
|
121
|
+
maybe_warn(
|
|
122
|
+
"WARN_MEDIA_RESOLUTION_UNSUPPORTED",
|
|
123
|
+
model_name=model.name,
|
|
124
|
+
)
|
|
125
|
+
|
|
69
126
|
return request_json
|
|
70
127
|
|
|
71
128
|
|
|
@@ -137,10 +194,19 @@ class GeminiRequest(APIRequestBase):
|
|
|
137
194
|
candidate = data["candidates"][0]
|
|
138
195
|
if "content" in candidate and "parts" in candidate["content"]:
|
|
139
196
|
for part in candidate["content"]["parts"]:
|
|
197
|
+
# Extract thought signature if present
|
|
198
|
+
thought_sig = part.get("thoughtSignature")
|
|
199
|
+
|
|
140
200
|
if "text" in part:
|
|
141
201
|
parts.append(Text(part["text"]))
|
|
142
202
|
elif "thought" in part:
|
|
143
|
-
|
|
203
|
+
# Thought with optional signature
|
|
204
|
+
parts.append(
|
|
205
|
+
Thinking(
|
|
206
|
+
content=part["thought"],
|
|
207
|
+
thought_signature=thought_sig,
|
|
208
|
+
)
|
|
209
|
+
)
|
|
144
210
|
elif "functionCall" in part:
|
|
145
211
|
func_call = part["functionCall"]
|
|
146
212
|
# Generate a unique ID since Gemini doesn't provide one
|
|
@@ -152,6 +218,7 @@ class GeminiRequest(APIRequestBase):
|
|
|
152
218
|
id=tool_id,
|
|
153
219
|
name=func_call["name"],
|
|
154
220
|
arguments=func_call.get("args", {}),
|
|
221
|
+
thought_signature=thought_sig,
|
|
155
222
|
)
|
|
156
223
|
)
|
|
157
224
|
|
|
@@ -262,6 +262,7 @@ class _LLMClient(BaseModel):
|
|
|
262
262
|
self.max_tokens_per_minute = max_tokens_per_minute
|
|
263
263
|
if max_concurrent_requests:
|
|
264
264
|
self.max_concurrent_requests = max_concurrent_requests
|
|
265
|
+
return self
|
|
265
266
|
|
|
266
267
|
def _get_tracker(self) -> StatusTracker:
|
|
267
268
|
if self._tracker is None:
|
|
@@ -12,6 +12,13 @@ class SamplingParams(BaseModel):
|
|
|
12
12
|
logprobs: bool = False
|
|
13
13
|
top_logprobs: int | None = None
|
|
14
14
|
strict_tools: bool = True
|
|
15
|
+
# Gemini 3 only - controls multimodal vision processing fidelity
|
|
16
|
+
media_resolution: (
|
|
17
|
+
Literal[
|
|
18
|
+
"media_resolution_low", "media_resolution_medium", "media_resolution_high"
|
|
19
|
+
]
|
|
20
|
+
| None
|
|
21
|
+
) = None
|
|
15
22
|
|
|
16
23
|
def to_vllm(self):
|
|
17
24
|
try:
|