lm-deluge 0.0.76__py3-none-any.whl → 0.0.79__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lm_deluge/api_requests/gemini.py +78 -11
- lm_deluge/client.py +1 -0
- lm_deluge/config.py +7 -0
- lm_deluge/llm_tools/filesystem.py +821 -0
- lm_deluge/llm_tools/sandbox.py +523 -0
- lm_deluge/models/google.py +15 -0
- lm_deluge/models/openrouter.py +10 -0
- lm_deluge/prompt.py +62 -24
- lm_deluge/warnings.py +2 -0
- {lm_deluge-0.0.76.dist-info → lm_deluge-0.0.79.dist-info}/METADATA +9 -8
- {lm_deluge-0.0.76.dist-info → lm_deluge-0.0.79.dist-info}/RECORD +14 -13
- {lm_deluge-0.0.76.dist-info → lm_deluge-0.0.79.dist-info}/WHEEL +0 -0
- {lm_deluge-0.0.76.dist-info → lm_deluge-0.0.79.dist-info}/licenses/LICENSE +0 -0
- {lm_deluge-0.0.76.dist-info → lm_deluge-0.0.79.dist-info}/top_level.txt +0 -0
lm_deluge/api_requests/gemini.py
CHANGED
|
@@ -23,6 +23,21 @@ async def _build_gemini_request(
|
|
|
23
23
|
) -> dict:
|
|
24
24
|
system_message, messages = prompt.to_gemini()
|
|
25
25
|
|
|
26
|
+
# For Gemini 3, inject dummy signatures when missing for function calls
|
|
27
|
+
is_gemini_3 = "gemini-3" in model.name.lower()
|
|
28
|
+
if is_gemini_3:
|
|
29
|
+
dummy_sig = "context_engineering_is_the_way_to_go"
|
|
30
|
+
for msg in messages:
|
|
31
|
+
if "parts" in msg:
|
|
32
|
+
for part in msg["parts"]:
|
|
33
|
+
# For function calls, inject dummy signature if missing
|
|
34
|
+
if "functionCall" in part and "thoughtSignature" not in part:
|
|
35
|
+
part["thoughtSignature"] = dummy_sig
|
|
36
|
+
maybe_warn(
|
|
37
|
+
"WARN_GEMINI3_MISSING_SIGNATURE",
|
|
38
|
+
part_type="function call",
|
|
39
|
+
)
|
|
40
|
+
|
|
26
41
|
request_json = {
|
|
27
42
|
"contents": messages,
|
|
28
43
|
"generationConfig": {
|
|
@@ -40,17 +55,44 @@ async def _build_gemini_request(
|
|
|
40
55
|
if model.reasoning_model:
|
|
41
56
|
thinking_config: dict[str, Any] | None = None
|
|
42
57
|
effort = sampling_params.reasoning_effort
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
58
|
+
is_gemini_3 = "gemini-3" in model.name.lower()
|
|
59
|
+
|
|
60
|
+
if is_gemini_3:
|
|
61
|
+
# Gemini 3 uses thinkingLevel instead of thinkingBudget
|
|
62
|
+
if effort in {"none", "minimal"}:
|
|
63
|
+
thinking_config = {"thinkingLevel": "low"}
|
|
64
|
+
elif effort is None:
|
|
65
|
+
# Default to high when reasoning is enabled but no preference was provided
|
|
66
|
+
thinking_config = {"thinkingLevel": "high"}
|
|
67
|
+
else:
|
|
68
|
+
# Map reasoning_effort to thinkingLevel
|
|
69
|
+
level_map = {
|
|
70
|
+
"minimal": "low",
|
|
71
|
+
"low": "low",
|
|
72
|
+
"medium": "medium", # Will work when supported
|
|
73
|
+
"high": "high",
|
|
74
|
+
}
|
|
75
|
+
thinking_level = level_map.get(effort, "high")
|
|
76
|
+
thinking_config = {"thinkingLevel": thinking_level}
|
|
47
77
|
else:
|
|
48
|
-
|
|
49
|
-
if effort
|
|
50
|
-
budget =
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
78
|
+
# Gemini 2.5 uses thinkingBudget (legacy)
|
|
79
|
+
if effort is None or effort == "none":
|
|
80
|
+
budget = 128 if "2.5-pro" in model.id else 0
|
|
81
|
+
# Explicitly disable thoughts when no effort is requested
|
|
82
|
+
thinking_config = {"includeThoughts": False, "thinkingBudget": budget}
|
|
83
|
+
else:
|
|
84
|
+
thinking_config = {"includeThoughts": True}
|
|
85
|
+
if (
|
|
86
|
+
effort in {"minimal", "low", "medium", "high"}
|
|
87
|
+
and "flash" in model.id
|
|
88
|
+
):
|
|
89
|
+
budget = {
|
|
90
|
+
"minimal": 256,
|
|
91
|
+
"low": 1024,
|
|
92
|
+
"medium": 4096,
|
|
93
|
+
"high": 16384,
|
|
94
|
+
}[effort]
|
|
95
|
+
thinking_config["thinkingBudget"] = budget
|
|
54
96
|
request_json["generationConfig"]["thinkingConfig"] = thinking_config
|
|
55
97
|
|
|
56
98
|
else:
|
|
@@ -66,6 +108,21 @@ async def _build_gemini_request(
|
|
|
66
108
|
if sampling_params.json_mode and model.supports_json:
|
|
67
109
|
request_json["generationConfig"]["responseMimeType"] = "application/json"
|
|
68
110
|
|
|
111
|
+
# Handle media_resolution for Gemini 3 (requires v1alpha)
|
|
112
|
+
if sampling_params.media_resolution is not None:
|
|
113
|
+
is_gemini_3 = "gemini-3" in model.name.lower()
|
|
114
|
+
if is_gemini_3:
|
|
115
|
+
# Add global media resolution to generationConfig
|
|
116
|
+
request_json["generationConfig"]["mediaResolution"] = {
|
|
117
|
+
"level": sampling_params.media_resolution
|
|
118
|
+
}
|
|
119
|
+
else:
|
|
120
|
+
# Warn if trying to use media_resolution on non-Gemini-3 models
|
|
121
|
+
maybe_warn(
|
|
122
|
+
"WARN_MEDIA_RESOLUTION_UNSUPPORTED",
|
|
123
|
+
model_name=model.name,
|
|
124
|
+
)
|
|
125
|
+
|
|
69
126
|
return request_json
|
|
70
127
|
|
|
71
128
|
|
|
@@ -137,10 +194,19 @@ class GeminiRequest(APIRequestBase):
|
|
|
137
194
|
candidate = data["candidates"][0]
|
|
138
195
|
if "content" in candidate and "parts" in candidate["content"]:
|
|
139
196
|
for part in candidate["content"]["parts"]:
|
|
197
|
+
# Extract thought signature if present
|
|
198
|
+
thought_sig = part.get("thoughtSignature")
|
|
199
|
+
|
|
140
200
|
if "text" in part:
|
|
141
201
|
parts.append(Text(part["text"]))
|
|
142
202
|
elif "thought" in part:
|
|
143
|
-
|
|
203
|
+
# Thought with optional signature
|
|
204
|
+
parts.append(
|
|
205
|
+
Thinking(
|
|
206
|
+
content=part["thought"],
|
|
207
|
+
thought_signature=thought_sig,
|
|
208
|
+
)
|
|
209
|
+
)
|
|
144
210
|
elif "functionCall" in part:
|
|
145
211
|
func_call = part["functionCall"]
|
|
146
212
|
# Generate a unique ID since Gemini doesn't provide one
|
|
@@ -152,6 +218,7 @@ class GeminiRequest(APIRequestBase):
|
|
|
152
218
|
id=tool_id,
|
|
153
219
|
name=func_call["name"],
|
|
154
220
|
arguments=func_call.get("args", {}),
|
|
221
|
+
thought_signature=thought_sig,
|
|
155
222
|
)
|
|
156
223
|
)
|
|
157
224
|
|
lm_deluge/client.py
CHANGED
|
@@ -262,6 +262,7 @@ class _LLMClient(BaseModel):
|
|
|
262
262
|
self.max_tokens_per_minute = max_tokens_per_minute
|
|
263
263
|
if max_concurrent_requests:
|
|
264
264
|
self.max_concurrent_requests = max_concurrent_requests
|
|
265
|
+
return self
|
|
265
266
|
|
|
266
267
|
def _get_tracker(self) -> StatusTracker:
|
|
267
268
|
if self._tracker is None:
|
lm_deluge/config.py
CHANGED
|
@@ -12,6 +12,13 @@ class SamplingParams(BaseModel):
|
|
|
12
12
|
logprobs: bool = False
|
|
13
13
|
top_logprobs: int | None = None
|
|
14
14
|
strict_tools: bool = True
|
|
15
|
+
# Gemini 3 only - controls multimodal vision processing fidelity
|
|
16
|
+
media_resolution: (
|
|
17
|
+
Literal[
|
|
18
|
+
"media_resolution_low", "media_resolution_medium", "media_resolution_high"
|
|
19
|
+
]
|
|
20
|
+
| None
|
|
21
|
+
) = None
|
|
15
22
|
|
|
16
23
|
def to_vllm(self):
|
|
17
24
|
try:
|