lm-deluge 0.0.76__py3-none-any.whl → 0.0.79__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -23,6 +23,21 @@ async def _build_gemini_request(
23
23
  ) -> dict:
24
24
  system_message, messages = prompt.to_gemini()
25
25
 
26
+ # For Gemini 3, inject dummy signatures when missing for function calls
27
+ is_gemini_3 = "gemini-3" in model.name.lower()
28
+ if is_gemini_3:
29
+ dummy_sig = "context_engineering_is_the_way_to_go"
30
+ for msg in messages:
31
+ if "parts" in msg:
32
+ for part in msg["parts"]:
33
+ # For function calls, inject dummy signature if missing
34
+ if "functionCall" in part and "thoughtSignature" not in part:
35
+ part["thoughtSignature"] = dummy_sig
36
+ maybe_warn(
37
+ "WARN_GEMINI3_MISSING_SIGNATURE",
38
+ part_type="function call",
39
+ )
40
+
26
41
  request_json = {
27
42
  "contents": messages,
28
43
  "generationConfig": {
@@ -40,17 +55,44 @@ async def _build_gemini_request(
40
55
  if model.reasoning_model:
41
56
  thinking_config: dict[str, Any] | None = None
42
57
  effort = sampling_params.reasoning_effort
43
- if effort is None or effort == "none":
44
- budget = 128 if "2.5-pro" in model.id else 0
45
- # Explicitly disable thoughts when no effort is requested
46
- thinking_config = {"includeThoughts": False, "thinkingBudget": budget}
58
+ is_gemini_3 = "gemini-3" in model.name.lower()
59
+
60
+ if is_gemini_3:
61
+ # Gemini 3 uses thinkingLevel instead of thinkingBudget
62
+ if effort in {"none", "minimal"}:
63
+ thinking_config = {"thinkingLevel": "low"}
64
+ elif effort is None:
65
+ # Default to high when reasoning is enabled but no preference was provided
66
+ thinking_config = {"thinkingLevel": "high"}
67
+ else:
68
+ # Map reasoning_effort to thinkingLevel
69
+ level_map = {
70
+ "minimal": "low",
71
+ "low": "low",
72
+ "medium": "medium", # Will work when supported
73
+ "high": "high",
74
+ }
75
+ thinking_level = level_map.get(effort, "high")
76
+ thinking_config = {"thinkingLevel": thinking_level}
47
77
  else:
48
- thinking_config = {"includeThoughts": True}
49
- if effort in {"minimal", "low", "medium", "high"} and "flash" in model.id:
50
- budget = {"minimal": 256, "low": 1024, "medium": 4096, "high": 16384}[
51
- effort
52
- ]
53
- thinking_config["thinkingBudget"] = budget
78
+ # Gemini 2.5 uses thinkingBudget (legacy)
79
+ if effort is None or effort == "none":
80
+ budget = 128 if "2.5-pro" in model.id else 0
81
+ # Explicitly disable thoughts when no effort is requested
82
+ thinking_config = {"includeThoughts": False, "thinkingBudget": budget}
83
+ else:
84
+ thinking_config = {"includeThoughts": True}
85
+ if (
86
+ effort in {"minimal", "low", "medium", "high"}
87
+ and "flash" in model.id
88
+ ):
89
+ budget = {
90
+ "minimal": 256,
91
+ "low": 1024,
92
+ "medium": 4096,
93
+ "high": 16384,
94
+ }[effort]
95
+ thinking_config["thinkingBudget"] = budget
54
96
  request_json["generationConfig"]["thinkingConfig"] = thinking_config
55
97
 
56
98
  else:
@@ -66,6 +108,21 @@ async def _build_gemini_request(
66
108
  if sampling_params.json_mode and model.supports_json:
67
109
  request_json["generationConfig"]["responseMimeType"] = "application/json"
68
110
 
111
+ # Handle media_resolution for Gemini 3 (requires v1alpha)
112
+ if sampling_params.media_resolution is not None:
113
+ is_gemini_3 = "gemini-3" in model.name.lower()
114
+ if is_gemini_3:
115
+ # Add global media resolution to generationConfig
116
+ request_json["generationConfig"]["mediaResolution"] = {
117
+ "level": sampling_params.media_resolution
118
+ }
119
+ else:
120
+ # Warn if trying to use media_resolution on non-Gemini-3 models
121
+ maybe_warn(
122
+ "WARN_MEDIA_RESOLUTION_UNSUPPORTED",
123
+ model_name=model.name,
124
+ )
125
+
69
126
  return request_json
70
127
 
71
128
 
@@ -137,10 +194,19 @@ class GeminiRequest(APIRequestBase):
137
194
  candidate = data["candidates"][0]
138
195
  if "content" in candidate and "parts" in candidate["content"]:
139
196
  for part in candidate["content"]["parts"]:
197
+ # Extract thought signature if present
198
+ thought_sig = part.get("thoughtSignature")
199
+
140
200
  if "text" in part:
141
201
  parts.append(Text(part["text"]))
142
202
  elif "thought" in part:
143
- parts.append(Thinking(part["thought"]))
203
+ # Thought with optional signature
204
+ parts.append(
205
+ Thinking(
206
+ content=part["thought"],
207
+ thought_signature=thought_sig,
208
+ )
209
+ )
144
210
  elif "functionCall" in part:
145
211
  func_call = part["functionCall"]
146
212
  # Generate a unique ID since Gemini doesn't provide one
@@ -152,6 +218,7 @@ class GeminiRequest(APIRequestBase):
152
218
  id=tool_id,
153
219
  name=func_call["name"],
154
220
  arguments=func_call.get("args", {}),
221
+ thought_signature=thought_sig,
155
222
  )
156
223
  )
157
224
 
lm_deluge/client.py CHANGED
@@ -262,6 +262,7 @@ class _LLMClient(BaseModel):
262
262
  self.max_tokens_per_minute = max_tokens_per_minute
263
263
  if max_concurrent_requests:
264
264
  self.max_concurrent_requests = max_concurrent_requests
265
+ return self
265
266
 
266
267
  def _get_tracker(self) -> StatusTracker:
267
268
  if self._tracker is None:
lm_deluge/config.py CHANGED
@@ -12,6 +12,13 @@ class SamplingParams(BaseModel):
12
12
  logprobs: bool = False
13
13
  top_logprobs: int | None = None
14
14
  strict_tools: bool = True
15
+ # Gemini 3 only - controls multimodal vision processing fidelity
16
+ media_resolution: (
17
+ Literal[
18
+ "media_resolution_low", "media_resolution_medium", "media_resolution_high"
19
+ ]
20
+ | None
21
+ ) = None
15
22
 
16
23
  def to_vllm(self):
17
24
  try: