lm-deluge 0.0.71__py3-none-any.whl → 0.0.73__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -90,9 +90,32 @@ class APIRequestBase(ABC):
90
90
  start -> poll -> result style of request.
91
91
  """
92
92
  assert self.context.status_tracker, "no status tracker"
93
- start_time = time.time()
93
+ poll_interval = 5.0
94
+ attempt_start = time.monotonic()
95
+ deadline = attempt_start + self.context.request_timeout
96
+ response_id: str | None = None
97
+ last_status: str | None = None
98
+
94
99
  async with aiohttp.ClientSession() as session:
95
- last_status: str | None = None
100
+
101
+ async def cancel_response(reason: str) -> None:
102
+ nonlocal response_id
103
+ if not response_id:
104
+ return
105
+ cancel_url = f"{self.url}/{response_id}/cancel"
106
+ try:
107
+ async with session.post(
108
+ url=cancel_url,
109
+ headers=self.request_header,
110
+ ) as cancel_response:
111
+ cancel_response.raise_for_status()
112
+ print(f"Background req {response_id} cancelled: {reason}")
113
+ except (
114
+ Exception
115
+ ) as cancel_err: # pragma: no cover - best effort logging
116
+ print(
117
+ f"Failed to cancel background req {response_id}: {cancel_err}"
118
+ )
96
119
 
97
120
  try:
98
121
  self.context.status_tracker.total_requests += 1
@@ -109,14 +132,11 @@ class APIRequestBase(ABC):
109
132
  last_status = data["status"]
110
133
 
111
134
  while True:
112
- if time.time() - start_time > self.context.request_timeout:
113
- # cancel the response
114
- async with session.post(
115
- url=f"{self.url}/{response_id}/cancel",
116
- headers=self.request_header,
117
- ) as http_response:
118
- http_response.raise_for_status()
119
-
135
+ now = time.monotonic()
136
+ remaining = deadline - now
137
+ if remaining <= 0:
138
+ elapsed = now - attempt_start
139
+ await cancel_response(f"timed out after {elapsed:.1f}s")
120
140
  return APIResponse(
121
141
  id=self.context.task_id,
122
142
  model_internal=self.context.model_name,
@@ -128,8 +148,9 @@ class APIRequestBase(ABC):
128
148
  content=None,
129
149
  usage=None,
130
150
  )
151
+
131
152
  # poll for the response
132
- await asyncio.sleep(5.0)
153
+ await asyncio.sleep(min(poll_interval, max(remaining, 0)))
133
154
  async with session.get(
134
155
  url=f"{self.url}/{response_id}",
135
156
  headers=self.request_header,
@@ -146,6 +167,8 @@ class APIRequestBase(ABC):
146
167
  return await self.handle_response(http_response)
147
168
 
148
169
  except Exception as e:
170
+ if response_id:
171
+ await cancel_response(f"errored: {type(e).__name__}")
149
172
  raise_if_modal_exception(e)
150
173
  tb = traceback.format_exc()
151
174
  print(tb)
@@ -67,10 +67,12 @@ async def _build_oa_chat_request(
67
67
  effort = "minimal"
68
68
  else:
69
69
  effort = "low"
70
- if effort == "minimal" and "gpt-5" not in model.id:
71
- print(
72
- "WARNING: 'minimal' reasoning effort only allowed for gpt-5. setting to 'low'."
73
- )
70
+ # GPT-5.1 models don't support 'minimal', they support 'none' instead
71
+ if effort == "minimal" and "gpt-5.1" in model.id:
72
+ maybe_warn("WARN_MINIMAL_TO_NONE", model_name=context.model_name)
73
+ effort = "none"
74
+ elif effort == "minimal" and "gpt-5" not in model.id:
75
+ maybe_warn("WARN_MINIMAL_TO_LOW", model_name=context.model_name)
74
76
  effort = "low"
75
77
  request_json["reasoning_effort"] = effort
76
78
  else:
@@ -271,16 +273,24 @@ async def _build_oa_responses_request(
271
273
  request_json["max_output_tokens"] = sampling_params.max_new_tokens
272
274
 
273
275
  if model.reasoning_model:
274
- if sampling_params.reasoning_effort in [None, "none"]:
276
+ effort = sampling_params.reasoning_effort
277
+ if effort in [None, "none"]:
275
278
  # gemini models can switch reasoning off
276
279
  if "gemini" in model.id:
277
- sampling_params.reasoning_effort = "none"
280
+ effort = "none"
278
281
  else:
279
- sampling_params.reasoning_effort = "low"
282
+ effort = "low"
283
+ # GPT-5.1 models don't support 'minimal', they support 'none' instead
284
+ if effort == "minimal" and "gpt-5.1" in model.id:
285
+ maybe_warn("WARN_MINIMAL_TO_NONE", model_name=context.model_name)
286
+ effort = "none"
287
+ elif effort == "minimal" and "gpt-5" not in model.id:
288
+ maybe_warn("WARN_MINIMAL_TO_LOW", model_name=context.model_name)
289
+ effort = "low"
280
290
  request_json["temperature"] = 1.0
281
291
  request_json["top_p"] = 1.0
282
292
  request_json["reasoning"] = {
283
- "effort": sampling_params.reasoning_effort,
293
+ "effort": effort,
284
294
  "summary": "auto",
285
295
  }
286
296
  else:
lm_deluge/client.py CHANGED
@@ -44,10 +44,14 @@ class _LLMClient(BaseModel):
44
44
  Keeps all validation, serialization, and existing functionality.
45
45
  """
46
46
 
47
- _REASONING_SUFFIXES: ClassVar[dict[str, Literal["low", "medium", "high"]]] = {
47
+ _REASONING_SUFFIXES: ClassVar[
48
+ dict[str, Literal["low", "medium", "high", "minimal", "none"]]
49
+ ] = {
48
50
  "-low": "low",
49
51
  "-medium": "medium",
50
52
  "-high": "high",
53
+ "-minimal": "minimal",
54
+ "-none": "none",
51
55
  }
52
56
 
53
57
  model_names: str | list[str] = ["gpt-4.1-mini"]
@@ -149,9 +153,11 @@ class _LLMClient(BaseModel):
149
153
 
150
154
  def _normalize_model_names(
151
155
  self, models: list[str]
152
- ) -> tuple[list[str], list[Literal["low", "medium", "high"] | None]]:
156
+ ) -> tuple[
157
+ list[str], list[Literal["low", "medium", "high", "minimal", "none"] | None]
158
+ ]:
153
159
  normalized: list[str] = []
154
- efforts: list[Literal["low", "medium", "high"] | None] = []
160
+ efforts: list[Literal["low", "medium", "high", "minimal", "none"] | None] = []
155
161
 
156
162
  for name in models:
157
163
  base_name = self._preprocess_openrouter_model(name)
@@ -164,7 +170,10 @@ class _LLMClient(BaseModel):
164
170
  return normalized, efforts
165
171
 
166
172
  def _align_sampling_params(
167
- self, per_model_efforts: list[Literal["low", "medium", "high"] | None]
173
+ self,
174
+ per_model_efforts: list[
175
+ Literal["low", "medium", "high", "minimal", "none"] | None
176
+ ],
168
177
  ) -> None:
169
178
  if len(per_model_efforts) < len(self.model_names):
170
179
  per_model_efforts = per_model_efforts + [None] * (
@@ -332,7 +341,7 @@ class _LLMClient(BaseModel):
332
341
  @classmethod
333
342
  def _strip_reasoning_suffix_if_registered(
334
343
  cls, model_name: str
335
- ) -> tuple[str, Literal["low", "medium", "high"] | None]:
344
+ ) -> tuple[str, Literal["low", "medium", "high", "minimal", "none"] | None]:
336
345
  """Remove reasoning suffix only when the trimmed model already exists."""
337
346
  for suffix, effort in cls._REASONING_SUFFIXES.items():
338
347
  if model_name.endswith(suffix) and len(model_name) > len(suffix):
@@ -364,6 +373,15 @@ class _LLMClient(BaseModel):
364
373
  assert (
365
374
  self.use_responses_api
366
375
  ), "background mode only allowed for responses api"
376
+
377
+ # codex models require responses api
378
+ for model_name in self.model_names:
379
+ if "codex" in model_name.lower() and not self.use_responses_api:
380
+ raise ValueError(
381
+ f"Model '{model_name}' requires use_responses_api=True. "
382
+ "Codex models are only available via the Responses API."
383
+ )
384
+
367
385
  # Auto-generate name if not provided
368
386
  if self.name is None:
369
387
  if len(self.model_names) == 1:
@@ -10,6 +10,48 @@ OPENAI_MODELS = {
10
10
  # ░███
11
11
  # █████
12
12
  # ░░░░░
13
+ "gpt-5.1": {
14
+ "id": "gpt-5.1",
15
+ "name": "gpt-5.1",
16
+ "api_base": "https://api.openai.com/v1",
17
+ "api_key_env_var": "OPENAI_API_KEY",
18
+ "supports_json": False,
19
+ "supports_logprobs": True,
20
+ "supports_responses": True,
21
+ "api_spec": "openai",
22
+ "input_cost": 1.25,
23
+ "cached_input_cost": 0.125,
24
+ "output_cost": 10.0,
25
+ "reasoning_model": True,
26
+ },
27
+ "gpt-5.1-codex": {
28
+ "id": "gpt-5.1-codex",
29
+ "name": "gpt-5.1-codex",
30
+ "api_base": "https://api.openai.com/v1",
31
+ "api_key_env_var": "OPENAI_API_KEY",
32
+ "supports_json": False,
33
+ "supports_logprobs": True,
34
+ "supports_responses": True,
35
+ "api_spec": "openai",
36
+ "input_cost": 1.25,
37
+ "cached_input_cost": 0.125,
38
+ "output_cost": 10.0,
39
+ "reasoning_model": True,
40
+ },
41
+ "gpt-5.1-codex-mini": {
42
+ "id": "gpt-5.1-codex-mini",
43
+ "name": "gpt-5.1-codex-mini",
44
+ "api_base": "https://api.openai.com/v1",
45
+ "api_key_env_var": "OPENAI_API_KEY",
46
+ "supports_json": False,
47
+ "supports_logprobs": True,
48
+ "supports_responses": True,
49
+ "api_spec": "openai",
50
+ "input_cost": 0.25,
51
+ "cached_input_cost": 0.025,
52
+ "output_cost": 2.0,
53
+ "reasoning_model": True,
54
+ },
13
55
  "gpt-5-codex": {
14
56
  "id": "gpt-5-codex",
15
57
  "name": "gpt-5-codex",
lm_deluge/warnings.py CHANGED
@@ -7,6 +7,8 @@ WARNINGS: dict[str, str] = {
7
7
  "WARN_REASONING_UNSUPPORTED": "Ignoring reasoning_effort param for non-reasoning model: {model_name}.",
8
8
  "WARN_CACHING_UNSUPPORTED": "Cache parameter '{cache_param}' is not supported, ignoring for {model_name}.",
9
9
  "WARN_LOGPROBS_UNSUPPORTED": "Ignoring logprobs param for non-logprobs model: {model_name}",
10
+ "WARN_MINIMAL_TO_LOW": "'minimal' reasoning effort only allowed for gpt-5 models. Setting to 'low' for {model_name}.",
11
+ "WARN_MINIMAL_TO_NONE": "GPT-5.1 models don't support 'minimal' reasoning effort. Converting to 'none' for {model_name}.",
10
12
  }
11
13
 
12
14
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lm_deluge
3
- Version: 0.0.71
3
+ Version: 0.0.73
4
4
  Summary: Python utility for using LLM API models.
5
5
  Author-email: Benjamin Anderson <ben@trytaylor.ai>
6
6
  Requires-Python: >=3.10
@@ -2,7 +2,7 @@ lm_deluge/__init__.py,sha256=zF5lAitfgJ8A28IXJ5BE9OUCqGOqSnGOWn3ZIlizNyY,822
2
2
  lm_deluge/batches.py,sha256=Km6QM5_7BlF2qEyo4WPlhkaZkpzrLqf50AaveHXQOoY,25127
3
3
  lm_deluge/cache.py,sha256=xO2AIYvP3tUpTMKQjwQQYfGRJSRi6e7sMlRhLjsS-u4,4873
4
4
  lm_deluge/cli.py,sha256=Ilww5gOw3J5v0NReq_Ra4hhxU4BCIJBl1oTGxJZKedc,12065
5
- lm_deluge/client.py,sha256=nBKuP6buwQYNMCP9f2SOuPkfowKRijJv4-bI-STg7Iw,40824
5
+ lm_deluge/client.py,sha256=WOYYSJopBqN3SPlRorkkgapYnSUvo6CveDoPMfZz8QQ,41409
6
6
  lm_deluge/config.py,sha256=s3wFBRD6pi0wtXMJRmQDT2vdiqSvhjUPmLehbkv41i0,943
7
7
  lm_deluge/embed.py,sha256=CO-TOlC5kOTAM8lcnicoG4u4K664vCBwHF1vHa-nAGg,13382
8
8
  lm_deluge/errors.py,sha256=oHjt7YnxWbh-eXMScIzov4NvpJMo0-2r5J6Wh5DQ1tk,209
@@ -15,16 +15,16 @@ lm_deluge/rerank.py,sha256=-NBAJdHz9OB-SWWJnHzkFmeVO4wR6lFV7Vw-SxG7aVo,11457
15
15
  lm_deluge/tool.py,sha256=Kp2O5lDq_WVo_ASxjLQSHzVRbaxZkS6J0JIIskBjux0,28909
16
16
  lm_deluge/tracker.py,sha256=aeS9GUJpgOSQRVXAnGDvlMO8qYpSxpTNLYj2hrMg0m8,14757
17
17
  lm_deluge/usage.py,sha256=xz9tAw2hqaJvv9aAVhnQ6N1Arn7fS8Shb28VwCW26wI,5136
18
- lm_deluge/warnings.py,sha256=nlDJMCw30VhDEFxqLO2-bfXH_Tv5qmlglzUSbokCSw8,1498
18
+ lm_deluge/warnings.py,sha256=xXXYXEfaaSVr__16BKOEEWLdfZi1L-2ylzTrXTRyO18,1748
19
19
  lm_deluge/api_requests/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
20
20
  lm_deluge/api_requests/anthropic.py,sha256=QGq3G5jJIGcoM2HdRt73GgkvZs4GOViyjYexWex05Vk,8927
21
- lm_deluge/api_requests/base.py,sha256=GCcydwBRx4_xAuYLvasXlyj-TgqvKAVhVvxRfJkvPbY,9471
21
+ lm_deluge/api_requests/base.py,sha256=mXEM85mcU_5LD-ugELpCl28tv-tpHKcaxerTIVLQZVo,10436
22
22
  lm_deluge/api_requests/bedrock.py,sha256=Uppne03GcIEk1tVYzoGu7GXK2Sg94a_xvFTLDRN_phY,15412
23
23
  lm_deluge/api_requests/chat_reasoning.py,sha256=sJvstvKFqsSBUjYcwxzGt2_FH4cEp3Z6gKcBPyPjGwk,236
24
24
  lm_deluge/api_requests/common.py,sha256=BZ3vRO5TB669_UsNKugkkuFSzoLHOYJIKt4nV4sf4vc,422
25
25
  lm_deluge/api_requests/gemini.py,sha256=4uD7fQl0yWyAvYkPNi3oO1InBnvYfo5_QR6k-va-2GI,7838
26
26
  lm_deluge/api_requests/mistral.py,sha256=8JZP2CDf1XZfaPcTk0WS4q-VfYYj58ptpoH8LD3MQG4,4528
27
- lm_deluge/api_requests/openai.py,sha256=ezlGYNGHFvQGgs-xuxhDDeiEembHhVh_KqJBdRBqSlM,26038
27
+ lm_deluge/api_requests/openai.py,sha256=ZqzQxs8CNUk757Q-1AfpKODkg5yPFHZjsqLm8bwjYDs,26584
28
28
  lm_deluge/api_requests/response.py,sha256=vG194gAH5p7ulpNy4qy5Pryfb1p3ZV21-YGoj__ru3E,7436
29
29
  lm_deluge/api_requests/deprecated/bedrock.py,sha256=WrcIShCoO8JCUSlFOCHxg6KQCNTZfw3TpYTvSpYk4mA,11320
30
30
  lm_deluge/api_requests/deprecated/cohere.py,sha256=KgDScD6_bWhAzOY5BHZQKSA3kurt4KGENqC4wLsGmcU,5142
@@ -58,7 +58,7 @@ lm_deluge/models/kimi.py,sha256=1voigLdNO2CxpWv0KDpQPP3Wolx5WrqgAlYL9ObJFuQ,1117
58
58
  lm_deluge/models/meta.py,sha256=BBgnscL1gMcIdPbRqrlDl_q9YAYGSrkw9JkAIabXtLs,1883
59
59
  lm_deluge/models/minimax.py,sha256=rwW9gNotAYfDVtMlqmSYegN6GoZM_9DSNNZU2yPOmaU,275
60
60
  lm_deluge/models/mistral.py,sha256=x67o5gckBGmPcIGdVbS26XZAYFKBYM4tsxEAahGp8bk,4323
61
- lm_deluge/models/openai.py,sha256=6J4eAt6Iu5RopokyldUQzRlviFBXBqhLqpVP5tztzqI,11074
61
+ lm_deluge/models/openai.py,sha256=t6fcXo0YXgPQ6YiftZJP8gPw8FOBqoVapSavMVmtaOw,12411
62
62
  lm_deluge/models/openrouter.py,sha256=O-Po4tmHjAqFIVU96TUL0QnK01R4e2yDN7Z4sYJ-CuE,2120
63
63
  lm_deluge/models/together.py,sha256=AjKhPsazqBgqyLwHkNQW07COM1n_oSrYQRp2BFVvn9o,4381
64
64
  lm_deluge/presets/cerebras.py,sha256=MDkqj15qQRrj8wxSCDNNe_Cs7h1WN1UjV6lTmSY1olQ,479
@@ -69,8 +69,8 @@ lm_deluge/util/logprobs.py,sha256=UkBZakOxWluaLqHrjARu7xnJ0uCHVfLGHJdnYlEcutk,11
69
69
  lm_deluge/util/spatial.py,sha256=BsF_UKhE-x0xBirc-bV1xSKZRTUhsOBdGqsMKme20C8,4099
70
70
  lm_deluge/util/validation.py,sha256=hz5dDb3ebvZrZhnaWxOxbNSVMI6nmaOODBkk0htAUhs,1575
71
71
  lm_deluge/util/xml.py,sha256=Ft4zajoYBJR3HHCt2oHwGfymGLdvp_gegVmJ-Wqk4Ck,10547
72
- lm_deluge-0.0.71.dist-info/licenses/LICENSE,sha256=uNNXGXPCw2TC7CUs7SEBkA-Mz6QBQFWUUEWDMgEs1dU,1058
73
- lm_deluge-0.0.71.dist-info/METADATA,sha256=kgq3xiS7tMIbXpx5UkhCEA_yJAJvgGOPaie_ZlScTxQ,13514
74
- lm_deluge-0.0.71.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
75
- lm_deluge-0.0.71.dist-info/top_level.txt,sha256=hqU-TJX93yBwpgkDtYcXyLr3t7TLSCCZ_reytJjwBaE,10
76
- lm_deluge-0.0.71.dist-info/RECORD,,
72
+ lm_deluge-0.0.73.dist-info/licenses/LICENSE,sha256=uNNXGXPCw2TC7CUs7SEBkA-Mz6QBQFWUUEWDMgEs1dU,1058
73
+ lm_deluge-0.0.73.dist-info/METADATA,sha256=zr-cq_RyQNDDplrt6CUbVfGNpsVQEbACExRg5w6mE5M,13514
74
+ lm_deluge-0.0.73.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
75
+ lm_deluge-0.0.73.dist-info/top_level.txt,sha256=hqU-TJX93yBwpgkDtYcXyLr3t7TLSCCZ_reytJjwBaE,10
76
+ lm_deluge-0.0.73.dist-info/RECORD,,