lm-deluge 0.0.72__py3-none-any.whl → 0.0.73__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -67,10 +67,12 @@ async def _build_oa_chat_request(
67
67
  effort = "minimal"
68
68
  else:
69
69
  effort = "low"
70
- if effort == "minimal" and "gpt-5" not in model.id:
71
- print(
72
- "WARNING: 'minimal' reasoning effort only allowed for gpt-5. setting to 'low'."
73
- )
70
+ # GPT-5.1 models don't support 'minimal', they support 'none' instead
71
+ if effort == "minimal" and "gpt-5.1" in model.id:
72
+ maybe_warn("WARN_MINIMAL_TO_NONE", model_name=context.model_name)
73
+ effort = "none"
74
+ elif effort == "minimal" and "gpt-5" not in model.id:
75
+ maybe_warn("WARN_MINIMAL_TO_LOW", model_name=context.model_name)
74
76
  effort = "low"
75
77
  request_json["reasoning_effort"] = effort
76
78
  else:
@@ -271,16 +273,24 @@ async def _build_oa_responses_request(
271
273
  request_json["max_output_tokens"] = sampling_params.max_new_tokens
272
274
 
273
275
  if model.reasoning_model:
274
- if sampling_params.reasoning_effort in [None, "none"]:
276
+ effort = sampling_params.reasoning_effort
277
+ if effort in [None, "none"]:
275
278
  # gemini models can switch reasoning off
276
279
  if "gemini" in model.id:
277
- sampling_params.reasoning_effort = "none"
280
+ effort = "none"
278
281
  else:
279
- sampling_params.reasoning_effort = "low"
282
+ effort = "low"
283
+ # GPT-5.1 models don't support 'minimal', they support 'none' instead
284
+ if effort == "minimal" and "gpt-5.1" in model.id:
285
+ maybe_warn("WARN_MINIMAL_TO_NONE", model_name=context.model_name)
286
+ effort = "none"
287
+ elif effort == "minimal" and "gpt-5" not in model.id:
288
+ maybe_warn("WARN_MINIMAL_TO_LOW", model_name=context.model_name)
289
+ effort = "low"
280
290
  request_json["temperature"] = 1.0
281
291
  request_json["top_p"] = 1.0
282
292
  request_json["reasoning"] = {
283
- "effort": sampling_params.reasoning_effort,
293
+ "effort": effort,
284
294
  "summary": "auto",
285
295
  }
286
296
  else:
lm_deluge/client.py CHANGED
@@ -44,10 +44,14 @@ class _LLMClient(BaseModel):
44
44
  Keeps all validation, serialization, and existing functionality.
45
45
  """
46
46
 
47
- _REASONING_SUFFIXES: ClassVar[dict[str, Literal["low", "medium", "high"]]] = {
47
+ _REASONING_SUFFIXES: ClassVar[
48
+ dict[str, Literal["low", "medium", "high", "minimal", "none"]]
49
+ ] = {
48
50
  "-low": "low",
49
51
  "-medium": "medium",
50
52
  "-high": "high",
53
+ "-minimal": "minimal",
54
+ "-none": "none",
51
55
  }
52
56
 
53
57
  model_names: str | list[str] = ["gpt-4.1-mini"]
@@ -149,9 +153,11 @@ class _LLMClient(BaseModel):
149
153
 
150
154
  def _normalize_model_names(
151
155
  self, models: list[str]
152
- ) -> tuple[list[str], list[Literal["low", "medium", "high"] | None]]:
156
+ ) -> tuple[
157
+ list[str], list[Literal["low", "medium", "high", "minimal", "none"] | None]
158
+ ]:
153
159
  normalized: list[str] = []
154
- efforts: list[Literal["low", "medium", "high"] | None] = []
160
+ efforts: list[Literal["low", "medium", "high", "minimal", "none"] | None] = []
155
161
 
156
162
  for name in models:
157
163
  base_name = self._preprocess_openrouter_model(name)
@@ -164,7 +170,10 @@ class _LLMClient(BaseModel):
164
170
  return normalized, efforts
165
171
 
166
172
  def _align_sampling_params(
167
- self, per_model_efforts: list[Literal["low", "medium", "high"] | None]
173
+ self,
174
+ per_model_efforts: list[
175
+ Literal["low", "medium", "high", "minimal", "none"] | None
176
+ ],
168
177
  ) -> None:
169
178
  if len(per_model_efforts) < len(self.model_names):
170
179
  per_model_efforts = per_model_efforts + [None] * (
@@ -332,7 +341,7 @@ class _LLMClient(BaseModel):
332
341
  @classmethod
333
342
  def _strip_reasoning_suffix_if_registered(
334
343
  cls, model_name: str
335
- ) -> tuple[str, Literal["low", "medium", "high"] | None]:
344
+ ) -> tuple[str, Literal["low", "medium", "high", "minimal", "none"] | None]:
336
345
  """Remove reasoning suffix only when the trimmed model already exists."""
337
346
  for suffix, effort in cls._REASONING_SUFFIXES.items():
338
347
  if model_name.endswith(suffix) and len(model_name) > len(suffix):
@@ -364,6 +373,15 @@ class _LLMClient(BaseModel):
364
373
  assert (
365
374
  self.use_responses_api
366
375
  ), "background mode only allowed for responses api"
376
+
377
+ # codex models require responses api
378
+ for model_name in self.model_names:
379
+ if "codex" in model_name.lower() and not self.use_responses_api:
380
+ raise ValueError(
381
+ f"Model '{model_name}' requires use_responses_api=True. "
382
+ "Codex models are only available via the Responses API."
383
+ )
384
+
367
385
  # Auto-generate name if not provided
368
386
  if self.name is None:
369
387
  if len(self.model_names) == 1:
@@ -10,6 +10,48 @@ OPENAI_MODELS = {
10
10
  # ░███
11
11
  # █████
12
12
  # ░░░░░
13
+ "gpt-5.1": {
14
+ "id": "gpt-5.1",
15
+ "name": "gpt-5.1",
16
+ "api_base": "https://api.openai.com/v1",
17
+ "api_key_env_var": "OPENAI_API_KEY",
18
+ "supports_json": False,
19
+ "supports_logprobs": True,
20
+ "supports_responses": True,
21
+ "api_spec": "openai",
22
+ "input_cost": 1.25,
23
+ "cached_input_cost": 0.125,
24
+ "output_cost": 10.0,
25
+ "reasoning_model": True,
26
+ },
27
+ "gpt-5.1-codex": {
28
+ "id": "gpt-5.1-codex",
29
+ "name": "gpt-5.1-codex",
30
+ "api_base": "https://api.openai.com/v1",
31
+ "api_key_env_var": "OPENAI_API_KEY",
32
+ "supports_json": False,
33
+ "supports_logprobs": True,
34
+ "supports_responses": True,
35
+ "api_spec": "openai",
36
+ "input_cost": 1.25,
37
+ "cached_input_cost": 0.125,
38
+ "output_cost": 10.0,
39
+ "reasoning_model": True,
40
+ },
41
+ "gpt-5.1-codex-mini": {
42
+ "id": "gpt-5.1-codex-mini",
43
+ "name": "gpt-5.1-codex-mini",
44
+ "api_base": "https://api.openai.com/v1",
45
+ "api_key_env_var": "OPENAI_API_KEY",
46
+ "supports_json": False,
47
+ "supports_logprobs": True,
48
+ "supports_responses": True,
49
+ "api_spec": "openai",
50
+ "input_cost": 0.25,
51
+ "cached_input_cost": 0.025,
52
+ "output_cost": 2.0,
53
+ "reasoning_model": True,
54
+ },
13
55
  "gpt-5-codex": {
14
56
  "id": "gpt-5-codex",
15
57
  "name": "gpt-5-codex",
lm_deluge/warnings.py CHANGED
@@ -7,6 +7,8 @@ WARNINGS: dict[str, str] = {
7
7
  "WARN_REASONING_UNSUPPORTED": "Ignoring reasoning_effort param for non-reasoning model: {model_name}.",
8
8
  "WARN_CACHING_UNSUPPORTED": "Cache parameter '{cache_param}' is not supported, ignoring for {model_name}.",
9
9
  "WARN_LOGPROBS_UNSUPPORTED": "Ignoring logprobs param for non-logprobs model: {model_name}",
10
+ "WARN_MINIMAL_TO_LOW": "'minimal' reasoning effort only allowed for gpt-5 models. Setting to 'low' for {model_name}.",
11
+ "WARN_MINIMAL_TO_NONE": "GPT-5.1 models don't support 'minimal' reasoning effort. Converting to 'none' for {model_name}.",
10
12
  }
11
13
 
12
14
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lm_deluge
3
- Version: 0.0.72
3
+ Version: 0.0.73
4
4
  Summary: Python utility for using LLM API models.
5
5
  Author-email: Benjamin Anderson <ben@trytaylor.ai>
6
6
  Requires-Python: >=3.10
@@ -2,7 +2,7 @@ lm_deluge/__init__.py,sha256=zF5lAitfgJ8A28IXJ5BE9OUCqGOqSnGOWn3ZIlizNyY,822
2
2
  lm_deluge/batches.py,sha256=Km6QM5_7BlF2qEyo4WPlhkaZkpzrLqf50AaveHXQOoY,25127
3
3
  lm_deluge/cache.py,sha256=xO2AIYvP3tUpTMKQjwQQYfGRJSRi6e7sMlRhLjsS-u4,4873
4
4
  lm_deluge/cli.py,sha256=Ilww5gOw3J5v0NReq_Ra4hhxU4BCIJBl1oTGxJZKedc,12065
5
- lm_deluge/client.py,sha256=nBKuP6buwQYNMCP9f2SOuPkfowKRijJv4-bI-STg7Iw,40824
5
+ lm_deluge/client.py,sha256=WOYYSJopBqN3SPlRorkkgapYnSUvo6CveDoPMfZz8QQ,41409
6
6
  lm_deluge/config.py,sha256=s3wFBRD6pi0wtXMJRmQDT2vdiqSvhjUPmLehbkv41i0,943
7
7
  lm_deluge/embed.py,sha256=CO-TOlC5kOTAM8lcnicoG4u4K664vCBwHF1vHa-nAGg,13382
8
8
  lm_deluge/errors.py,sha256=oHjt7YnxWbh-eXMScIzov4NvpJMo0-2r5J6Wh5DQ1tk,209
@@ -15,7 +15,7 @@ lm_deluge/rerank.py,sha256=-NBAJdHz9OB-SWWJnHzkFmeVO4wR6lFV7Vw-SxG7aVo,11457
15
15
  lm_deluge/tool.py,sha256=Kp2O5lDq_WVo_ASxjLQSHzVRbaxZkS6J0JIIskBjux0,28909
16
16
  lm_deluge/tracker.py,sha256=aeS9GUJpgOSQRVXAnGDvlMO8qYpSxpTNLYj2hrMg0m8,14757
17
17
  lm_deluge/usage.py,sha256=xz9tAw2hqaJvv9aAVhnQ6N1Arn7fS8Shb28VwCW26wI,5136
18
- lm_deluge/warnings.py,sha256=nlDJMCw30VhDEFxqLO2-bfXH_Tv5qmlglzUSbokCSw8,1498
18
+ lm_deluge/warnings.py,sha256=xXXYXEfaaSVr__16BKOEEWLdfZi1L-2ylzTrXTRyO18,1748
19
19
  lm_deluge/api_requests/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
20
20
  lm_deluge/api_requests/anthropic.py,sha256=QGq3G5jJIGcoM2HdRt73GgkvZs4GOViyjYexWex05Vk,8927
21
21
  lm_deluge/api_requests/base.py,sha256=mXEM85mcU_5LD-ugELpCl28tv-tpHKcaxerTIVLQZVo,10436
@@ -24,7 +24,7 @@ lm_deluge/api_requests/chat_reasoning.py,sha256=sJvstvKFqsSBUjYcwxzGt2_FH4cEp3Z6
24
24
  lm_deluge/api_requests/common.py,sha256=BZ3vRO5TB669_UsNKugkkuFSzoLHOYJIKt4nV4sf4vc,422
25
25
  lm_deluge/api_requests/gemini.py,sha256=4uD7fQl0yWyAvYkPNi3oO1InBnvYfo5_QR6k-va-2GI,7838
26
26
  lm_deluge/api_requests/mistral.py,sha256=8JZP2CDf1XZfaPcTk0WS4q-VfYYj58ptpoH8LD3MQG4,4528
27
- lm_deluge/api_requests/openai.py,sha256=ezlGYNGHFvQGgs-xuxhDDeiEembHhVh_KqJBdRBqSlM,26038
27
+ lm_deluge/api_requests/openai.py,sha256=ZqzQxs8CNUk757Q-1AfpKODkg5yPFHZjsqLm8bwjYDs,26584
28
28
  lm_deluge/api_requests/response.py,sha256=vG194gAH5p7ulpNy4qy5Pryfb1p3ZV21-YGoj__ru3E,7436
29
29
  lm_deluge/api_requests/deprecated/bedrock.py,sha256=WrcIShCoO8JCUSlFOCHxg6KQCNTZfw3TpYTvSpYk4mA,11320
30
30
  lm_deluge/api_requests/deprecated/cohere.py,sha256=KgDScD6_bWhAzOY5BHZQKSA3kurt4KGENqC4wLsGmcU,5142
@@ -58,7 +58,7 @@ lm_deluge/models/kimi.py,sha256=1voigLdNO2CxpWv0KDpQPP3Wolx5WrqgAlYL9ObJFuQ,1117
58
58
  lm_deluge/models/meta.py,sha256=BBgnscL1gMcIdPbRqrlDl_q9YAYGSrkw9JkAIabXtLs,1883
59
59
  lm_deluge/models/minimax.py,sha256=rwW9gNotAYfDVtMlqmSYegN6GoZM_9DSNNZU2yPOmaU,275
60
60
  lm_deluge/models/mistral.py,sha256=x67o5gckBGmPcIGdVbS26XZAYFKBYM4tsxEAahGp8bk,4323
61
- lm_deluge/models/openai.py,sha256=6J4eAt6Iu5RopokyldUQzRlviFBXBqhLqpVP5tztzqI,11074
61
+ lm_deluge/models/openai.py,sha256=t6fcXo0YXgPQ6YiftZJP8gPw8FOBqoVapSavMVmtaOw,12411
62
62
  lm_deluge/models/openrouter.py,sha256=O-Po4tmHjAqFIVU96TUL0QnK01R4e2yDN7Z4sYJ-CuE,2120
63
63
  lm_deluge/models/together.py,sha256=AjKhPsazqBgqyLwHkNQW07COM1n_oSrYQRp2BFVvn9o,4381
64
64
  lm_deluge/presets/cerebras.py,sha256=MDkqj15qQRrj8wxSCDNNe_Cs7h1WN1UjV6lTmSY1olQ,479
@@ -69,8 +69,8 @@ lm_deluge/util/logprobs.py,sha256=UkBZakOxWluaLqHrjARu7xnJ0uCHVfLGHJdnYlEcutk,11
69
69
  lm_deluge/util/spatial.py,sha256=BsF_UKhE-x0xBirc-bV1xSKZRTUhsOBdGqsMKme20C8,4099
70
70
  lm_deluge/util/validation.py,sha256=hz5dDb3ebvZrZhnaWxOxbNSVMI6nmaOODBkk0htAUhs,1575
71
71
  lm_deluge/util/xml.py,sha256=Ft4zajoYBJR3HHCt2oHwGfymGLdvp_gegVmJ-Wqk4Ck,10547
72
- lm_deluge-0.0.72.dist-info/licenses/LICENSE,sha256=uNNXGXPCw2TC7CUs7SEBkA-Mz6QBQFWUUEWDMgEs1dU,1058
73
- lm_deluge-0.0.72.dist-info/METADATA,sha256=Ffg1w5rphPj_MScOCYhA1cQmSKsc2XjBqJefXiZOtDk,13514
74
- lm_deluge-0.0.72.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
75
- lm_deluge-0.0.72.dist-info/top_level.txt,sha256=hqU-TJX93yBwpgkDtYcXyLr3t7TLSCCZ_reytJjwBaE,10
76
- lm_deluge-0.0.72.dist-info/RECORD,,
72
+ lm_deluge-0.0.73.dist-info/licenses/LICENSE,sha256=uNNXGXPCw2TC7CUs7SEBkA-Mz6QBQFWUUEWDMgEs1dU,1058
73
+ lm_deluge-0.0.73.dist-info/METADATA,sha256=zr-cq_RyQNDDplrt6CUbVfGNpsVQEbACExRg5w6mE5M,13514
74
+ lm_deluge-0.0.73.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
75
+ lm_deluge-0.0.73.dist-info/top_level.txt,sha256=hqU-TJX93yBwpgkDtYcXyLr3t7TLSCCZ_reytJjwBaE,10
76
+ lm_deluge-0.0.73.dist-info/RECORD,,