PyPI - lm-deluge - Versions diffs - 0.0.71__py3-none-any.whl → 0.0.73__py3-none-any.whl - Mend

lm-deluge 0.0.71py3-none-any.whl → 0.0.73py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

lm_deluge/api_requests/base.py +34 -11
lm_deluge/api_requests/openai.py +18 -8
lm_deluge/client.py +23 -5
lm_deluge/models/openai.py +42 -0
lm_deluge/warnings.py +2 -0
{lm_deluge-0.0.71.dist-info → lm_deluge-0.0.73.dist-info}/METADATA +1 -1
{lm_deluge-0.0.71.dist-info → lm_deluge-0.0.73.dist-info}/RECORD +10 -10
{lm_deluge-0.0.71.dist-info → lm_deluge-0.0.73.dist-info}/WHEEL +0 -0
{lm_deluge-0.0.71.dist-info → lm_deluge-0.0.73.dist-info}/licenses/LICENSE +0 -0
{lm_deluge-0.0.71.dist-info → lm_deluge-0.0.73.dist-info}/top_level.txt +0 -0

lm_deluge/api_requests/base.py CHANGED Viewed

@@ -90,9 +90,32 @@ class APIRequestBase(ABC):
         start -> poll -> result style of request.
         """
         assert self.context.status_tracker, "no status tracker"
-        start_time = time.time()
+        poll_interval = 5.0
+        attempt_start = time.monotonic()
+        deadline = attempt_start + self.context.request_timeout
+        response_id: str | None = None
+        last_status: str | None = None
         async with aiohttp.ClientSession() as session:
-            last_status: str | None = None
+            async def cancel_response(reason: str) -> None:
+                nonlocal response_id
+                if not response_id:
+                    return
+                cancel_url = f"{self.url}/{response_id}/cancel"
+                try:
+                    async with session.post(
+                        url=cancel_url,
+                        headers=self.request_header,
+                    ) as cancel_response:
+                        cancel_response.raise_for_status()
+                    print(f"Background req {response_id} cancelled: {reason}")
+                except (
+                    Exception
+                ) as cancel_err:  # pragma: no cover - best effort logging
+                    print(
+                        f"Failed to cancel background req {response_id}: {cancel_err}"
+                    )
             try:
                 self.context.status_tracker.total_requests += 1
@@ -109,14 +132,11 @@ class APIRequestBase(ABC):
                     last_status = data["status"]
                 while True:
-                    if time.time() - start_time > self.context.request_timeout:
-                        # cancel the response
-                        async with session.post(
-                            url=f"{self.url}/{response_id}/cancel",
-                            headers=self.request_header,
-                        ) as http_response:
-                            http_response.raise_for_status()
+                    now = time.monotonic()
+                    remaining = deadline - now
+                    if remaining <= 0:
+                        elapsed = now - attempt_start
+                        await cancel_response(f"timed out after {elapsed:.1f}s")
                         return APIResponse(
                             id=self.context.task_id,
                             model_internal=self.context.model_name,
@@ -128,8 +148,9 @@ class APIRequestBase(ABC):
                             content=None,
                             usage=None,
                         )
                     # poll for the response
-                    await asyncio.sleep(5.0)
+                    await asyncio.sleep(min(poll_interval, max(remaining, 0)))
                     async with session.get(
                         url=f"{self.url}/{response_id}",
                         headers=self.request_header,
@@ -146,6 +167,8 @@ class APIRequestBase(ABC):
                             return await self.handle_response(http_response)
             except Exception as e:
+                if response_id:
+                    await cancel_response(f"errored: {type(e).__name__}")
                 raise_if_modal_exception(e)
                 tb = traceback.format_exc()
                 print(tb)

lm_deluge/api_requests/openai.py CHANGED Viewed

@@ -67,10 +67,12 @@ async def _build_oa_chat_request(
                 effort = "minimal"
             else:
                 effort = "low"
-        if effort == "minimal" and "gpt-5" not in model.id:
-            print(
-                "WARNING: 'minimal' reasoning effort only allowed for gpt-5. setting to 'low'."
-            )
+        # GPT-5.1 models don't support 'minimal', they support 'none' instead
+        if effort == "minimal" and "gpt-5.1" in model.id:
+            maybe_warn("WARN_MINIMAL_TO_NONE", model_name=context.model_name)
+            effort = "none"
+        elif effort == "minimal" and "gpt-5" not in model.id:
+            maybe_warn("WARN_MINIMAL_TO_LOW", model_name=context.model_name)
             effort = "low"
         request_json["reasoning_effort"] = effort
     else:
@@ -271,16 +273,24 @@ async def _build_oa_responses_request(
         request_json["max_output_tokens"] = sampling_params.max_new_tokens
     if model.reasoning_model:
-        if sampling_params.reasoning_effort in [None, "none"]:
+        effort = sampling_params.reasoning_effort
+        if effort in [None, "none"]:
             # gemini models can switch reasoning off
             if "gemini" in model.id:
-                sampling_params.reasoning_effort = "none"
+                effort = "none"
             else:
-                sampling_params.reasoning_effort = "low"
+                effort = "low"
+        # GPT-5.1 models don't support 'minimal', they support 'none' instead
+        if effort == "minimal" and "gpt-5.1" in model.id:
+            maybe_warn("WARN_MINIMAL_TO_NONE", model_name=context.model_name)
+            effort = "none"
+        elif effort == "minimal" and "gpt-5" not in model.id:
+            maybe_warn("WARN_MINIMAL_TO_LOW", model_name=context.model_name)
+            effort = "low"
         request_json["temperature"] = 1.0
         request_json["top_p"] = 1.0
         request_json["reasoning"] = {
-            "effort": sampling_params.reasoning_effort,
+            "effort": effort,
             "summary": "auto",
         }
     else:

lm_deluge/client.py CHANGED Viewed

@@ -44,10 +44,14 @@ class _LLMClient(BaseModel):
     Keeps all validation, serialization, and existing functionality.
     """
-    _REASONING_SUFFIXES: ClassVar[dict[str, Literal["low", "medium", "high"]]] = {
+    _REASONING_SUFFIXES: ClassVar[
+        dict[str, Literal["low", "medium", "high", "minimal", "none"]]
+    ] = {
         "-low": "low",
         "-medium": "medium",
         "-high": "high",
+        "-minimal": "minimal",
+        "-none": "none",
     }
     model_names: str | list[str] = ["gpt-4.1-mini"]
@@ -149,9 +153,11 @@ class _LLMClient(BaseModel):
     def _normalize_model_names(
         self, models: list[str]
-    ) -> tuple[list[str], list[Literal["low", "medium", "high"] | None]]:
+    ) -> tuple[
+        list[str], list[Literal["low", "medium", "high", "minimal", "none"] | None]
+    ]:
         normalized: list[str] = []
-        efforts: list[Literal["low", "medium", "high"] | None] = []
+        efforts: list[Literal["low", "medium", "high", "minimal", "none"] | None] = []
         for name in models:
             base_name = self._preprocess_openrouter_model(name)
@@ -164,7 +170,10 @@ class _LLMClient(BaseModel):
         return normalized, efforts
     def _align_sampling_params(
-        self, per_model_efforts: list[Literal["low", "medium", "high"] | None]
+        self,
+        per_model_efforts: list[
+            Literal["low", "medium", "high", "minimal", "none"] | None
+        ],
     ) -> None:
         if len(per_model_efforts) < len(self.model_names):
             per_model_efforts = per_model_efforts + [None] * (
@@ -332,7 +341,7 @@ class _LLMClient(BaseModel):
     @classmethod
     def _strip_reasoning_suffix_if_registered(
         cls, model_name: str
-    ) -> tuple[str, Literal["low", "medium", "high"] | None]:
+    ) -> tuple[str, Literal["low", "medium", "high", "minimal", "none"] | None]:
         """Remove reasoning suffix only when the trimmed model already exists."""
         for suffix, effort in cls._REASONING_SUFFIXES.items():
             if model_name.endswith(suffix) and len(model_name) > len(suffix):
@@ -364,6 +373,15 @@ class _LLMClient(BaseModel):
             assert (
                 self.use_responses_api
             ), "background mode only allowed for responses api"
+        # codex models require responses api
+        for model_name in self.model_names:
+            if "codex" in model_name.lower() and not self.use_responses_api:
+                raise ValueError(
+                    f"Model '{model_name}' requires use_responses_api=True. "
+                    "Codex models are only available via the Responses API."
+                )
         # Auto-generate name if not provided
         if self.name is None:
             if len(self.model_names) == 1:

lm_deluge/models/openai.py CHANGED Viewed

@@ -10,6 +10,48 @@ OPENAI_MODELS = {
     #                ░███
     #                █████
     #               ░░░░░
+    "gpt-5.1": {
+        "id": "gpt-5.1",
+        "name": "gpt-5.1",
+        "api_base": "https://api.openai.com/v1",
+        "api_key_env_var": "OPENAI_API_KEY",
+        "supports_json": False,
+        "supports_logprobs": True,
+        "supports_responses": True,
+        "api_spec": "openai",
+        "input_cost": 1.25,
+        "cached_input_cost": 0.125,
+        "output_cost": 10.0,
+        "reasoning_model": True,
+    },
+    "gpt-5.1-codex": {
+        "id": "gpt-5.1-codex",
+        "name": "gpt-5.1-codex",
+        "api_base": "https://api.openai.com/v1",
+        "api_key_env_var": "OPENAI_API_KEY",
+        "supports_json": False,
+        "supports_logprobs": True,
+        "supports_responses": True,
+        "api_spec": "openai",
+        "input_cost": 1.25,
+        "cached_input_cost": 0.125,
+        "output_cost": 10.0,
+        "reasoning_model": True,
+    },
+    "gpt-5.1-codex-mini": {
+        "id": "gpt-5.1-codex-mini",
+        "name": "gpt-5.1-codex-mini",
+        "api_base": "https://api.openai.com/v1",
+        "api_key_env_var": "OPENAI_API_KEY",
+        "supports_json": False,
+        "supports_logprobs": True,
+        "supports_responses": True,
+        "api_spec": "openai",
+        "input_cost": 0.25,
+        "cached_input_cost": 0.025,
+        "output_cost": 2.0,
+        "reasoning_model": True,
+    },
     "gpt-5-codex": {
         "id": "gpt-5-codex",
         "name": "gpt-5-codex",

lm_deluge/warnings.py CHANGED Viewed

@@ -7,6 +7,8 @@ WARNINGS: dict[str, str] = {
     "WARN_REASONING_UNSUPPORTED": "Ignoring reasoning_effort param for non-reasoning model: {model_name}.",
     "WARN_CACHING_UNSUPPORTED": "Cache parameter '{cache_param}' is not supported, ignoring for {model_name}.",
     "WARN_LOGPROBS_UNSUPPORTED": "Ignoring logprobs param for non-logprobs model: {model_name}",
+    "WARN_MINIMAL_TO_LOW": "'minimal' reasoning effort only allowed for gpt-5 models. Setting to 'low' for {model_name}.",
+    "WARN_MINIMAL_TO_NONE": "GPT-5.1 models don't support 'minimal' reasoning effort. Converting to 'none' for {model_name}.",
 }

{lm_deluge-0.0.71.dist-info → lm_deluge-0.0.73.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lm_deluge
-Version: 0.0.71
+Version: 0.0.73
 Summary: Python utility for using LLM API models.
 Author-email: Benjamin Anderson <ben@trytaylor.ai>
 Requires-Python: >=3.10

{lm_deluge-0.0.71.dist-info → lm_deluge-0.0.73.dist-info}/RECORD RENAMED Viewed

@@ -2,7 +2,7 @@ lm_deluge/__init__.py,sha256=zF5lAitfgJ8A28IXJ5BE9OUCqGOqSnGOWn3ZIlizNyY,822
 lm_deluge/batches.py,sha256=Km6QM5_7BlF2qEyo4WPlhkaZkpzrLqf50AaveHXQOoY,25127
 lm_deluge/cache.py,sha256=xO2AIYvP3tUpTMKQjwQQYfGRJSRi6e7sMlRhLjsS-u4,4873
 lm_deluge/cli.py,sha256=Ilww5gOw3J5v0NReq_Ra4hhxU4BCIJBl1oTGxJZKedc,12065
-lm_deluge/client.py,sha256=nBKuP6buwQYNMCP9f2SOuPkfowKRijJv4-bI-STg7Iw,40824
+lm_deluge/client.py,sha256=WOYYSJopBqN3SPlRorkkgapYnSUvo6CveDoPMfZz8QQ,41409
 lm_deluge/config.py,sha256=s3wFBRD6pi0wtXMJRmQDT2vdiqSvhjUPmLehbkv41i0,943
 lm_deluge/embed.py,sha256=CO-TOlC5kOTAM8lcnicoG4u4K664vCBwHF1vHa-nAGg,13382
 lm_deluge/errors.py,sha256=oHjt7YnxWbh-eXMScIzov4NvpJMo0-2r5J6Wh5DQ1tk,209
@@ -15,16 +15,16 @@ lm_deluge/rerank.py,sha256=-NBAJdHz9OB-SWWJnHzkFmeVO4wR6lFV7Vw-SxG7aVo,11457
 lm_deluge/tool.py,sha256=Kp2O5lDq_WVo_ASxjLQSHzVRbaxZkS6J0JIIskBjux0,28909
 lm_deluge/tracker.py,sha256=aeS9GUJpgOSQRVXAnGDvlMO8qYpSxpTNLYj2hrMg0m8,14757
 lm_deluge/usage.py,sha256=xz9tAw2hqaJvv9aAVhnQ6N1Arn7fS8Shb28VwCW26wI,5136
-lm_deluge/warnings.py,sha256=nlDJMCw30VhDEFxqLO2-bfXH_Tv5qmlglzUSbokCSw8,1498
+lm_deluge/warnings.py,sha256=xXXYXEfaaSVr__16BKOEEWLdfZi1L-2ylzTrXTRyO18,1748
 lm_deluge/api_requests/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
 lm_deluge/api_requests/anthropic.py,sha256=QGq3G5jJIGcoM2HdRt73GgkvZs4GOViyjYexWex05Vk,8927
-lm_deluge/api_requests/base.py,sha256=GCcydwBRx4_xAuYLvasXlyj-TgqvKAVhVvxRfJkvPbY,9471
+lm_deluge/api_requests/base.py,sha256=mXEM85mcU_5LD-ugELpCl28tv-tpHKcaxerTIVLQZVo,10436
 lm_deluge/api_requests/bedrock.py,sha256=Uppne03GcIEk1tVYzoGu7GXK2Sg94a_xvFTLDRN_phY,15412
 lm_deluge/api_requests/chat_reasoning.py,sha256=sJvstvKFqsSBUjYcwxzGt2_FH4cEp3Z6gKcBPyPjGwk,236
 lm_deluge/api_requests/common.py,sha256=BZ3vRO5TB669_UsNKugkkuFSzoLHOYJIKt4nV4sf4vc,422
 lm_deluge/api_requests/gemini.py,sha256=4uD7fQl0yWyAvYkPNi3oO1InBnvYfo5_QR6k-va-2GI,7838
 lm_deluge/api_requests/mistral.py,sha256=8JZP2CDf1XZfaPcTk0WS4q-VfYYj58ptpoH8LD3MQG4,4528
-lm_deluge/api_requests/openai.py,sha256=ezlGYNGHFvQGgs-xuxhDDeiEembHhVh_KqJBdRBqSlM,26038
+lm_deluge/api_requests/openai.py,sha256=ZqzQxs8CNUk757Q-1AfpKODkg5yPFHZjsqLm8bwjYDs,26584
 lm_deluge/api_requests/response.py,sha256=vG194gAH5p7ulpNy4qy5Pryfb1p3ZV21-YGoj__ru3E,7436
 lm_deluge/api_requests/deprecated/bedrock.py,sha256=WrcIShCoO8JCUSlFOCHxg6KQCNTZfw3TpYTvSpYk4mA,11320
 lm_deluge/api_requests/deprecated/cohere.py,sha256=KgDScD6_bWhAzOY5BHZQKSA3kurt4KGENqC4wLsGmcU,5142
@@ -58,7 +58,7 @@ lm_deluge/models/kimi.py,sha256=1voigLdNO2CxpWv0KDpQPP3Wolx5WrqgAlYL9ObJFuQ,1117
 lm_deluge/models/meta.py,sha256=BBgnscL1gMcIdPbRqrlDl_q9YAYGSrkw9JkAIabXtLs,1883
 lm_deluge/models/minimax.py,sha256=rwW9gNotAYfDVtMlqmSYegN6GoZM_9DSNNZU2yPOmaU,275
 lm_deluge/models/mistral.py,sha256=x67o5gckBGmPcIGdVbS26XZAYFKBYM4tsxEAahGp8bk,4323
-lm_deluge/models/openai.py,sha256=6J4eAt6Iu5RopokyldUQzRlviFBXBqhLqpVP5tztzqI,11074
+lm_deluge/models/openai.py,sha256=t6fcXo0YXgPQ6YiftZJP8gPw8FOBqoVapSavMVmtaOw,12411
 lm_deluge/models/openrouter.py,sha256=O-Po4tmHjAqFIVU96TUL0QnK01R4e2yDN7Z4sYJ-CuE,2120
 lm_deluge/models/together.py,sha256=AjKhPsazqBgqyLwHkNQW07COM1n_oSrYQRp2BFVvn9o,4381
 lm_deluge/presets/cerebras.py,sha256=MDkqj15qQRrj8wxSCDNNe_Cs7h1WN1UjV6lTmSY1olQ,479
@@ -69,8 +69,8 @@ lm_deluge/util/logprobs.py,sha256=UkBZakOxWluaLqHrjARu7xnJ0uCHVfLGHJdnYlEcutk,11
 lm_deluge/util/spatial.py,sha256=BsF_UKhE-x0xBirc-bV1xSKZRTUhsOBdGqsMKme20C8,4099
 lm_deluge/util/validation.py,sha256=hz5dDb3ebvZrZhnaWxOxbNSVMI6nmaOODBkk0htAUhs,1575
 lm_deluge/util/xml.py,sha256=Ft4zajoYBJR3HHCt2oHwGfymGLdvp_gegVmJ-Wqk4Ck,10547
-lm_deluge-0.0.71.dist-info/licenses/LICENSE,sha256=uNNXGXPCw2TC7CUs7SEBkA-Mz6QBQFWUUEWDMgEs1dU,1058
-lm_deluge-0.0.71.dist-info/METADATA,sha256=kgq3xiS7tMIbXpx5UkhCEA_yJAJvgGOPaie_ZlScTxQ,13514
-lm_deluge-0.0.71.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-lm_deluge-0.0.71.dist-info/top_level.txt,sha256=hqU-TJX93yBwpgkDtYcXyLr3t7TLSCCZ_reytJjwBaE,10
-lm_deluge-0.0.71.dist-info/RECORD,,
+lm_deluge-0.0.73.dist-info/licenses/LICENSE,sha256=uNNXGXPCw2TC7CUs7SEBkA-Mz6QBQFWUUEWDMgEs1dU,1058
+lm_deluge-0.0.73.dist-info/METADATA,sha256=zr-cq_RyQNDDplrt6CUbVfGNpsVQEbACExRg5w6mE5M,13514
+lm_deluge-0.0.73.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+lm_deluge-0.0.73.dist-info/top_level.txt,sha256=hqU-TJX93yBwpgkDtYcXyLr3t7TLSCCZ_reytJjwBaE,10
+lm_deluge-0.0.73.dist-info/RECORD,,

{lm_deluge-0.0.71.dist-info → lm_deluge-0.0.73.dist-info}/WHEEL RENAMED Viewed

File without changes

{lm_deluge-0.0.71.dist-info → lm_deluge-0.0.73.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{lm_deluge-0.0.71.dist-info → lm_deluge-0.0.73.dist-info}/top_level.txt RENAMED Viewed

File without changes

lm-deluge 0.0.71__py3-none-any.whl → 0.0.73__py3-none-any.whl

lm-deluge 0.0.71py3-none-any.whl → 0.0.73py3-none-any.whl