PyPI - lm-deluge - Versions diffs - 0.0.58__py3-none-any.whl → 0.0.60__py3-none-any.whl - Mend

lm-deluge 0.0.58py3-none-any.whl → 0.0.60py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lm-deluge might be problematic. Click here for more details.

Files changed (17) hide show

lm_deluge/api_requests/base.py +87 -5
lm_deluge/api_requests/bedrock.py +3 -4
lm_deluge/api_requests/gemini.py +7 -6
lm_deluge/api_requests/mistral.py +8 -9
lm_deluge/api_requests/openai.py +57 -16
lm_deluge/batches.py +25 -9
lm_deluge/client.py +187 -31
lm_deluge/models/__init__.py +1 -1
lm_deluge/models/openai.py +28 -0
lm_deluge/prompt.py +89 -21
lm_deluge/request_context.py +9 -11
lm_deluge/warnings.py +46 -0
{lm_deluge-0.0.58.dist-info → lm_deluge-0.0.60.dist-info}/METADATA +1 -1
{lm_deluge-0.0.58.dist-info → lm_deluge-0.0.60.dist-info}/RECORD +17 -16
{lm_deluge-0.0.58.dist-info → lm_deluge-0.0.60.dist-info}/WHEEL +0 -0
{lm_deluge-0.0.58.dist-info → lm_deluge-0.0.60.dist-info}/licenses/LICENSE +0 -0
{lm_deluge-0.0.58.dist-info → lm_deluge-0.0.60.dist-info}/top_level.txt +0 -0

lm_deluge/api_requests/base.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import asyncio
+import time
 import traceback
 from abc import ABC, abstractmethod
@@ -6,6 +7,7 @@ import aiohttp
 from aiohttp import ClientResponse
 from ..errors import raise_if_modal_exception
+from ..models.openai import OPENAI_MODELS
 from ..request_context import RequestContext
 from .response import APIResponse
@@ -82,15 +84,95 @@ class APIRequestBase(ABC):
         if self.context.status_tracker:
             self.context.status_tracker.task_succeeded(self.context.task_id)
+    async def _execute_once_background_mode(self) -> APIResponse:
+        """
+        ONLY for OpenAI responses API. Implement the
+        start -> poll -> result style of request.
+        """
+        assert self.context.status_tracker, "no status tracker"
+        start_time = time.time()
+        async with aiohttp.ClientSession() as session:
+            last_status: str | None = None
+            try:
+                self.context.status_tracker.total_requests += 1
+                assert self.url is not None, "URL is not set"
+                async with session.post(
+                    url=self.url,
+                    headers=self.request_header,
+                    json=self.request_json,
+                ) as http_response:
+                    # make sure we created the Response object
+                    http_response.raise_for_status()
+                    data = await http_response.json()
+                    response_id = data["id"]
+                    last_status = data["status"]
+                while True:
+                    if time.time() - start_time > self.context.request_timeout:
+                        # cancel the response
+                        async with session.post(
+                            url=f"{self.url}/{response_id}/cancel",
+                            headers=self.request_header,
+                        ) as http_response:
+                            http_response.raise_for_status()
+                        return APIResponse(
+                            id=self.context.task_id,
+                            model_internal=self.context.model_name,
+                            prompt=self.context.prompt,
+                            sampling_params=self.context.sampling_params,
+                            status_code=None,
+                            is_error=True,
+                            error_message="Request timed out (terminated by client).",
+                            content=None,
+                            usage=None,
+                        )
+                    # poll for the response
+                    await asyncio.sleep(5.0)
+                    async with session.get(
+                        url=f"{self.url}/{response_id}",
+                        headers=self.request_header,
+                    ) as http_response:
+                        http_response.raise_for_status()
+                        data = await http_response.json()
+                        if data["status"] != last_status:
+                            print(
+                                f"Background req {response_id} status updated to: {data['status']}"
+                            )
+                            last_status = data["status"]
+                        if last_status not in ["queued", "in_progress"]:
+                            return await self.handle_response(http_response)
+            except Exception as e:
+                raise_if_modal_exception(e)
+                tb = traceback.format_exc()
+                print(tb)
+                return APIResponse(
+                    id=self.context.task_id,
+                    model_internal=self.context.model_name,
+                    prompt=self.context.prompt,
+                    sampling_params=self.context.sampling_params,
+                    status_code=None,
+                    is_error=True,
+                    error_message=f"Unexpected {type(e).__name__}: {str(e) or 'No message.'}",
+                    content=None,
+                    usage=None,
+                )
     async def execute_once(self) -> APIResponse:
         """Send the HTTP request once and return the parsed APIResponse."""
         await self.build_request()
         assert self.context.status_tracker
-        # try:
-        #     dumped = json.dumps(self.request_json)
-        # except Exception:
-        #     print("couldn't serialize request json")
-        #     print(self.request_json)
+        if (
+            self.context.background
+            and self.context.use_responses_api
+            and self.context.model_name in OPENAI_MODELS
+        ):
+            return await self._execute_once_background_mode()
         try:
             self.context.status_tracker.total_requests += 1
             timeout = aiohttp.ClientTimeout(total=self.context.request_timeout)

lm_deluge/api_requests/bedrock.py CHANGED Viewed

@@ -1,10 +1,11 @@
 import asyncio
 import json
 import os
-import warnings
 from aiohttp import ClientResponse
+from lm_deluge.warnings import maybe_warn
 try:
     from requests_aws4auth import AWS4Auth
 except ImportError:
@@ -187,9 +188,7 @@ async def _build_openai_bedrock_request(
     # Note: GPT-OSS on Bedrock doesn't support response_format parameter
     # Even though the model supports JSON, we can't use the response_format parameter
     if sampling_params.json_mode and model.supports_json:
-        warnings.warn(
-            f"JSON mode requested for {model.name} but response_format parameter not supported on Bedrock"
-        )
+        maybe_warn("WARN_JSON_MODE_UNSUPPORTED", model_name=model.name)
     if tools:
         request_tools = []

lm_deluge/api_requests/gemini.py CHANGED Viewed

@@ -1,11 +1,12 @@
 import json
 import os
-import warnings
 from typing import Any
 from aiohttp import ClientResponse
 from lm_deluge.request_context import RequestContext
 from lm_deluge.tool import Tool
+from lm_deluge.warnings import maybe_warn
 from ..config import SamplingParams
 from ..models import APIModel
@@ -54,9 +55,7 @@ async def _build_gemini_request(
     else:
         if sampling_params.reasoning_effort:
-            warnings.warn(
-                f"Ignoring reasoning_effort param for non-reasoning model: {model.name}"
-            )
+            maybe_warn("WARN_REASONING_UNSUPPORTED", model_name=model.name)
     # Add tools if provided
     if tools:
@@ -76,8 +75,10 @@ class GeminiRequest(APIRequestBase):
         # Warn if cache is specified for Gemini model
         if self.context.cache is not None:
-            warnings.warn(
-                f"Cache parameter '{self.context.cache}' is not supported for Gemini models, ignoring for {self.context.model_name}"
+            maybe_warn(
+                "WARN_CACHING_UNSUPPORTED",
+                model_name=self.context.model_name,
+                cache_param=self.context.cache,
             )
         self.model = APIModel.from_registry(self.context.model_name)

lm_deluge/api_requests/mistral.py CHANGED Viewed

@@ -1,9 +1,10 @@
 import json
 import os
-import warnings
 from aiohttp import ClientResponse
+from lm_deluge.warnings import maybe_warn
 from ..models import APIModel
 from ..prompt import Message
 from ..request_context import RequestContext
@@ -17,8 +18,10 @@ class MistralRequest(APIRequestBase):
         # Warn if cache is specified for non-Anthropic model
         if self.context.cache is not None:
-            warnings.warn(
-                f"Cache parameter '{self.context.cache}' is only supported for Anthropic models, ignoring for {self.context.model_name}"
+            maybe_warn(
+                "WARN_CACHING_UNSUPPORTED",
+                model_name=self.context.model_name,
+                cache_param=self.context.cache,
             )
         self.model = APIModel.from_registry(self.context.model_name)
@@ -38,13 +41,9 @@ class MistralRequest(APIRequestBase):
             "max_tokens": self.context.sampling_params.max_new_tokens,
         }
         if self.context.sampling_params.reasoning_effort:
-            warnings.warn(
-                f"Ignoring reasoning_effort param for non-reasoning model: {self.context.model_name}"
-            )
+            maybe_warn("WARN_REASONING_UNSUPPORTED", model_name=self.context.model_name)
         if self.context.sampling_params.logprobs:
-            warnings.warn(
-                f"Ignoring logprobs param for non-logprobs model: {self.context.model_name}"
-            )
+            maybe_warn("WARN_LOGPROBS_UNSUPPORTED", model_name=self.context.model_name)
         if self.context.sampling_params.json_mode and self.model.supports_json:
             self.request_json["response_format"] = {"type": "json_object"}

lm_deluge/api_requests/openai.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import json
 import os
 import traceback as tb
-import warnings
 from types import SimpleNamespace
 import aiohttp
@@ -9,6 +8,7 @@ from aiohttp import ClientResponse
 from lm_deluge.request_context import RequestContext
 from lm_deluge.tool import MCPServer, Tool
+from lm_deluge.warnings import maybe_warn
 from ..config import SamplingParams
 from ..models import APIModel
@@ -30,6 +30,26 @@ async def _build_oa_chat_request(
         "temperature": sampling_params.temperature,
         "top_p": sampling_params.top_p,
     }
+    if context.service_tier:
+        assert context.service_tier in [
+            "auto",
+            "default",
+            "flex",
+            "priority",
+        ], f"Invalid service tier: {context.service_tier}"
+        # flex is only supported for o3, o4-mini, gpt-5 models
+        if context.service_tier == "flex":
+            model_supports_flex = any(x in model.id for x in ["o3", "o4-mini", "gpt-5"])
+            if not model_supports_flex:
+                print(
+                    f"WARNING: service_tier='flex' only supported for o3, o4-mini, gpt-5. "
+                    f"Using 'auto' instead for model {model.id}."
+                )
+                request_json["service_tier"] = "auto"
+            else:
+                request_json["service_tier"] = context.service_tier
+        else:
+            request_json["service_tier"] = context.service_tier
     # set max_tokens or max_completion_tokens dep. on provider
     if "cohere" in model.api_base:
         request_json["max_tokens"] = sampling_params.max_new_tokens
@@ -55,9 +75,8 @@ async def _build_oa_chat_request(
         request_json["reasoning_effort"] = effort
     else:
         if sampling_params.reasoning_effort:
-            warnings.warn(
-                f"Ignoring reasoning_effort param for non-reasoning model: {model.name}"
-            )
+            maybe_warn("WARN_REASONING_UNSUPPORTED", model_name=context.model_name)
     if sampling_params.logprobs:
         request_json["logprobs"] = True
         if sampling_params.top_logprobs is not None:
@@ -85,8 +104,10 @@ class OpenAIRequest(APIRequestBase):
         # Warn if cache is specified for non-Anthropic model
         if self.context.cache is not None:
-            warnings.warn(
-                f"Cache parameter '{self.context.cache}' is only supported for Anthropic models, ignoring for {self.context.model_name}"
+            maybe_warn(
+                "WARN_CACHING_UNSUPPORTED",
+                model_name=self.context.model_name,
+                cache_param=self.context.cache,
             )
         self.model = APIModel.from_registry(self.context.model_name)
@@ -213,9 +234,6 @@ class OpenAIRequest(APIRequestBase):
 async def _build_oa_responses_request(
     model: APIModel,
     context: RequestContext,
-    # prompt: Conversation,
-    # tools: list[Tool] | None,
-    # sampling_params: SamplingParams,
 ):
     prompt = context.prompt
     sampling_params = context.sampling_params
@@ -226,7 +244,28 @@ async def _build_oa_responses_request(
         "input": openai_responses_format["input"],
         "temperature": sampling_params.temperature,
         "top_p": sampling_params.top_p,
+        "background": context.background or False,
     }
+    if context.service_tier:
+        assert context.service_tier in [
+            "auto",
+            "default",
+            "flex",
+            "priority",
+        ], f"Invalid service tier: {context.service_tier}"
+        # flex is only supported for o3, o4-mini, gpt-5 models
+        if context.service_tier == "flex":
+            model_supports_flex = any(x in model.id for x in ["o3", "o4-mini", "gpt-5"])
+            if not model_supports_flex:
+                print(
+                    f"WARNING: service_tier='flex' only supported for o3, o4-mini, gpt-5. "
+                    f"Model {model.id} doesn't support flex. Using 'auto' instead."
+                )
+                request_json["service_tier"] = "auto"
+            else:
+                request_json["service_tier"] = context.service_tier
+        else:
+            request_json["service_tier"] = context.service_tier
     if sampling_params.max_new_tokens:
         request_json["max_output_tokens"] = sampling_params.max_new_tokens
@@ -245,9 +284,7 @@ async def _build_oa_responses_request(
         }
     else:
         if sampling_params.reasoning_effort:
-            warnings.warn(
-                f"Ignoring reasoning_effort for non-reasoning model: {model.id}"
-            )
+            maybe_warn("WARN_REASONING_UNSUPPORTED", model_name=context.model_name)
     if sampling_params.json_mode and model.supports_json:
         request_json["text"] = {"format": {"type": "json_object"}}
@@ -284,8 +321,10 @@ class OpenAIResponsesRequest(APIRequestBase):
         super().__init__(context)
         # Warn if cache is specified for non-Anthropic model
         if self.context.cache is not None:
-            warnings.warn(
-                f"Cache parameter '{self.context.cache}' is only supported for Anthropic models, ignoring for {self.context.model_name}"
+            maybe_warn(
+                "WARN_CACHING_UNSUPPORTED",
+                model_name=self.context.model_name,
+                cache_param=self.context.cache,
             )
         self.model = APIModel.from_registry(self.context.model_name)
@@ -488,8 +527,10 @@ async def stream_chat(
     extra_headers: dict[str, str] | None = None,
 ):
     if cache is not None:
-        warnings.warn(
-            f"Cache parameter '{cache}' is only supported for Anthropic models, ignoring for {model_name}"
+        maybe_warn(
+            "WARN_CACHING_UNSUPPORTED",
+            model_name=model_name,
+            cache_param=cache,
         )
     model = APIModel.from_registry(model_name)

lm_deluge/batches.py CHANGED Viewed

@@ -3,7 +3,7 @@ import json
 import os
 import tempfile
 import time
-from typing import Literal, Sequence
+from typing import Literal, Sequence, cast
 import aiohttp
 from rich.console import Console
@@ -16,7 +16,12 @@ from lm_deluge.api_requests.anthropic import _build_anthropic_request
 from lm_deluge.api_requests.openai import _build_oa_chat_request
 from lm_deluge.config import SamplingParams
 from lm_deluge.models import APIModel, registry
-from lm_deluge.prompt import CachePattern, Conversation, prompts_to_conversations
+from lm_deluge.prompt import (
+    CachePattern,
+    Conversation,
+    Prompt,
+    prompts_to_conversations,
+)
 from lm_deluge.request_context import RequestContext
@@ -166,14 +171,18 @@ async def _submit_anthropic_batch(file_path: str, headers: dict, model: str):
 async def create_batch_files_oa(
     model: str,
     sampling_params: SamplingParams,
-    prompts: Sequence[str | list[dict] | Conversation],
+    prompts: Prompt | Sequence[Prompt],
     batch_size: int = 50_000,
     destination: str | None = None,  # if none provided, temp files
 ):
     MAX_BATCH_SIZE_BYTES = 200 * 1024 * 1024  # 200MB
     MAX_BATCH_SIZE_ITEMS = batch_size
-    prompts = prompts_to_conversations(prompts)
+    if not isinstance(prompts, list):
+        prompts = cast(Sequence[Prompt], [prompts])
+    prompts = prompts_to_conversations(cast(Sequence[Prompt], prompts))
+    assert isinstance(prompts, Sequence)
     if any(p is None for p in prompts):
         raise ValueError("All prompts must be valid.")
@@ -251,14 +260,18 @@ async def create_batch_files_oa(
 async def submit_batches_oa(
     model: str,
     sampling_params: SamplingParams,
-    prompts: Sequence[str | list[dict] | Conversation],
+    prompts: Prompt | Sequence[Prompt],
     batch_size: int = 50_000,
 ):
     """Write OpenAI batch requests to a file and submit."""
     MAX_BATCH_SIZE_BYTES = 200 * 1024 * 1024  # 200MB
     MAX_BATCH_SIZE_ITEMS = batch_size
-    prompts = prompts_to_conversations(prompts)
+    if not isinstance(prompts, list):
+        prompts = prompts = cast(Sequence[Prompt], [prompts])
+    prompts = prompts_to_conversations(cast(Sequence[Prompt], prompts))
+    assert isinstance(prompts, Sequence)
     if any(p is None for p in prompts):
         raise ValueError("All prompts must be valid.")
@@ -342,7 +355,7 @@ async def submit_batches_oa(
 async def submit_batches_anthropic(
     model: str,
     sampling_params: SamplingParams,
-    prompts: Sequence[str | list[dict] | Conversation],
+    prompts: Prompt | Sequence[Prompt],
     *,
     cache: CachePattern | None = None,
     batch_size=100_000,
@@ -362,13 +375,16 @@ async def submit_batches_anthropic(
     MAX_BATCH_SIZE_ITEMS = batch_size
     # Convert prompts to Conversations
-    prompts = prompts_to_conversations(prompts)
+    if not isinstance(prompts, list):
+        prompts = prompts = cast(Sequence[Prompt], [prompts])
+    prompts = prompts_to_conversations(cast(Sequence[Prompt], prompts))
     request_headers = None
     batch_tasks = []
     current_batch = []
     current_batch_size = 0
+    assert isinstance(prompts, Sequence)
     for idx, prompt in enumerate(prompts):
         assert isinstance(prompt, Conversation)
         context = RequestContext(

lm_deluge/client.py CHANGED Viewed

@@ -1,5 +1,14 @@
 import asyncio
-from typing import Any, AsyncGenerator, Callable, Literal, Self, Sequence, overload
+from typing import (
+    Any,
+    AsyncGenerator,
+    Callable,
+    Literal,
+    Self,
+    Sequence,
+    cast,
+    overload,
+)
 import numpy as np
 import yaml
@@ -12,7 +21,12 @@ from lm_deluge.batches import (
     submit_batches_oa,
     wait_for_batch_completion_async,
 )
-from lm_deluge.prompt import CachePattern, Conversation, prompts_to_conversations
+from lm_deluge.prompt import (
+    CachePattern,
+    Conversation,
+    Prompt,
+    prompts_to_conversations,
+)
 from lm_deluge.tool import MCPServer, Tool
 from .api_requests.base import APIResponse
@@ -40,6 +54,9 @@ class _LLMClient(BaseModel):
     request_timeout: int = 30
     cache: Any = None
     extra_headers: dict[str, str] | None = None
+    extra_body: dict[str, str] | None = None
+    use_responses_api: bool = False
+    background: bool = False
     # sampling params - if provided, and sampling_params is not,
     # these override the defaults
     temperature: float = 0.75
@@ -100,13 +117,120 @@ class _LLMClient(BaseModel):
     # NEW! Builder methods
     def with_model(self, model: str):
-        self.model_names = [model]
+        self._update_models([model])
         return self
     def with_models(self, models: list[str]):
-        self.model_names = models
+        self._update_models(models)
         return self
+    def _update_models(self, models: list[str]) -> None:
+        normalized, per_model_efforts = self._normalize_model_names(models)
+        if self.reasoning_effort is None:
+            unique_efforts = {eff for eff in per_model_efforts if eff is not None}
+            if len(normalized) == 1 and per_model_efforts[0] is not None:
+                self.reasoning_effort = per_model_efforts[0]
+            elif (
+                len(unique_efforts) == 1
+                and len(unique_efforts) != 0
+                and None not in per_model_efforts
+            ):
+                self.reasoning_effort = next(iter(unique_efforts))  # type: ignore
+        self.model_names = normalized
+        self._align_sampling_params(per_model_efforts)
+        self._reset_model_weights()
+    def _normalize_model_names(
+        self, models: list[str]
+    ) -> tuple[list[str], list[Literal["low", "medium", "high"] | None]]:
+        reasoning_effort_suffixes: dict[str, Literal["low", "medium", "high"]] = {
+            "-low": "low",
+            "-medium": "medium",
+            "-high": "high",
+        }
+        normalized: list[str] = []
+        efforts: list[Literal["low", "medium", "high"] | None] = []
+        for name in models:
+            base_name = name
+            effort: Literal["low", "medium", "high"] | None = None
+            for suffix, candidate in reasoning_effort_suffixes.items():
+                if name.endswith(suffix) and len(name) > len(suffix):
+                    base_name = name[: -len(suffix)]
+                    effort = candidate
+                    break
+            normalized.append(base_name)
+            efforts.append(effort)
+        return normalized, efforts
+    def _align_sampling_params(
+        self, per_model_efforts: list[Literal["low", "medium", "high"] | None]
+    ) -> None:
+        if len(per_model_efforts) < len(self.model_names):
+            per_model_efforts = per_model_efforts + [None] * (
+                len(self.model_names) - len(per_model_efforts)
+            )
+        if not self.model_names:
+            self.sampling_params = []
+            return
+        if not self.sampling_params:
+            self.sampling_params = []
+        if len(self.sampling_params) == 0:
+            for _ in self.model_names:
+                self.sampling_params.append(
+                    SamplingParams(
+                        temperature=self.temperature,
+                        top_p=self.top_p,
+                        json_mode=self.json_mode,
+                        max_new_tokens=self.max_new_tokens,
+                        reasoning_effort=self.reasoning_effort,
+                        logprobs=self.logprobs,
+                        top_logprobs=self.top_logprobs,
+                    )
+                )
+        elif len(self.sampling_params) == 1 and len(self.model_names) > 1:
+            base_param = self.sampling_params[0]
+            self.sampling_params = [
+                base_param.model_copy(deep=True) for _ in self.model_names
+            ]
+        elif len(self.sampling_params) != len(self.model_names):
+            base_param = self.sampling_params[0]
+            self.sampling_params = [
+                base_param.model_copy(deep=True) for _ in self.model_names
+            ]
+        if self.reasoning_effort is not None:
+            for sp in self.sampling_params:
+                sp.reasoning_effort = self.reasoning_effort
+        else:
+            for sp, effort in zip(self.sampling_params, per_model_efforts):
+                if effort is not None:
+                    sp.reasoning_effort = effort
+    def _reset_model_weights(self) -> None:
+        if not self.model_names:
+            self.model_weights = []
+            return
+        if isinstance(self.model_weights, list):
+            if len(self.model_weights) == len(self.model_names) and any(
+                self.model_weights
+            ):
+                total = sum(self.model_weights)
+                if total == 0:
+                    self.model_weights = [
+                        1 / len(self.model_names) for _ in self.model_names
+                    ]
+                else:
+                    self.model_weights = [w / total for w in self.model_weights]
+                return
+        # Fallback to uniform distribution
+        self.model_weights = [1 / len(self.model_names) for _ in self.model_names]
     def with_limits(
         self,
         max_requests_per_minute: int | None = None,
@@ -133,8 +257,29 @@ class _LLMClient(BaseModel):
     @model_validator(mode="before")
     @classmethod
     def fix_lists(cls, data) -> "_LLMClient":
+        # Parse reasoning effort from model name suffixes (e.g., "gpt-5-high")
+        # Only applies when a single model string is provided
         if isinstance(data.get("model_names"), str):
+            model_name = data["model_names"]
+            reasoning_effort_suffixes = {
+                "-low": "low",
+                "-medium": "medium",
+                "-high": "high",
+            }
+            for suffix, effort in reasoning_effort_suffixes.items():
+                if model_name.endswith(suffix):
+                    # Extract base model name by removing suffix
+                    base_model = model_name[: -len(suffix)]
+                    data["model_names"] = base_model
+                    # Set reasoning_effort if not already explicitly set
+                    if data.get("reasoning_effort") is None:
+                        data["reasoning_effort"] = effort
+                    break
             data["model_names"] = [data["model_names"]]
         if not isinstance(data.get("sampling_params", []), list):
             data["sampling_params"] = [data["sampling_params"]]
         if "sampling_params" not in data or len(data.get("sampling_params", [])) == 0:
@@ -171,6 +316,11 @@ class _LLMClient(BaseModel):
         # normalize weights
         self.model_weights = [w / sum(self.model_weights) for w in self.model_weights]
+        # background mode only allowed for responses api
+        if self.background:
+            assert (
+                self.use_responses_api
+            ), "background mode only allowed for responses api"
         # Auto-generate name if not provided
         if self.name is None:
             if len(self.model_names) == 1:
@@ -256,13 +406,6 @@ class _LLMClient(BaseModel):
             # Idle wait before next capacity check. Aim for ~RPM spacing.
             await asyncio.sleep(max(60.0 / self.max_requests_per_minute, 0.01))
-    async def _execute_request(self, context: RequestContext) -> APIResponse:
-        """Create and send a single API request using the provided context."""
-        model_obj = APIModel.from_registry(context.model_name)
-        request = model_obj.make_request(context)
-        response = await request.execute_once()
-        return response
     async def process_single_request(
         self, context: RequestContext, retry_queue: asyncio.Queue | None = None
     ) -> APIResponse:
@@ -290,7 +433,9 @@ class _LLMClient(BaseModel):
         # Execute single request
         assert context.status_tracker
         context.status_tracker.update_pbar()
-        response = await self._execute_request(context)
+        model_obj = APIModel.from_registry(context.model_name)
+        request = model_obj.make_request(context)
+        response = await request.execute_once()
         # Handle successful response
         if not response.is_error:
@@ -350,36 +495,36 @@ class _LLMClient(BaseModel):
     @overload
     async def process_prompts_async(
         self,
-        prompts: Sequence[str | list[dict] | Conversation],
+        prompts: Prompt | Sequence[Prompt],
         *,
         return_completions_only: Literal[True],
         show_progress: bool = ...,
         tools: list[Tool | dict | MCPServer] | None = ...,
         cache: CachePattern | None = ...,
-        use_responses_api: bool = ...,
+        service_tier: Literal["auto", "default", "flex", "priority"] | None = ...,
     ) -> list[str | None]: ...
     @overload
     async def process_prompts_async(
         self,
-        prompts: Sequence[str | list[dict] | Conversation],
+        prompts: Prompt | Sequence[Prompt],
         *,
         return_completions_only: Literal[False] = ...,
         show_progress: bool = ...,
         tools: list[Tool | dict | MCPServer] | None = ...,
         cache: CachePattern | None = ...,
-        use_responses_api: bool = ...,
+        service_tier: Literal["auto", "default", "flex", "priority"] | None = ...,
     ) -> list[APIResponse]: ...
     async def process_prompts_async(
         self,
-        prompts: Sequence[str | list[dict] | Conversation],
+        prompts: Prompt | Sequence[Prompt],
         *,
         return_completions_only: bool = False,
         show_progress: bool = True,
         tools: list[Tool | dict | MCPServer] | None = None,
         cache: CachePattern | None = None,
-        use_responses_api: bool = False,
+        service_tier: Literal["auto", "default", "flex", "priority"] | None = None,
     ) -> list[APIResponse] | list[str | None] | dict[str, int]:
         """Process multiple prompts asynchronously using the start_nowait/wait_for_all backend.
@@ -387,7 +532,9 @@ class _LLMClient(BaseModel):
         avoiding issues with tracker state accumulating across multiple calls.
         """
         # Convert prompts to Conversations
-        prompts = prompts_to_conversations(prompts)
+        if not isinstance(prompts, list):
+            prompts = prompts = cast(Sequence[Prompt], [prompts])
+        prompts = prompts_to_conversations(cast(Sequence[Prompt], prompts))
         # Ensure tracker exists (start_nowait will call add_to_total for each task)
         if self._tracker is None:
@@ -398,13 +545,14 @@ class _LLMClient(BaseModel):
         # Start all tasks using start_nowait - tasks will coordinate via shared capacity lock
         task_ids = []
+        assert isinstance(prompts, Sequence)
         for prompt in prompts:
             assert isinstance(prompt, Conversation)
             task_id = self.start_nowait(
                 prompt,
                 tools=tools,
                 cache=cache,
-                use_responses_api=use_responses_api,
+                service_tier=service_tier,
             )
             task_ids.append(task_id)
@@ -443,13 +591,12 @@ class _LLMClient(BaseModel):
     def process_prompts_sync(
         self,
-        prompts: Sequence[str | list[dict] | Conversation],
+        prompts: Prompt | Sequence[Prompt],
         *,
         return_completions_only: bool = False,
         show_progress=True,
         tools: list[Tool | dict | MCPServer] | None = None,
         cache: CachePattern | None = None,
-        use_responses_api: bool = False,
     ):
         return asyncio.run(
             self.process_prompts_async(
@@ -458,7 +605,6 @@ class _LLMClient(BaseModel):
                 show_progress=show_progress,
                 tools=tools,
                 cache=cache,
-                use_responses_api=use_responses_api,
             )
         )
@@ -478,18 +624,18 @@ class _LLMClient(BaseModel):
     def start_nowait(
         self,
-        prompt: str | Conversation,
+        prompt: Prompt,
         *,
         tools: list[Tool | dict | MCPServer] | None = None,
         cache: CachePattern | None = None,
-        use_responses_api: bool = False,
+        service_tier: Literal["auto", "default", "flex", "priority"] | None = None,
     ) -> int:
         tracker = self._get_tracker()
         task_id = self._next_task_id
         self._next_task_id += 1
         model, sampling_params = self._select_model()
-        if isinstance(prompt, str):
-            prompt = Conversation.user(prompt)
+        prompt = prompts_to_conversations([prompt])[0]
+        assert isinstance(prompt, Conversation)
         context = RequestContext(
             task_id=task_id,
             model_name=model,
@@ -500,7 +646,9 @@ class _LLMClient(BaseModel):
             status_tracker=tracker,
             tools=tools,
             cache=cache,
-            use_responses_api=use_responses_api,
+            use_responses_api=self.use_responses_api,
+            background=self.background,
+            service_tier=service_tier,
             extra_headers=self.extra_headers,
             force_local_mcp=self.force_local_mcp,
         )
@@ -515,10 +663,10 @@ class _LLMClient(BaseModel):
         *,
         tools: list[Tool | dict | MCPServer] | None = None,
         cache: CachePattern | None = None,
-        use_responses_api: bool = False,
+        service_tier: Literal["auto", "default", "flex", "priority"] | None = None,
     ) -> APIResponse:
         task_id = self.start_nowait(
-            prompt, tools=tools, cache=cache, use_responses_api=use_responses_api
+            prompt, tools=tools, cache=cache, service_tier=service_tier
         )
         return await self.wait_for(task_id)
@@ -698,7 +846,7 @@ class _LLMClient(BaseModel):
     async def submit_batch_job(
         self,
-        prompts: Sequence[str | list[dict] | Conversation],
+        prompts: Prompt | Sequence[Prompt],
         *,
         tools: list[Tool] | None = None,
         cache: CachePattern | None = None,
@@ -760,6 +908,8 @@ def LLMClient(
     request_timeout: int = 30,
     cache: Any = None,
     extra_headers: dict[str, str] | None = None,
+    use_responses_api: bool = False,
+    background: bool = False,
     temperature: float = 0.75,
     top_p: float = 1.0,
     json_mode: bool = False,
@@ -787,6 +937,8 @@ def LLMClient(
     request_timeout: int = 30,
     cache: Any = None,
     extra_headers: dict[str, str] | None = None,
+    use_responses_api: bool = False,
+    background: bool = False,
     temperature: float = 0.75,
     top_p: float = 1.0,
     json_mode: bool = False,
@@ -813,6 +965,8 @@ def LLMClient(
     request_timeout: int = 30,
     cache: Any = None,
     extra_headers: dict[str, str] | None = None,
+    use_responses_api: bool = False,
+    background: bool = False,
     temperature: float = 0.75,
     top_p: float = 1.0,
     json_mode: bool = False,
@@ -851,6 +1005,8 @@ def LLMClient(
         request_timeout=request_timeout,
         cache=cache,
         extra_headers=extra_headers,
+        use_responses_api=use_responses_api,
+        background=background,
         temperature=temperature,
         top_p=top_p,
         json_mode=json_mode,

lm_deluge/models/__init__.py CHANGED Viewed

@@ -62,7 +62,7 @@ class APIModel:
             raise ValueError("no regions to sample")
         random.sample(regions, 1, counts=weights)[0]
-    def make_request(self, context: RequestContext):  # -> "APIRequestBase"
+    def make_request(self, context: RequestContext):
         from ..api_requests.common import CLASSES
         api_spec = self.api_spec

lm_deluge/models/openai.py CHANGED Viewed

@@ -10,6 +10,20 @@ OPENAI_MODELS = {
     #                ░███
     #                █████
     #               ░░░░░
+    "gpt-5-codex": {
+        "id": "gpt-5-codex",
+        "name": "gpt-5-codex",
+        "api_base": "https://api.openai.com/v1",
+        "api_key_env_var": "OPENAI_API_KEY",
+        "supports_json": False,
+        "supports_logprobs": True,
+        "supports_responses": True,
+        "api_spec": "openai",
+        "input_cost": 1.25,
+        "cached_input_cost": 0.125,
+        "output_cost": 10.0,
+        "reasoning_model": True,
+    },
     "gpt-5": {
         "id": "gpt-5",
         "name": "gpt-5",
@@ -79,6 +93,20 @@ OPENAI_MODELS = {
         "output_cost": 12.0,
         "reasoning_model": False,
     },
+    "codex-mini-latest": {
+        "id": "codex-mini-latest",
+        "name": "codex-mini-latest",
+        "api_base": "https://api.openai.com/v1",
+        "api_key_env_var": "OPENAI_API_KEY",
+        "supports_json": True,
+        "supports_logprobs": False,
+        "supports_responses": True,
+        "api_spec": "openai",
+        "input_cost": 1.5,
+        "cached_input_cost": 0.375,
+        "output_cost": 6.0,
+        "reasoning_model": True,
+    },
     "o3": {
         "id": "o3",
         "name": "o3-2025-04-16",

lm_deluge/prompt.py CHANGED Viewed

@@ -2,13 +2,14 @@ import io
 import json
 from dataclasses import dataclass, field
 from pathlib import Path
-from typing import Literal, Sequence
+from typing import Literal, Sequence, TypeAlias
 import tiktoken
 import xxhash
 from lm_deluge.file import File
 from lm_deluge.image import Image, MediaType
+from lm_deluge.warnings import deprecated
 CachePattern = Literal[
     "tools_only",
@@ -415,12 +416,17 @@ class Message:
         return cls(role, parts)
-    def add_text(self, content: str) -> "Message":
+    def with_text(self, content: str) -> "Message":
         """Append a text block and return self for chaining."""
         self.parts.append(Text(content))
         return self
-    def add_image(
+    @deprecated("with_text")
+    def add_text(self, content: str) -> "Message":
+        """Append a text block and return self for chaining."""
+        return self.with_text(content)
+    def with_image(
         self,
         data: bytes | str | Path | io.BytesIO | Image,
         *,
@@ -446,7 +452,27 @@ class Message:
         self.parts.append(img)
         return self
-    def add_file(
+    @deprecated("with_image")
+    def add_image(
+        self,
+        data: bytes | str | Path | io.BytesIO | Image,
+        *,
+        media_type: MediaType | None = None,
+        detail: Literal["low", "high", "auto"] = "auto",
+        max_size: int | None = None,
+    ) -> "Message":
+        """
+        Append an image block and return self for chaining.
+        If max_size is provided, the image will be resized so that its longer
+        dimension equals max_size, but only if the longer dimension is currently
+        larger than max_size.
+        """
+        return self.with_image(
+            data=data, media_type=media_type, detail=detail, max_size=max_size
+        )
+    def with_file(
         self,
         data: bytes | str | Path | io.BytesIO,
         *,
@@ -460,11 +486,29 @@ class Message:
         self.parts.append(file)
         return self
-    def add_tool_call(self, id: str, name: str, arguments: dict) -> "Message":
+    @deprecated("with_file")
+    def add_file(
+        self,
+        data: bytes | str | Path | io.BytesIO,
+        *,
+        media_type: str | None = None,
+        filename: str | None = None,
+    ) -> "Message":
+        """
+        Append a file block and return self for chaining.
+        """
+        return self.with_file(data, media_type=media_type, filename=filename)
+    def with_tool_call(self, id: str, name: str, arguments: dict) -> "Message":
         """Append a tool call block and return self for chaining."""
         self.parts.append(ToolCall(id=id, name=name, arguments=arguments))
         return self
+    @deprecated("with_tool_call")
+    def add_tool_call(self, id: str, name: str, arguments: dict) -> "Message":
+        """Append a tool call block and return self for chaining."""
+        return self.with_tool_call(id, name, arguments)
     def with_tool_result(
         self, tool_call_id: str, result: str | list[ToolResultPart]
     ) -> "Message":
@@ -472,11 +516,23 @@ class Message:
         self.parts.append(ToolResult(tool_call_id=tool_call_id, result=result))
         return self
-    def add_thinking(self, content: str) -> "Message":
+    @deprecated("with_tool_result")
+    def add_tool_result(
+        self, tool_call_id: str, result: str | list[ToolResultPart]
+    ) -> "Message":
+        """Append a tool result block and return self for chaining."""
+        return self.with_tool_result(tool_call_id, result)
+    def with_thinking(self, content: str) -> "Message":
         """Append a thinking block and return self for chaining."""
         self.parts.append(Thinking(content=content))
         return self
+    @deprecated("with_thinking")
+    def add_thinking(self, content: str) -> "Message":
+        """Append a thinking block and return self for chaining."""
+        return self.with_thinking(content)
     # -------- convenient constructors --------
     @classmethod
     def user(
@@ -488,25 +544,25 @@ class Message:
     ) -> "Message":
         res = cls("user", [])
         if text is not None:
-            res.add_text(text)
+            res.with_text(text)
         if image is not None:
-            res.add_image(image)
+            res.with_image(image)
         if file is not None:
-            res.add_file(file)
+            res.with_file(file)
         return res
     @classmethod
     def system(cls, text: str | None = None) -> "Message":
         res = cls("system", [])
         if text is not None:
-            res.add_text(text)
+            res.with_text(text)
         return res
     @classmethod
     def ai(cls, text: str | None = None) -> "Message":
         res = cls("assistant", [])
         if text is not None:
-            res.add_text(text)
+            res.with_text(text)
         return res
     # ──── provider-specific constructors ───
@@ -698,9 +754,9 @@ class Conversation:
     ) -> "Conversation":
         msg = Message.user(text)
         if image is not None:
-            msg.add_image(image)
+            msg.with_image(image)
         if file is not None:
-            msg.add_file(file)
+            msg.with_file(file)
         return cls([msg])
     @classmethod
@@ -1211,11 +1267,11 @@ class Conversation:
                 for i, tool_result in enumerate(m.tool_results):
                     images = tool_result.get_images()
                     if len(images) > 0:
-                        user_msg.add_text(
+                        user_msg.with_text(
                             f"[Images for Tool Call {tool_result.tool_call_id}]"
                         )
                         for img in images:
-                            user_msg.add_image(img)
+                            user_msg.with_image(img)
             else:
                 result.append(m.oa_chat())
@@ -1495,9 +1551,21 @@ class Conversation:
         return cls(msgs)
-def prompts_to_conversations(prompts: Sequence[str | list[dict] | Conversation]):
-    if any(isinstance(x, list) for x in prompts):
-        raise ValueError("can't convert list[dict] to conversation yet")
-    return [  # type: ignore
-        Conversation.user(p) if isinstance(p, str) else p for p in prompts
-    ]
+Prompt: TypeAlias = str | list[dict] | Message | Conversation
+def prompts_to_conversations(prompts: Sequence[Prompt]) -> Sequence[Prompt]:
+    converted = []
+    for prompt in prompts:
+        if isinstance(prompt, Conversation):
+            converted.append(prompt)
+        elif isinstance(prompt, Message):
+            converted.append(Conversation([prompt]))
+        elif isinstance(prompt, str):
+            converted.append(Conversation.user(prompt))
+        elif isinstance(prompt, list):
+            conv, provider = Conversation.from_unknown(prompt)
+            converted.append(conv)
+        else:
+            raise ValueError(f"Unknown prompt type {type(prompt)}")
+    return converted

lm_deluge/request_context.py CHANGED Viewed

@@ -26,28 +26,22 @@ class RequestContext:
     # Infrastructure
     status_tracker: StatusTracker | None = None
-    results_arr: list[Any] | None = (
-        None  # list["APIRequestBase"] but avoiding circular import
-    )
+    # avoiding circular import
+    results_arr: list[Any] | None = None  # list["APIRequestBase"]
     callback: Callable | None = None
     # Optional features
     tools: list | None = None
     cache: CachePattern | None = None
     use_responses_api: bool = False
+    background: bool = False
+    service_tier: str | None = None
     extra_headers: dict[str, str] | None = None
+    extra_body: dict[str, Any] | None = None
     force_local_mcp: bool = False
     # Computed properties
     cache_key: str = field(init=False)
-    # num_tokens: int = field(init=False)
-    # def __post_init__(self):
-    #     # Compute cache key from prompt fingerprint
-    #     # self.cache_key = self.prompt.fingerprint
-    #     # Compute token count
-    #     self.num_tokens =
     @cached_property
     def num_tokens(self):
@@ -74,6 +68,10 @@ class RequestContext:
             "tools": self.tools,
             "cache": self.cache,
             "use_responses_api": self.use_responses_api,
+            "background": self.background,
+            "service_tier": self.service_tier,
+            "extra_headers": self.extra_headers,
+            "extra_body": self.extra_body,
             "force_local_mcp": self.force_local_mcp,
         }

lm_deluge/warnings.py ADDED Viewed

@@ -0,0 +1,46 @@
+import functools
+import os
+import warnings
+WARNINGS: dict[str, str] = {
+    "WARN_JSON_MODE_UNSUPPORTED": "JSON mode requested for {model_name} but response_format parameter not supported.",
+    "WARN_REASONING_UNSUPPORTED": "Ignoring reasoning_effort param for non-reasoning model: {model_name}.",
+    "WARN_CACHING_UNSUPPORTED": "Cache parameter '{cache_param}' is not supported, ignoring for {model_name}.",
+    "WARN_LOGPROBS_UNSUPPORTED": "Ignoring logprobs param for non-logprobs model: {model_name}",
+}
+def maybe_warn(warning: str, **kwargs):
+    if os.getenv(warning):
+        pass
+    else:
+        warnings.warn(WARNINGS[warning].format(**kwargs))
+        os.environ[warning] = "1"
+def deprecated(replacement: str):
+    """Decorator to mark methods as deprecated and suggest replacement.
+    Only shows the warning once per method to avoid spam.
+    Args:
+        replacement: The name of the replacement method to suggest
+    """
+    def decorator(func):
+        warning_key = f"DEPRECATED_{func.__module__}_{func.__qualname__}"
+        @functools.wraps(func)
+        def wrapper(*args, **kwargs):
+            if not os.getenv(warning_key):
+                warnings.warn(
+                    f"{func.__name__} is deprecated, use {replacement} instead",
+                    DeprecationWarning,
+                    stacklevel=2,
+                )
+                os.environ[warning_key] = "1"
+            return func(*args, **kwargs)
+        return wrapper
+    return decorator

{lm_deluge-0.0.58.dist-info → lm_deluge-0.0.60.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lm_deluge
-Version: 0.0.58
+Version: 0.0.60
 Summary: Python utility for using LLM API models.
 Author-email: Benjamin Anderson <ben@trytaylor.ai>
 Requires-Python: >=3.10

{lm_deluge-0.0.58.dist-info → lm_deluge-0.0.60.dist-info}/RECORD RENAMED Viewed

@@ -1,27 +1,28 @@
 lm_deluge/__init__.py,sha256=LKKIcqQoQyDpTck6fnB7iAs75BnfNNa3Bj5Nz7KU4Hk,376
-lm_deluge/batches.py,sha256=rQocJLyIs3Ko_nRdAE9jT__5cKWYxiIRAH_Lw3L0E1k,24653
+lm_deluge/batches.py,sha256=Km6QM5_7BlF2qEyo4WPlhkaZkpzrLqf50AaveHXQOoY,25127
 lm_deluge/cache.py,sha256=xO2AIYvP3tUpTMKQjwQQYfGRJSRi6e7sMlRhLjsS-u4,4873
 lm_deluge/cli.py,sha256=Ilww5gOw3J5v0NReq_Ra4hhxU4BCIJBl1oTGxJZKedc,12065
-lm_deluge/client.py,sha256=1ZxQAWkmtz-zhW4E8rfU2V4BfzvqGsKhvqz_CB63-lc,32894
+lm_deluge/client.py,sha256=nxVxN0oXYLvOiMgiF7b_qmqQk6Hohnf4ZTtSx1SI_PQ,38845
 lm_deluge/config.py,sha256=H1tQyJDNHGFuwxqQNL5Z-CjWAC0luHSBA3iY_pxmACM,932
 lm_deluge/embed.py,sha256=CO-TOlC5kOTAM8lcnicoG4u4K664vCBwHF1vHa-nAGg,13382
 lm_deluge/errors.py,sha256=oHjt7YnxWbh-eXMScIzov4NvpJMo0-2r5J6Wh5DQ1tk,209
 lm_deluge/file.py,sha256=FGomcG8s2go_55Z2CChflHgmU-UqgFftgFY8c7f_G70,5631
 lm_deluge/image.py,sha256=5AMXmn2x47yXeYNfMSMAOWcnlrOxxOel-4L8QCJwU70,8928
-lm_deluge/prompt.py,sha256=RgZBcCiAtThqjILkPa4X530sR53SUK03U-6TWWk07tc,59607
-lm_deluge/request_context.py,sha256=o33LSEwnK6YPhZeulUoSE_VrdKCXiCQa0tjjixK2K6M,2540
+lm_deluge/prompt.py,sha256=1hGLOIwdyGFokKv0dPiVpke3OPHD6vK5qO6q9E8H89Y,62020
+lm_deluge/request_context.py,sha256=cBayMFWupWhde2OjRugW3JH-Gin-WFGc6DK2Mb4Prdc,2576
 lm_deluge/rerank.py,sha256=-NBAJdHz9OB-SWWJnHzkFmeVO4wR6lFV7Vw-SxG7aVo,11457
 lm_deluge/tool.py,sha256=eZpzgkSIlGD7KdZQwzLF-UdyRJpRnNNXpceGJrNhRrE,26421
 lm_deluge/tracker.py,sha256=aeS9GUJpgOSQRVXAnGDvlMO8qYpSxpTNLYj2hrMg0m8,14757
 lm_deluge/usage.py,sha256=xz9tAw2hqaJvv9aAVhnQ6N1Arn7fS8Shb28VwCW26wI,5136
+lm_deluge/warnings.py,sha256=nlDJMCw30VhDEFxqLO2-bfXH_Tv5qmlglzUSbokCSw8,1498
 lm_deluge/api_requests/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
 lm_deluge/api_requests/anthropic.py,sha256=7tTb_NMPodDHrCzakrLd9LyXuLqeTQyAGU-FvMoV3gI,8437
-lm_deluge/api_requests/base.py,sha256=1et-5SdRqfnvXZT3b9fBEx0vvbCwbVunHBWtQr7Wurg,5878
-lm_deluge/api_requests/bedrock.py,sha256=GmVxXz3ERAeQ7e52Nlztt81O4H9eJOQeOnS6b65vjm4,15453
+lm_deluge/api_requests/base.py,sha256=GCcydwBRx4_xAuYLvasXlyj-TgqvKAVhVvxRfJkvPbY,9471
+lm_deluge/api_requests/bedrock.py,sha256=Uppne03GcIEk1tVYzoGu7GXK2Sg94a_xvFTLDRN_phY,15412
 lm_deluge/api_requests/common.py,sha256=BZ3vRO5TB669_UsNKugkkuFSzoLHOYJIKt4nV4sf4vc,422
-lm_deluge/api_requests/gemini.py,sha256=COHqPWmeaq9fpg0YwOZqQTUbijKnXNF4cvMLnW9kLl8,7857
-lm_deluge/api_requests/mistral.py,sha256=S_LpOfCGbCVEROH_od3P-tYeNYTKFMamMTL-c_wFCBI,4597
-lm_deluge/api_requests/openai.py,sha256=frxSdQn9ZAAweSO-HMKRZ6gKU3Wdl1PqTVPhwy-iNA8,23202
+lm_deluge/api_requests/gemini.py,sha256=4uD7fQl0yWyAvYkPNi3oO1InBnvYfo5_QR6k-va-2GI,7838
+lm_deluge/api_requests/mistral.py,sha256=8JZP2CDf1XZfaPcTk0WS4q-VfYYj58ptpoH8LD3MQG4,4528
+lm_deluge/api_requests/openai.py,sha256=qRBakHOOMYJWvKO0HeeE5C1Dv_dbokuizZin9Ca4k_k,24855
 lm_deluge/api_requests/response.py,sha256=vG194gAH5p7ulpNy4qy5Pryfb1p3ZV21-YGoj__ru3E,7436
 lm_deluge/api_requests/deprecated/bedrock.py,sha256=WrcIShCoO8JCUSlFOCHxg6KQCNTZfw3TpYTvSpYk4mA,11320
 lm_deluge/api_requests/deprecated/cohere.py,sha256=KgDScD6_bWhAzOY5BHZQKSA3kurt4KGENqC4wLsGmcU,5142
@@ -41,7 +42,7 @@ lm_deluge/llm_tools/locate.py,sha256=lYNbKTmy9dTvj0lEQkOQ7yrxyqsgYzjD0C_byJKI_4w
 lm_deluge/llm_tools/ocr.py,sha256=7fDlvs6uUOvbxMasvGGNJx5Fj6biM6z3lijKZaGN26k,23
 lm_deluge/llm_tools/score.py,sha256=9oGA3-k2U5buHQXkXaEI9M4Wb5yysNhTLsPbGeghAlQ,2580
 lm_deluge/llm_tools/translate.py,sha256=iXyYvQZ8bC44FWhBk4qpdqjKM1WFF7Shq-H2PxhPgg4,1452
-lm_deluge/models/__init__.py,sha256=7HNEnpxpEguZYjcudY_9oJ79hOOLo0oNUvG-kwkEpv4,4539
+lm_deluge/models/__init__.py,sha256=a2xzQNG2axdMaSzoLbzdOKBM5EVOLztvlo8E1k-brqM,4516
 lm_deluge/models/anthropic.py,sha256=5j75sB40yZzT1wwKC7Dh0f2Y2cXnp8yxHuXW63PCuns,6285
 lm_deluge/models/bedrock.py,sha256=g1PbfceSRH2lWST3ja0mUlF3oTq4e4T-si6RMe7qXgg,4888
 lm_deluge/models/cerebras.py,sha256=u2FMXJF6xMr0euDRKLKMo_NVTOcvSrrEpehbHr8sSeE,2050
@@ -53,7 +54,7 @@ lm_deluge/models/grok.py,sha256=TDzr8yfTaHbdJhwMA-Du6L-efaKFJhjTQViuVElCCHI,2566
 lm_deluge/models/groq.py,sha256=Mi5WE1xOBGoZlymD0UN6kzhH_NOmfJYU4N2l-TO0Z8Q,2552
 lm_deluge/models/meta.py,sha256=BBgnscL1gMcIdPbRqrlDl_q9YAYGSrkw9JkAIabXtLs,1883
 lm_deluge/models/mistral.py,sha256=x67o5gckBGmPcIGdVbS26XZAYFKBYM4tsxEAahGp8bk,4323
-lm_deluge/models/openai.py,sha256=HC_oNLmKkmShkcfeUgyhesACtXGg__I2WiIIDrN-X84,10176
+lm_deluge/models/openai.py,sha256=6J4eAt6Iu5RopokyldUQzRlviFBXBqhLqpVP5tztzqI,11074
 lm_deluge/models/openrouter.py,sha256=O-Po4tmHjAqFIVU96TUL0QnK01R4e2yDN7Z4sYJ-CuE,2120
 lm_deluge/models/together.py,sha256=AjKhPsazqBgqyLwHkNQW07COM1n_oSrYQRp2BFVvn9o,4381
 lm_deluge/presets/cerebras.py,sha256=MDkqj15qQRrj8wxSCDNNe_Cs7h1WN1UjV6lTmSY1olQ,479
@@ -64,8 +65,8 @@ lm_deluge/util/logprobs.py,sha256=UkBZakOxWluaLqHrjARu7xnJ0uCHVfLGHJdnYlEcutk,11
 lm_deluge/util/spatial.py,sha256=BsF_UKhE-x0xBirc-bV1xSKZRTUhsOBdGqsMKme20C8,4099
 lm_deluge/util/validation.py,sha256=hz5dDb3ebvZrZhnaWxOxbNSVMI6nmaOODBkk0htAUhs,1575
 lm_deluge/util/xml.py,sha256=Ft4zajoYBJR3HHCt2oHwGfymGLdvp_gegVmJ-Wqk4Ck,10547
-lm_deluge-0.0.58.dist-info/licenses/LICENSE,sha256=uNNXGXPCw2TC7CUs7SEBkA-Mz6QBQFWUUEWDMgEs1dU,1058
-lm_deluge-0.0.58.dist-info/METADATA,sha256=jyhXeGVPAMMYBGm3omp6MKZfQGlRX-ow_9fI58ZZNGg,13443
-lm_deluge-0.0.58.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-lm_deluge-0.0.58.dist-info/top_level.txt,sha256=hqU-TJX93yBwpgkDtYcXyLr3t7TLSCCZ_reytJjwBaE,10
-lm_deluge-0.0.58.dist-info/RECORD,,
+lm_deluge-0.0.60.dist-info/licenses/LICENSE,sha256=uNNXGXPCw2TC7CUs7SEBkA-Mz6QBQFWUUEWDMgEs1dU,1058
+lm_deluge-0.0.60.dist-info/METADATA,sha256=uBr_1y__E5eT9sL6rOo3qf0MZ4rNKZe0hKVj4WMcqKE,13443
+lm_deluge-0.0.60.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+lm_deluge-0.0.60.dist-info/top_level.txt,sha256=hqU-TJX93yBwpgkDtYcXyLr3t7TLSCCZ_reytJjwBaE,10
+lm_deluge-0.0.60.dist-info/RECORD,,

{lm_deluge-0.0.58.dist-info → lm_deluge-0.0.60.dist-info}/WHEEL RENAMED Viewed

File without changes

{lm_deluge-0.0.58.dist-info → lm_deluge-0.0.60.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{lm_deluge-0.0.58.dist-info → lm_deluge-0.0.60.dist-info}/top_level.txt RENAMED Viewed

File without changes

lm-deluge 0.0.58__py3-none-any.whl → 0.0.60__py3-none-any.whl

Potentially problematic release.

lm-deluge 0.0.58py3-none-any.whl → 0.0.60py3-none-any.whl