PyPI - lm-deluge - Versions diffs - 0.0.57__tar.gz → 0.0.59__tar.gz - Mend

lm-deluge 0.0.57tar.gz → 0.0.59tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lm-deluge might be problematic. Click here for more details.

Files changed (80) hide show

{lm_deluge-0.0.57/src/lm_deluge.egg-info → lm_deluge-0.0.59}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lm_deluge
-Version: 0.0.57
+Version: 0.0.59
 Summary: Python utility for using LLM API models.
 Author-email: Benjamin Anderson <ben@trytaylor.ai>
 Requires-Python: >=3.10

{lm_deluge-0.0.57 → lm_deluge-0.0.59}/pyproject.toml RENAMED Viewed

@@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
 [project]
 name = "lm_deluge"
-version = "0.0.57"
+version = "0.0.59"
 authors = [{ name = "Benjamin Anderson", email = "ben@trytaylor.ai" }]
 description = "Python utility for using LLM API models."
 readme = "README.md"

{lm_deluge-0.0.57 → lm_deluge-0.0.59}/src/lm_deluge/api_requests/anthropic.py RENAMED Viewed

@@ -72,7 +72,7 @@ def _build_anthropic_request(
         request_json["system"] = system_message
     # handle temp + top_p for opus 4.1/sonnet 4.5
-    if model.name in ["claude-sonnet-4-5-20250929", "claude-opus-4-1-20250805"]:
+    if "4-1" in model.name or "4-5" in model.name:
         if "temperature" in request_json and "top_p" in request_json:
             request_json.pop("top_p")

{lm_deluge-0.0.57 → lm_deluge-0.0.59}/src/lm_deluge/api_requests/base.py RENAMED Viewed

@@ -1,4 +1,5 @@
 import asyncio
+import time
 import traceback
 from abc import ABC, abstractmethod
@@ -6,6 +7,7 @@ import aiohttp
 from aiohttp import ClientResponse
 from ..errors import raise_if_modal_exception
+from ..models.openai import OPENAI_MODELS
 from ..request_context import RequestContext
 from .response import APIResponse
@@ -82,15 +84,95 @@ class APIRequestBase(ABC):
         if self.context.status_tracker:
             self.context.status_tracker.task_succeeded(self.context.task_id)
+    async def _execute_once_background_mode(self) -> APIResponse:
+        """
+        ONLY for OpenAI responses API. Implement the
+        start -> poll -> result style of request.
+        """
+        assert self.context.status_tracker, "no status tracker"
+        start_time = time.time()
+        async with aiohttp.ClientSession() as session:
+            last_status: str | None = None
+            try:
+                self.context.status_tracker.total_requests += 1
+                assert self.url is not None, "URL is not set"
+                async with session.post(
+                    url=self.url,
+                    headers=self.request_header,
+                    json=self.request_json,
+                ) as http_response:
+                    # make sure we created the Response object
+                    http_response.raise_for_status()
+                    data = await http_response.json()
+                    response_id = data["id"]
+                    last_status = data["status"]
+                while True:
+                    if time.time() - start_time > self.context.request_timeout:
+                        # cancel the response
+                        async with session.post(
+                            url=f"{self.url}/{response_id}/cancel",
+                            headers=self.request_header,
+                        ) as http_response:
+                            http_response.raise_for_status()
+                        return APIResponse(
+                            id=self.context.task_id,
+                            model_internal=self.context.model_name,
+                            prompt=self.context.prompt,
+                            sampling_params=self.context.sampling_params,
+                            status_code=None,
+                            is_error=True,
+                            error_message="Request timed out (terminated by client).",
+                            content=None,
+                            usage=None,
+                        )
+                    # poll for the response
+                    await asyncio.sleep(5.0)
+                    async with session.get(
+                        url=f"{self.url}/{response_id}",
+                        headers=self.request_header,
+                    ) as http_response:
+                        http_response.raise_for_status()
+                        data = await http_response.json()
+                        if data["status"] != last_status:
+                            print(
+                                f"Background req {response_id} status updated to: {data['status']}"
+                            )
+                            last_status = data["status"]
+                        if last_status not in ["queued", "in_progress"]:
+                            return await self.handle_response(http_response)
+            except Exception as e:
+                raise_if_modal_exception(e)
+                tb = traceback.format_exc()
+                print(tb)
+                return APIResponse(
+                    id=self.context.task_id,
+                    model_internal=self.context.model_name,
+                    prompt=self.context.prompt,
+                    sampling_params=self.context.sampling_params,
+                    status_code=None,
+                    is_error=True,
+                    error_message=f"Unexpected {type(e).__name__}: {str(e) or 'No message.'}",
+                    content=None,
+                    usage=None,
+                )
     async def execute_once(self) -> APIResponse:
         """Send the HTTP request once and return the parsed APIResponse."""
         await self.build_request()
         assert self.context.status_tracker
-        # try:
-        #     dumped = json.dumps(self.request_json)
-        # except Exception:
-        #     print("couldn't serialize request json")
-        #     print(self.request_json)
+        if (
+            self.context.background
+            and self.context.use_responses_api
+            and self.context.model_name in OPENAI_MODELS
+        ):
+            return await self._execute_once_background_mode()
         try:
             self.context.status_tracker.total_requests += 1
             timeout = aiohttp.ClientTimeout(total=self.context.request_timeout)

{lm_deluge-0.0.57 → lm_deluge-0.0.59}/src/lm_deluge/api_requests/openai.py RENAMED Viewed

@@ -30,6 +30,26 @@ async def _build_oa_chat_request(
         "temperature": sampling_params.temperature,
         "top_p": sampling_params.top_p,
     }
+    if context.service_tier:
+        assert context.service_tier in [
+            "auto",
+            "default",
+            "flex",
+            "priority",
+        ], f"Invalid service tier: {context.service_tier}"
+        # flex is only supported for o3, o4-mini, gpt-5 models
+        if context.service_tier == "flex":
+            model_supports_flex = any(x in model.id for x in ["o3", "o4-mini", "gpt-5"])
+            if not model_supports_flex:
+                print(
+                    f"WARNING: service_tier='flex' only supported for o3, o4-mini, gpt-5. "
+                    f"Using 'auto' instead for model {model.id}."
+                )
+                request_json["service_tier"] = "auto"
+            else:
+                request_json["service_tier"] = context.service_tier
+        else:
+            request_json["service_tier"] = context.service_tier
     # set max_tokens or max_completion_tokens dep. on provider
     if "cohere" in model.api_base:
         request_json["max_tokens"] = sampling_params.max_new_tokens
@@ -213,9 +233,6 @@ class OpenAIRequest(APIRequestBase):
 async def _build_oa_responses_request(
     model: APIModel,
     context: RequestContext,
-    # prompt: Conversation,
-    # tools: list[Tool] | None,
-    # sampling_params: SamplingParams,
 ):
     prompt = context.prompt
     sampling_params = context.sampling_params
@@ -226,7 +243,28 @@ async def _build_oa_responses_request(
         "input": openai_responses_format["input"],
         "temperature": sampling_params.temperature,
         "top_p": sampling_params.top_p,
+        "background": context.background or False,
     }
+    if context.service_tier:
+        assert context.service_tier in [
+            "auto",
+            "default",
+            "flex",
+            "priority",
+        ], f"Invalid service tier: {context.service_tier}"
+        # flex is only supported for o3, o4-mini, gpt-5 models
+        if context.service_tier == "flex":
+            model_supports_flex = any(x in model.id for x in ["o3", "o4-mini", "gpt-5"])
+            if not model_supports_flex:
+                print(
+                    f"WARNING: service_tier='flex' only supported for o3, o4-mini, gpt-5. "
+                    f"Model {model.id} doesn't support flex. Using 'auto' instead."
+                )
+                request_json["service_tier"] = "auto"
+            else:
+                request_json["service_tier"] = context.service_tier
+        else:
+            request_json["service_tier"] = context.service_tier
     if sampling_params.max_new_tokens:
         request_json["max_output_tokens"] = sampling_params.max_new_tokens

{lm_deluge-0.0.57 → lm_deluge-0.0.59}/src/lm_deluge/batches.py RENAMED Viewed

@@ -3,7 +3,7 @@ import json
 import os
 import tempfile
 import time
-from typing import Literal, Sequence
+from typing import Literal, Sequence, cast
 import aiohttp
 from rich.console import Console
@@ -16,7 +16,12 @@ from lm_deluge.api_requests.anthropic import _build_anthropic_request
 from lm_deluge.api_requests.openai import _build_oa_chat_request
 from lm_deluge.config import SamplingParams
 from lm_deluge.models import APIModel, registry
-from lm_deluge.prompt import CachePattern, Conversation, prompts_to_conversations
+from lm_deluge.prompt import (
+    CachePattern,
+    Conversation,
+    Prompt,
+    prompts_to_conversations,
+)
 from lm_deluge.request_context import RequestContext
@@ -166,14 +171,18 @@ async def _submit_anthropic_batch(file_path: str, headers: dict, model: str):
 async def create_batch_files_oa(
     model: str,
     sampling_params: SamplingParams,
-    prompts: Sequence[str | list[dict] | Conversation],
+    prompts: Prompt | Sequence[Prompt],
     batch_size: int = 50_000,
     destination: str | None = None,  # if none provided, temp files
 ):
     MAX_BATCH_SIZE_BYTES = 200 * 1024 * 1024  # 200MB
     MAX_BATCH_SIZE_ITEMS = batch_size
-    prompts = prompts_to_conversations(prompts)
+    if not isinstance(prompts, list):
+        prompts = cast(Sequence[Prompt], [prompts])
+    prompts = prompts_to_conversations(cast(Sequence[Prompt], prompts))
+    assert isinstance(prompts, Sequence)
     if any(p is None for p in prompts):
         raise ValueError("All prompts must be valid.")
@@ -251,14 +260,18 @@ async def create_batch_files_oa(
 async def submit_batches_oa(
     model: str,
     sampling_params: SamplingParams,
-    prompts: Sequence[str | list[dict] | Conversation],
+    prompts: Prompt | Sequence[Prompt],
     batch_size: int = 50_000,
 ):
     """Write OpenAI batch requests to a file and submit."""
     MAX_BATCH_SIZE_BYTES = 200 * 1024 * 1024  # 200MB
     MAX_BATCH_SIZE_ITEMS = batch_size
-    prompts = prompts_to_conversations(prompts)
+    if not isinstance(prompts, list):
+        prompts = prompts = cast(Sequence[Prompt], [prompts])
+    prompts = prompts_to_conversations(cast(Sequence[Prompt], prompts))
+    assert isinstance(prompts, Sequence)
     if any(p is None for p in prompts):
         raise ValueError("All prompts must be valid.")
@@ -342,7 +355,7 @@ async def submit_batches_oa(
 async def submit_batches_anthropic(
     model: str,
     sampling_params: SamplingParams,
-    prompts: Sequence[str | list[dict] | Conversation],
+    prompts: Prompt | Sequence[Prompt],
     *,
     cache: CachePattern | None = None,
     batch_size=100_000,
@@ -362,13 +375,16 @@ async def submit_batches_anthropic(
     MAX_BATCH_SIZE_ITEMS = batch_size
     # Convert prompts to Conversations
-    prompts = prompts_to_conversations(prompts)
+    if not isinstance(prompts, list):
+        prompts = prompts = cast(Sequence[Prompt], [prompts])
+    prompts = prompts_to_conversations(cast(Sequence[Prompt], prompts))
     request_headers = None
     batch_tasks = []
     current_batch = []
     current_batch_size = 0
+    assert isinstance(prompts, Sequence)
     for idx, prompt in enumerate(prompts):
         assert isinstance(prompt, Conversation)
         context = RequestContext(

{lm_deluge-0.0.57 → lm_deluge-0.0.59}/src/lm_deluge/client.py RENAMED Viewed

@@ -1,5 +1,14 @@
 import asyncio
-from typing import Any, AsyncGenerator, Callable, Literal, Self, Sequence, overload
+from typing import (
+    Any,
+    AsyncGenerator,
+    Callable,
+    Literal,
+    Self,
+    Sequence,
+    cast,
+    overload,
+)
 import numpy as np
 import yaml
@@ -12,7 +21,12 @@ from lm_deluge.batches import (
     submit_batches_oa,
     wait_for_batch_completion_async,
 )
-from lm_deluge.prompt import CachePattern, Conversation, prompts_to_conversations
+from lm_deluge.prompt import (
+    CachePattern,
+    Conversation,
+    Prompt,
+    prompts_to_conversations,
+)
 from lm_deluge.tool import MCPServer, Tool
 from .api_requests.base import APIResponse
@@ -40,6 +54,9 @@ class _LLMClient(BaseModel):
     request_timeout: int = 30
     cache: Any = None
     extra_headers: dict[str, str] | None = None
+    extra_body: dict[str, str] | None = None
+    use_responses_api: bool = False
+    background: bool = False
     # sampling params - if provided, and sampling_params is not,
     # these override the defaults
     temperature: float = 0.75
@@ -171,6 +188,11 @@ class _LLMClient(BaseModel):
         # normalize weights
         self.model_weights = [w / sum(self.model_weights) for w in self.model_weights]
+        # background mode only allowed for responses api
+        if self.background:
+            assert (
+                self.use_responses_api
+            ), "background mode only allowed for responses api"
         # Auto-generate name if not provided
         if self.name is None:
             if len(self.model_names) == 1:
@@ -256,13 +278,6 @@ class _LLMClient(BaseModel):
             # Idle wait before next capacity check. Aim for ~RPM spacing.
             await asyncio.sleep(max(60.0 / self.max_requests_per_minute, 0.01))
-    async def _execute_request(self, context: RequestContext) -> APIResponse:
-        """Create and send a single API request using the provided context."""
-        model_obj = APIModel.from_registry(context.model_name)
-        request = model_obj.make_request(context)
-        response = await request.execute_once()
-        return response
     async def process_single_request(
         self, context: RequestContext, retry_queue: asyncio.Queue | None = None
     ) -> APIResponse:
@@ -290,7 +305,9 @@ class _LLMClient(BaseModel):
         # Execute single request
         assert context.status_tracker
         context.status_tracker.update_pbar()
-        response = await self._execute_request(context)
+        model_obj = APIModel.from_registry(context.model_name)
+        request = model_obj.make_request(context)
+        response = await request.execute_once()
         # Handle successful response
         if not response.is_error:
@@ -350,44 +367,46 @@ class _LLMClient(BaseModel):
     @overload
     async def process_prompts_async(
         self,
-        prompts: Sequence[str | list[dict] | Conversation],
+        prompts: Prompt | Sequence[Prompt],
         *,
         return_completions_only: Literal[True],
         show_progress: bool = ...,
         tools: list[Tool | dict | MCPServer] | None = ...,
         cache: CachePattern | None = ...,
-        use_responses_api: bool = ...,
+        service_tier: Literal["auto", "default", "flex", "priority"] | None = ...,
     ) -> list[str | None]: ...
     @overload
     async def process_prompts_async(
         self,
-        prompts: Sequence[str | list[dict] | Conversation],
+        prompts: Prompt | Sequence[Prompt],
         *,
         return_completions_only: Literal[False] = ...,
         show_progress: bool = ...,
         tools: list[Tool | dict | MCPServer] | None = ...,
         cache: CachePattern | None = ...,
-        use_responses_api: bool = ...,
-    ) -> list[APIResponse | None]: ...
+        service_tier: Literal["auto", "default", "flex", "priority"] | None = ...,
+    ) -> list[APIResponse]: ...
     async def process_prompts_async(
         self,
-        prompts: Sequence[str | list[dict] | Conversation],
+        prompts: Prompt | Sequence[Prompt],
         *,
         return_completions_only: bool = False,
         show_progress: bool = True,
         tools: list[Tool | dict | MCPServer] | None = None,
         cache: CachePattern | None = None,
-        use_responses_api: bool = False,
-    ) -> list[APIResponse | None] | list[str | None] | dict[str, int]:
+        service_tier: Literal["auto", "default", "flex", "priority"] | None = None,
+    ) -> list[APIResponse] | list[str | None] | dict[str, int]:
         """Process multiple prompts asynchronously using the start_nowait/wait_for_all backend.
         This implementation creates all tasks upfront and waits for them to complete,
         avoiding issues with tracker state accumulating across multiple calls.
         """
         # Convert prompts to Conversations
-        prompts = prompts_to_conversations(prompts)
+        if not isinstance(prompts, list):
+            prompts = prompts = cast(Sequence[Prompt], [prompts])
+        prompts = prompts_to_conversations(cast(Sequence[Prompt], prompts))
         # Ensure tracker exists (start_nowait will call add_to_total for each task)
         if self._tracker is None:
@@ -398,13 +417,14 @@ class _LLMClient(BaseModel):
         # Start all tasks using start_nowait - tasks will coordinate via shared capacity lock
         task_ids = []
+        assert isinstance(prompts, Sequence)
         for prompt in prompts:
             assert isinstance(prompt, Conversation)
             task_id = self.start_nowait(
                 prompt,
                 tools=tools,
                 cache=cache,
-                use_responses_api=use_responses_api,
+                service_tier=service_tier,
             )
             task_ids.append(task_id)
@@ -443,13 +463,12 @@ class _LLMClient(BaseModel):
     def process_prompts_sync(
         self,
-        prompts: Sequence[str | list[dict] | Conversation],
+        prompts: Prompt | Sequence[Prompt],
         *,
         return_completions_only: bool = False,
         show_progress=True,
         tools: list[Tool | dict | MCPServer] | None = None,
         cache: CachePattern | None = None,
-        use_responses_api: bool = False,
     ):
         return asyncio.run(
             self.process_prompts_async(
@@ -458,7 +477,6 @@ class _LLMClient(BaseModel):
                 show_progress=show_progress,
                 tools=tools,
                 cache=cache,
-                use_responses_api=use_responses_api,
             )
         )
@@ -478,18 +496,18 @@ class _LLMClient(BaseModel):
     def start_nowait(
         self,
-        prompt: str | Conversation,
+        prompt: Prompt,
         *,
         tools: list[Tool | dict | MCPServer] | None = None,
         cache: CachePattern | None = None,
-        use_responses_api: bool = False,
+        service_tier: Literal["auto", "default", "flex", "priority"] | None = None,
     ) -> int:
         tracker = self._get_tracker()
         task_id = self._next_task_id
         self._next_task_id += 1
         model, sampling_params = self._select_model()
-        if isinstance(prompt, str):
-            prompt = Conversation.user(prompt)
+        prompt = prompts_to_conversations([prompt])[0]
+        assert isinstance(prompt, Conversation)
         context = RequestContext(
             task_id=task_id,
             model_name=model,
@@ -500,7 +518,9 @@ class _LLMClient(BaseModel):
             status_tracker=tracker,
             tools=tools,
             cache=cache,
-            use_responses_api=use_responses_api,
+            use_responses_api=self.use_responses_api,
+            background=self.background,
+            service_tier=service_tier,
             extra_headers=self.extra_headers,
             force_local_mcp=self.force_local_mcp,
         )
@@ -515,29 +535,41 @@ class _LLMClient(BaseModel):
         *,
         tools: list[Tool | dict | MCPServer] | None = None,
         cache: CachePattern | None = None,
-        use_responses_api: bool = False,
-    ) -> APIResponse | None:
+        service_tier: Literal["auto", "default", "flex", "priority"] | None = None,
+    ) -> APIResponse:
         task_id = self.start_nowait(
-            prompt, tools=tools, cache=cache, use_responses_api=use_responses_api
+            prompt, tools=tools, cache=cache, service_tier=service_tier
         )
         return await self.wait_for(task_id)
-    async def wait_for(self, task_id: int) -> APIResponse | None:
+    async def wait_for(self, task_id: int) -> APIResponse:
         task = self._tasks.get(task_id)
         if task:
             return await task
-        return self._results.get(task_id)
+        res = self._results.get(task_id)
+        if res:
+            return res
+        else:
+            return APIResponse(
+                id=-1,
+                model_internal="",
+                prompt=Conversation([]),
+                sampling_params=SamplingParams(),
+                status_code=500,
+                is_error=True,
+                error_message="Task not found",
+            )
     async def wait_for_all(
         self, task_ids: Sequence[int] | None = None
-    ) -> list[APIResponse | None]:
+    ) -> list[APIResponse]:
         if task_ids is None:
             task_ids = list(self._tasks.keys())
         return [await self.wait_for(tid) for tid in task_ids]
     async def as_completed(
         self, task_ids: Sequence[int] | None = None
-    ) -> AsyncGenerator[tuple[int, APIResponse | None], None]:
+    ) -> AsyncGenerator[tuple[int, APIResponse], None]:
         """Yield ``(task_id, result)`` pairs as tasks complete.
         Args:
@@ -561,7 +593,9 @@ class _LLMClient(BaseModel):
         for task in list(tasks_map.keys()):
             if task.done():
                 tid = tasks_map.pop(task)
-                yield tid, self._results.get(tid, await task)
+                task_result = self._results.get(tid, await task)
+                assert task_result
+                yield tid, task_result
         while tasks_map:
             done, _ = await asyncio.wait(
@@ -569,7 +603,9 @@ class _LLMClient(BaseModel):
             )
             for task in done:
                 tid = tasks_map.pop(task)
-                yield tid, self._results.get(tid, await task)
+                task_result = self._results.get(tid, await task)
+                assert task_result
+                yield tid, task_result
     async def stream(
         self,
@@ -682,7 +718,7 @@ class _LLMClient(BaseModel):
     async def submit_batch_job(
         self,
-        prompts: Sequence[str | list[dict] | Conversation],
+        prompts: Prompt | Sequence[Prompt],
         *,
         tools: list[Tool] | None = None,
         cache: CachePattern | None = None,
@@ -744,6 +780,8 @@ def LLMClient(
     request_timeout: int = 30,
     cache: Any = None,
     extra_headers: dict[str, str] | None = None,
+    use_responses_api: bool = False,
+    background: bool = False,
     temperature: float = 0.75,
     top_p: float = 1.0,
     json_mode: bool = False,
@@ -771,6 +809,8 @@ def LLMClient(
     request_timeout: int = 30,
     cache: Any = None,
     extra_headers: dict[str, str] | None = None,
+    use_responses_api: bool = False,
+    background: bool = False,
     temperature: float = 0.75,
     top_p: float = 1.0,
     json_mode: bool = False,
@@ -797,6 +837,8 @@ def LLMClient(
     request_timeout: int = 30,
     cache: Any = None,
     extra_headers: dict[str, str] | None = None,
+    use_responses_api: bool = False,
+    background: bool = False,
     temperature: float = 0.75,
     top_p: float = 1.0,
     json_mode: bool = False,
@@ -835,6 +877,8 @@ def LLMClient(
         request_timeout=request_timeout,
         cache=cache,
         extra_headers=extra_headers,
+        use_responses_api=use_responses_api,
+        background=background,
         temperature=temperature,
         top_p=top_p,
         json_mode=json_mode,

{lm_deluge-0.0.57 → lm_deluge-0.0.59}/src/lm_deluge/models/__init__.py RENAMED Viewed

@@ -38,9 +38,9 @@ class APIModel:
     supports_responses: bool = False
     reasoning_model: bool = False
     regions: list[str] | dict[str, int] = field(default_factory=list)
-    tokens_per_minute: int | None = None
-    requests_per_minute: int | None = None
-    gpus: list[str] | None = None
+    # tokens_per_minute: int | None = None
+    # requests_per_minute: int | None = None
+    # gpus: list[str] | None = None
     @classmethod
     def from_registry(cls, name: str):
@@ -62,7 +62,7 @@ class APIModel:
             raise ValueError("no regions to sample")
         random.sample(regions, 1, counts=weights)[0]
-    def make_request(self, context: RequestContext):  # -> "APIRequestBase"
+    def make_request(self, context: RequestContext):
         from ..api_requests.common import CLASSES
         api_spec = self.api_spec
@@ -97,8 +97,8 @@ def register_model(
     supports_responses: bool = False,
     reasoning_model: bool = False,
     regions: list[str] | dict[str, int] = field(default_factory=list),
-    tokens_per_minute: int | None = None,
-    requests_per_minute: int | None = None,
+    # tokens_per_minute: int | None = None,
+    # requests_per_minute: int | None = None,
 ) -> APIModel:
     """Register a model configuration and return the created APIModel."""
     model = APIModel(
@@ -116,8 +116,8 @@ def register_model(
         supports_responses=supports_responses,
         reasoning_model=reasoning_model,
         regions=regions,
-        tokens_per_minute=tokens_per_minute,
-        requests_per_minute=requests_per_minute,
+        # tokens_per_minute=tokens_per_minute,
+        # requests_per_minute=requests_per_minute,
     )
     registry[model.id] = model
     return model

lm-deluge 0.0.57__tar.gz → 0.0.59__tar.gz

Potentially problematic release.

lm-deluge 0.0.57tar.gz → 0.0.59tar.gz