PyPI - lm-deluge - Versions diffs - 0.0.56__py3-none-any.whl → 0.0.69__py3-none-any.whl - Mend

lm-deluge 0.0.56py3-none-any.whl → 0.0.69py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

lm_deluge/__init__.py +12 -1
lm_deluge/api_requests/anthropic.py +12 -1
lm_deluge/api_requests/base.py +87 -5
lm_deluge/api_requests/bedrock.py +3 -4
lm_deluge/api_requests/chat_reasoning.py +4 -0
lm_deluge/api_requests/gemini.py +7 -6
lm_deluge/api_requests/mistral.py +8 -9
lm_deluge/api_requests/openai.py +179 -124
lm_deluge/batches.py +25 -9
lm_deluge/client.py +280 -67
lm_deluge/config.py +1 -1
lm_deluge/file.py +382 -13
lm_deluge/mock_openai.py +482 -0
lm_deluge/models/__init__.py +12 -8
lm_deluge/models/anthropic.py +12 -20
lm_deluge/models/bedrock.py +0 -14
lm_deluge/models/cohere.py +0 -16
lm_deluge/models/google.py +0 -20
lm_deluge/models/grok.py +48 -4
lm_deluge/models/groq.py +2 -2
lm_deluge/models/kimi.py +34 -0
lm_deluge/models/meta.py +0 -8
lm_deluge/models/minimax.py +10 -0
lm_deluge/models/openai.py +28 -34
lm_deluge/models/openrouter.py +64 -1
lm_deluge/models/together.py +0 -16
lm_deluge/prompt.py +138 -29
lm_deluge/request_context.py +9 -11
lm_deluge/tool.py +395 -19
lm_deluge/tracker.py +11 -5
lm_deluge/warnings.py +46 -0
{lm_deluge-0.0.56.dist-info → lm_deluge-0.0.69.dist-info}/METADATA +3 -1
{lm_deluge-0.0.56.dist-info → lm_deluge-0.0.69.dist-info}/RECORD +36 -33
lm_deluge/agent.py +0 -0
lm_deluge/gemini_limits.py +0 -65
{lm_deluge-0.0.56.dist-info → lm_deluge-0.0.69.dist-info}/WHEEL +0 -0
{lm_deluge-0.0.56.dist-info → lm_deluge-0.0.69.dist-info}/licenses/LICENSE +0 -0
{lm_deluge-0.0.56.dist-info → lm_deluge-0.0.69.dist-info}/top_level.txt +0 -0

lm_deluge/api_requests/openai.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import json
 import os
 import traceback as tb
-import warnings
 from types import SimpleNamespace
 import aiohttp
@@ -9,6 +8,7 @@ from aiohttp import ClientResponse
 from lm_deluge.request_context import RequestContext
 from lm_deluge.tool import MCPServer, Tool
+from lm_deluge.warnings import maybe_warn
 from ..config import SamplingParams
 from ..models import APIModel
@@ -30,6 +30,26 @@ async def _build_oa_chat_request(
         "temperature": sampling_params.temperature,
         "top_p": sampling_params.top_p,
     }
+    if context.service_tier:
+        assert context.service_tier in [
+            "auto",
+            "default",
+            "flex",
+            "priority",
+        ], f"Invalid service tier: {context.service_tier}"
+        # flex is only supported for o3, o4-mini, gpt-5 models
+        if context.service_tier == "flex":
+            model_supports_flex = any(x in model.id for x in ["o3", "o4-mini", "gpt-5"])
+            if not model_supports_flex:
+                print(
+                    f"WARNING: service_tier='flex' only supported for o3, o4-mini, gpt-5. "
+                    f"Using 'auto' instead for model {model.id}."
+                )
+                request_json["service_tier"] = "auto"
+            else:
+                request_json["service_tier"] = context.service_tier
+        else:
+            request_json["service_tier"] = context.service_tier
     # set max_tokens or max_completion_tokens dep. on provider
     if "cohere" in model.api_base:
         request_json["max_tokens"] = sampling_params.max_new_tokens
@@ -55,9 +75,8 @@ async def _build_oa_chat_request(
         request_json["reasoning_effort"] = effort
     else:
         if sampling_params.reasoning_effort:
-            warnings.warn(
-                f"Ignoring reasoning_effort param for non-reasoning model: {model.name}"
-            )
+            maybe_warn("WARN_REASONING_UNSUPPORTED", model_name=context.model_name)
     if sampling_params.logprobs:
         request_json["logprobs"] = True
         if sampling_params.top_logprobs is not None:
@@ -85,8 +104,10 @@ class OpenAIRequest(APIRequestBase):
         # Warn if cache is specified for non-Anthropic model
         if self.context.cache is not None:
-            warnings.warn(
-                f"Cache parameter '{self.context.cache}' is only supported for Anthropic models, ignoring for {self.context.model_name}"
+            maybe_warn(
+                "WARN_CACHING_UNSUPPORTED",
+                model_name=self.context.model_name,
+                cache_param=self.context.cache,
             )
         self.model = APIModel.from_registry(self.context.model_name)
@@ -163,7 +184,8 @@ class OpenAIRequest(APIRequestBase):
                     content = Message("assistant", parts)
-                    usage = Usage.from_openai_usage(data["usage"])
+                    if "usage" in data and data["usage"] is not None:
+                        usage = Usage.from_openai_usage(data["usage"])
                     if (
                         self.context.sampling_params.logprobs
                         and "logprobs" in data["choices"][0]
@@ -213,9 +235,6 @@ class OpenAIRequest(APIRequestBase):
 async def _build_oa_responses_request(
     model: APIModel,
     context: RequestContext,
-    # prompt: Conversation,
-    # tools: list[Tool] | None,
-    # sampling_params: SamplingParams,
 ):
     prompt = context.prompt
     sampling_params = context.sampling_params
@@ -226,7 +245,28 @@ async def _build_oa_responses_request(
         "input": openai_responses_format["input"],
         "temperature": sampling_params.temperature,
         "top_p": sampling_params.top_p,
+        "background": context.background or False,
     }
+    if context.service_tier:
+        assert context.service_tier in [
+            "auto",
+            "default",
+            "flex",
+            "priority",
+        ], f"Invalid service tier: {context.service_tier}"
+        # flex is only supported for o3, o4-mini, gpt-5 models
+        if context.service_tier == "flex":
+            model_supports_flex = any(x in model.id for x in ["o3", "o4-mini", "gpt-5"])
+            if not model_supports_flex:
+                print(
+                    f"WARNING: service_tier='flex' only supported for o3, o4-mini, gpt-5. "
+                    f"Model {model.id} doesn't support flex. Using 'auto' instead."
+                )
+                request_json["service_tier"] = "auto"
+            else:
+                request_json["service_tier"] = context.service_tier
+        else:
+            request_json["service_tier"] = context.service_tier
     if sampling_params.max_new_tokens:
         request_json["max_output_tokens"] = sampling_params.max_new_tokens
@@ -245,9 +285,7 @@ async def _build_oa_responses_request(
         }
     else:
         if sampling_params.reasoning_effort:
-            warnings.warn(
-                f"Ignoring reasoning_effort for non-reasoning model: {model.id}"
-            )
+            maybe_warn("WARN_REASONING_UNSUPPORTED", model_name=context.model_name)
     if sampling_params.json_mode and model.supports_json:
         request_json["text"] = {"format": {"type": "json_object"}}
@@ -284,8 +322,10 @@ class OpenAIResponsesRequest(APIRequestBase):
         super().__init__(context)
         # Warn if cache is specified for non-Anthropic model
         if self.context.cache is not None:
-            warnings.warn(
-                f"Cache parameter '{self.context.cache}' is only supported for Anthropic models, ignoring for {self.context.model_name}"
+            maybe_warn(
+                "WARN_CACHING_UNSUPPORTED",
+                model_name=self.context.model_name,
+                cache_param=self.context.cache,
             )
         self.model = APIModel.from_registry(self.context.model_name)
@@ -310,7 +350,8 @@ class OpenAIResponsesRequest(APIRequestBase):
         assert self.context.status_tracker
         if status_code == 500:
-            print("Internal Server Error: ", http_response.text())
+            res_text = await http_response.text()
+            print("Internal Server Error: ", res_text)
         if status_code >= 200 and status_code < 300:
             try:
@@ -322,126 +363,138 @@ class OpenAIResponsesRequest(APIRequestBase):
                 )
             if not is_error:
                 assert data is not None, "data is None"
-                try:
-                    # Parse Responses API format
-                    parts = []
-                    # Get the output array from the response
-                    output = data.get("output", [])
-                    if not output:
-                        is_error = True
-                        error_message = "No output in response"
-                    else:
-                        # Process each output item
-                        for item in output:
-                            if item.get("type") == "message":
-                                message_content = item.get("content", [])
-                                for content_item in message_content:
-                                    if content_item.get("type") == "output_text":
-                                        parts.append(Text(content_item["text"]))
-                                    elif content_item.get("type") == "refusal":
-                                        parts.append(Text(content_item["refusal"]))
-                            elif item.get("type") == "reasoning":
-                                summary = item["summary"]
-                                if not summary:
-                                    continue
-                                if isinstance(summary, list) and len(summary) > 0:
-                                    summary = summary[0]
-                                assert isinstance(summary, dict), "summary isn't a dict"
-                                parts.append(Thinking(summary["text"]))
-                            elif item.get("type") == "function_call":
-                                parts.append(
-                                    ToolCall(
-                                        id=item["call_id"],
-                                        name=item["name"],
-                                        arguments=json.loads(item["arguments"]),
+                # Check if response is incomplete
+                if data.get("status") == "incomplete":
+                    is_error = True
+                    incomplete_reason = data.get("incomplete_details", {}).get(
+                        "reason", "unknown"
+                    )
+                    error_message = f"Response incomplete: {incomplete_reason}"
+                if not is_error:
+                    try:
+                        # Parse Responses API format
+                        parts = []
+                        # Get the output array from the response
+                        output = data.get("output", [])
+                        if not output:
+                            is_error = True
+                            error_message = f"No output in response. Status: {data.get('status')}, error: {data.get('error')}, incomplete details: {data.get('incomplete_details')}"
+                        else:
+                            # Process each output item
+                            for item in output:
+                                if item.get("type") == "message":
+                                    message_content = item.get("content", [])
+                                    for content_item in message_content:
+                                        if content_item.get("type") == "output_text":
+                                            parts.append(Text(content_item["text"]))
+                                        elif content_item.get("type") == "refusal":
+                                            parts.append(Text(content_item["refusal"]))
+                                elif item.get("type") == "reasoning":
+                                    summary = item["summary"]
+                                    if not summary:
+                                        continue
+                                    if isinstance(summary, list) and len(summary) > 0:
+                                        summary = summary[0]
+                                    assert isinstance(
+                                        summary, dict
+                                    ), "summary isn't a dict"
+                                    parts.append(Thinking(summary["text"]))
+                                elif item.get("type") == "function_call":
+                                    parts.append(
+                                        ToolCall(
+                                            id=item["call_id"],
+                                            name=item["name"],
+                                            arguments=json.loads(item["arguments"]),
+                                        )
                                     )
-                                )
-                            elif item.get("type") == "mcp_call":
-                                parts.append(
-                                    ToolCall(
-                                        id=item["id"],
-                                        name=item["name"],
-                                        arguments=json.loads(item["arguments"]),
-                                        built_in=True,
-                                        built_in_type="mcp_call",
-                                        extra_body={
-                                            "server_label": item["server_label"],
-                                            "error": item.get("error"),
-                                            "output": item.get("output"),
-                                        },
+                                elif item.get("type") == "mcp_call":
+                                    parts.append(
+                                        ToolCall(
+                                            id=item["id"],
+                                            name=item["name"],
+                                            arguments=json.loads(item["arguments"]),
+                                            built_in=True,
+                                            built_in_type="mcp_call",
+                                            extra_body={
+                                                "server_label": item["server_label"],
+                                                "error": item.get("error"),
+                                                "output": item.get("output"),
+                                            },
+                                        )
                                     )
-                                )
-                            elif item.get("type") == "computer_call":
-                                parts.append(
-                                    ToolCall(
-                                        id=item["call_id"],
-                                        name="computer_call",
-                                        arguments=item.get("action"),
-                                        built_in=True,
-                                        built_in_type="computer_call",
+                                elif item.get("type") == "computer_call":
+                                    parts.append(
+                                        ToolCall(
+                                            id=item["call_id"],
+                                            name="computer_call",
+                                            arguments=item.get("action"),
+                                            built_in=True,
+                                            built_in_type="computer_call",
+                                        )
                                     )
-                                )
-                            elif item.get("type") == "web_search_call":
-                                parts.append(
-                                    ToolCall(
-                                        id=item["id"],
-                                        name="web_search_call",
-                                        arguments={},
-                                        built_in=True,
-                                        built_in_type="web_search_call",
-                                        extra_body={"status": item["status"]},
+                                elif item.get("type") == "web_search_call":
+                                    parts.append(
+                                        ToolCall(
+                                            id=item["id"],
+                                            name="web_search_call",
+                                            arguments={},
+                                            built_in=True,
+                                            built_in_type="web_search_call",
+                                            extra_body={"status": item["status"]},
+                                        )
                                     )
-                                )
-                            elif item.get("type") == "file_search_call":
-                                parts.append(
-                                    ToolCall(
-                                        id=item["id"],
-                                        name="file_search_call",
-                                        arguments={"queries": item["queries"]},
-                                        built_in=True,
-                                        built_in_type="file_search_call",
-                                        extra_body={
-                                            "status": item["status"],
-                                            "results": item["results"],
-                                        },
+                                elif item.get("type") == "file_search_call":
+                                    parts.append(
+                                        ToolCall(
+                                            id=item["id"],
+                                            name="file_search_call",
+                                            arguments={"queries": item["queries"]},
+                                            built_in=True,
+                                            built_in_type="file_search_call",
+                                            extra_body={
+                                                "status": item["status"],
+                                                "results": item["results"],
+                                            },
+                                        )
                                     )
-                                )
-                            elif item.get("type") == "image_generation_call":
-                                parts.append(
-                                    ToolCall(
-                                        id=item["id"],
-                                        name="image_generation_call",
-                                        arguments={},
-                                        built_in=True,
-                                        built_in_type="image_generation_call",
-                                        extra_body={
-                                            "status": item["status"],
-                                            "result": item["result"],
-                                        },
+                                elif item.get("type") == "image_generation_call":
+                                    parts.append(
+                                        ToolCall(
+                                            id=item["id"],
+                                            name="image_generation_call",
+                                            arguments={},
+                                            built_in=True,
+                                            built_in_type="image_generation_call",
+                                            extra_body={
+                                                "status": item["status"],
+                                                "result": item["result"],
+                                            },
+                                        )
                                     )
-                                )
-                        # Handle reasoning if present
-                        if "reasoning" in data and data["reasoning"].get("summary"):
-                            thinking = data["reasoning"]["summary"]
-                            parts.append(Thinking(thinking))
+                            # Handle reasoning if present
+                            if "reasoning" in data and data["reasoning"].get("summary"):
+                                thinking = data["reasoning"]["summary"]
+                                parts.append(Thinking(thinking))
-                        content = Message("assistant", parts)
+                            content = Message("assistant", parts)
-                        # Extract usage information
-                        if "usage" in data:
-                            usage = Usage.from_openai_usage(data["usage"])
+                            # Extract usage information
+                            if "usage" in data and data["usage"] is not None:
+                                usage = Usage.from_openai_usage(data["usage"])
-                except Exception as e:
-                    is_error = True
-                    error_message = f"Error parsing {self.model.name} responses API response: {str(e)}"
-                    print("got data:", data)
-                    traceback = tb.format_exc()
-                    print(f"Error details:\n{traceback}")
+                    except Exception as e:
+                        is_error = True
+                        error_message = f"Error parsing {self.model.name} responses API response: {str(e)}"
+                        print("got data:", data)
+                        traceback = tb.format_exc()
+                        print(f"Error details:\n{traceback}")
         elif mimetype and "json" in mimetype.lower():
             print("is_error True, json response")
@@ -488,8 +541,10 @@ async def stream_chat(
     extra_headers: dict[str, str] | None = None,
 ):
     if cache is not None:
-        warnings.warn(
-            f"Cache parameter '{cache}' is only supported for Anthropic models, ignoring for {model_name}"
+        maybe_warn(
+            "WARN_CACHING_UNSUPPORTED",
+            model_name=model_name,
+            cache_param=cache,
         )
     model = APIModel.from_registry(model_name)

lm_deluge/batches.py CHANGED Viewed

@@ -3,7 +3,7 @@ import json
 import os
 import tempfile
 import time
-from typing import Literal, Sequence
+from typing import Literal, Sequence, cast
 import aiohttp
 from rich.console import Console
@@ -16,7 +16,12 @@ from lm_deluge.api_requests.anthropic import _build_anthropic_request
 from lm_deluge.api_requests.openai import _build_oa_chat_request
 from lm_deluge.config import SamplingParams
 from lm_deluge.models import APIModel, registry
-from lm_deluge.prompt import CachePattern, Conversation, prompts_to_conversations
+from lm_deluge.prompt import (
+    CachePattern,
+    Conversation,
+    Prompt,
+    prompts_to_conversations,
+)
 from lm_deluge.request_context import RequestContext
@@ -166,14 +171,18 @@ async def _submit_anthropic_batch(file_path: str, headers: dict, model: str):
 async def create_batch_files_oa(
     model: str,
     sampling_params: SamplingParams,
-    prompts: Sequence[str | list[dict] | Conversation],
+    prompts: Prompt | Sequence[Prompt],
     batch_size: int = 50_000,
     destination: str | None = None,  # if none provided, temp files
 ):
     MAX_BATCH_SIZE_BYTES = 200 * 1024 * 1024  # 200MB
     MAX_BATCH_SIZE_ITEMS = batch_size
-    prompts = prompts_to_conversations(prompts)
+    if not isinstance(prompts, list):
+        prompts = cast(Sequence[Prompt], [prompts])
+    prompts = prompts_to_conversations(cast(Sequence[Prompt], prompts))
+    assert isinstance(prompts, Sequence)
     if any(p is None for p in prompts):
         raise ValueError("All prompts must be valid.")
@@ -251,14 +260,18 @@ async def create_batch_files_oa(
 async def submit_batches_oa(
     model: str,
     sampling_params: SamplingParams,
-    prompts: Sequence[str | list[dict] | Conversation],
+    prompts: Prompt | Sequence[Prompt],
     batch_size: int = 50_000,
 ):
     """Write OpenAI batch requests to a file and submit."""
     MAX_BATCH_SIZE_BYTES = 200 * 1024 * 1024  # 200MB
     MAX_BATCH_SIZE_ITEMS = batch_size
-    prompts = prompts_to_conversations(prompts)
+    if not isinstance(prompts, list):
+        prompts = prompts = cast(Sequence[Prompt], [prompts])
+    prompts = prompts_to_conversations(cast(Sequence[Prompt], prompts))
+    assert isinstance(prompts, Sequence)
     if any(p is None for p in prompts):
         raise ValueError("All prompts must be valid.")
@@ -342,7 +355,7 @@ async def submit_batches_oa(
 async def submit_batches_anthropic(
     model: str,
     sampling_params: SamplingParams,
-    prompts: Sequence[str | list[dict] | Conversation],
+    prompts: Prompt | Sequence[Prompt],
     *,
     cache: CachePattern | None = None,
     batch_size=100_000,
@@ -362,13 +375,16 @@ async def submit_batches_anthropic(
     MAX_BATCH_SIZE_ITEMS = batch_size
     # Convert prompts to Conversations
-    prompts = prompts_to_conversations(prompts)
+    if not isinstance(prompts, list):
+        prompts = prompts = cast(Sequence[Prompt], [prompts])
+    prompts = prompts_to_conversations(cast(Sequence[Prompt], prompts))
     request_headers = None
     batch_tasks = []
     current_batch = []
     current_batch_size = 0
+    assert isinstance(prompts, Sequence)
     for idx, prompt in enumerate(prompts):
         assert isinstance(prompt, Conversation)
         context = RequestContext(

lm-deluge 0.0.56__py3-none-any.whl → 0.0.69__py3-none-any.whl

lm-deluge 0.0.56py3-none-any.whl → 0.0.69py3-none-any.whl