PyPI - lm-deluge - Versions diffs - 0.0.13__py3-none-any.whl → 0.0.14__py3-none-any.whl - Mend

lm-deluge 0.0.13py3-none-any.whl → 0.0.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lm-deluge might be problematic. Click here for more details.

Files changed (14) hide show

lm_deluge/__init__.py +2 -0
lm_deluge/api_requests/base.py +2 -148
lm_deluge/api_requests/openai.py +72 -6
lm_deluge/api_requests/response.py +153 -0
lm_deluge/client.py +36 -48
lm_deluge/config.py +3 -2
lm_deluge/file.py +149 -0
lm_deluge/prompt.py +70 -9
lm_deluge/tracker.py +5 -3
{lm_deluge-0.0.13.dist-info → lm_deluge-0.0.14.dist-info}/METADATA +4 -1
{lm_deluge-0.0.13.dist-info → lm_deluge-0.0.14.dist-info}/RECORD +14 -12
{lm_deluge-0.0.13.dist-info → lm_deluge-0.0.14.dist-info}/WHEEL +0 -0
{lm_deluge-0.0.13.dist-info → lm_deluge-0.0.14.dist-info}/licenses/LICENSE +0 -0
{lm_deluge-0.0.13.dist-info → lm_deluge-0.0.14.dist-info}/top_level.txt +0 -0

lm_deluge/__init__.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from .client import LLMClient, SamplingParams, APIResponse
 from .prompt import Conversation, Message
 from .tool import Tool
+from .file import File
 import dotenv
 dotenv.load_dotenv()
@@ -12,4 +13,5 @@ __all__ = [
     "Conversation",
     "Message",
     "Tool",
+    "File",
 ]

lm_deluge/api_requests/base.py CHANGED Viewed

@@ -1,165 +1,19 @@
 import asyncio
-import json
 import random
 import traceback
 from abc import ABC, abstractmethod
-from dataclasses import dataclass
 from typing import Callable
 import aiohttp
 from aiohttp import ClientResponse
-from lm_deluge.prompt import CachePattern, Conversation, Message
-from lm_deluge.usage import Usage
+from lm_deluge.prompt import CachePattern, Conversation
 from ..config import SamplingParams
 from ..errors import raise_if_modal_exception
 from ..models import APIModel
 from ..tracker import StatusTracker
-@dataclass
-class APIResponse:
-    # request information
-    id: int  # should be unique to the request within a given prompt-processing call
-    model_internal: str  # our internal model tag
-    prompt: Conversation
-    sampling_params: SamplingParams
-    # http response information
-    status_code: int | None
-    is_error: bool | None
-    error_message: str | None
-    # completion information - unified usage tracking
-    usage: Usage | None = None
-    # response content - structured format
-    content: Message | None = None
-    # optional or calculated automatically
-    thinking: str | None = None  # if model shows thinking tokens
-    model_external: str | None = None  # the model tag used by the API
-    region: str | None = None
-    logprobs: list | None = None
-    finish_reason: str | None = None  # make required later
-    cost: float | None = None  # calculated automatically
-    cache_hit: bool = False  # manually set if true
-    # set to true if is_error and should be retried with a different model
-    retry_with_different_model: bool | None = False
-    # set to true if should NOT retry with the same model (unrecoverable error)
-    give_up_if_no_other_models: bool | None = False
-    # OpenAI Responses API specific - used for computer use continuation
-    response_id: str | None = None
-    # Raw API response for debugging
-    raw_response: dict | None = None
-    @property
-    def completion(self) -> str | None:
-        """Backward compatibility: extract text from content Message."""
-        if self.content is not None:
-            return self.content.completion
-        return None
-    @property
-    def input_tokens(self) -> int | None:
-        """Get input tokens from usage object."""
-        return self.usage.input_tokens if self.usage else None
-    @property
-    def output_tokens(self) -> int | None:
-        """Get output tokens from usage object."""
-        return self.usage.output_tokens if self.usage else None
-    @property
-    def cache_read_tokens(self) -> int | None:
-        """Get cache read tokens from usage object."""
-        return self.usage.cache_read_tokens if self.usage else None
-    @property
-    def cache_write_tokens(self) -> int | None:
-        """Get cache write tokens from usage object."""
-        return self.usage.cache_write_tokens if self.usage else None
-    def __post_init__(self):
-        # calculate cost & get external model name
-        self.id = int(self.id)
-        api_model = APIModel.from_registry(self.model_internal)
-        self.model_external = api_model.name
-        self.cost = None
-        if (
-            self.usage is not None
-            and api_model.input_cost is not None
-            and api_model.output_cost is not None
-        ):
-            self.cost = (
-                self.usage.input_tokens * api_model.input_cost / 1e6
-                + self.usage.output_tokens * api_model.output_cost / 1e6
-            )
-        elif self.content is not None and self.completion is not None:
-            print(
-                f"Warning: Completion provided without token counts for model {self.model_internal}."
-            )
-    def to_dict(self):
-        return {
-            "id": self.id,
-            "model_internal": self.model_internal,
-            "model_external": self.model_external,
-            "region": self.region,
-            "prompt": self.prompt.to_log(),  # destroys image if present
-            "sampling_params": self.sampling_params.__dict__,
-            "status_code": self.status_code,
-            "is_error": self.is_error,
-            "error_message": self.error_message,
-            "completion": self.completion,  # computed property
-            "content": self.content.to_log() if self.content else None,
-            "usage": self.usage.to_dict() if self.usage else None,
-            "finish_reason": self.finish_reason,
-            "cost": self.cost,
-        }
-    @classmethod
-    def from_dict(cls, data: dict):
-        # Handle backward compatibility for content/completion
-        content = None
-        if "content" in data and data["content"] is not None:
-            # Reconstruct message from log format
-            content = Message.from_log(data["content"])
-        elif "completion" in data and data["completion"] is not None:
-            # Backward compatibility: create a Message with just text
-            content = Message.ai(data["completion"])
-        usage = None
-        if "usage" in data and data["usage"] is not None:
-            usage = Usage.from_dict(data["usage"])
-        return cls(
-            id=data.get("id", random.randint(0, 1_000_000_000)),
-            model_internal=data["model_internal"],
-            prompt=Conversation.from_log(data["prompt"]),
-            sampling_params=SamplingParams(**data["sampling_params"]),
-            status_code=data["status_code"],
-            is_error=data["is_error"],
-            error_message=data["error_message"],
-            usage=usage,
-            content=content,
-            thinking=data.get("thinking"),
-            model_external=data.get("model_external"),
-            region=data.get("region"),
-            logprobs=data.get("logprobs"),
-            finish_reason=data.get("finish_reason"),
-            cost=data.get("cost"),
-            cache_hit=data.get("cache_hit", False),
-        )
-    def write_to_file(self, filename):
-        """
-        Writes the APIResponse as a line to a file.
-        If file exists, appends to it.
-        """
-        with open(filename, "a") as f:
-            f.write(json.dumps(self.to_dict()) + "\n")
+from .response import APIResponse
 class APIRequestBase(ABC):

lm_deluge/api_requests/openai.py CHANGED Viewed

@@ -1,17 +1,19 @@
-import warnings
-from aiohttp import ClientResponse
 import json
 import os
+import warnings
 from typing import Callable
+import aiohttp
+from aiohttp import ClientResponse
 from lm_deluge.tool import Tool
-from .base import APIRequestBase, APIResponse
-from ..prompt import Conversation, Message, Text, ToolCall, Thinking, CachePattern
-from ..usage import Usage
-from ..tracker import StatusTracker
 from ..config import SamplingParams
 from ..models import APIModel
+from ..prompt import CachePattern, Conversation, Message, Text, Thinking, ToolCall
+from ..tracker import StatusTracker
+from ..usage import Usage
+from .base import APIRequestBase, APIResponse
 def _build_oa_chat_request(
@@ -111,6 +113,7 @@ class OpenAIRequest(APIRequestBase):
         status_code = http_response.status
         mimetype = http_response.headers.get("Content-Type", None)
         data = None
+        finish_reason = None
         if status_code >= 200 and status_code < 300:
             try:
                 data = await http_response.json()
@@ -125,6 +128,7 @@ class OpenAIRequest(APIRequestBase):
                     # Parse response into Message with parts
                     parts = []
                     message = data["choices"][0]["message"]
+                    finish_reason = data["choices"][0]["finish_reason"]
                     # Add text content if present
                     if message.get("content"):
@@ -190,6 +194,7 @@ class OpenAIRequest(APIRequestBase):
             sampling_params=self.sampling_params,
             usage=usage,
             raw_response=data,
+            finish_reason=finish_reason,
         )
@@ -266,6 +271,13 @@ class OpenAIResponsesRequest(APIRequestBase):
             self.request_json["max_output_tokens"] = sampling_params.max_new_tokens
         if self.model.reasoning_model:
+            if sampling_params.reasoning_effort in [None, "none"]:
+                # gemini models can switch reasoning off
+                if "gemini" in self.model.id:
+                    self.sampling_params.reasoning_effort = "none"  # expects string
+                # openai models can only go down to "low"
+                else:
+                    self.sampling_params.reasoning_effort = "low"
             self.request_json["temperature"] = 1.0
             self.request_json["top_p"] = 1.0
             self.request_json["reasoning"] = {
@@ -413,3 +425,57 @@ class OpenAIResponsesRequest(APIRequestBase):
             usage=usage,
             raw_response=data,
         )
+async def stream_chat(
+    model_name: str,  # must correspond to registry
+    prompt: Conversation,
+    sampling_params: SamplingParams = SamplingParams(),
+    tools: list | None = None,
+    cache: CachePattern | None = None,
+):
+    if cache is not None:
+        warnings.warn(
+            f"Cache parameter '{cache}' is only supported for Anthropic models, ignoring for {model_name}"
+        )
+    model = APIModel.from_registry(model_name)
+    if model.api_spec != "openai":
+        raise ValueError("streaming only supported on openai models for now")
+    url = f"{model.api_base}/chat/completions"
+    request_header = {"Authorization": f"Bearer {os.getenv(model.api_key_env_var)}"}
+    request_json = _build_oa_chat_request(model, prompt, tools, sampling_params)
+    request_json["stream"] = True
+    async with aiohttp.ClientSession() as s:
+        async with s.post(url, headers=request_header, json=request_json) as r:
+            r.raise_for_status()  # bail on 4xx/5xx
+            content = ""
+            buf = ""
+            async for chunk in r.content.iter_any():  # raw bytes
+                buf += chunk.decode()
+                while "\n\n" in buf:  # full SSE frame
+                    event, buf = buf.split("\n\n", 1)
+                    if not event.startswith("data:"):
+                        continue  # ignore comments
+                    data = event[5:].strip()  # after "data:"
+                    if data == "[DONE]":
+                        yield APIResponse(
+                            id=0,
+                            status_code=None,
+                            is_error=False,
+                            error_message=None,
+                            prompt=prompt,
+                            content=Message(
+                                role="assistant", parts=[Text(text=content)]
+                            ),
+                            model_internal=model.id,
+                            sampling_params=sampling_params,
+                            usage=None,
+                            raw_response=None,
+                        )
+                    msg = json.loads(data)  # SSE payload
+                    delta = msg["choices"][0]["delta"].get("content")
+                    if delta:
+                        content += delta
+                        yield delta

lm_deluge/api_requests/response.py ADDED Viewed

@@ -0,0 +1,153 @@
+import json
+import random
+from dataclasses import dataclass
+from lm_deluge.prompt import Conversation, Message
+from lm_deluge.usage import Usage
+from ..config import SamplingParams
+from ..models import APIModel
+@dataclass
+class APIResponse:
+    # request information
+    id: int  # should be unique to the request within a given prompt-processing call
+    model_internal: str  # our internal model tag
+    prompt: Conversation
+    sampling_params: SamplingParams
+    # http response information
+    status_code: int | None
+    is_error: bool | None
+    error_message: str | None
+    # completion information - unified usage tracking
+    usage: Usage | None = None
+    # response content - structured format
+    content: Message | None = None
+    # optional or calculated automatically
+    thinking: str | None = None  # if model shows thinking tokens
+    model_external: str | None = None  # the model tag used by the API
+    region: str | None = None
+    logprobs: list | None = None
+    finish_reason: str | None = None  # make required later
+    cost: float | None = None  # calculated automatically
+    cache_hit: bool = False  # manually set if true
+    # set to true if is_error and should be retried with a different model
+    retry_with_different_model: bool | None = False
+    # set to true if should NOT retry with the same model (unrecoverable error)
+    give_up_if_no_other_models: bool | None = False
+    # OpenAI Responses API specific - used for computer use continuation
+    response_id: str | None = None
+    # Raw API response for debugging
+    raw_response: dict | None = None
+    @property
+    def completion(self) -> str | None:
+        """Backward compatibility: extract text from content Message."""
+        if self.content is not None:
+            return self.content.completion
+        return None
+    @property
+    def input_tokens(self) -> int | None:
+        """Get input tokens from usage object."""
+        return self.usage.input_tokens if self.usage else None
+    @property
+    def output_tokens(self) -> int | None:
+        """Get output tokens from usage object."""
+        return self.usage.output_tokens if self.usage else None
+    @property
+    def cache_read_tokens(self) -> int | None:
+        """Get cache read tokens from usage object."""
+        return self.usage.cache_read_tokens if self.usage else None
+    @property
+    def cache_write_tokens(self) -> int | None:
+        """Get cache write tokens from usage object."""
+        return self.usage.cache_write_tokens if self.usage else None
+    def __post_init__(self):
+        # calculate cost & get external model name
+        self.id = int(self.id)
+        api_model = APIModel.from_registry(self.model_internal)
+        self.model_external = api_model.name
+        self.cost = None
+        if (
+            self.usage is not None
+            and api_model.input_cost is not None
+            and api_model.output_cost is not None
+        ):
+            self.cost = (
+                self.usage.input_tokens * api_model.input_cost / 1e6
+                + self.usage.output_tokens * api_model.output_cost / 1e6
+            )
+        elif self.content is not None and self.completion is not None:
+            print(
+                f"Warning: Completion provided without token counts for model {self.model_internal}."
+            )
+    def to_dict(self):
+        return {
+            "id": self.id,
+            "model_internal": self.model_internal,
+            "model_external": self.model_external,
+            "region": self.region,
+            "prompt": self.prompt.to_log(),  # destroys image if present
+            "sampling_params": self.sampling_params.__dict__,
+            "status_code": self.status_code,
+            "is_error": self.is_error,
+            "error_message": self.error_message,
+            "completion": self.completion,  # computed property
+            "content": self.content.to_log() if self.content else None,
+            "usage": self.usage.to_dict() if self.usage else None,
+            "finish_reason": self.finish_reason,
+            "cost": self.cost,
+        }
+    @classmethod
+    def from_dict(cls, data: dict):
+        # Handle backward compatibility for content/completion
+        content = None
+        if "content" in data and data["content"] is not None:
+            # Reconstruct message from log format
+            content = Message.from_log(data["content"])
+        elif "completion" in data and data["completion"] is not None:
+            # Backward compatibility: create a Message with just text
+            content = Message.ai(data["completion"])
+        usage = None
+        if "usage" in data and data["usage"] is not None:
+            usage = Usage.from_dict(data["usage"])
+        return cls(
+            id=data.get("id", random.randint(0, 1_000_000_000)),
+            model_internal=data["model_internal"],
+            prompt=Conversation.from_log(data["prompt"]),
+            sampling_params=SamplingParams(**data["sampling_params"]),
+            status_code=data["status_code"],
+            is_error=data["is_error"],
+            error_message=data["error_message"],
+            usage=usage,
+            content=content,
+            thinking=data.get("thinking"),
+            model_external=data.get("model_external"),
+            region=data.get("region"),
+            logprobs=data.get("logprobs"),
+            finish_reason=data.get("finish_reason"),
+            cost=data.get("cost"),
+            cache_hit=data.get("cache_hit", False),
+        )
+    def write_to_file(self, filename):
+        """
+        Writes the APIResponse as a line to a file.
+        If file exists, appends to it.
+        """
+        with open(filename, "a") as f:
+            f.write(json.dumps(self.to_dict()) + "\n")

lm_deluge/client.py CHANGED Viewed

@@ -6,6 +6,7 @@ import yaml
 from pydantic import BaseModel
 from pydantic.functional_validators import model_validator
+from lm_deluge.api_requests.openai import stream_chat
 from lm_deluge.batches import (
     submit_batches_anthropic,
     submit_batches_oa,
@@ -34,6 +35,12 @@ class LLMClient(BaseModel):
     """
     model_names: list[str] = ["gpt-4.1-mini"]
+    def __init__(self, model_name: str | list[str] | None = None, **kwargs):
+        if model_name is not None:
+            kwargs["model_names"] = model_name
+        super().__init__(**kwargs)
     max_requests_per_minute: int = 1_000
     max_tokens_per_minute: int = 100_000
     max_concurrent_requests: int = 225
@@ -81,7 +88,7 @@ class LLMClient(BaseModel):
     @model_validator(mode="before")
     @classmethod
     def fix_lists(cls, data) -> "LLMClient":
-        if isinstance(data["model_names"], str):
+        if isinstance(data.get("model_names"), str):
             data["model_names"] = [data["model_names"]]
         if "sampling_params" not in data or len(data.get("sampling_params", [])) == 0:
             data["sampling_params"] = [
@@ -162,6 +169,11 @@ class LLMClient(BaseModel):
         kwargs["model_names"] = model
         return cls(**kwargs)
+    def _select_model(self):
+        assert isinstance(self.model_weights, list)
+        model_idx = np.random.choice(range(len(self.models)), p=self.model_weights)
+        return self.models[model_idx], self.sampling_params[model_idx]
     @overload
     async def process_prompts_async(
         self,
@@ -249,41 +261,6 @@ class LLMClient(BaseModel):
             if len(cache_hit_ids) > 0:
                 tracker.update_pbar(len(cache_hit_ids))
-            # api_task = asyncio.create_task(
-            #     process_api_prompts_async(
-            #         ids,
-            #         prompts,  # type: ignore -- fix later for dry running conversations
-            #         self.models,
-            #         self.model_weights,  # type: ignore
-            #         self.sampling_params,  # type: ignore
-            #         max_attempts=self.max_attempts,
-            #         max_concurrent_requests=self.max_concurrent_requests,
-            #         request_timeout=self.request_timeout,
-            #         status_tracker=tracker,
-            #         tools=tools,
-            #         cache=cache,
-            #         computer_use=computer_use,
-            #         display_width=display_width,
-            #         display_height=display_height,
-            #         use_responses_api=use_responses_api,
-            #     )
-            # )
-            # async def process_api_prompts_async(
-            #     models: str | list[str],
-            #     model_weights: list[float],
-            #     sampling_params: list[SamplingParams],
-            #     max_attempts: int = 5,
-            #     max_concurrent_requests: int = 1_000,
-            #     request_timeout: int = 30,
-            #     status_tracker: StatusTracker | None = None,
-            #     tools: list[Tool] | None = None,
-            #     cache: CachePattern | None = None,
-            #     computer_use: bool = False,
-            #     display_width: int = 1024,
-            #     display_height: int = 768,
-            #     use_responses_api: bool = False,
-            # ):
             if isinstance(ids, np.ndarray):
                 ids = ids.tolist()  # pyright: ignore
@@ -296,28 +273,28 @@ class LLMClient(BaseModel):
             assert tracker.retry_queue, "retry queue not initialized"
             while True:
                 # get next request (if one is not already waiting for capacity)
+                retry_request = False
                 if next_request is None:
                     if not tracker.retry_queue.empty():
                         next_request = tracker.retry_queue.get_nowait()
+                        retry_request = True
                         print(f"Retrying request {next_request.task_id}.")
                     elif prompts_not_finished:
                         try:
                             # get new request
                             id, prompt = next(prompts_iter)
                             # select model
-                            assert isinstance(self.model_weights, list)
-                            model_idx = np.random.choice(
-                                range(len(self.models)), p=self.model_weights
-                            )
+                            model, sampling_params = self._select_model()
                             next_request = create_api_request(
                                 task_id=id,
-                                model_name=self.models[model_idx],
+                                model_name=model,
                                 prompt=prompt,  # type: ignore
                                 request_timeout=self.request_timeout,
                                 attempts_left=self.max_attempts,
                                 status_tracker=tracker,
                                 results_arr=requests,
-                                sampling_params=self.sampling_params[model_idx],
+                                sampling_params=sampling_params,
                                 all_model_names=self.models,
                                 all_sampling_params=self.sampling_params,
                                 tools=tools,
@@ -339,10 +316,9 @@ class LLMClient(BaseModel):
                 # if enough capacity available, call API
                 if next_request:
                     next_request_tokens = next_request.num_tokens
-                    if tracker.check_capacity(next_request_tokens):
+                    if tracker.check_capacity(next_request_tokens, retry=retry_request):
                         tracker.set_limiting_factor(None)
-                        next_request.attempts_left -= 1
-                        # call API
+                        # call API (attempts_left will be decremented in handle_error if it fails)
                         asyncio.create_task(next_request.call_api())
                         next_request = None  # reset next_request to empty
                 # update pbar status
@@ -360,9 +336,10 @@ class LLMClient(BaseModel):
                     await asyncio.sleep(tracker.seconds_to_pause)
                     print(f"Pausing {tracker.seconds_to_pause}s to cool down.")
-                # after finishing, log final status
-                tracker.log_final_status()
-                # deduplicate results by id
+            # after finishing, log final status
+            tracker.log_final_status()
+            # deduplicate results by id
             api_results = deduplicate_responses(requests)
             for res in api_results:
                 results[res.id] = res
@@ -399,6 +376,17 @@ class LLMClient(BaseModel):
             )
         )
+    async def stream(self, prompt: str | Conversation, tools: list[Tool] | None = None):
+        model, sampling_params = self._select_model()
+        if isinstance(prompt, str):
+            prompt = Conversation.user(prompt)
+        async for item in stream_chat(model, prompt, sampling_params, tools, None):
+            if isinstance(item, str):
+                print(item, end="", flush=True)
+            else:
+                # final item
+                return item
     async def submit_batch_job(
         self,
         prompts: Sequence[str | list[dict] | Conversation],

lm_deluge/config.py CHANGED Viewed

@@ -1,13 +1,14 @@
-from pydantic import BaseModel
 from typing import Literal
+from pydantic import BaseModel
 class SamplingParams(BaseModel):
     temperature: float = 0.0
     top_p: float = 1.0
     json_mode: bool = False
     max_new_tokens: int = 512
-    reasoning_effort: Literal["low", "medium", "high", None] = None
+    reasoning_effort: Literal["low", "medium", "high", "none", None] = None
     logprobs: bool = False
     top_logprobs: int | None = None

lm_deluge/file.py ADDED Viewed

@@ -0,0 +1,149 @@
+import os
+import io
+import requests
+import base64
+import mimetypes
+import xxhash
+from dataclasses import dataclass, field
+from pathlib import Path
+@dataclass(slots=True)
+class File:
+    # raw bytes, pathlike, http url, base64 data url, or file_id
+    data: bytes | io.BytesIO | Path | str
+    media_type: str | None = None  # inferred if None
+    filename: str | None = None  # optional filename for uploads
+    file_id: str | None = None  # for OpenAI file uploads or Anthropic file API
+    type: str = field(init=False, default="file")
+    # helpers -----------------------------------------------------------------
+    def _bytes(self) -> bytes:
+        if isinstance(self.data, bytes):
+            return self.data
+        elif isinstance(self.data, io.BytesIO):
+            return self.data.getvalue()
+        elif isinstance(self.data, str) and self.data.startswith("http"):
+            res = requests.get(self.data)
+            res.raise_for_status()
+            return res.content
+        elif isinstance(self.data, str) and os.path.exists(self.data):
+            with open(self.data, "rb") as f:
+                return f.read()
+        elif isinstance(self.data, Path) and self.data.exists():
+            return Path(self.data).read_bytes()
+        elif isinstance(self.data, str) and self.data.startswith("data:"):
+            header, encoded = self.data.split(",", 1)
+            return base64.b64decode(encoded)
+        else:
+            raise ValueError("unreadable file format")
+    def _mime(self) -> str:
+        if self.media_type:
+            return self.media_type
+        if isinstance(self.data, (Path, str)):
+            # For URL or path, try to guess from the string
+            path_str = str(self.data)
+            guess = mimetypes.guess_type(path_str)[0]
+            if guess:
+                return guess
+        return "application/pdf"  # default to PDF
+    def _filename(self) -> str:
+        if self.filename:
+            return self.filename
+        if isinstance(self.data, (Path, str)):
+            path_str = str(self.data)
+            if path_str.startswith("http"):
+                # Extract filename from URL
+                return path_str.split("/")[-1].split("?")[0] or "document.pdf"
+            else:
+                # Extract from local path
+                return os.path.basename(path_str) or "document.pdf"
+        return "document.pdf"
+    def _base64(self, include_header: bool = True) -> str:
+        encoded = base64.b64encode(self._bytes()).decode("utf-8")
+        if not include_header:
+            return encoded
+        return f"data:{self._mime()};base64,{encoded}"
+    @property
+    def fingerprint(self) -> str:
+        # Hash the file contents for fingerprinting
+        file_bytes = self._bytes()
+        return xxhash.xxh64(file_bytes).hexdigest()
+    @property
+    def size(self) -> int:
+        """Return file size in bytes."""
+        return len(self._bytes())
+    # ── provider-specific emission ────────────────────────────────────────────
+    def oa_chat(self) -> dict:
+        """For OpenAI Chat Completions - file content as base64 or file_id."""
+        if self.file_id:
+            return {
+                "type": "file",
+                "file": {
+                    "file_id": self.file_id,
+                },
+            }
+        else:
+            return {
+                "type": "file",
+                "file": {
+                    "filename": self._filename(),
+                    "file_data": self._base64(),
+                },
+            }
+    def oa_resp(self) -> dict:
+        """For OpenAI Responses API - file content as base64 or file_id."""
+        if self.file_id:
+            return {
+                "type": "input_file",
+                "file_id": self.file_id,
+            }
+        else:
+            return {
+                "type": "input_file",
+                "filename": self._filename(),
+                "file_data": self._base64(),
+            }
+    def anthropic(self) -> dict:
+        """For Anthropic Messages API - file content as base64 or file_id."""
+        if self.file_id:
+            return {
+                "type": "document",
+                "source": {
+                    "type": "file",
+                    "file_id": self.file_id,
+                },
+            }
+        else:
+            b64 = base64.b64encode(self._bytes()).decode()
+            return {
+                "type": "document",
+                "source": {
+                    "type": "base64",
+                    "media_type": self._mime(),
+                    "data": b64,
+                },
+            }
+    def anthropic_file_upload(self) -> tuple[str, bytes, str]:
+        """For Anthropic Files API - return tuple for file upload."""
+        filename = self._filename()
+        content = self._bytes()
+        media_type = self._mime()
+        return filename, content, media_type
+    def gemini(self) -> dict:
+        """For Gemini API - not yet supported."""
+        raise NotImplementedError("File support for Gemini is not yet implemented")
+    def mistral(self) -> dict:
+        """For Mistral API - not yet supported."""
+        raise NotImplementedError("File support for Mistral is not yet implemented")

lm_deluge/prompt.py CHANGED Viewed

@@ -1,12 +1,15 @@
 import io
 import json
-import tiktoken
-import xxhash
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Literal, Sequence
-from lm_deluge.models import APIModel
+import tiktoken
+import xxhash
+from lm_deluge.file import File
 from lm_deluge.image import Image
+from lm_deluge.models import APIModel
 CachePattern = Literal[
     "tools_only",
@@ -203,7 +206,7 @@ class Thinking:
         return {"type": "text", "text": f"[Thinking: {self.content}]"}
-Part = Text | Image | ToolCall | ToolResult | Thinking
+Part = Text | Image | File | ToolCall | ToolResult | Thinking
 ###############################################################################
@@ -246,6 +249,11 @@ class Message:
         """Get all image parts with proper typing."""
         return [part for part in self.parts if part.type == "image"]  # type: ignore
+    @property
+    def files(self) -> list[File]:
+        """Get all file parts with proper typing."""
+        return [part for part in self.parts if part.type == "file"]  # type: ignore
     @property
     def thinking_parts(self) -> list["Thinking"]:
         """Get all thinking parts with proper typing."""
@@ -262,6 +270,9 @@ class Message:
             elif isinstance(p, Image):  # Image – redact the bytes, keep a hint
                 w, h = p.size
                 content_blocks.append({"type": "image", "tag": f"<Image ({w}×{h})>"})
+            elif isinstance(p, File):  # File – redact the bytes, keep a hint
+                size = p.size
+                content_blocks.append({"type": "file", "tag": f"<File ({size} bytes)>"})
             elif isinstance(p, ToolCall):
                 content_blocks.append(
                     {
@@ -296,6 +307,9 @@ class Message:
             elif p["type"] == "image":
                 # We only stored a placeholder tag, so keep that placeholder.
                 parts.append(Image(p["tag"], detail="low"))
+            elif p["type"] == "file":
+                # We only stored a placeholder tag, so keep that placeholder.
+                parts.append(File(p["tag"]))
             elif p["type"] == "tool_call":
                 parts.append(
                     ToolCall(id=p["id"], name=p["name"], arguments=p["arguments"])
@@ -340,6 +354,20 @@ class Message:
         self.parts.append(img)
         return self
+    def add_file(
+        self,
+        data: bytes | str | Path | io.BytesIO,
+        *,
+        media_type: str | None = None,
+        filename: str | None = None,
+    ) -> "Message":
+        """
+        Append a file block and return self for chaining.
+        """
+        file = File(data, media_type=media_type, filename=filename)
+        self.parts.append(file)
+        return self
     def add_tool_call(self, id: str, name: str, arguments: dict) -> "Message":
         """Append a tool call block and return self for chaining."""
         self.parts.append(ToolCall(id=id, name=name, arguments=arguments))
@@ -362,12 +390,15 @@ class Message:
         text: str | None = None,
         *,
         image: str | bytes | Path | io.BytesIO | None = None,
+        file: str | bytes | Path | io.BytesIO | None = None,
     ) -> "Message":
         res = cls("user", [])
         if text is not None:
             res.add_text(text)
         if image is not None:
             res.add_image(image)
+        if file is not None:
+            res.add_file(file)
         return res
     @classmethod
@@ -403,6 +434,19 @@ class Message:
                     part_list.append(Text(item["text"]))
                 elif item["type"] == "image_url":
                     part_list.append(Image(data=item["image_url"]["url"]))
+                elif item["type"] == "file":
+                    file_data = item["file"]
+                    if "file_id" in file_data:
+                        # Handle file ID reference (not implemented yet)
+                        part_list.append(File(data=file_data["file_id"]))
+                    elif "file_data" in file_data:
+                        # Handle base64 file data
+                        part_list.append(
+                            File(
+                                data=file_data["file_data"],
+                                filename=file_data.get("filename"),
+                            )
+                        )
             parts = part_list
         # Handle tool calls (assistant messages)
@@ -511,11 +555,17 @@ class Conversation:
     @classmethod
     def user(
-        cls, text: str, *, image: bytes | str | Path | None = None
+        cls,
+        text: str,
+        *,
+        image: bytes | str | Path | None = None,
+        file: bytes | str | Path | None = None,
     ) -> "Conversation":
-        msg = (
-            Message.user(text) if image is None else Message.user(text).add_image(image)
-        )
+        msg = Message.user(text)
+        if image is not None:
+            msg.add_image(image)
+        if file is not None:
+            msg.add_file(file)
         return cls([msg])
     @classmethod
@@ -677,6 +727,9 @@ class Conversation:
                 if isinstance(part, Image):
                     # Force conversion to bytes if not already
                     part.data = part._bytes()
+                elif isinstance(part, File):
+                    # Force conversion to bytes if not already
+                    part.data = part._bytes()
         return self
     def _add_cache_control_to_message(self, message: dict) -> None:
@@ -765,6 +818,11 @@ class Conversation:
                     content_blocks.append(
                         {"type": "image", "tag": f"<Image ({w}×{h})>"}
                     )
+                elif isinstance(p, File):  # File – redact the bytes, keep a hint
+                    size = p.size
+                    content_blocks.append(
+                        {"type": "file", "tag": f"<File ({size} bytes)>"}
+                    )
                 elif isinstance(p, ToolCall):
                     content_blocks.append(
                         {
@@ -795,7 +853,7 @@ class Conversation:
         for m in payload.get("messages", []):
             role: Role = m["role"]  # 'system' | 'user' | 'assistant'
-            parts: list[Text | Image | ToolCall | ToolResult | Thinking] = []
+            parts: list[Part] = []
             for p in m["content"]:
                 if p["type"] == "text":
@@ -804,6 +862,9 @@ class Conversation:
                     # We only stored a placeholder tag, so keep that placeholder.
                     # You could raise instead if real image bytes are required.
                     parts.append(Image(p["tag"], detail="low"))
+                elif p["type"] == "file":
+                    # We only stored a placeholder tag, so keep that placeholder.
+                    parts.append(File(p["tag"]))
                 elif p["type"] == "tool_call":
                     parts.append(
                         ToolCall(id=p["id"], name=p["name"], arguments=p["arguments"])

lm_deluge/tracker.py CHANGED Viewed

@@ -67,7 +67,7 @@ class StatusTracker:
     def set_limiting_factor(self, factor):
         self.limiting_factor = factor
-    def check_capacity(self, num_tokens: int):
+    def check_capacity(self, num_tokens: int, retry: bool = False):
         request_available = self.available_request_capacity >= 1
         tokens_available = self.available_token_capacity >= num_tokens
         concurrent_request_available = (
@@ -76,8 +76,10 @@ class StatusTracker:
         if request_available and tokens_available and concurrent_request_available:
             self.available_request_capacity -= 1
             self.available_token_capacity -= num_tokens
-            self.num_tasks_started += 1
-            self.num_tasks_in_progress += 1
+            if not retry:
+                # Only count new tasks, not retries
+                self.num_tasks_started += 1
+                self.num_tasks_in_progress += 1
             self.set_limiting_factor(None)
             return True
         else:

{lm_deluge-0.0.13.dist-info → lm_deluge-0.0.14.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lm_deluge
-Version: 0.0.13
+Version: 0.0.14
 Summary: Python utility for using LLM API models.
 Author-email: Benjamin Anderson <ben@trytaylor.ai>
 Requires-Python: >=3.10
@@ -30,6 +30,7 @@ Dynamic: license-file
 `lm-deluge` is a lightweight helper library for maxing out your rate limits with LLM providers. It provides the following:
 - **Unified client** – Send prompts to all relevant models with a single client.
+- **Files and Images** - Include images easily for multimodal models, and PDF files for models that support them (OpenAI and Anthropic).
 - **Massive concurrency with throttling** – Set `max_tokens_per_minute` and `max_requests_per_minute` and let it fly. The client will process as many requests as possible while respecting rate limits and retrying failures.
 - **Spray across models/providers** – Configure a client with multiple models from any provider(s), and sampling weights. The client samples a model for each request.
 - **Tool Use** – Unified API for defining tools for all providers, and creating tools automatically from python functions.
@@ -41,6 +42,8 @@ Dynamic: license-file
 **STREAMING IS NOT IN SCOPE.** There are plenty of packages that let you stream chat completions across providers. The sole purpose of this package is to do very fast batch inference using APIs. Sorry!
+**Update 06/02/2025:** I lied, it supports (very basic) streaming now via client.stream(...). It will print tokens as they arrive, then return an APIResponse at the end. More sophisticated streaming may or may not be implemented later, don't count on it.
 ## Installation
 ```bash

{lm_deluge-0.0.13.dist-info → lm_deluge-0.0.14.dist-info}/RECORD RENAMED Viewed

@@ -1,26 +1,28 @@
-lm_deluge/__init__.py,sha256=XR_EuBvJM4LggqfWdsrdQij1-UIGAFwyvHW9Rp8tnQA,280
+lm_deluge/__init__.py,sha256=mAztMuxINmh7dGbYnT8tsmw1eryQAvd0jpY8yHzd0EE,315
 lm_deluge/agent.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 lm_deluge/batches.py,sha256=dI5G9uvmoDU9hMohrkEhlIDyJPsmsVwZPwxx6qETxxk,17728
 lm_deluge/cache.py,sha256=VB1kv8rM2t5XWPR60uhszFcxLDnVKOe1oA5hYjVDjIo,4375
-lm_deluge/client.py,sha256=nkYO_wsGgUkFfqfb_8JrDzcU39RL9FfplKEK6zrncAo,20564
-lm_deluge/config.py,sha256=E47daVMvqMicoY2CDcgUnN5nVGDLAQejR358B-pRHZk,923
+lm_deluge/client.py,sha256=kMHA3VlCRk_Ly1CiJ6rRz2GxttxhVuw6WEQtdMVrK-4,19806
+lm_deluge/config.py,sha256=H1tQyJDNHGFuwxqQNL5Z-CjWAC0luHSBA3iY_pxmACM,932
 lm_deluge/embed.py,sha256=CO-TOlC5kOTAM8lcnicoG4u4K664vCBwHF1vHa-nAGg,13382
 lm_deluge/errors.py,sha256=oHjt7YnxWbh-eXMScIzov4NvpJMo0-2r5J6Wh5DQ1tk,209
+lm_deluge/file.py,sha256=9l-zWKoHPnPhTL_CZNbxyoKwbLxlXHkRU2bz43qxaV4,5311
 lm_deluge/gemini_limits.py,sha256=V9mpS9JtXYz7AY6OuKyQp5TuIMRH1BVv9YrSNmGmHNA,1569
 lm_deluge/image.py,sha256=hFbRajqEVQbkirAfOxsTPkeq-27Zl-so4AWBFeUbpBI,7161
 lm_deluge/models.py,sha256=gW9ZhKYjwC-ZF-SzWqagFUE_7Mqerdtt_T5NxGo040E,46583
-lm_deluge/prompt.py,sha256=dKaV4gI9yLB0w0Ukdz14kGl34yMm5JNm6Sc-24WQPcg,32202
+lm_deluge/prompt.py,sha256=KOuJFwpRKuz2F5WLniZzjOTW05I--mzYyMglr-s47F8,34601
 lm_deluge/rerank.py,sha256=-NBAJdHz9OB-SWWJnHzkFmeVO4wR6lFV7Vw-SxG7aVo,11457
 lm_deluge/tool.py,sha256=C2zwU9-7fldfYT0TZDoVVGGSC6dN_It9GSxnfkN6Z_w,9822
-lm_deluge/tracker.py,sha256=Un2uthRNZk3dl2fODvvR6CCyFW3IKWfR0GjvpB_dxoM,9095
+lm_deluge/tracker.py,sha256=4QQ0-H01KQp8x8KccidBIJWA5zfSQyA0kgTynvSG0gk,9202
 lm_deluge/usage.py,sha256=oS-rmF3ZJ1RMtR7WI6BB2uVOAjJg0scvGF3zZRahWVg,4449
 lm_deluge/api_requests/__init__.py,sha256=_aSpD6CJL9g6OpLPoChXiHjl4MH_OlGcKgfZaW8cgLM,71
 lm_deluge/api_requests/anthropic.py,sha256=itKPu1cqCYcrr4fkLarlvSYr6tqLEAGVLGXEG05QXWM,8345
-lm_deluge/api_requests/base.py,sha256=ixI326EtRadoVCbmvIddzzzIp6E_zPfPOIfDEnucZrc,18060
+lm_deluge/api_requests/base.py,sha256=THgCceZ_z9YjA_E9WWME5f2tIRSOOI2OAQCAWVlV-Xg,12448
 lm_deluge/api_requests/bedrock.py,sha256=yh4-zMrjlQfmxoBbrc2WYJ8gEqVkTP_-tMR7-XbTAtQ,11753
 lm_deluge/api_requests/common.py,sha256=pcOpODL4heoaNLjbA6_ogkrOAbUSKY3F37D2EyMLW10,359
 lm_deluge/api_requests/mistral.py,sha256=PkuoKbOJAB6DOK_NvzbxpWPAktfvonf69QjC0tVCYuE,5366
-lm_deluge/api_requests/openai.py,sha256=fj-ioXeK6-OGl9VIFpVy6XJRYOvf6TgMv7eu5mkC8RE,16482
+lm_deluge/api_requests/openai.py,sha256=HUn83Y_Roo3pCUTBnrQhL9skW_PJ4OvS5gr5rIg58dU,19366
+lm_deluge/api_requests/response.py,sha256=X6AHXv-4dWHLKkPv7J0MSesweunqxIqJED6UY6ypdzE,5770
 lm_deluge/api_requests/deprecated/bedrock.py,sha256=WrcIShCoO8JCUSlFOCHxg6KQCNTZfw3TpYTvSpYk4mA,11320
 lm_deluge/api_requests/deprecated/cohere.py,sha256=KgDScD6_bWhAzOY5BHZQKSA3kurt4KGENqC4wLsGmcU,5142
 lm_deluge/api_requests/deprecated/deepseek.py,sha256=FEApI93VAWDwuaqTooIyKMgONYqRhdUmiAPBRme-IYs,4582
@@ -35,8 +37,8 @@ lm_deluge/util/json.py,sha256=_4Oar2Cmz2L1DK3EtPLPDxD6rsYHxjROmV8ZpmMjQ-4,5822
 lm_deluge/util/logprobs.py,sha256=UkBZakOxWluaLqHrjARu7xnJ0uCHVfLGHJdnYlEcutk,11768
 lm_deluge/util/validation.py,sha256=hz5dDb3ebvZrZhnaWxOxbNSVMI6nmaOODBkk0htAUhs,1575
 lm_deluge/util/xml.py,sha256=Ft4zajoYBJR3HHCt2oHwGfymGLdvp_gegVmJ-Wqk4Ck,10547
-lm_deluge-0.0.13.dist-info/licenses/LICENSE,sha256=uNNXGXPCw2TC7CUs7SEBkA-Mz6QBQFWUUEWDMgEs1dU,1058
-lm_deluge-0.0.13.dist-info/METADATA,sha256=GEkP9_w0VcPOGEKad9Yh24WOhiW4TQvC2pX4wK1x0jk,11549
-lm_deluge-0.0.13.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-lm_deluge-0.0.13.dist-info/top_level.txt,sha256=hqU-TJX93yBwpgkDtYcXyLr3t7TLSCCZ_reytJjwBaE,10
-lm_deluge-0.0.13.dist-info/RECORD,,
+lm_deluge-0.0.14.dist-info/licenses/LICENSE,sha256=uNNXGXPCw2TC7CUs7SEBkA-Mz6QBQFWUUEWDMgEs1dU,1058
+lm_deluge-0.0.14.dist-info/METADATA,sha256=iK9UuTpf235TbQQ6CkrLX725loOMSdwTscZJQgEHeoo,11942
+lm_deluge-0.0.14.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+lm_deluge-0.0.14.dist-info/top_level.txt,sha256=hqU-TJX93yBwpgkDtYcXyLr3t7TLSCCZ_reytJjwBaE,10
+lm_deluge-0.0.14.dist-info/RECORD,,

{lm_deluge-0.0.13.dist-info → lm_deluge-0.0.14.dist-info}/WHEEL RENAMED Viewed

File without changes

{lm_deluge-0.0.13.dist-info → lm_deluge-0.0.14.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{lm_deluge-0.0.13.dist-info → lm_deluge-0.0.14.dist-info}/top_level.txt RENAMED Viewed

File without changes

lm-deluge 0.0.13__py3-none-any.whl → 0.0.14__py3-none-any.whl

Potentially problematic release.

lm-deluge 0.0.13py3-none-any.whl → 0.0.14py3-none-any.whl