PyPI - lm-deluge - Versions diffs - 0.0.21__py3-none-any.whl → 0.0.23__py3-none-any.whl - Mend

lm-deluge 0.0.21py3-none-any.whl → 0.0.23py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lm-deluge might be problematic. Click here for more details.

Files changed (19) hide show

lm_deluge/api_requests/anthropic.py +16 -14
lm_deluge/api_requests/base.py +10 -2
lm_deluge/api_requests/bedrock.py +18 -20
lm_deluge/api_requests/gemini.py +4 -3
lm_deluge/api_requests/mistral.py +2 -0
lm_deluge/api_requests/openai.py +96 -83
lm_deluge/api_requests/response.py +4 -2
lm_deluge/batches.py +106 -77
lm_deluge/client.py +24 -11
lm_deluge/image.py +6 -2
lm_deluge/models.py +61 -59
lm_deluge/prompt.py +104 -56
lm_deluge/request_context.py +2 -0
lm_deluge/tool.py +115 -26
{lm_deluge-0.0.21.dist-info → lm_deluge-0.0.23.dist-info}/METADATA +1 -1
{lm_deluge-0.0.21.dist-info → lm_deluge-0.0.23.dist-info}/RECORD +19 -19
{lm_deluge-0.0.21.dist-info → lm_deluge-0.0.23.dist-info}/WHEEL +0 -0
{lm_deluge-0.0.21.dist-info → lm_deluge-0.0.23.dist-info}/licenses/LICENSE +0 -0
{lm_deluge-0.0.21.dist-info → lm_deluge-0.0.23.dist-info}/top_level.txt +0 -0

lm_deluge/batches.py CHANGED Viewed

@@ -3,7 +3,7 @@ import json
 import time
 import asyncio
 import aiohttp
-import pandas as pd
+import tempfile
 from lm_deluge.prompt import CachePattern, Conversation, prompts_to_conversations
 from lm_deluge.config import SamplingParams
 from lm_deluge.models import APIModel
@@ -16,6 +16,7 @@ from rich.spinner import Spinner
 from rich.table import Table
 from rich.text import Text
 from lm_deluge.models import registry
+from lm_deluge.request_context import RequestContext
 def _create_batch_status_display(
@@ -79,11 +80,8 @@ def _create_batch_status_display(
     return grid
-async def submit_batch_oa(batch_requests: list[dict]):
-    """Submit one batch asynchronously."""
-    pd.DataFrame(batch_requests).to_json(
-        "requests_temp.jsonl", orient="records", lines=True
-    )
+async def submit_batch_oa(file_path: str):
+    """Upload a JSONL file and create one OpenAI batch."""
     # upload the file
     api_key = os.environ.get("OPENAI_API_KEY", None)
@@ -99,21 +97,22 @@ async def submit_batch_oa(batch_requests: list[dict]):
         url = "https://api.openai.com/v1/files"
         data = aiohttp.FormData()
         data.add_field("purpose", "batch")
-        data.add_field(
-            "file",
-            open("requests_temp.jsonl", "rb"),
-            filename="requests_temp.jsonl",
-            content_type="application/json",
-        )
+        with open(file_path, "rb") as f:
+            data.add_field(
+                "file",
+                f,
+                filename=os.path.basename(file_path),
+                content_type="application/json",
+            )
-        async with session.post(url, data=data, headers=headers) as response:
-            if response.status != 200:
-                text = await response.text()
-                raise ValueError(f"Error uploading file: {text}")
+            async with session.post(url, data=data, headers=headers) as response:
+                if response.status != 200:
+                    text = await response.text()
+                    raise ValueError(f"Error uploading file: {text}")
-            print("File uploaded successfully")
-            response_data = await response.json()
-            file_id = response_data["id"]
+                print("File uploaded successfully")
+                response_data = await response.json()
+                file_id = response_data["id"]
         # Create batch
         url = "https://api.openai.com/v1/batches"
@@ -131,46 +130,82 @@ async def submit_batch_oa(batch_requests: list[dict]):
             response_data = await response.json()
             batch_id = response_data["id"]
             print("Batch job started successfully: id = ", batch_id)
-            return batch_id
+        os.remove(file_path)
+        return batch_id
+async def _submit_anthropic_batch(file_path: str, headers: dict, model: str):
+    """Upload a JSONL file and create one Anthropic batch."""
+    async with aiohttp.ClientSession() as session:
+        url = f"{registry[model].api_base}/messages/batches"
+        data = aiohttp.FormData()
+        with open(file_path, "rb") as f:
+            data.add_field(
+                "file",
+                f,
+                filename=os.path.basename(file_path),
+                content_type="application/json",
+            )
+            async with session.post(url, data=data, headers=headers) as response:
+                if response.status != 200:
+                    text = await response.text()
+                    raise ValueError(f"Error creating batch: {text}")
+                batch_data = await response.json()
+                batch_id = batch_data["id"]
+                print(f"Anthropic batch job started successfully: id = {batch_id}")
+        os.remove(file_path)
+        return batch_id
 async def submit_batches_oa(
     model: str,
     sampling_params: SamplingParams,
     prompts: Sequence[str | list[dict] | Conversation],
+    batch_size: int = 50_000,
 ):
-    # if prompts are strings, convert them to message lists
+    """Write OpenAI batch requests to a file and submit."""
+    BATCH_SIZE = batch_size
     prompts = prompts_to_conversations(prompts)
     if any(p is None for p in prompts):
         raise ValueError("All prompts must be valid.")
-    ids = [i for i, _ in enumerate(prompts)]
-    # create file with requests to send to batch api
-    batch_requests = []
     model_obj = APIModel.from_registry(model)
-    for id, prompt in zip(ids, prompts):
-        assert isinstance(prompt, Conversation)
-        batch_requests.append(
-            {
-                "custom_id": str(id),
-                "method": "POST",
-                "url": "/v1/chat/completions",
-                "body": _build_oa_chat_request(model_obj, prompt, [], sampling_params),
-            }
-        )
-    # since the api only accepts up to 50,000 requests per batch job, we chunk into 50k chunks
-    BATCH_SIZE = 50_000
-    batches = [
-        batch_requests[i : i + BATCH_SIZE]
-        for i in range(0, len(batch_requests), BATCH_SIZE)
-    ]
     tasks = []
-    for batch in batches:
-        tasks.append(asyncio.create_task(submit_batch_oa(batch)))
+    for start in range(0, len(prompts), BATCH_SIZE):
+        batch_prompts = prompts[start : start + BATCH_SIZE]
+        with tempfile.NamedTemporaryFile(mode="w+", suffix=".jsonl", delete=False) as f:
+            for idx, prompt in enumerate(batch_prompts, start=start):
+                assert isinstance(prompt, Conversation)
+                context = RequestContext(
+                    task_id=idx,
+                    model_name=model,
+                    prompt=prompt,
+                    sampling_params=sampling_params,
+                )
+                request = {
+                    "custom_id": str(idx),
+                    "method": "POST",
+                    "url": "/v1/chat/completions",
+                    "body": _build_oa_chat_request(model_obj, context),
+                }
+                json.dump(request, f)
+                f.write("\n")
+            file_path = f.name
+        tasks.append(asyncio.create_task(submit_batch_oa(file_path)))
     batch_ids = await asyncio.gather(*tasks)
-    print(f"Submitted {len(batches)} batch jobs.")
+    print(f"Submitted {len(tasks)} batch jobs.")
     return batch_ids
@@ -181,6 +216,7 @@ async def submit_batches_anthropic(
     prompts: Sequence[str | list[dict] | Conversation],
     *,
     cache: CachePattern | None = None,
+    batch_size=100_000,
 ):
     """Submit a batch job to Anthropic's Message Batches API.
@@ -196,47 +232,40 @@ async def submit_batches_anthropic(
     # Convert prompts to Conversations
     prompts = prompts_to_conversations(prompts)
-    # Create batch requests
-    request_headers = None
-    batch_requests = []
-    for i, prompt in enumerate(prompts):
-        assert isinstance(prompt, Conversation)
-        # Build request body
-        request_body, request_headers = _build_anthropic_request(
-            APIModel.from_registry(model), prompt, [], sampling_params, cache
-        )
-        batch_requests.append({"custom_id": str(i), "params": request_body})
-    # Chunk into batches of 100k requests (Anthropic's limit)
-    BATCH_SIZE = 100_000
-    batches = [
-        batch_requests[i : i + BATCH_SIZE]
-        for i in range(0, len(batch_requests), BATCH_SIZE)
-    ]
-    batch_ids = []
+    request_headers = None
+    BATCH_SIZE = batch_size
     batch_tasks = []
-    async with aiohttp.ClientSession() as session:
-        for batch in batches:
-            url = f"{registry[model].api_base}/messages/batches"
-            data = {"requests": batch}
-            async def submit_batch(data, url, headers):
-                async with session.post(url, json=data, headers=headers) as response:
-                    if response.status != 200:
-                        text = await response.text()
-                        raise ValueError(f"Error creating batch: {text}")
+    for start in range(0, len(prompts), BATCH_SIZE):
+        batch_prompts = prompts[start : start + BATCH_SIZE]
+        with tempfile.NamedTemporaryFile(mode="w+", suffix=".jsonl", delete=False) as f:
+            for idx, prompt in enumerate(batch_prompts, start=start):
+                assert isinstance(prompt, Conversation)
+                context = RequestContext(
+                    task_id=idx,
+                    model_name=model,
+                    prompt=prompt,
+                    sampling_params=sampling_params,
+                    cache=cache,
+                )
+                request_body, request_headers = _build_anthropic_request(
+                    APIModel.from_registry(model), context
+                )
+                json.dump({"custom_id": str(idx), "params": request_body}, f)
+                f.write("\n")
-                    batch_data = await response.json()
-                    batch_id = batch_data["id"]
-                    print(f"Anthropic batch job started successfully: id = {batch_id}")
-                    return batch_id
+            file_path = f.name
-            batch_tasks.append(submit_batch(data, url, request_headers))
+        batch_tasks.append(
+            asyncio.create_task(
+                _submit_anthropic_batch(file_path, request_headers, model)  # type: ignore
+            )
+        )
-        batch_ids = await asyncio.gather(*batch_tasks)
+    batch_ids = await asyncio.gather(*batch_tasks)
-    print(f"Submitted {len(batches)} batch jobs.")
+    print(f"Submitted {len(batch_tasks)} batch jobs.")
     return batch_ids

lm_deluge/client.py CHANGED Viewed

@@ -22,11 +22,8 @@ from .models import APIModel, registry
 from .request_context import RequestContext
 from .tracker import StatusTracker
-# from .cache import LevelDBCache, SqliteCache
 # TODO: get completions as they finish, not all at once at the end.
-# relatedly, would be nice to cache them as they finish too.
 # TODO: add optional max_input_tokens to client so we can reject long prompts to prevent abuse
 class LLMClient(BaseModel):
     """
@@ -60,6 +57,7 @@ class LLMClient(BaseModel):
     reasoning_effort: Literal["low", "medium", "high", None] = None
     logprobs: bool = False
     top_logprobs: int | None = None
+    force_local_mcp: bool = False
     # NEW! Builder methods
     def with_model(self, model: str):
@@ -113,6 +111,7 @@ class LLMClient(BaseModel):
         if isinstance(self.model_names, str):
             self.model_names = [self.model_names]
         if any(m not in registry for m in self.model_names):
+            print("got model names:", self.model_names)
             raise ValueError("all model_names must be in registry")
         if isinstance(self.sampling_params, SamplingParams):
             self.sampling_params = [self.sampling_params for _ in self.model_names]
@@ -368,6 +367,7 @@ class LLMClient(BaseModel):
                             cache=cache,
                             use_responses_api=use_responses_api,
                             extra_headers=self.extra_headers,
+                            force_local_mcp=self.force_local_mcp,
                         )
                     except StopIteration:
                         prompts_not_finished = False
@@ -389,8 +389,6 @@ class LLMClient(BaseModel):
                             results[ctx.task_id] = response
                         except Exception as e:
                             # Create an error response for validation errors and other exceptions
-                            from .api_requests.response import APIResponse
                             error_response = APIResponse(
                                 id=ctx.task_id,
                                 model_internal=ctx.model_name,
@@ -421,7 +419,8 @@ class LLMClient(BaseModel):
             # Sleep - original logic
             await asyncio.sleep(seconds_to_sleep_each_loop + tracker.seconds_to_pause)
-            tracker.log_final_status()
+        tracker.log_final_status()
         if return_completions_only:
             return [r.completion if r is not None else None for r in results]
@@ -468,7 +467,7 @@ class LLMClient(BaseModel):
         self,
         conversation: str | Conversation,
         *,
-        tools: list[Tool | dict] | None = None,
+        tools: list[Tool | dict | MCPServer] | None = None,
         max_rounds: int = 5,
         show_progress: bool = False,
     ) -> tuple[Conversation, APIResponse]:
@@ -482,6 +481,16 @@ class LLMClient(BaseModel):
         if isinstance(conversation, str):
             conversation = Conversation.user(conversation)
+        # Expand MCPServer objects to their constituent tools for tool execution
+        expanded_tools: list[Tool] = []
+        if tools:
+            for tool in tools:
+                if isinstance(tool, Tool):
+                    expanded_tools.append(tool)
+                elif isinstance(tool, MCPServer):
+                    mcp_tools = await tool.to_tools()
+                    expanded_tools.extend(mcp_tools)
         last_response: APIResponse | None = None
         for _ in range(max_rounds):
@@ -504,9 +513,9 @@ class LLMClient(BaseModel):
             for call in tool_calls:
                 tool_obj = None
-                if tools:
-                    for t in tools:
-                        if isinstance(t, Tool) and t.name == call.name:
+                if expanded_tools:
+                    for t in expanded_tools:
+                        if t.name == call.name:
                             tool_obj = t
                             break
@@ -553,6 +562,7 @@ class LLMClient(BaseModel):
         *,
         tools: list[Tool] | None = None,
         cache: CachePattern | None = None,
+        batch_size: int = 50_000,
     ):
         """Submit a batch job asynchronously, automatically detecting the provider based on model.
@@ -572,13 +582,16 @@ class LLMClient(BaseModel):
         api_spec = registry[model].api_spec
         if api_spec == "openai":
-            return await submit_batches_oa(model, self.sampling_params[0], prompts)
+            return await submit_batches_oa(
+                model, self.sampling_params[0], prompts, batch_size=batch_size
+            )
         elif api_spec == "anthropic":
             return await submit_batches_anthropic(
                 model,
                 self.sampling_params[0],
                 prompts,
                 cache=cache,
+                batch_size=batch_size,
             )
         else:
             raise ValueError(f"Batch processing not supported for API spec: {api_spec}")

lm_deluge/image.py CHANGED Viewed

@@ -10,7 +10,7 @@ from typing import Literal
 import requests
 from PIL import Image as PILImage  # type: ignore
-MediaType = Literal["image/jpeg", "image/png", "image/gif", "image/webp"]
+MediaType = Literal["image/jpeg", "image/png", "image/gif", "image/webp"] | str
 @dataclass(slots=True)
@@ -23,6 +23,9 @@ class Image:
     _fingerprint_cache: str | None = field(init=False, default=None)
     _size_cache: tuple[int, int] | None = field(init=False, default=None)
+    def __repr__(self):
+        return f"Image(data=[{type(self.data)}], media_type={self.media_type}, detail={self.detail})"
     @classmethod
     def from_pdf(
         cls,
@@ -69,10 +72,11 @@ class Image:
         elif isinstance(self.data, Path) and self.data.exists():
             return Path(self.data).read_bytes()
         elif isinstance(self.data, str) and self.data.startswith("data:"):
+            # print("base64 path selected")
             header, encoded = self.data.split(",", 1)
             return base64.b64decode(encoded)
         else:
-            raise ValueError("unreadable image format")
+            raise ValueError(f"unreadable image format. type: {type(self.data)}")
     def _mime(self) -> str:
         if self.media_type:

lm_deluge/models.py CHANGED Viewed

@@ -42,7 +42,7 @@ BUILTIN_MODELS = {
         "reasoning_model": False,
     },
     "llama-3.3-70b": {
-        "id": "llama-3.3-70B",
+        "id": "llama-3.3-70b",
         "name": "Llama-3.3-70B-Instruct",
         "api_base": "https://api.llama.com/compat/v1",
         "api_key_env_var": "META_API_KEY",
@@ -56,7 +56,7 @@ BUILTIN_MODELS = {
         "reasoning_model": False,
     },
     "llama-3.3-8b": {
-        "id": "llama-3.3-8B",
+        "id": "llama-3.3-8b",
         "name": "Llama-3.3-8B-Instruct",
         "api_base": "https://api.llama.com/compat/v1",
         "api_key_env_var": "META_API_KEY",
@@ -670,62 +670,62 @@ BUILTIN_MODELS = {
     #     "requests_per_minute": 120,
     #     "tokens_per_minute": None,
     # },
-    "gemini-2.5-pro-vertex": {
-        "id": "gemini-2.5-pro",
-        "name": "gemini-2.5-pro-preview-05-06",
-        "api_base": "",
-        "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
-        "supports_json": True,
-        "supports_logprobs": False,
-        "api_spec": "vertex_gemini",
-        "input_cost": 1.25,
-        "output_cost": 10.0,
-        "requests_per_minute": 20,
-        "tokens_per_minute": 100_000,
-        "reasoning_model": True,
-    },
-    "gemini-2.5-flash-vertex": {
-        "id": "gemini-2.5-flash",
-        "name": "gemini-2.5-flash-preview-05-20",
-        "api_base": "",
-        "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
-        "supports_json": True,
-        "supports_logprobs": False,
-        "api_spec": "vertex_gemini",
-        "input_cost": 0.15,
-        "output_cost": 0.6,
-        "requests_per_minute": 20,
-        "tokens_per_minute": 100_000,
-        "reasoning_model": True,
-    },
-    "gemini-2.0-flash-vertex": {
-        "id": "gemini-2.0-flash",
-        "name": "gemini-2.0-flash",
-        "api_base": "",
-        "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
-        "supports_json": True,
-        "supports_logprobs": False,
-        "api_spec": "vertex_gemini",
-        "input_cost": 0.10,
-        "output_cost": 0.40,
-        "requests_per_minute": 20,
-        "tokens_per_minute": 100_000,
-        "reasoning_model": False,
-    },
-    "gemini-2.0-flash-lite-vertex": {
-        "id": "gemini-2.0-flash-lite",
-        "name": "gemini-2.0-flash-lite",
-        "api_base": "",
-        "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
-        "supports_json": True,
-        "supports_logprobs": False,
-        "api_spec": "vertex_gemini",
-        "input_cost": 0.075,
-        "output_cost": 0.30,
-        "requests_per_minute": 20,
-        "tokens_per_minute": 100_000,
-        "reasoning_model": False,
-    },
+    # "gemini-2.5-pro-vertex": {
+    #     "id": "gemini-2.5-pro",
+    #     "name": "gemini-2.5-pro-preview-05-06",
+    #     "api_base": "",
+    #     "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
+    #     "supports_json": True,
+    #     "supports_logprobs": False,
+    #     "api_spec": "vertex_gemini",
+    #     "input_cost": 1.25,
+    #     "output_cost": 10.0,
+    #     "requests_per_minute": 20,
+    #     "tokens_per_minute": 100_000,
+    #     "reasoning_model": True,
+    # },
+    # "gemini-2.5-flash-vertex": {
+    #     "id": "gemini-2.5-flash",
+    #     "name": "gemini-2.5-flash-preview-05-20",
+    #     "api_base": "",
+    #     "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
+    #     "supports_json": True,
+    #     "supports_logprobs": False,
+    #     "api_spec": "vertex_gemini",
+    #     "input_cost": 0.15,
+    #     "output_cost": 0.6,
+    #     "requests_per_minute": 20,
+    #     "tokens_per_minute": 100_000,
+    #     "reasoning_model": True,
+    # },
+    # "gemini-2.0-flash-vertex": {
+    #     "id": "gemini-2.0-flash",
+    #     "name": "gemini-2.0-flash",
+    #     "api_base": "",
+    #     "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
+    #     "supports_json": True,
+    #     "supports_logprobs": False,
+    #     "api_spec": "vertex_gemini",
+    #     "input_cost": 0.10,
+    #     "output_cost": 0.40,
+    #     "requests_per_minute": 20,
+    #     "tokens_per_minute": 100_000,
+    #     "reasoning_model": False,
+    # },
+    # "gemini-2.0-flash-lite-vertex": {
+    #     "id": "gemini-2.0-flash-lite",
+    #     "name": "gemini-2.0-flash-lite",
+    #     "api_base": "",
+    #     "api_key_env_var": "GOOGLE_APPLICATION_CREDENTIALS",
+    #     "supports_json": True,
+    #     "supports_logprobs": False,
+    #     "api_spec": "vertex_gemini",
+    #     "input_cost": 0.075,
+    #     "output_cost": 0.30,
+    #     "requests_per_minute": 20,
+    #     "tokens_per_minute": 100_000,
+    #     "reasoning_model": False,
+    # },
     #  ███████████               █████                             █████
     # ░░███░░░░░███             ░░███                             ░░███
     #  ░███    ░███  ██████   ███████  ████████   ██████   ██████  ░███ █████
@@ -1138,7 +1138,7 @@ BUILTIN_MODELS = {
         "output_cost": 0.7,
     },
     "mixtral-8x22b": {
-        "id": "mistral-8x22b",
+        "id": "mixtral-8x22b",
         "name": "open-mixtral-8x22b",
         "api_base": "https://api.mistral.ai/v1",
         "api_key_env_var": "MISTRAL_API_KEY",
@@ -1243,3 +1243,5 @@ def register_model(**kwargs) -> APIModel:
 # Populate registry with builtin models
 for cfg in BUILTIN_MODELS.values():
     register_model(**cfg)
+# print("Valid models:", registry.keys())

lm-deluge 0.0.21__py3-none-any.whl → 0.0.23__py3-none-any.whl

Potentially problematic release.

lm-deluge 0.0.21py3-none-any.whl → 0.0.23py3-none-any.whl