PyPI - lm-deluge - Versions diffs - 0.0.14__py3-none-any.whl → 0.0.16__py3-none-any.whl - Mend

lm-deluge 0.0.14py3-none-any.whl → 0.0.16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

lm_deluge/api_requests/__init__.py +0 -2
lm_deluge/api_requests/anthropic.py +58 -84
lm_deluge/api_requests/base.py +43 -229
lm_deluge/api_requests/bedrock.py +173 -195
lm_deluge/api_requests/common.py +2 -0
lm_deluge/api_requests/gemini.py +196 -0
lm_deluge/api_requests/mistral.py +30 -60
lm_deluge/api_requests/openai.py +147 -148
lm_deluge/api_requests/response.py +2 -1
lm_deluge/batches.py +1 -1
lm_deluge/{computer_use/anthropic_tools.py → built_in_tools/anthropic.py} +56 -5
lm_deluge/built_in_tools/openai.py +28 -0
lm_deluge/client.py +221 -150
lm_deluge/file.py +7 -2
lm_deluge/image.py +13 -8
lm_deluge/llm_tools/extract.py +23 -4
lm_deluge/llm_tools/ocr.py +1 -0
lm_deluge/models.py +96 -2
lm_deluge/prompt.py +43 -27
lm_deluge/request_context.py +75 -0
lm_deluge/tool.py +93 -15
lm_deluge/tracker.py +1 -0
lm_deluge/usage.py +10 -0
{lm_deluge-0.0.14.dist-info → lm_deluge-0.0.16.dist-info}/METADATA +25 -1
lm_deluge-0.0.16.dist-info/RECORD +48 -0
lm_deluge-0.0.14.dist-info/RECORD +0 -44
{lm_deluge-0.0.14.dist-info → lm_deluge-0.0.16.dist-info}/WHEEL +0 -0
{lm_deluge-0.0.14.dist-info → lm_deluge-0.0.16.dist-info}/licenses/LICENSE +0 -0
{lm_deluge-0.0.14.dist-info → lm_deluge-0.0.16.dist-info}/top_level.txt +0 -0

lm_deluge/api_requests/__init__.py CHANGED Viewed

@@ -1,3 +1 @@
-from .base import create_api_request
-__all__ = ["create_api_request"]

lm_deluge/api_requests/anthropic.py CHANGED Viewed

@@ -1,35 +1,39 @@
-from aiohttp import ClientResponse
 import json
 import os
-from typing import Callable
+from aiohttp import ClientResponse
 from lm_deluge.prompt import (
+    CachePattern,
     Conversation,
     Message,
     Text,
-    ToolCall,
     Thinking,
-    CachePattern,
+    ToolCall,
 )
-from lm_deluge.tool import Tool
+from lm_deluge.request_context import RequestContext
+from lm_deluge.tool import MCPServer, Tool
 from lm_deluge.usage import Usage
-from .base import APIRequestBase, APIResponse
-from ..tracker import StatusTracker
 from ..config import SamplingParams
 from ..models import APIModel
-from ..computer_use.anthropic_tools import get_anthropic_cu_tools
+from .base import APIRequestBase, APIResponse
+def _add_beta(headers: dict, beta: str):
+    if "anthropic-beta" in headers and headers["anthropic-beta"]:
+        if beta not in headers["anthropic-beta"]:
+            headers["anthropic-beta"] += f",{beta}"
+    else:
+        headers["anthropic-beta"] = beta
 def _build_anthropic_request(
     model: APIModel,
     prompt: Conversation,
-    tools: list[Tool] | None,
+    tools: list[Tool | dict | MCPServer] | None,
     sampling_params: SamplingParams,
     cache_pattern: CachePattern | None = None,
-    computer_use: bool = False,
-    display_width: int = 1024,
-    display_height: int = 768,
 ):
     system_message, messages = prompt.to_anthropic(cache_pattern=cache_pattern)
     request_header = {
@@ -38,10 +42,6 @@ def _build_anthropic_request(
         "content-type": "application/json",
     }
-    # Add beta header for Computer Use
-    if computer_use:
-        request_header["anthropic-beta"] = "computer-use-2025-01-24"
     request_json = {
         "model": model.name,
         "messages": messages,
@@ -69,89 +69,61 @@ def _build_anthropic_request(
             print("ignoring reasoning_effort for non-reasoning model")
     if system_message is not None:
         request_json["system"] = system_message
-    if tools or computer_use:
+    if tools:
+        mcp_servers = []
         tool_definitions = []
-        if tools:
-            tool_definitions.extend([tool.dump_for("anthropic") for tool in tools])
-        # Add Computer Use tools
-        if computer_use:
-            cu_tools = get_anthropic_cu_tools(
-                model=model.id,
-                display_width=display_width,  # todo: set from ComputerUseParams
-                display_height=display_height,
-            )
-            tool_definitions.extend(cu_tools)
+        for tool in tools:
+            if isinstance(tool, Tool):
+                tool_definitions.append(tool.dump_for("anthropic"))
+            elif isinstance(tool, dict):
+                tool_definitions.append(tool)
+                # add betas if needed
+                if tool["type"] in [
+                    "computer_20241022",
+                    "text_editor_20241022",
+                    "bash_20241022",
+                ]:
+                    _add_beta(request_header, "computer-use-2024-10-22")
+                elif tool["type"] == "computer_20250124":
+                    _add_beta(request_header, "computer-use-2025-01-24")
+                elif tool["type"] == "code_execution_20250522":
+                    _add_beta(request_header, "code-execution-2025-05-22")
+            elif isinstance(tool, MCPServer):
+                _add_beta(request_header, "mcp-client-2025-04-04")
+                mcp_servers.append(tool.for_anthropic())
         # Add cache control to last tool if tools_only caching is specified
         if cache_pattern == "tools_only" and tool_definitions:
             tool_definitions[-1]["cache_control"] = {"type": "ephemeral"}
         request_json["tools"] = tool_definitions
+        if len(mcp_servers) > 0:
+            request_json["mcp_servers"] = mcp_servers
     return request_json, request_header
 class AnthropicRequest(APIRequestBase):
-    def __init__(
-        self,
-        task_id: int,
-        # should always be 'role', 'content' keys.
-        # internal logic should handle translating to specific API format
-        model_name: str,  # must correspond to registry
-        prompt: Conversation,
-        attempts_left: int,
-        status_tracker: StatusTracker,
-        results_arr: list,
-        request_timeout: int = 30,
-        sampling_params: SamplingParams = SamplingParams(),
-        callback: Callable | None = None,
-        # for retries
-        all_model_names: list[str] | None = None,
-        all_sampling_params: list[SamplingParams] | None = None,
-        tools: list | None = None,
-        cache: CachePattern | None = None,
-        # Computer Use support
-        computer_use: bool = False,
-        display_width: int = 1024,
-        display_height: int = 768,
-    ):
-        super().__init__(
-            task_id=task_id,
-            model_name=model_name,
-            prompt=prompt,
-            attempts_left=attempts_left,
-            status_tracker=status_tracker,
-            results_arr=results_arr,
-            request_timeout=request_timeout,
-            sampling_params=sampling_params,
-            callback=callback,
-            all_model_names=all_model_names,
-            all_sampling_params=all_sampling_params,
-            tools=tools,
-            cache=cache,
-        )
-        self.computer_use = computer_use
-        self.display_width = display_width
-        self.display_height = display_height
-        self.model = APIModel.from_registry(model_name)
+    def __init__(self, context: RequestContext):
+        super().__init__(context=context)
+        self.model = APIModel.from_registry(self.context.model_name)
         self.url = f"{self.model.api_base}/messages"
         # Lock images as bytes if caching is enabled
-        if cache is not None:
-            prompt.lock_images_as_bytes()
+        if self.context.cache is not None:
+            self.context.prompt.lock_images_as_bytes()
         self.request_json, self.request_header = _build_anthropic_request(
             self.model,
-            prompt,
-            tools,
-            sampling_params,
-            cache,
-            computer_use,
-            display_width,
-            display_height,
+            self.context.prompt,
+            self.context.tools,
+            self.context.sampling_params,
+            self.context.cache,
         )
     async def handle_response(self, http_response: ClientResponse) -> APIResponse:
+        data = None
         is_error = False
         error_message = None
         thinking = None
@@ -160,6 +132,7 @@ class AnthropicRequest(APIRequestBase):
         status_code = http_response.status
         mimetype = http_response.headers.get("Content-Type", None)
         rate_limits = {}
+        assert self.context.status_tracker
         for header in [
             "anthropic-ratelimit-requests-limit",
             "anthropic-ratelimit-requests-remaining",
@@ -215,20 +188,21 @@ class AnthropicRequest(APIRequestBase):
                 or "overloaded" in error_message.lower()
             ):
                 error_message += " (Rate limit error, triggering cooldown.)"
-                self.status_tracker.rate_limit_exceeded()
+                self.context.status_tracker.rate_limit_exceeded()
             if "context length" in error_message:
                 error_message += " (Context length exceeded, set retries to 0.)"
-                self.attempts_left = 0
+                self.context.attempts_left = 0
         return APIResponse(
-            id=self.task_id,
+            id=self.context.task_id,
             status_code=status_code,
             is_error=is_error,
             error_message=error_message,
-            prompt=self.prompt,
+            prompt=self.context.prompt,
             content=content,
             thinking=thinking,
-            model_internal=self.model_name,
-            sampling_params=self.sampling_params,
+            model_internal=self.context.model_name,
+            sampling_params=self.context.sampling_params,
             usage=usage,
+            raw_response=data,
         )

lm_deluge/api_requests/base.py CHANGED Viewed

@@ -1,18 +1,12 @@
 import asyncio
-import random
 import traceback
 from abc import ABC, abstractmethod
-from typing import Callable
 import aiohttp
 from aiohttp import ClientResponse
-from lm_deluge.prompt import CachePattern, Conversation
-from ..config import SamplingParams
 from ..errors import raise_if_modal_exception
-from ..models import APIModel
-from ..tracker import StatusTracker
+from ..request_context import RequestContext
 from .response import APIResponse
@@ -28,40 +22,13 @@ class APIRequestBase(ABC):
     def __init__(
         self,
-        task_id: int,
-        # should always be 'role', 'content' keys.
-        # internal logic should handle translating to specific API format
-        model_name: str,  # must correspond to registry
-        prompt: Conversation,
-        attempts_left: int,
-        status_tracker: StatusTracker,
-        # needed in order to retry with a different model and not throw the output away
-        results_arr: list["APIRequestBase"],
-        request_timeout: int = 30,
-        sampling_params: SamplingParams = SamplingParams(),
-        callback: Callable | None = None,
-        all_model_names: list[str] | None = None,
-        all_sampling_params: list[SamplingParams] | None = None,
-        tools: list | None = None,
-        cache: CachePattern | None = None,
+        context: RequestContext,
     ):
-        if all_model_names is None:
-            raise ValueError("all_model_names must be provided.")
-        self.task_id = task_id
-        self.model_name = model_name
+        # If context is provided, use it; otherwise construct one from individual parameters
+        self.context = context
+        # Everything is now accessed through self.context - no copying!
         self.system_prompt = None
-        self.prompt = prompt
-        self.attempts_left = attempts_left
-        self.status_tracker = status_tracker
-        self.request_timeout = request_timeout
-        self.sampling_params = sampling_params
-        self.callback = callback
-        self.num_tokens = prompt.count_tokens(sampling_params.max_new_tokens)
-        self.results_arr = results_arr
-        self.all_model_names = all_model_names
-        self.all_sampling_params = all_sampling_params
-        self.tools = tools
-        self.cache: CachePattern | None = cache
         self.result = []  # list of APIResponse objects from each attempt
         # these should be set in the __init__ of the subclass
@@ -71,101 +38,25 @@ class APIRequestBase(ABC):
         self.region = None
     def increment_pbar(self):
-        self.status_tracker.increment_pbar()
+        if self.context.status_tracker:
+            self.context.status_tracker.increment_pbar()
     def call_callback(self):
-        if self.callback is not None:
+        if self.context.callback is not None:
             # the APIResponse in self.result includes all the information
-            self.callback(self.result[-1], self.status_tracker)
+            self.context.callback(self.result[-1], self.context.status_tracker)
     def handle_success(self, data):
         self.call_callback()
-        self.status_tracker.task_succeeded(self.task_id)
-    def handle_error(self, create_new_request=False, give_up_if_no_other_models=False):
-        """
-        If create_new_request is True, will create a new API request (so that it
-        has a chance of being sent to a different model). If false, will retry
-        the same request.
-        """
-        last_result: APIResponse = self.result[-1]
-        error_to_print = f"Error  task {self.task_id}. "
-        error_to_print += (
-            f"Model: {last_result.model_internal} Code: {last_result.status_code}, "
-        )
-        if self.region is not None:
-            error_to_print += f"Region: {self.region}, "
-        error_to_print += f"Message: {last_result.error_message}."
-        print(error_to_print)
-        if self.attempts_left > 0:
-            self.attempts_left -= 1
-            if not create_new_request:
-                assert self.status_tracker.retry_queue
-                self.status_tracker.retry_queue.put_nowait(self)
-                return
-            else:
-                # make sure we have another model to send it to besides the current one
-                if self.all_model_names is None or len(self.all_model_names) < 2:
-                    if give_up_if_no_other_models:
-                        print(
-                            f"No other models to try for task {self.task_id}. Giving up."
-                        )
-                        self.status_tracker.task_failed(self.task_id)
-                    else:
-                        print(
-                            f"No other models to try for task {self.task_id}. Retrying with same model."
-                        )
-                        assert self.status_tracker.retry_queue
-                        self.status_tracker.retry_queue.put_nowait(self)
-                else:
-                    # two things to change: model_name and sampling_params
-                    new_model_name = self.model_name
-                    new_model_idx = 0
-                    while new_model_name == self.model_name:
-                        new_model_idx = random.randint(0, len(self.all_model_names) - 1)
-                        new_model_name = self.all_model_names[new_model_idx]
-                    if isinstance(self.all_sampling_params, list):
-                        new_sampling_params = self.all_sampling_params[new_model_idx]
-                    elif isinstance(self.all_sampling_params, SamplingParams):
-                        new_sampling_params = self.all_sampling_params
-                    elif self.all_sampling_params is None:
-                        new_sampling_params = self.sampling_params
-                    else:
-                        new_sampling_params = self.sampling_params
+        if self.context.status_tracker:
+            self.context.status_tracker.task_succeeded(self.context.task_id)
-                    print("Creating new request with model", new_model_name)
-                    new_request = create_api_request(
-                        task_id=self.task_id,
-                        model_name=new_model_name,
-                        prompt=self.prompt,
-                        attempts_left=self.attempts_left,
-                        status_tracker=self.status_tracker,
-                        results_arr=self.results_arr,
-                        request_timeout=self.request_timeout,
-                        sampling_params=new_sampling_params,
-                        callback=self.callback,
-                        all_model_names=self.all_model_names,
-                        all_sampling_params=self.all_sampling_params,
-                        tools=self.tools,
-                        cache=self.cache,
-                        computer_use=getattr(self, "computer_use", False),
-                        display_width=getattr(self, "display_width", 1024),
-                        display_height=getattr(self, "display_height", 768),
-                    )
-                    # PROBLEM: new request is never put into results array, so we can't get the result.
-                    assert self.status_tracker.retry_queue
-                    self.status_tracker.retry_queue.put_nowait(self)
-                    # SOLUTION: just need to make sure it's deduplicated by task_id later.
-                    self.results_arr.append(new_request)
-        else:
-            print(f"Task {self.task_id} out of tries.")
-            self.status_tracker.task_failed(self.task_id)
-    async def call_api(self):
+    async def execute_once(self) -> APIResponse:
+        """Send the HTTP request once and return the parsed APIResponse."""
+        assert self.context.status_tracker
         try:
-            self.status_tracker.total_requests += 1
-            timeout = aiohttp.ClientTimeout(total=self.request_timeout)
+            self.context.status_tracker.total_requests += 1
+            timeout = aiohttp.ClientTimeout(total=self.context.request_timeout)
             async with aiohttp.ClientSession(timeout=timeout) as session:
                 assert self.url is not None, "URL is not set"
                 async with session.post(
@@ -174,133 +65,56 @@ class APIRequestBase(ABC):
                     json=self.request_json,
                 ) as http_response:
                     response: APIResponse = await self.handle_response(http_response)
-            self.result.append(response)
-            if response.is_error:
-                self.handle_error(
-                    create_new_request=response.retry_with_different_model or False,
-                    give_up_if_no_other_models=response.give_up_if_no_other_models
-                    or False,
-                )
-            else:
-                self.handle_success(response)
+            return response
         except asyncio.TimeoutError:
-            self.result.append(
-                APIResponse(
-                    id=self.task_id,
-                    model_internal=self.model_name,
-                    prompt=self.prompt,
-                    sampling_params=self.sampling_params,
-                    status_code=None,
-                    is_error=True,
-                    error_message="Request timed out (terminated by client).",
-                    content=None,
-                    usage=None,
-                )
+            return APIResponse(
+                id=self.context.task_id,
+                model_internal=self.context.model_name,
+                prompt=self.context.prompt,
+                sampling_params=self.context.sampling_params,
+                status_code=None,
+                is_error=True,
+                error_message="Request timed out (terminated by client).",
+                content=None,
+                usage=None,
             )
-            self.handle_error(create_new_request=False)
         except Exception as e:
             raise_if_modal_exception(e)
             tb = traceback.format_exc()
             print(tb)
-            self.result.append(
-                APIResponse(
-                    id=self.task_id,
-                    model_internal=self.model_name,
-                    prompt=self.prompt,
-                    sampling_params=self.sampling_params,
-                    status_code=None,
-                    is_error=True,
-                    error_message=f"Unexpected {type(e).__name__}: {str(e) or 'No message.'}",
-                    content=None,
-                    usage=None,
-                )
+            return APIResponse(
+                id=self.context.task_id,
+                model_internal=self.context.model_name,
+                prompt=self.context.prompt,
+                sampling_params=self.context.sampling_params,
+                status_code=None,
+                is_error=True,
+                error_message=f"Unexpected {type(e).__name__}: {str(e) or 'No message.'}",
+                content=None,
+                usage=None,
             )
-            # maybe consider making True?
-            self.handle_error(create_new_request=False)
     @abstractmethod
     async def handle_response(self, http_response: ClientResponse) -> APIResponse:
         raise NotImplementedError
-def create_api_request(
-    task_id: int,
-    model_name: str,
-    prompt: Conversation,
-    attempts_left: int,
-    status_tracker: StatusTracker,
-    results_arr: list["APIRequestBase"],
-    request_timeout: int = 30,
-    sampling_params: SamplingParams = SamplingParams(),
-    callback: Callable | None = None,
-    all_model_names: list[str] | None = None,
-    all_sampling_params: list[SamplingParams] | None = None,
-    tools: list | None = None,
-    cache: CachePattern | None = None,
-    computer_use: bool = False,
-    display_width: int = 1024,
-    display_height: int = 768,
-    use_responses_api: bool = False,
-) -> APIRequestBase:
-    from .common import CLASSES  # circular import so made it lazy, does this work?
-    model_obj = APIModel.from_registry(model_name)
-    # Choose API spec based on use_responses_api flag and model support
-    api_spec = model_obj.api_spec
-    if use_responses_api and model_obj.supports_responses and api_spec == "openai":
-        api_spec = "openai-responses"
-    request_class = CLASSES.get(api_spec, None)
-    if request_class is None:
-        raise ValueError(f"Unsupported API spec: {api_spec}")
-    kwargs = {}
-    # Add computer_use to kwargs if the request class supports it
-    model_obj = APIModel.from_registry(model_name)
-    if computer_use and api_spec in ["anthropic", "bedrock", "openai-responses"]:
-        kwargs.update(
-            {
-                "computer_use": computer_use,
-                "display_width": display_width,
-                "display_height": display_height,
-            }
-        )
-    return request_class(
-        task_id=task_id,
-        model_name=model_name,
-        prompt=prompt,
-        attempts_left=attempts_left,
-        status_tracker=status_tracker,
-        results_arr=results_arr,
-        request_timeout=request_timeout,
-        sampling_params=sampling_params,
-        callback=callback,
-        all_model_names=all_model_names,
-        all_sampling_params=all_sampling_params,
-        tools=tools,
-        cache=cache,
-        **kwargs,
-    )
 def deduplicate_responses(results: list[APIRequestBase]) -> list[APIResponse]:
     deduplicated = {}
     for request in results:
-        if request.task_id not in deduplicated:
-            deduplicated[request.task_id] = request.result[-1]
+        if request.context.task_id not in deduplicated:
+            deduplicated[request.context.task_id] = request.result[-1]
         else:
-            current_response: APIResponse = deduplicated[request.task_id]
+            current_response: APIResponse = deduplicated[request.context.task_id]
             # only replace if the current request has no completion and the new one does
             if (
                 request.result[-1].completion is not None
                 and current_response.completion is None
             ):
-                deduplicated[request.task_id] = request.result[-1]
+                deduplicated[request.context.task_id] = request.result[-1]
-    output = [deduplicated[request.task_id] for request in results]
+    output = [deduplicated[request.context.task_id] for request in results]
     return output

lm-deluge 0.0.14__py3-none-any.whl → 0.0.16__py3-none-any.whl

lm-deluge 0.0.14py3-none-any.whl → 0.0.16py3-none-any.whl