PyPI - lm-deluge - Versions diffs - 0.0.67__py3-none-any.whl → 0.0.90__py3-none-any.whl - Mend

lm-deluge 0.0.67py3-none-any.whl → 0.0.90py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lm-deluge might be problematic. Click here for more details.

Files changed (108) hide show

lm_deluge/__init__.py +1 -2
lm_deluge/api_requests/anthropic.py +117 -22
lm_deluge/api_requests/base.py +84 -11
lm_deluge/api_requests/bedrock.py +30 -6
lm_deluge/api_requests/chat_reasoning.py +4 -0
lm_deluge/api_requests/gemini.py +166 -20
lm_deluge/api_requests/openai.py +145 -25
lm_deluge/batches.py +15 -45
lm_deluge/client.py +309 -50
lm_deluge/config.py +15 -3
lm_deluge/models/__init__.py +14 -1
lm_deluge/models/anthropic.py +29 -14
lm_deluge/models/arcee.py +16 -0
lm_deluge/models/deepseek.py +36 -4
lm_deluge/models/google.py +42 -0
lm_deluge/models/grok.py +24 -0
lm_deluge/models/kimi.py +36 -0
lm_deluge/models/minimax.py +18 -0
lm_deluge/models/openai.py +100 -0
lm_deluge/models/openrouter.py +133 -7
lm_deluge/models/together.py +11 -0
lm_deluge/models/zai.py +50 -0
lm_deluge/pipelines/gepa/__init__.py +95 -0
lm_deluge/pipelines/gepa/core.py +354 -0
lm_deluge/pipelines/gepa/docs/samples.py +705 -0
lm_deluge/pipelines/gepa/examples/01_synthetic_keywords.py +140 -0
lm_deluge/pipelines/gepa/examples/02_gsm8k_math.py +261 -0
lm_deluge/pipelines/gepa/examples/03_hotpotqa_multihop.py +300 -0
lm_deluge/pipelines/gepa/examples/04_batch_classification.py +271 -0
lm_deluge/pipelines/gepa/examples/simple_qa.py +129 -0
lm_deluge/pipelines/gepa/optimizer.py +435 -0
lm_deluge/pipelines/gepa/proposer.py +235 -0
lm_deluge/pipelines/gepa/util.py +165 -0
lm_deluge/{llm_tools → pipelines}/score.py +2 -2
lm_deluge/{llm_tools → pipelines}/translate.py +5 -3
lm_deluge/prompt.py +537 -88
lm_deluge/request_context.py +7 -2
lm_deluge/server/__init__.py +24 -0
lm_deluge/server/__main__.py +144 -0
lm_deluge/server/adapters.py +369 -0
lm_deluge/server/app.py +388 -0
lm_deluge/server/auth.py +71 -0
lm_deluge/server/model_policy.py +215 -0
lm_deluge/server/models_anthropic.py +172 -0
lm_deluge/server/models_openai.py +175 -0
lm_deluge/tool/__init__.py +1130 -0
lm_deluge/tool/builtin/anthropic/__init__.py +300 -0
lm_deluge/tool/builtin/anthropic/bash.py +0 -0
lm_deluge/tool/builtin/anthropic/computer_use.py +0 -0
lm_deluge/tool/builtin/gemini.py +59 -0
lm_deluge/tool/builtin/openai.py +74 -0
lm_deluge/tool/cua/__init__.py +173 -0
lm_deluge/tool/cua/actions.py +148 -0
lm_deluge/tool/cua/base.py +27 -0
lm_deluge/tool/cua/batch.py +215 -0
lm_deluge/tool/cua/converters.py +466 -0
lm_deluge/tool/cua/kernel.py +702 -0
lm_deluge/tool/cua/trycua.py +989 -0
lm_deluge/tool/prefab/__init__.py +45 -0
lm_deluge/tool/prefab/batch_tool.py +156 -0
lm_deluge/tool/prefab/docs.py +1119 -0
lm_deluge/tool/prefab/email.py +294 -0
lm_deluge/tool/prefab/filesystem.py +1711 -0
lm_deluge/tool/prefab/full_text_search/__init__.py +285 -0
lm_deluge/tool/prefab/full_text_search/tantivy_index.py +396 -0
lm_deluge/tool/prefab/memory.py +458 -0
lm_deluge/tool/prefab/otc/__init__.py +165 -0
lm_deluge/tool/prefab/otc/executor.py +281 -0
lm_deluge/tool/prefab/otc/parse.py +188 -0
lm_deluge/tool/prefab/random.py +212 -0
lm_deluge/tool/prefab/rlm/__init__.py +296 -0
lm_deluge/tool/prefab/rlm/executor.py +349 -0
lm_deluge/tool/prefab/rlm/parse.py +144 -0
lm_deluge/tool/prefab/sandbox/__init__.py +19 -0
lm_deluge/tool/prefab/sandbox/daytona_sandbox.py +483 -0
lm_deluge/tool/prefab/sandbox/docker_sandbox.py +609 -0
lm_deluge/tool/prefab/sandbox/fargate_sandbox.py +546 -0
lm_deluge/tool/prefab/sandbox/modal_sandbox.py +469 -0
lm_deluge/tool/prefab/sandbox/seatbelt_sandbox.py +827 -0
lm_deluge/tool/prefab/sheets.py +385 -0
lm_deluge/tool/prefab/skills.py +0 -0
lm_deluge/tool/prefab/subagents.py +233 -0
lm_deluge/tool/prefab/todos.py +342 -0
lm_deluge/tool/prefab/tool_search.py +169 -0
lm_deluge/tool/prefab/web_search.py +199 -0
lm_deluge/tracker.py +16 -13
lm_deluge/util/schema.py +412 -0
lm_deluge/warnings.py +8 -0
{lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/METADATA +23 -9
lm_deluge-0.0.90.dist-info/RECORD +132 -0
lm_deluge/built_in_tools/anthropic/__init__.py +0 -128
lm_deluge/built_in_tools/openai.py +0 -28
lm_deluge/presets/cerebras.py +0 -17
lm_deluge/presets/meta.py +0 -13
lm_deluge/tool.py +0 -849
lm_deluge-0.0.67.dist-info/RECORD +0 -72
lm_deluge/{llm_tools → pipelines}/__init__.py +1 -1
/lm_deluge/{llm_tools → pipelines}/classify.py +0 -0
/lm_deluge/{llm_tools → pipelines}/extract.py +0 -0
/lm_deluge/{llm_tools → pipelines}/locate.py +0 -0
/lm_deluge/{llm_tools → pipelines}/ocr.py +0 -0
/lm_deluge/{built_in_tools/anthropic/bash.py → skills/anthropic.py} +0 -0
/lm_deluge/{built_in_tools/anthropic/computer_use.py → skills/compat.py} +0 -0
/lm_deluge/{built_in_tools → tool/builtin}/anthropic/editor.py +0 -0
/lm_deluge/{built_in_tools → tool/builtin}/base.py +0 -0
{lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/WHEEL +0 -0
{lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/licenses/LICENSE +0 -0
{lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/top_level.txt +0 -0

lm_deluge/client.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import asyncio
+from dataclasses import dataclass
 from typing import (
     Any,
     AsyncGenerator,
@@ -37,6 +38,14 @@ from .request_context import RequestContext
 from .tracker import StatusTracker
+@dataclass
+class AgentLoopResponse:
+    """Wrapper for agent loop results to distinguish from single request results."""
+    conversation: Conversation
+    final_response: APIResponse
 # TODO: add optional max_input_tokens to client so we can reject long prompts to prevent abuse
 class _LLMClient(BaseModel):
     """
@@ -44,10 +53,15 @@ class _LLMClient(BaseModel):
     Keeps all validation, serialization, and existing functionality.
     """
-    _REASONING_SUFFIXES: ClassVar[dict[str, Literal["low", "medium", "high"]]] = {
+    _REASONING_SUFFIXES: ClassVar[
+        dict[str, Literal["low", "medium", "high", "xhigh", "minimal", "none"]]
+    ] = {
         "-low": "low",
         "-medium": "medium",
         "-high": "high",
+        "-xhigh": "xhigh",
+        "-minimal": "minimal",
+        "-none": "none",
     }
     model_names: str | list[str] = ["gpt-4.1-mini"]
@@ -66,11 +80,15 @@ class _LLMClient(BaseModel):
     background: bool = False
     # sampling params - if provided, and sampling_params is not,
     # these override the defaults
-    temperature: float = 0.75
+    temperature: float = 1.0
     top_p: float = 1.0
     json_mode: bool = False
     max_new_tokens: int = 512
-    reasoning_effort: Literal["low", "medium", "high", "minimal", "none", None] = None
+    reasoning_effort: Literal[
+        "low", "medium", "high", "xhigh", "minimal", "none", None
+    ] = None
+    global_effort: Literal["low", "medium", "high"] | None = None
+    thinking_budget: int | None = None
     logprobs: bool = False
     top_logprobs: int | None = None
     force_local_mcp: bool = False
@@ -84,10 +102,17 @@ class _LLMClient(BaseModel):
     # Internal state for async task handling
     _next_task_id: int = PrivateAttr(default=0)
     _tasks: dict[int, asyncio.Task] = PrivateAttr(default_factory=dict)
-    _results: dict[int, APIResponse] = PrivateAttr(default_factory=dict)
+    _results: dict[int, APIResponse | AgentLoopResponse] = PrivateAttr(
+        default_factory=dict
+    )
     _tracker: StatusTracker | None = PrivateAttr(default=None)
     _capacity_lock: asyncio.Lock = PrivateAttr(default_factory=asyncio.Lock)
+    # usage
+    def print_usage(self):
+        if self._tracker:
+            self._tracker.log_usage()
     # Progress management for queueing API
     def open(self, total: int | None = None, show_progress: bool = True):
         self._tracker = StatusTracker(
@@ -149,9 +174,14 @@ class _LLMClient(BaseModel):
     def _normalize_model_names(
         self, models: list[str]
-    ) -> tuple[list[str], list[Literal["low", "medium", "high"] | None]]:
+    ) -> tuple[
+        list[str],
+        list[Literal["low", "medium", "high", "xhigh", "minimal", "none"] | None],
+    ]:
         normalized: list[str] = []
-        efforts: list[Literal["low", "medium", "high"] | None] = []
+        efforts: list[
+            Literal["low", "medium", "high", "xhigh", "minimal", "none"] | None
+        ] = []
         for name in models:
             base_name = self._preprocess_openrouter_model(name)
@@ -164,7 +194,10 @@ class _LLMClient(BaseModel):
         return normalized, efforts
     def _align_sampling_params(
-        self, per_model_efforts: list[Literal["low", "medium", "high"] | None]
+        self,
+        per_model_efforts: list[
+            Literal["low", "medium", "high", "xhigh", "minimal", "none"] | None
+        ],
     ) -> None:
         if len(per_model_efforts) < len(self.model_names):
             per_model_efforts = per_model_efforts + [None] * (
@@ -187,6 +220,8 @@ class _LLMClient(BaseModel):
                         json_mode=self.json_mode,
                         max_new_tokens=self.max_new_tokens,
                         reasoning_effort=self.reasoning_effort,
+                        global_effort=self.global_effort or "high",
+                        thinking_budget=self.thinking_budget,
                         logprobs=self.logprobs,
                         top_logprobs=self.top_logprobs,
                     )
@@ -242,6 +277,7 @@ class _LLMClient(BaseModel):
             self.max_tokens_per_minute = max_tokens_per_minute
         if max_concurrent_requests:
             self.max_concurrent_requests = max_concurrent_requests
+        return self
     def _get_tracker(self) -> StatusTracker:
         if self._tracker is None:
@@ -253,6 +289,28 @@ class _LLMClient(BaseModel):
     def models(self):
         return self.model_names  # why? idk
+    @staticmethod
+    def _preprocess_tinker_model(model_name: str) -> str:
+        if model_name.startswith("tinker://"):
+            model_id = model_name
+            if model_id not in registry:
+                register_model(
+                    id=model_name,
+                    name=model_name,
+                    api_base="https://tinker.thinkingmachines.dev/services/tinker-prod/oai/api/v1",
+                    api_key_env_var="TINKER_API_KEY",
+                    api_spec="openai",
+                    supports_json=True,
+                    supports_logprobs=False,
+                    supports_responses=False,
+                    input_cost=0,  # Unknown costs for arbitrary tinker models
+                    cached_input_cost=0,
+                    cache_write_cost=0,
+                    output_cost=0,
+                )
+        return model_name
     @staticmethod
     def _preprocess_openrouter_model(model_name: str) -> str:
         """Process openrouter: prefix and register model if needed."""
@@ -279,7 +337,8 @@ class _LLMClient(BaseModel):
                 )
             return model_id
-        return model_name
+        else:
+            return model_name
     @model_validator(mode="before")
     @classmethod
@@ -292,6 +351,9 @@ class _LLMClient(BaseModel):
             # First, handle OpenRouter prefix
             model_name = cls._preprocess_openrouter_model(model_names)
+            # next handle tinker prefix
+            model_name = cls._preprocess_tinker_model(model_name)
             # Then handle reasoning effort suffix (e.g., "gpt-5-high")
             model_name, effort = cls._strip_reasoning_suffix_if_registered(model_name)
             if effort and data.get("reasoning_effort") is None:
@@ -316,11 +378,13 @@ class _LLMClient(BaseModel):
         if "sampling_params" not in data or len(data.get("sampling_params", [])) == 0:
             data["sampling_params"] = [
                 SamplingParams(
-                    temperature=data.get("temperature", 0.75),
+                    temperature=data.get("temperature", 1.0),
                     top_p=data.get("top_p", 1.0),
                     json_mode=data.get("json_mode", False),
                     max_new_tokens=data.get("max_new_tokens", 512),
                     reasoning_effort=data.get("reasoning_effort", None),
+                    global_effort=data.get("global_effort") or "high",
+                    thinking_budget=data.get("thinking_budget", None),
                     logprobs=data.get("logprobs", False),
                     top_logprobs=data.get("top_logprobs", None),
                 )
@@ -332,7 +396,9 @@ class _LLMClient(BaseModel):
     @classmethod
     def _strip_reasoning_suffix_if_registered(
         cls, model_name: str
-    ) -> tuple[str, Literal["low", "medium", "high"] | None]:
+    ) -> tuple[
+        str, Literal["low", "medium", "high", "xhigh", "minimal", "none"] | None
+    ]:
         """Remove reasoning suffix only when the trimmed model already exists."""
         for suffix, effort in cls._REASONING_SUFFIXES.items():
             if model_name.endswith(suffix) and len(model_name) > len(suffix):
@@ -364,6 +430,15 @@ class _LLMClient(BaseModel):
             assert (
                 self.use_responses_api
             ), "background mode only allowed for responses api"
+        # codex models require responses api
+        for model_name in self.model_names:
+            if "codex" in model_name.lower() and not self.use_responses_api:
+                raise ValueError(
+                    f"Model '{model_name}' requires use_responses_api=True. "
+                    "Codex models are only available via the Responses API."
+                )
         # Auto-generate name if not provided
         if self.name is None:
             if len(self.model_names) == 1:
@@ -542,7 +617,8 @@ class _LLMClient(BaseModel):
         *,
         return_completions_only: Literal[True],
         show_progress: bool = ...,
-        tools: list[Tool | dict | MCPServer] | None = ...,
+        tools: Sequence[Tool | dict | MCPServer] | None = ...,
+        output_schema: type[BaseModel] | dict | None = ...,
         cache: CachePattern | None = ...,
         service_tier: Literal["auto", "default", "flex", "priority"] | None = ...,
     ) -> list[str | None]: ...
@@ -554,7 +630,8 @@ class _LLMClient(BaseModel):
         *,
         return_completions_only: Literal[False] = ...,
         show_progress: bool = ...,
-        tools: list[Tool | dict | MCPServer] | None = ...,
+        tools: Sequence[Tool | dict | MCPServer] | None = ...,
+        output_schema: type[BaseModel] | dict | None = ...,
         cache: CachePattern | None = ...,
         service_tier: Literal["auto", "default", "flex", "priority"] | None = ...,
     ) -> list[APIResponse]: ...
@@ -565,7 +642,8 @@ class _LLMClient(BaseModel):
         *,
         return_completions_only: bool = False,
         show_progress: bool = True,
-        tools: list[Tool | dict | MCPServer] | None = None,
+        tools: Sequence[Tool | dict | MCPServer] | None = None,
+        output_schema: type[BaseModel] | dict | None = None,
         cache: CachePattern | None = None,
         service_tier: Literal["auto", "default", "flex", "priority"] | None = None,
     ) -> list[APIResponse] | list[str | None] | dict[str, int]:
@@ -594,6 +672,7 @@ class _LLMClient(BaseModel):
             task_id = self.start_nowait(
                 prompt,
                 tools=tools,
+                output_schema=output_schema,
                 cache=cache,
                 service_tier=service_tier,
             )
@@ -638,7 +717,8 @@ class _LLMClient(BaseModel):
         *,
         return_completions_only: bool = False,
         show_progress=True,
-        tools: list[Tool | dict | MCPServer] | None = None,
+        tools: Sequence[Tool | dict | MCPServer] | None = None,
+        output_schema: type[BaseModel] | dict | None = None,
         cache: CachePattern | None = None,
     ):
         return asyncio.run(
@@ -647,6 +727,7 @@ class _LLMClient(BaseModel):
                 return_completions_only=return_completions_only,
                 show_progress=show_progress,
                 tools=tools,
+                output_schema=output_schema,
                 cache=cache,
             )
         )
@@ -669,7 +750,8 @@ class _LLMClient(BaseModel):
         self,
         prompt: Prompt,
         *,
-        tools: list[Tool | dict | MCPServer] | None = None,
+        tools: Sequence[Tool | dict | MCPServer] | None = None,
+        output_schema: type[BaseModel] | dict | None = None,
         cache: CachePattern | None = None,
         service_tier: Literal["auto", "default", "flex", "priority"] | None = None,
     ) -> int:
@@ -688,6 +770,7 @@ class _LLMClient(BaseModel):
             request_timeout=self.request_timeout,
             status_tracker=tracker,
             tools=tools,
+            output_schema=output_schema,
             cache=cache,
             use_responses_api=self.use_responses_api,
             background=self.background,
@@ -702,25 +785,30 @@ class _LLMClient(BaseModel):
     async def start(
         self,
-        prompt: str | Conversation,
+        prompt: Prompt,
         *,
-        tools: list[Tool | dict | MCPServer] | None = None,
+        tools: Sequence[Tool | dict | MCPServer] | None = None,
+        output_schema: type[BaseModel] | dict | None = None,
         cache: CachePattern | None = None,
         service_tier: Literal["auto", "default", "flex", "priority"] | None = None,
     ) -> APIResponse:
         task_id = self.start_nowait(
-            prompt, tools=tools, cache=cache, service_tier=service_tier
+            prompt,
+            tools=tools,
+            output_schema=output_schema,
+            cache=cache,
+            service_tier=service_tier,
         )
         return await self.wait_for(task_id)
     async def wait_for(self, task_id: int) -> APIResponse:
         task = self._tasks.get(task_id)
         if task:
-            return await task
-        res = self._results.get(task_id)
-        if res:
-            return res
+            result = await task
         else:
+            result = self._results.get(task_id)
+        if result is None:
             return APIResponse(
                 id=-1,
                 model_internal="",
@@ -731,6 +819,11 @@ class _LLMClient(BaseModel):
                 error_message="Task not found",
             )
+        assert isinstance(
+            result, APIResponse
+        ), f"Expected APIResponse, got {type(result)}. Use wait_for_agent_loop for agent loop tasks."
+        return result
     async def wait_for_all(
         self, task_ids: Sequence[int] | None = None
     ) -> list[APIResponse]:
@@ -766,6 +859,9 @@ class _LLMClient(BaseModel):
                 tid = tasks_map.pop(task)
                 task_result = self._results.get(tid, await task)
                 assert task_result
+                assert isinstance(
+                    task_result, APIResponse
+                ), f"Expected APIResponse, got {type(task_result)}. as_completed() only works with single requests, not agent loops."
                 yield tid, task_result
         while tasks_map:
@@ -776,16 +872,19 @@ class _LLMClient(BaseModel):
                 tid = tasks_map.pop(task)
                 task_result = self._results.get(tid, await task)
                 assert task_result
+                assert isinstance(
+                    task_result, APIResponse
+                ), f"Expected APIResponse, got {type(task_result)}. as_completed() only works with single requests, not agent loops."
                 yield tid, task_result
     async def stream(
         self,
-        prompt: str | Conversation,
-        tools: list[Tool | dict | MCPServer] | None = None,
+        prompt: Prompt,
+        tools: Sequence[Tool | dict | MCPServer] | None = None,
     ):
         model, sampling_params = self._select_model()
-        if isinstance(prompt, str):
-            prompt = Conversation.user(prompt)
+        prompt = prompts_to_conversations([prompt])[0]
+        assert isinstance(prompt, Conversation)
         async for item in stream_chat(
             model, prompt, sampling_params, tools, None, self.extra_headers
         ):
@@ -797,23 +896,15 @@ class _LLMClient(BaseModel):
                     return self.postprocess(item)
                 return item
-    async def run_agent_loop(
+    async def _run_agent_loop_internal(
         self,
-        conversation: str | Conversation,
+        task_id: int,
+        conversation: Conversation,
         *,
-        tools: list[Tool | dict | MCPServer] | None = None,
+        tools: Sequence[Tool | dict | MCPServer] | None = None,
         max_rounds: int = 5,
-        show_progress: bool = False,
-    ) -> tuple[Conversation, APIResponse]:
-        """Run a simple agent loop until no more tool calls are returned.
-        The provided ``conversation`` will be mutated and returned alongside the
-        final ``APIResponse`` from the model. ``tools`` may include ``Tool``
-        instances or built‑in tool dictionaries.
-        """
-        if isinstance(conversation, str):
-            conversation = Conversation.user(conversation)
+    ) -> AgentLoopResponse:
+        """Internal method to run agent loop and return wrapped result."""
         # Expand MCPServer objects to their constituent tools for tool execution
         expanded_tools: list[Tool] = []
@@ -861,18 +952,86 @@ class _LLMClient(BaseModel):
                 if not isinstance(result, (str, dict, list)):
                     result = str(result)
-                conversation.with_tool_result(call.id, result)  # type: ignore
+                conversation = conversation.with_tool_result(call.id, result)  # type: ignore
         if response is None:
             raise RuntimeError("model did not return a response")
-        return conversation, response
+        result = AgentLoopResponse(conversation=conversation, final_response=response)
+        self._results[task_id] = result
+        return result
+    def start_agent_loop_nowait(
+        self,
+        conversation: Prompt,
+        *,
+        tools: Sequence[Tool | dict | MCPServer] | None = None,
+        max_rounds: int = 5,
+    ) -> int:
+        """Start an agent loop without waiting for it to complete.
+        Returns a task_id that can be used with wait_for_agent_loop().
+        """
+        if not isinstance(conversation, Conversation):
+            conversation = prompts_to_conversations([conversation])[0]
+            assert isinstance(conversation, Conversation)
+        task_id = self._next_task_id
+        self._next_task_id += 1
+        task = asyncio.create_task(
+            self._run_agent_loop_internal(
+                task_id, conversation, tools=tools, max_rounds=max_rounds
+            )
+        )
+        self._tasks[task_id] = task
+        return task_id
+    async def wait_for_agent_loop(
+        self, task_id: int
+    ) -> tuple[Conversation, APIResponse]:
+        """Wait for an agent loop task to complete.
+        Returns the conversation and final response from the agent loop.
+        """
+        task = self._tasks.get(task_id)
+        if task:
+            result = await task
+        else:
+            result = self._results.get(task_id)
+        if result is None:
+            raise RuntimeError(f"Agent loop task {task_id} not found")
+        assert isinstance(
+            result, AgentLoopResponse
+        ), f"Expected AgentLoopResponse, got {type(result)}"
+        return result.conversation, result.final_response
+    async def run_agent_loop(
+        self,
+        conversation: Prompt,
+        *,
+        tools: Sequence[Tool | dict | MCPServer] | None = None,
+        max_rounds: int = 5,
+        show_progress: bool = False,
+    ) -> tuple[Conversation, APIResponse]:
+        """Run a simple agent loop until no more tool calls are returned.
+        The provided ``conversation`` will be mutated and returned alongside the
+        final ``APIResponse`` from the model. ``tools`` may include ``Tool``
+        instances or built‑in tool dictionaries.
+        """
+        task_id = self.start_agent_loop_nowait(
+            conversation, tools=tools, max_rounds=max_rounds
+        )
+        return await self.wait_for_agent_loop(task_id)
     def run_agent_loop_sync(
         self,
-        conversation: str | Conversation,
+        conversation: Prompt,
         *,
-        tools: list[Tool | dict | MCPServer] | None = None,
+        tools: Sequence[Tool | dict | MCPServer] | None = None,
         max_rounds: int = 5,
         show_progress: bool = False,
     ) -> tuple[Conversation, APIResponse]:
@@ -887,6 +1046,92 @@ class _LLMClient(BaseModel):
             )
         )
+    async def process_agent_loops_async(
+        self,
+        prompts: Sequence[Prompt],
+        *,
+        tools: Sequence[Tool | dict | MCPServer] | None = None,
+        max_rounds: int = 5,
+        max_concurrent_agents: int = 10,
+        show_progress: bool = True,
+    ) -> list[tuple[Conversation, APIResponse]]:
+        """Process multiple agent loops concurrently.
+        Each prompt becomes an independent agent loop that can make multiple LLM
+        calls and execute tools until completion. The agent loops run concurrently,
+        limited by ``max_concurrent_agents``, while the underlying LLM requests
+        are still governed by ``max_concurrent_requests``.
+        Args:
+            prompts: Sequence of prompts, each becoming a separate agent loop.
+            tools: Tools available to all agent loops.
+            max_rounds: Maximum rounds per agent loop (default 5).
+            max_concurrent_agents: Maximum number of agent loops running
+                concurrently (default 10). This is separate from the LLM request
+                concurrency limit.
+            show_progress: Whether to show progress bar for LLM requests.
+        Returns:
+            List of (Conversation, APIResponse) tuples in the same order as
+            the input prompts.
+        """
+        # Convert prompts to Conversations
+        conversations = prompts_to_conversations(list(prompts))
+        # Ensure tracker exists for underlying LLM requests
+        if self._tracker is None:
+            self.open(total=0, show_progress=show_progress)
+            tracker_preopened = False
+        else:
+            tracker_preopened = True
+        # Semaphore to limit concurrent agent loops
+        agent_semaphore = asyncio.Semaphore(max_concurrent_agents)
+        async def run_single_loop(
+            idx: int, conv: Conversation
+        ) -> tuple[int, Conversation, APIResponse]:
+            """Run a single agent loop with semaphore protection."""
+            async with agent_semaphore:
+                task_id = self._next_task_id
+                self._next_task_id += 1
+                result = await self._run_agent_loop_internal(
+                    task_id, conv, tools=tools, max_rounds=max_rounds
+                )
+                return idx, result.conversation, result.final_response
+        # Launch all agent loops concurrently (semaphore limits actual concurrency)
+        tasks = [run_single_loop(idx, conv) for idx, conv in enumerate(conversations)]
+        completed = await asyncio.gather(*tasks)
+        # Close tracker if we opened it
+        if not tracker_preopened:
+            self.close()
+        # Sort by original index and extract results
+        completed_sorted = sorted(completed, key=lambda x: x[0])
+        return [(conv, resp) for _, conv, resp in completed_sorted]
+    def process_agent_loops_sync(
+        self,
+        prompts: Sequence[Prompt],
+        *,
+        tools: Sequence[Tool | dict | MCPServer] | None = None,
+        max_rounds: int = 5,
+        max_concurrent_agents: int = 10,
+        show_progress: bool = True,
+    ) -> list[tuple[Conversation, APIResponse]]:
+        """Synchronous wrapper for :meth:`process_agent_loops_async`."""
+        return asyncio.run(
+            self.process_agent_loops_async(
+                prompts,
+                tools=tools,
+                max_rounds=max_rounds,
+                max_concurrent_agents=max_concurrent_agents,
+                show_progress=show_progress,
+            )
+        )
     async def submit_batch_job(
         self,
         prompts: Prompt | Sequence[Prompt],
@@ -953,11 +1198,15 @@ def LLMClient(
     extra_headers: dict[str, str] | None = None,
     use_responses_api: bool = False,
     background: bool = False,
-    temperature: float = 0.75,
+    temperature: float = 1.0,
     top_p: float = 1.0,
     json_mode: bool = False,
     max_new_tokens: int = 512,
-    reasoning_effort: Literal["low", "medium", "high", "minimal", "none", None] = None,
+    reasoning_effort: Literal[
+        "low", "medium", "high", "xhigh", "minimal", "none", None
+    ] = None,
+    global_effort: Literal["low", "medium", "high"] | None = None,
+    thinking_budget: int | None = None,
     logprobs: bool = False,
     top_logprobs: int | None = None,
     force_local_mcp: bool = False,
@@ -982,11 +1231,15 @@ def LLMClient(
     extra_headers: dict[str, str] | None = None,
     use_responses_api: bool = False,
     background: bool = False,
-    temperature: float = 0.75,
+    temperature: float = 1.0,
     top_p: float = 1.0,
     json_mode: bool = False,
     max_new_tokens: int = 512,
-    reasoning_effort: Literal["low", "medium", "high", "minimal", "none", None] = None,
+    reasoning_effort: Literal[
+        "low", "medium", "high", "xhigh", "minimal", "none", None
+    ] = None,
+    global_effort: Literal["low", "medium", "high"] | None = None,
+    thinking_budget: int | None = None,
     logprobs: bool = False,
     top_logprobs: int | None = None,
     force_local_mcp: bool = False,
@@ -1010,11 +1263,15 @@ def LLMClient(
     extra_headers: dict[str, str] | None = None,
     use_responses_api: bool = False,
     background: bool = False,
-    temperature: float = 0.75,
+    temperature: float = 1.0,
     top_p: float = 1.0,
     json_mode: bool = False,
     max_new_tokens: int = 512,
-    reasoning_effort: Literal["low", "medium", "high", "minimal", "none", None] = None,
+    reasoning_effort: Literal[
+        "low", "medium", "high", "xhigh", "minimal", "none", None
+    ] = None,
+    global_effort: Literal["low", "medium", "high"] | None = None,
+    thinking_budget: int | None = None,
     logprobs: bool = False,
     top_logprobs: int | None = None,
     force_local_mcp: bool = False,
@@ -1055,6 +1312,8 @@ def LLMClient(
         json_mode=json_mode,
         max_new_tokens=max_new_tokens,
         reasoning_effort=reasoning_effort,
+        global_effort=global_effort,
+        thinking_budget=thinking_budget,
         logprobs=logprobs,
         top_logprobs=top_logprobs,
         force_local_mcp=force_local_mcp,

lm_deluge/config.py CHANGED Viewed

@@ -4,13 +4,25 @@ from pydantic import BaseModel
 class SamplingParams(BaseModel):
-    temperature: float = 0.0
+    temperature: float = 1.0  # more typical for new models
     top_p: float = 1.0
     json_mode: bool = False
-    max_new_tokens: int = 512
-    reasoning_effort: Literal["low", "medium", "high", "minimal", "none", None] = None
+    max_new_tokens: int = 2_048
+    global_effort: Literal["low", "medium", "high"] = "high"  # for opus-4.5
+    reasoning_effort: Literal[
+        "low", "medium", "high", "xhigh", "minimal", "none", None
+    ] = None
+    thinking_budget: int | None = None
     logprobs: bool = False
     top_logprobs: int | None = None
+    strict_tools: bool = True
+    # Gemini 3 only - controls multimodal vision processing fidelity
+    media_resolution: (
+        Literal[
+            "media_resolution_low", "media_resolution_medium", "media_resolution_high"
+        ]
+        | None
+    ) = None
     def to_vllm(self):
         try:

lm-deluge 0.0.67__py3-none-any.whl → 0.0.90__py3-none-any.whl

Potentially problematic release.

lm-deluge 0.0.67py3-none-any.whl → 0.0.90py3-none-any.whl