PyPI - lm-deluge - Versions diffs - 0.0.67__py3-none-any.whl → 0.0.88__py3-none-any.whl - Mend

lm-deluge 0.0.67py3-none-any.whl → 0.0.88py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lm-deluge might be problematic. Click here for more details.

Files changed (92) hide show

lm_deluge/__init__.py +25 -2
lm_deluge/api_requests/anthropic.py +92 -17
lm_deluge/api_requests/base.py +47 -11
lm_deluge/api_requests/bedrock.py +7 -4
lm_deluge/api_requests/chat_reasoning.py +4 -0
lm_deluge/api_requests/gemini.py +138 -18
lm_deluge/api_requests/openai.py +114 -21
lm_deluge/client.py +282 -49
lm_deluge/config.py +15 -3
lm_deluge/mock_openai.py +643 -0
lm_deluge/models/__init__.py +12 -1
lm_deluge/models/anthropic.py +17 -2
lm_deluge/models/arcee.py +16 -0
lm_deluge/models/deepseek.py +36 -4
lm_deluge/models/google.py +29 -0
lm_deluge/models/grok.py +24 -0
lm_deluge/models/kimi.py +36 -0
lm_deluge/models/minimax.py +10 -0
lm_deluge/models/openai.py +100 -0
lm_deluge/models/openrouter.py +86 -8
lm_deluge/models/together.py +11 -0
lm_deluge/models/zai.py +1 -0
lm_deluge/pipelines/gepa/__init__.py +95 -0
lm_deluge/pipelines/gepa/core.py +354 -0
lm_deluge/pipelines/gepa/docs/samples.py +696 -0
lm_deluge/pipelines/gepa/examples/01_synthetic_keywords.py +140 -0
lm_deluge/pipelines/gepa/examples/02_gsm8k_math.py +261 -0
lm_deluge/pipelines/gepa/examples/03_hotpotqa_multihop.py +300 -0
lm_deluge/pipelines/gepa/examples/04_batch_classification.py +271 -0
lm_deluge/pipelines/gepa/examples/simple_qa.py +129 -0
lm_deluge/pipelines/gepa/optimizer.py +435 -0
lm_deluge/pipelines/gepa/proposer.py +235 -0
lm_deluge/pipelines/gepa/util.py +165 -0
lm_deluge/{llm_tools → pipelines}/score.py +2 -2
lm_deluge/{llm_tools → pipelines}/translate.py +5 -3
lm_deluge/prompt.py +224 -40
lm_deluge/request_context.py +7 -2
lm_deluge/tool/__init__.py +1118 -0
lm_deluge/tool/builtin/anthropic/__init__.py +300 -0
lm_deluge/tool/builtin/gemini.py +59 -0
lm_deluge/tool/builtin/openai.py +74 -0
lm_deluge/tool/cua/__init__.py +173 -0
lm_deluge/tool/cua/actions.py +148 -0
lm_deluge/tool/cua/base.py +27 -0
lm_deluge/tool/cua/batch.py +215 -0
lm_deluge/tool/cua/converters.py +466 -0
lm_deluge/tool/cua/kernel.py +702 -0
lm_deluge/tool/cua/trycua.py +989 -0
lm_deluge/tool/prefab/__init__.py +45 -0
lm_deluge/tool/prefab/batch_tool.py +156 -0
lm_deluge/tool/prefab/docs.py +1119 -0
lm_deluge/tool/prefab/email.py +294 -0
lm_deluge/tool/prefab/filesystem.py +1711 -0
lm_deluge/tool/prefab/full_text_search/__init__.py +285 -0
lm_deluge/tool/prefab/full_text_search/tantivy_index.py +396 -0
lm_deluge/tool/prefab/memory.py +458 -0
lm_deluge/tool/prefab/otc/__init__.py +165 -0
lm_deluge/tool/prefab/otc/executor.py +281 -0
lm_deluge/tool/prefab/otc/parse.py +188 -0
lm_deluge/tool/prefab/random.py +212 -0
lm_deluge/tool/prefab/rlm/__init__.py +296 -0
lm_deluge/tool/prefab/rlm/executor.py +349 -0
lm_deluge/tool/prefab/rlm/parse.py +144 -0
lm_deluge/tool/prefab/sandbox.py +1621 -0
lm_deluge/tool/prefab/sheets.py +385 -0
lm_deluge/tool/prefab/subagents.py +233 -0
lm_deluge/tool/prefab/todos.py +342 -0
lm_deluge/tool/prefab/tool_search.py +169 -0
lm_deluge/tool/prefab/web_search.py +199 -0
lm_deluge/tracker.py +16 -13
lm_deluge/util/schema.py +412 -0
lm_deluge/warnings.py +8 -0
{lm_deluge-0.0.67.dist-info → lm_deluge-0.0.88.dist-info}/METADATA +22 -9
lm_deluge-0.0.88.dist-info/RECORD +117 -0
lm_deluge/built_in_tools/anthropic/__init__.py +0 -128
lm_deluge/built_in_tools/openai.py +0 -28
lm_deluge/presets/cerebras.py +0 -17
lm_deluge/presets/meta.py +0 -13
lm_deluge/tool.py +0 -849
lm_deluge-0.0.67.dist-info/RECORD +0 -72
lm_deluge/{llm_tools → pipelines}/__init__.py +1 -1
/lm_deluge/{llm_tools → pipelines}/classify.py +0 -0
/lm_deluge/{llm_tools → pipelines}/extract.py +0 -0
/lm_deluge/{llm_tools → pipelines}/locate.py +0 -0
/lm_deluge/{llm_tools → pipelines}/ocr.py +0 -0
/lm_deluge/{built_in_tools → tool/builtin}/anthropic/bash.py +0 -0
/lm_deluge/{built_in_tools → tool/builtin}/anthropic/computer_use.py +0 -0
/lm_deluge/{built_in_tools → tool/builtin}/anthropic/editor.py +0 -0
/lm_deluge/{built_in_tools → tool/builtin}/base.py +0 -0
{lm_deluge-0.0.67.dist-info → lm_deluge-0.0.88.dist-info}/WHEEL +0 -0
{lm_deluge-0.0.67.dist-info → lm_deluge-0.0.88.dist-info}/licenses/LICENSE +0 -0
{lm_deluge-0.0.67.dist-info → lm_deluge-0.0.88.dist-info}/top_level.txt +0 -0

lm_deluge/__init__.py CHANGED Viewed

@@ -1,7 +1,20 @@
 from .client import APIResponse, LLMClient, SamplingParams
 from .file import File
 from .prompt import Conversation, Message
-from .tool import Tool, ToolParams
+from .tool import Tool
+try:
+    from .mock_openai import (  # noqa
+        APIError,
+        APITimeoutError,
+        BadRequestError,
+        MockAsyncOpenAI,
+        RateLimitError,
+    )
+    _has_openai = True
+except ImportError:
+    _has_openai = False
 # dotenv.load_dotenv() - don't do this, fucks with other packages
@@ -12,6 +25,16 @@ __all__ = [
     "Conversation",
     "Message",
     "Tool",
-    "ToolParams",
     "File",
 ]
+if _has_openai:
+    __all__.extend(
+        [
+            "MockAsyncOpenAI",
+            "APIError",
+            "APITimeoutError",
+            "BadRequestError",
+            "RateLimitError",
+        ]
+    )

lm_deluge/api_requests/anthropic.py CHANGED Viewed

@@ -12,6 +12,11 @@ from lm_deluge.prompt import (
 from lm_deluge.request_context import RequestContext
 from lm_deluge.tool import MCPServer, Tool
 from lm_deluge.usage import Usage
+from lm_deluge.util.schema import (
+    prepare_output_schema,
+    transform_schema_for_anthropic,
+)
+from lm_deluge.warnings import maybe_warn
 from ..models import APIModel
 from .base import APIRequestBase, APIResponse
@@ -58,38 +63,102 @@ def _build_anthropic_request(
         "max_tokens": sampling_params.max_new_tokens,
     }
+    if model.id == "claude-4.5-opus" and sampling_params.global_effort:
+        request_json["output_config"] = {"effort": sampling_params.global_effort}
+        _add_beta(base_headers, "effort-2025-11-24")
     # handle thinking
-    if model.reasoning_model and sampling_params.reasoning_effort:
-        # translate reasoning effort of low, medium, high to budget tokens
-        budget = {"minimal": 256, "low": 1024, "medium": 4096, "high": 16384}.get(
-            sampling_params.reasoning_effort
-        )
-        request_json["thinking"] = {
-            "type": "enabled",
-            "budget_tokens": budget,
-        }
-        if "top_p" in request_json:
-            request_json["top_p"] = max(request_json["top_p"], 0.95)
-        request_json["temperature"] = 1.0
-        request_json["max_tokens"] += budget
+    if model.reasoning_model:
+        if (
+            sampling_params.thinking_budget is not None
+            and sampling_params.reasoning_effort is not None
+        ):
+            maybe_warn("WARN_THINKING_BUDGET_AND_REASONING_EFFORT")
+        if sampling_params.thinking_budget is not None:
+            budget = sampling_params.thinking_budget
+        elif sampling_params.reasoning_effort is not None:
+            effort = sampling_params.reasoning_effort
+            if effort == "xhigh":
+                maybe_warn("WARN_XHIGH_TO_HIGH", model_name=context.model_name)
+                effort = "high"
+            # translate reasoning effort of low, medium, high to budget tokens
+            budget = {
+                "none": 0,
+                "minimal": 256,
+                "low": 1024,
+                "medium": 4096,
+                "high": 16384,
+            }.get(effort)
+            assert isinstance(budget, int)
+        else:
+            budget = 0
+        if budget > 0:
+            request_json["thinking"] = {
+                "type": "enabled",
+                "budget_tokens": budget,
+            }
+            if "top_p" in request_json:
+                request_json["top_p"] = max(request_json["top_p"], 0.95)
+            request_json["temperature"] = 1.0
+            request_json["max_tokens"] += budget
+        else:
+            request_json["thinking"] = {"type": "disabled"}
+            if "kimi" in model.id and "thinking" in model.id:
+                maybe_warn("WARN_KIMI_THINKING_NO_REASONING")
     else:
         request_json["thinking"] = {"type": "disabled"}
         if sampling_params.reasoning_effort:
             print("ignoring reasoning_effort for non-reasoning model")
     if system_message is not None:
         request_json["system"] = system_message
-    # handle temp + top_p for opus 4.1/sonnet 4.5
+    # handle temp + top_p for opus 4.1/sonnet 4.5.
+    # TODO: make clearer / more user-friendly so there can be NotGiven
+    # and user can control which one they want to use
     if "4-1" in model.name or "4-5" in model.name:
-        if "temperature" in request_json and "top_p" in request_json:
-            request_json.pop("top_p")
+        request_json.pop("top_p")
+    # print(request_json)
+    # Handle structured outputs (output_format)
+    if context.output_schema:
+        if model.supports_json:
+            base_schema = prepare_output_schema(context.output_schema)
+            # Apply Anthropic-specific transformations (move unsupported constraints to description)
+            transformed_schema = transform_schema_for_anthropic(base_schema)
+            _add_beta(base_headers, "structured-outputs-2025-11-13")
+            request_json["output_format"] = {
+                "type": "json_schema",
+                "schema": transformed_schema,
+            }
+        else:
+            print(
+                f"WARNING: Model {model.name} does not support structured outputs. Ignoring output_schema."
+            )
+    elif sampling_params.json_mode:
+        # Anthropic doesn't support basic json_mode without a schema
+        print(
+            "WARNING: Anthropic does not support basic json_mode without a schema. "
+            "Use output_schema parameter for structured JSON outputs."
+        )
+    # Add beta header for strict tools when enabled
+    if tools and sampling_params.strict_tools and model.supports_json:
+        _add_beta(base_headers, "structured-outputs-2025-11-13")
     if tools:
         mcp_servers = []
         tool_definitions = []
         for tool in tools:
             if isinstance(tool, Tool):
-                tool_definitions.append(tool.dump_for("anthropic"))
+                # Only use strict mode if model supports structured outputs
+                use_strict = sampling_params.strict_tools and model.supports_json
+                tool_definitions.append(tool.dump_for("anthropic", strict=use_strict))
             elif isinstance(tool, dict) and "url" in tool:
                 _add_beta(base_headers, "mcp-client-2025-04-04")
                 mcp_servers.append(tool)
@@ -102,6 +171,9 @@ def _build_anthropic_request(
                     "bash_20241022",
                 ]:
                     _add_beta(base_headers, "computer-use-2024-10-22")
+                elif tool["type"] == "computer_20251124":
+                    # Claude Opus 4.5 - newest computer use with zoom support
+                    _add_beta(base_headers, "computer-use-2025-11-24")
                 elif tool["type"] == "computer_20250124":
                     _add_beta(base_headers, "computer-use-2025-01-24")
                 elif tool["type"] == "code_execution_20250522":
@@ -169,6 +241,9 @@ class AnthropicRequest(APIRequestBase):
                 data = await http_response.json()
                 response_content = data["content"]
+                # print("=== CONTENT ===")
+                # print(response_content)
                 # Parse response into Message with parts
                 parts = []
                 for item in response_content:

lm_deluge/api_requests/base.py CHANGED Viewed

@@ -90,9 +90,32 @@ class APIRequestBase(ABC):
         start -> poll -> result style of request.
         """
         assert self.context.status_tracker, "no status tracker"
-        start_time = time.time()
+        poll_interval = 5.0
+        attempt_start = time.monotonic()
+        deadline = attempt_start + self.context.request_timeout
+        response_id: str | None = None
+        last_status: str | None = None
         async with aiohttp.ClientSession() as session:
-            last_status: str | None = None
+            async def cancel_response(reason: str) -> None:
+                nonlocal response_id
+                if not response_id:
+                    return
+                cancel_url = f"{self.url}/{response_id}/cancel"
+                try:
+                    async with session.post(
+                        url=cancel_url,
+                        headers=self.request_header,
+                    ) as cancel_response:
+                        cancel_response.raise_for_status()
+                    print(f"Background req {response_id} cancelled: {reason}")
+                except (
+                    Exception
+                ) as cancel_err:  # pragma: no cover - best effort logging
+                    print(
+                        f"Failed to cancel background req {response_id}: {cancel_err}"
+                    )
             try:
                 self.context.status_tracker.total_requests += 1
@@ -109,14 +132,11 @@ class APIRequestBase(ABC):
                     last_status = data["status"]
                 while True:
-                    if time.time() - start_time > self.context.request_timeout:
-                        # cancel the response
-                        async with session.post(
-                            url=f"{self.url}/{response_id}/cancel",
-                            headers=self.request_header,
-                        ) as http_response:
-                            http_response.raise_for_status()
+                    now = time.monotonic()
+                    remaining = deadline - now
+                    if remaining <= 0:
+                        elapsed = now - attempt_start
+                        await cancel_response(f"timed out after {elapsed:.1f}s")
                         return APIResponse(
                             id=self.context.task_id,
                             model_internal=self.context.model_name,
@@ -128,8 +148,9 @@ class APIRequestBase(ABC):
                             content=None,
                             usage=None,
                         )
                     # poll for the response
-                    await asyncio.sleep(5.0)
+                    await asyncio.sleep(min(poll_interval, max(remaining, 0)))
                     async with session.get(
                         url=f"{self.url}/{response_id}",
                         headers=self.request_header,
@@ -146,6 +167,8 @@ class APIRequestBase(ABC):
                             return await self.handle_response(http_response)
             except Exception as e:
+                if response_id:
+                    await cancel_response(f"errored: {type(e).__name__}")
                 raise_if_modal_exception(e)
                 tb = traceback.format_exc()
                 print(tb)
@@ -199,6 +222,19 @@ class APIRequestBase(ABC):
                 usage=None,
             )
+        except aiohttp.ServerDisconnectedError:
+            return APIResponse(
+                id=self.context.task_id,
+                model_internal=self.context.model_name,
+                prompt=self.context.prompt,
+                sampling_params=self.context.sampling_params,
+                status_code=None,
+                is_error=True,
+                error_message="Server disconnected.",
+                content=None,
+                usage=None,
+            )
         except Exception as e:
             raise_if_modal_exception(e)
             tb = traceback.format_exc()

lm_deluge/api_requests/bedrock.py CHANGED Viewed

@@ -106,7 +106,8 @@ async def _build_anthropic_bedrock_request(
         tool_definitions = []
         for tool in tools:
             if isinstance(tool, Tool):
-                tool_definitions.append(tool.dump_for("anthropic"))
+                # Bedrock doesn't have the strict-mode betas Anthropic exposes yet
+                tool_definitions.append(tool.dump_for("anthropic", strict=False))
             elif isinstance(tool, dict):
                 tool_definitions.append(tool)
                 # add betas if needed
@@ -124,7 +125,9 @@ async def _build_anthropic_bedrock_request(
                 # Convert to individual tools locally (like OpenAI does)
                 individual_tools = await tool.to_tools()
                 for individual_tool in individual_tools:
-                    tool_definitions.append(individual_tool.dump_for("anthropic"))
+                    tool_definitions.append(
+                        individual_tool.dump_for("anthropic", strict=False)
+                    )
         # Add cache control to last tool if tools_only caching is specified
         if cache_pattern == "tools_only" and tool_definitions:
@@ -194,11 +197,11 @@ async def _build_openai_bedrock_request(
         request_tools = []
         for tool in tools:
             if isinstance(tool, Tool):
-                request_tools.append(tool.dump_for("openai-completions"))
+                request_tools.append(tool.dump_for("openai-completions", strict=False))
             elif isinstance(tool, MCPServer):
                 as_tools = await tool.to_tools()
                 request_tools.extend(
-                    [t.dump_for("openai-completions") for t in as_tools]
+                    [t.dump_for("openai-completions", strict=False) for t in as_tools]
                 )
         request_json["tools"] = request_tools

lm_deluge/api_requests/chat_reasoning.py ADDED Viewed

@@ -0,0 +1,4 @@
+# this request type is for models that add "reasoning_content"
+# on top of the openai chat completions. it's important to be separate
+# for providers that expect you to provide back the reasoning content to
+# preserve best performance.

lm_deluge/api_requests/gemini.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import json
 import os
-from typing import Any
 from aiohttp import ClientResponse
@@ -23,6 +22,21 @@ async def _build_gemini_request(
 ) -> dict:
     system_message, messages = prompt.to_gemini()
+    # For Gemini 3, inject dummy signatures when missing for function calls
+    is_gemini_3 = "gemini-3" in model.name.lower()
+    if is_gemini_3:
+        dummy_sig = "context_engineering_is_the_way_to_go"
+        for msg in messages:
+            if "parts" in msg:
+                for part in msg["parts"]:
+                    # For function calls, inject dummy signature if missing
+                    if "functionCall" in part and "thoughtSignature" not in part:
+                        part["thoughtSignature"] = dummy_sig
+                        maybe_warn(
+                            "WARN_GEMINI3_MISSING_SIGNATURE",
+                            part_type="function call",
+                        )
     request_json = {
         "contents": messages,
         "generationConfig": {
@@ -37,20 +51,69 @@ async def _build_gemini_request(
         request_json["systemInstruction"] = {"parts": [{"text": system_message}]}
     # Handle reasoning models (thinking)
-    if model.reasoning_model:
-        thinking_config: dict[str, Any] | None = None
-        effort = sampling_params.reasoning_effort
-        if effort is None or effort == "none":
-            budget = 128 if "2.5-pro" in model.id else 0
-            # Explicitly disable thoughts when no effort is requested
-            thinking_config = {"includeThoughts": False, "thinkingBudget": budget}
+    is_gemini_3 = "gemini-3" in model.name.lower()
+    if is_gemini_3:
+        # gemini3 MUST think
+        if not sampling_params.reasoning_effort:
+            maybe_warn("WARN_GEMINI3_NO_REASONING")
+            effort = "low"
         else:
-            thinking_config = {"includeThoughts": True}
-            if effort in {"minimal", "low", "medium", "high"} and "flash" in model.id:
-                budget = {"minimal": 256, "low": 1024, "medium": 4096, "high": 16384}[
-                    effort
-                ]
-                thinking_config["thinkingBudget"] = budget
+            effort_key = sampling_params.reasoning_effort
+            if effort_key == "xhigh":
+                maybe_warn("WARN_XHIGH_TO_HIGH", model_name=model.name)
+                effort_key = "high"
+            level_map = {
+                "none": "low",
+                "minimal": "low",
+                "low": "low",
+                "medium": "high",  # change when supported
+                "high": "high",
+            }
+            effort = level_map[effort_key]
+        thinking_config = {"thinkingLevel": effort}
+        request_json["generationConfig"]["thinkingConfig"] = thinking_config
+    elif model.reasoning_model:
+        if (
+            sampling_params.thinking_budget is not None
+            and sampling_params.reasoning_effort is not None
+        ):
+            maybe_warn("WARN_THINKING_BUDGET_AND_REASONING_EFFORT")
+        if (
+            sampling_params.thinking_budget is not None
+            and sampling_params.thinking_budget > 0
+        ):
+            thinking_config = {
+                "includeThoughts": True,
+                "thinkingBudget": sampling_params.thinking_budget,
+            }
+        elif sampling_params.thinking_budget == -1:
+            # dynamic thinking
+            thinking_config = {"includeThoughts": True, "thinkingBudget": -1}
+        elif sampling_params.reasoning_effort not in [None, "none"]:
+            effort_key = sampling_params.reasoning_effort
+            if effort_key == "xhigh":
+                maybe_warn("WARN_XHIGH_TO_HIGH", model_name=model.name)
+                effort_key = "high"
+            level_map = {
+                "minimal": 256,
+                "low": 1024,
+                "medium": 4096,
+                "high": 16384,
+            }
+            assert effort_key in level_map
+            budget = level_map[effort_key]
+            if "flash-lite" in model.id:
+                budget = max(budget, 512)
+            thinking_config = {"includeThoughts": True, "thinkingBudget": budget}
+        elif "2.5-pro" in model.id:
+            # 2.5 pro must think.
+            thinking_config = {"includeThoughts": True, "thinkingBudget": 128}
+        else:
+            # no thoughts head empty
+            thinking_config = {"includeThoughts": False, "thinkingBudget": 0}
         request_json["generationConfig"]["thinkingConfig"] = thinking_config
     else:
@@ -59,13 +122,60 @@ async def _build_gemini_request(
     # Add tools if provided
     if tools:
-        tool_declarations = [tool.dump_for("google") for tool in tools]
-        request_json["tools"] = [{"functionDeclarations": tool_declarations}]
+        request_tools = []
+        function_declarations = []
+        for tool in tools:
+            if isinstance(tool, dict) and tool.get("type") == "gemini_computer_use":
+                # Gemini computer use tool - add as separate tool entry
+                env_map = {
+                    "browser": "ENVIRONMENT_BROWSER",
+                    "android": "ENVIRONMENT_ANDROID",
+                }
+                env = env_map.get(
+                    tool.get("environment", "browser"), "ENVIRONMENT_BROWSER"
+                )
+                cu_tool: dict = {
+                    "computerUse": {
+                        "environment": env,
+                    }
+                }
+                excluded = tool.get("excluded_predefined_functions")
+                if excluded:
+                    cu_tool["computerUse"]["excludedPredefinedFunctions"] = excluded
+                request_tools.append(cu_tool)
+            elif hasattr(tool, "dump_for"):
+                # Regular Tool object
+                function_declarations.append(tool.dump_for("google"))
+            elif isinstance(tool, dict):
+                # Raw dict tool - assume it's a function declaration
+                function_declarations.append(tool)
+        if function_declarations:
+            request_tools.append({"functionDeclarations": function_declarations})
+        if request_tools:
+            request_json["tools"] = request_tools
     # Handle JSON mode
     if sampling_params.json_mode and model.supports_json:
         request_json["generationConfig"]["responseMimeType"] = "application/json"
+    # Handle media_resolution for Gemini 3 (requires v1alpha)
+    if sampling_params.media_resolution is not None:
+        is_gemini_3 = "gemini-3" in model.name.lower()
+        if is_gemini_3:
+            # Add global media resolution to generationConfig
+            request_json["generationConfig"]["mediaResolution"] = {
+                "level": sampling_params.media_resolution
+            }
+        else:
+            # Warn if trying to use media_resolution on non-Gemini-3 models
+            maybe_warn(
+                "WARN_MEDIA_RESOLUTION_UNSUPPORTED",
+                model_name=model.name,
+            )
     return request_json
@@ -103,7 +213,7 @@ class GeminiRequest(APIRequestBase):
         self.request_json = await _build_gemini_request(
             self.model,
             self.context.prompt,
-            self.context.tools,
+            self.context.tools,  # type: ignore
             self.context.sampling_params,
         )
@@ -137,10 +247,19 @@ class GeminiRequest(APIRequestBase):
                         candidate = data["candidates"][0]
                         if "content" in candidate and "parts" in candidate["content"]:
                             for part in candidate["content"]["parts"]:
+                                # Extract thought signature if present
+                                thought_sig = part.get("thoughtSignature")
                                 if "text" in part:
                                     parts.append(Text(part["text"]))
                                 elif "thought" in part:
-                                    parts.append(Thinking(part["thought"]))
+                                    # Thought with optional signature
+                                    parts.append(
+                                        Thinking(
+                                            content=part["thought"],
+                                            thought_signature=thought_sig,
+                                        )
+                                    )
                                 elif "functionCall" in part:
                                     func_call = part["functionCall"]
                                     # Generate a unique ID since Gemini doesn't provide one
@@ -152,6 +271,7 @@ class GeminiRequest(APIRequestBase):
                                             id=tool_id,
                                             name=func_call["name"],
                                             arguments=func_call.get("args", {}),
+                                            thought_signature=thought_sig,
                                         )
                                     )

lm-deluge 0.0.67__py3-none-any.whl → 0.0.88__py3-none-any.whl

Potentially problematic release.

lm-deluge 0.0.67py3-none-any.whl → 0.0.88py3-none-any.whl