lm-deluge 0.0.79__tar.gz → 0.0.81__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lm_deluge-0.0.79/src/lm_deluge.egg-info → lm_deluge-0.0.81}/PKG-INFO +2 -3
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/README.md +1 -2
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/pyproject.toml +1 -1
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/__init__.py +1 -2
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/api_requests/anthropic.py +44 -16
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/api_requests/base.py +13 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/api_requests/gemini.py +54 -41
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/api_requests/openai.py +3 -2
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/client.py +16 -16
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/config.py +3 -1
- lm_deluge-0.0.81/src/lm_deluge/llm_tools/__init__.py +25 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/models/anthropic.py +15 -0
- {lm_deluge-0.0.79/src/lm_deluge/llm_tools → lm_deluge-0.0.81/src/lm_deluge/pipelines}/__init__.py +0 -7
- {lm_deluge-0.0.79/src/lm_deluge/llm_tools → lm_deluge-0.0.81/src/lm_deluge/pipelines}/score.py +2 -2
- {lm_deluge-0.0.79/src/lm_deluge/llm_tools → lm_deluge-0.0.81/src/lm_deluge/pipelines}/translate.py +5 -3
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/prompt.py +105 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/request_context.py +2 -2
- lm_deluge-0.0.79/src/lm_deluge/tool.py → lm_deluge-0.0.81/src/lm_deluge/tool/__init__.py +531 -314
- lm_deluge-0.0.81/src/lm_deluge/tool/prefab/__init__.py +29 -0
- lm_deluge-0.0.81/src/lm_deluge/tool/prefab/batch_tool.py +156 -0
- {lm_deluge-0.0.79/src/lm_deluge/llm_tools → lm_deluge-0.0.81/src/lm_deluge/tool/prefab}/filesystem.py +1 -1
- lm_deluge-0.0.81/src/lm_deluge/tool/prefab/memory.py +190 -0
- lm_deluge-0.0.81/src/lm_deluge/tool/prefab/otc/__init__.py +165 -0
- lm_deluge-0.0.81/src/lm_deluge/tool/prefab/otc/executor.py +281 -0
- lm_deluge-0.0.81/src/lm_deluge/tool/prefab/otc/parse.py +188 -0
- {lm_deluge-0.0.79/src/lm_deluge/llm_tools → lm_deluge-0.0.81/src/lm_deluge/tool/prefab}/sandbox.py +251 -61
- {lm_deluge-0.0.79/src/lm_deluge/llm_tools → lm_deluge-0.0.81/src/lm_deluge/tool/prefab}/todos.py +1 -1
- lm_deluge-0.0.81/src/lm_deluge/tool/prefab/tool_search.py +169 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/warnings.py +2 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81/src/lm_deluge.egg-info}/PKG-INFO +2 -3
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge.egg-info/SOURCES.txt +23 -14
- lm_deluge-0.0.81/tests/test_batch_tool.py +98 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/tests/test_filesystem.py +8 -6
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/tests/test_filesystem_live.py +1 -1
- lm_deluge-0.0.81/tests/test_otc.py +117 -0
- lm_deluge-0.0.81/tests/test_tool_search.py +86 -0
- lm_deluge-0.0.79/src/lm_deluge/presets/cerebras.py +0 -17
- lm_deluge-0.0.79/src/lm_deluge/presets/meta.py +0 -13
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/LICENSE +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/setup.cfg +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/api_requests/__init__.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/api_requests/bedrock.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/api_requests/chat_reasoning.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/api_requests/common.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/api_requests/deprecated/bedrock.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/api_requests/deprecated/cohere.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/api_requests/deprecated/deepseek.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/api_requests/deprecated/mistral.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/api_requests/deprecated/vertex.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/api_requests/mistral.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/api_requests/response.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/batches.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/built_in_tools/anthropic/__init__.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/built_in_tools/anthropic/bash.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/built_in_tools/anthropic/computer_use.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/built_in_tools/anthropic/editor.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/built_in_tools/base.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/built_in_tools/openai.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/cache.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/cli.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/embed.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/errors.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/file.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/image.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/mock_openai.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/models/__init__.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/models/bedrock.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/models/cerebras.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/models/cohere.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/models/deepseek.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/models/fireworks.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/models/google.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/models/grok.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/models/groq.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/models/kimi.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/models/meta.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/models/minimax.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/models/mistral.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/models/openai.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/models/openrouter.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/models/together.py +0 -0
- {lm_deluge-0.0.79/src/lm_deluge/llm_tools → lm_deluge-0.0.81/src/lm_deluge/pipelines}/classify.py +0 -0
- {lm_deluge-0.0.79/src/lm_deluge/llm_tools → lm_deluge-0.0.81/src/lm_deluge/pipelines}/extract.py +0 -0
- {lm_deluge-0.0.79/src/lm_deluge/llm_tools → lm_deluge-0.0.81/src/lm_deluge/pipelines}/locate.py +0 -0
- {lm_deluge-0.0.79/src/lm_deluge/llm_tools → lm_deluge-0.0.81/src/lm_deluge/pipelines}/ocr.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/rerank.py +0 -0
- {lm_deluge-0.0.79/src/lm_deluge/llm_tools → lm_deluge-0.0.81/src/lm_deluge/tool/prefab}/subagents.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/tracker.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/usage.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/util/harmony.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/util/json.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/util/logprobs.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/util/schema.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/util/spatial.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/util/validation.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge/util/xml.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge.egg-info/dependency_links.txt +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge.egg-info/requires.txt +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/src/lm_deluge.egg-info/top_level.txt +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/tests/test_builtin_tools.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/tests/test_file_upload.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/tests/test_mock_openai.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/tests/test_native_mcp_server.py +0 -0
- {lm_deluge-0.0.79 → lm_deluge-0.0.81}/tests/test_openrouter_generic.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lm_deluge
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.81
|
|
4
4
|
Summary: Python utility for using LLM API models.
|
|
5
5
|
Author-email: Benjamin Anderson <ben@trytaylor.ai>
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -296,11 +296,10 @@ We support structured outputs via `json_mode` parameter provided to `SamplingPar
|
|
|
296
296
|
|
|
297
297
|
## Built‑in tools
|
|
298
298
|
|
|
299
|
-
The `lm_deluge.
|
|
299
|
+
The `lm_deluge.pipelines` module exposes a few helper functions that combine LLMClient with prompt and output parsing to accomplish tasks:
|
|
300
300
|
|
|
301
301
|
- `extract` – structure text or images into a Pydantic model based on a schema.
|
|
302
302
|
- `translate` – translate a list of strings to English.
|
|
303
303
|
- `score_llm` – simple yes/no style scoring with optional log probability output.
|
|
304
|
-
- `FilesystemManager` – expose a sandboxed read/write filesystem tool (with optional regex search and `apply_patch` support) that agents can call without touching the host machine.
|
|
305
304
|
|
|
306
305
|
Experimental embeddings (`embed.embed_parallel_async`) and document reranking (`rerank.rerank_parallel_async`) clients are also provided.
|
|
@@ -267,11 +267,10 @@ We support structured outputs via `json_mode` parameter provided to `SamplingPar
|
|
|
267
267
|
|
|
268
268
|
## Built‑in tools
|
|
269
269
|
|
|
270
|
-
The `lm_deluge.
|
|
270
|
+
The `lm_deluge.pipelines` module exposes a few helper functions that combine LLMClient with prompt and output parsing to accomplish tasks:
|
|
271
271
|
|
|
272
272
|
- `extract` – structure text or images into a Pydantic model based on a schema.
|
|
273
273
|
- `translate` – translate a list of strings to English.
|
|
274
274
|
- `score_llm` – simple yes/no style scoring with optional log probability output.
|
|
275
|
-
- `FilesystemManager` – expose a sandboxed read/write filesystem tool (with optional regex search and `apply_patch` support) that agents can call without touching the host machine.
|
|
276
275
|
|
|
277
276
|
Experimental embeddings (`embed.embed_parallel_async`) and document reranking (`rerank.rerank_parallel_async`) clients are also provided.
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from .client import APIResponse, LLMClient, SamplingParams
|
|
2
2
|
from .file import File
|
|
3
3
|
from .prompt import Conversation, Message
|
|
4
|
-
from .tool import Tool
|
|
4
|
+
from .tool import Tool
|
|
5
5
|
|
|
6
6
|
try:
|
|
7
7
|
from .mock_openai import ( # noqa
|
|
@@ -25,7 +25,6 @@ __all__ = [
|
|
|
25
25
|
"Conversation",
|
|
26
26
|
"Message",
|
|
27
27
|
"Tool",
|
|
28
|
-
"ToolParams",
|
|
29
28
|
"File",
|
|
30
29
|
]
|
|
31
30
|
|
|
@@ -16,6 +16,7 @@ from lm_deluge.util.schema import (
|
|
|
16
16
|
prepare_output_schema,
|
|
17
17
|
transform_schema_for_anthropic,
|
|
18
18
|
)
|
|
19
|
+
from lm_deluge.warnings import maybe_warn
|
|
19
20
|
|
|
20
21
|
from ..models import APIModel
|
|
21
22
|
from .base import APIRequestBase, APIResponse
|
|
@@ -62,20 +63,45 @@ def _build_anthropic_request(
|
|
|
62
63
|
"max_tokens": sampling_params.max_new_tokens,
|
|
63
64
|
}
|
|
64
65
|
|
|
66
|
+
if model.id == "claude-4.5-opus" and sampling_params.global_effort:
|
|
67
|
+
request_json["output_config"] = {"effort": sampling_params.global_effort}
|
|
68
|
+
_add_beta(base_headers, "effort-2025-11-24")
|
|
69
|
+
|
|
65
70
|
# handle thinking
|
|
66
|
-
if model.reasoning_model
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
sampling_params.reasoning_effort
|
|
70
|
-
)
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
71
|
+
if model.reasoning_model:
|
|
72
|
+
if (
|
|
73
|
+
sampling_params.thinking_budget is not None
|
|
74
|
+
and sampling_params.reasoning_effort is not None
|
|
75
|
+
):
|
|
76
|
+
maybe_warn("WARN_THINKING_BUDGET_AND_REASONING_EFFORT")
|
|
77
|
+
|
|
78
|
+
if sampling_params.thinking_budget is not None:
|
|
79
|
+
budget = sampling_params.thinking_budget
|
|
80
|
+
elif sampling_params.reasoning_effort is not None:
|
|
81
|
+
# translate reasoning effort of low, medium, high to budget tokens
|
|
82
|
+
budget = {
|
|
83
|
+
"none": 0,
|
|
84
|
+
"minimal": 256,
|
|
85
|
+
"low": 1024,
|
|
86
|
+
"medium": 4096,
|
|
87
|
+
"high": 16384,
|
|
88
|
+
}.get(sampling_params.reasoning_effort)
|
|
89
|
+
assert isinstance(budget, int)
|
|
90
|
+
else:
|
|
91
|
+
budget = 0
|
|
92
|
+
|
|
93
|
+
if budget > 0:
|
|
94
|
+
request_json["thinking"] = {
|
|
95
|
+
"type": "enabled",
|
|
96
|
+
"budget_tokens": budget,
|
|
97
|
+
}
|
|
98
|
+
if "top_p" in request_json:
|
|
99
|
+
request_json["top_p"] = max(request_json["top_p"], 0.95)
|
|
100
|
+
request_json["temperature"] = 1.0
|
|
101
|
+
request_json["max_tokens"] += budget
|
|
102
|
+
else:
|
|
103
|
+
request_json["thinking"] = {"type": "disabled"}
|
|
104
|
+
|
|
79
105
|
else:
|
|
80
106
|
request_json["thinking"] = {"type": "disabled"}
|
|
81
107
|
if sampling_params.reasoning_effort:
|
|
@@ -83,11 +109,13 @@ def _build_anthropic_request(
|
|
|
83
109
|
if system_message is not None:
|
|
84
110
|
request_json["system"] = system_message
|
|
85
111
|
|
|
86
|
-
# handle temp + top_p for opus 4.1/sonnet 4.5
|
|
112
|
+
# handle temp + top_p for opus 4.1/sonnet 4.5.
|
|
113
|
+
# TODO: make clearer / more user-friendly so there can be NotGiven
|
|
114
|
+
# and user can control which one they want to use
|
|
87
115
|
if "4-1" in model.name or "4-5" in model.name:
|
|
88
|
-
|
|
89
|
-
request_json.pop("top_p")
|
|
116
|
+
request_json.pop("top_p")
|
|
90
117
|
|
|
118
|
+
# print(request_json)
|
|
91
119
|
# Handle structured outputs (output_format)
|
|
92
120
|
if context.output_schema:
|
|
93
121
|
if model.supports_json:
|
|
@@ -222,6 +222,19 @@ class APIRequestBase(ABC):
|
|
|
222
222
|
usage=None,
|
|
223
223
|
)
|
|
224
224
|
|
|
225
|
+
except aiohttp.ServerDisconnectedError:
|
|
226
|
+
return APIResponse(
|
|
227
|
+
id=self.context.task_id,
|
|
228
|
+
model_internal=self.context.model_name,
|
|
229
|
+
prompt=self.context.prompt,
|
|
230
|
+
sampling_params=self.context.sampling_params,
|
|
231
|
+
status_code=None,
|
|
232
|
+
is_error=True,
|
|
233
|
+
error_message="Server disconnected.",
|
|
234
|
+
content=None,
|
|
235
|
+
usage=None,
|
|
236
|
+
)
|
|
237
|
+
|
|
225
238
|
except Exception as e:
|
|
226
239
|
raise_if_modal_exception(e)
|
|
227
240
|
tb = traceback.format_exc()
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
|
-
from typing import Any
|
|
4
3
|
|
|
5
4
|
from aiohttp import ClientResponse
|
|
6
5
|
|
|
@@ -52,47 +51,61 @@ async def _build_gemini_request(
|
|
|
52
51
|
request_json["systemInstruction"] = {"parts": [{"text": system_message}]}
|
|
53
52
|
|
|
54
53
|
# Handle reasoning models (thinking)
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
54
|
+
is_gemini_3 = "gemini-3" in model.name.lower()
|
|
55
|
+
if is_gemini_3:
|
|
56
|
+
# gemini3 MUST think
|
|
57
|
+
if not sampling_params.reasoning_effort:
|
|
58
|
+
maybe_warn("WARN_GEMINI3_NO_REASONING")
|
|
59
|
+
effort = "low"
|
|
60
|
+
else:
|
|
61
|
+
level_map = {
|
|
62
|
+
"none": "low",
|
|
63
|
+
"minimal": "low",
|
|
64
|
+
"low": "low",
|
|
65
|
+
"medium": "high", # change when supported
|
|
66
|
+
"high": "high",
|
|
67
|
+
}
|
|
68
|
+
effort = level_map[sampling_params.reasoning_effort]
|
|
69
|
+
thinking_config = {"thinkingLevel": effort}
|
|
70
|
+
request_json["generationConfig"]["thinkingConfig"] = thinking_config
|
|
59
71
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
72
|
+
elif model.reasoning_model:
|
|
73
|
+
if (
|
|
74
|
+
sampling_params.thinking_budget is not None
|
|
75
|
+
and sampling_params.reasoning_effort is not None
|
|
76
|
+
):
|
|
77
|
+
maybe_warn("WARN_THINKING_BUDGET_AND_REASONING_EFFORT")
|
|
78
|
+
|
|
79
|
+
if (
|
|
80
|
+
sampling_params.thinking_budget is not None
|
|
81
|
+
and sampling_params.thinking_budget > 0
|
|
82
|
+
):
|
|
83
|
+
thinking_config = {
|
|
84
|
+
"includeThoughts": True,
|
|
85
|
+
"thinkingBudget": sampling_params.thinking_budget,
|
|
86
|
+
}
|
|
87
|
+
elif sampling_params.thinking_budget == -1:
|
|
88
|
+
# dynamic thinking
|
|
89
|
+
thinking_config = {"includeThoughts": True, "thinkingBudget": -1}
|
|
90
|
+
elif sampling_params.reasoning_effort not in [None, "none"]:
|
|
91
|
+
level_map = {
|
|
92
|
+
"minimal": 256,
|
|
93
|
+
"low": 1024,
|
|
94
|
+
"medium": 4096,
|
|
95
|
+
"high": 16384,
|
|
96
|
+
}
|
|
97
|
+
assert sampling_params.reasoning_effort in level_map
|
|
98
|
+
budget = level_map[sampling_params.reasoning_effort]
|
|
99
|
+
if "flash-lite" in model.id:
|
|
100
|
+
budget = max(budget, 512)
|
|
101
|
+
thinking_config = {"includeThoughts": True, "thinkingBudget": budget}
|
|
102
|
+
elif "2.5-pro" in model.id:
|
|
103
|
+
# 2.5 pro must think.
|
|
104
|
+
thinking_config = {"includeThoughts": True, "thinkingBudget": 128}
|
|
77
105
|
else:
|
|
78
|
-
#
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
# Explicitly disable thoughts when no effort is requested
|
|
82
|
-
thinking_config = {"includeThoughts": False, "thinkingBudget": budget}
|
|
83
|
-
else:
|
|
84
|
-
thinking_config = {"includeThoughts": True}
|
|
85
|
-
if (
|
|
86
|
-
effort in {"minimal", "low", "medium", "high"}
|
|
87
|
-
and "flash" in model.id
|
|
88
|
-
):
|
|
89
|
-
budget = {
|
|
90
|
-
"minimal": 256,
|
|
91
|
-
"low": 1024,
|
|
92
|
-
"medium": 4096,
|
|
93
|
-
"high": 16384,
|
|
94
|
-
}[effort]
|
|
95
|
-
thinking_config["thinkingBudget"] = budget
|
|
106
|
+
# no thoughts head empty
|
|
107
|
+
thinking_config = {"includeThoughts": False, "thinkingBudget": 0}
|
|
108
|
+
|
|
96
109
|
request_json["generationConfig"]["thinkingConfig"] = thinking_config
|
|
97
110
|
|
|
98
111
|
else:
|
|
@@ -160,7 +173,7 @@ class GeminiRequest(APIRequestBase):
|
|
|
160
173
|
self.request_json = await _build_gemini_request(
|
|
161
174
|
self.model,
|
|
162
175
|
self.context.prompt,
|
|
163
|
-
self.context.tools,
|
|
176
|
+
self.context.tools, # type: ignore
|
|
164
177
|
self.context.sampling_params,
|
|
165
178
|
)
|
|
166
179
|
|
|
@@ -2,17 +2,18 @@ import json
|
|
|
2
2
|
import os
|
|
3
3
|
import traceback as tb
|
|
4
4
|
from types import SimpleNamespace
|
|
5
|
+
from typing import Sequence
|
|
5
6
|
|
|
6
7
|
import aiohttp
|
|
7
8
|
from aiohttp import ClientResponse
|
|
8
9
|
|
|
9
10
|
from lm_deluge.request_context import RequestContext
|
|
10
11
|
from lm_deluge.tool import MCPServer, Tool
|
|
11
|
-
from lm_deluge.warnings import maybe_warn
|
|
12
12
|
from lm_deluge.util.schema import (
|
|
13
13
|
prepare_output_schema,
|
|
14
14
|
transform_schema_for_openai,
|
|
15
15
|
)
|
|
16
|
+
from lm_deluge.warnings import maybe_warn
|
|
16
17
|
|
|
17
18
|
from ..config import SamplingParams
|
|
18
19
|
from ..models import APIModel
|
|
@@ -610,7 +611,7 @@ async def stream_chat(
|
|
|
610
611
|
model_name: str, # must correspond to registry
|
|
611
612
|
prompt: Conversation,
|
|
612
613
|
sampling_params: SamplingParams = SamplingParams(),
|
|
613
|
-
tools:
|
|
614
|
+
tools: Sequence[Tool | dict | MCPServer] | None = None,
|
|
614
615
|
cache: CachePattern | None = None,
|
|
615
616
|
extra_headers: dict[str, str] | None = None,
|
|
616
617
|
):
|
|
@@ -79,7 +79,7 @@ class _LLMClient(BaseModel):
|
|
|
79
79
|
background: bool = False
|
|
80
80
|
# sampling params - if provided, and sampling_params is not,
|
|
81
81
|
# these override the defaults
|
|
82
|
-
temperature: float = 0
|
|
82
|
+
temperature: float = 1.0
|
|
83
83
|
top_p: float = 1.0
|
|
84
84
|
json_mode: bool = False
|
|
85
85
|
max_new_tokens: int = 512
|
|
@@ -337,7 +337,7 @@ class _LLMClient(BaseModel):
|
|
|
337
337
|
if "sampling_params" not in data or len(data.get("sampling_params", [])) == 0:
|
|
338
338
|
data["sampling_params"] = [
|
|
339
339
|
SamplingParams(
|
|
340
|
-
temperature=data.get("temperature", 0
|
|
340
|
+
temperature=data.get("temperature", 1.0),
|
|
341
341
|
top_p=data.get("top_p", 1.0),
|
|
342
342
|
json_mode=data.get("json_mode", False),
|
|
343
343
|
max_new_tokens=data.get("max_new_tokens", 512),
|
|
@@ -572,7 +572,7 @@ class _LLMClient(BaseModel):
|
|
|
572
572
|
*,
|
|
573
573
|
return_completions_only: Literal[True],
|
|
574
574
|
show_progress: bool = ...,
|
|
575
|
-
tools:
|
|
575
|
+
tools: Sequence[Tool | dict | MCPServer] | None = ...,
|
|
576
576
|
output_schema: type[BaseModel] | dict | None = ...,
|
|
577
577
|
cache: CachePattern | None = ...,
|
|
578
578
|
service_tier: Literal["auto", "default", "flex", "priority"] | None = ...,
|
|
@@ -585,7 +585,7 @@ class _LLMClient(BaseModel):
|
|
|
585
585
|
*,
|
|
586
586
|
return_completions_only: Literal[False] = ...,
|
|
587
587
|
show_progress: bool = ...,
|
|
588
|
-
tools:
|
|
588
|
+
tools: Sequence[Tool | dict | MCPServer] | None = ...,
|
|
589
589
|
output_schema: type[BaseModel] | dict | None = ...,
|
|
590
590
|
cache: CachePattern | None = ...,
|
|
591
591
|
service_tier: Literal["auto", "default", "flex", "priority"] | None = ...,
|
|
@@ -597,7 +597,7 @@ class _LLMClient(BaseModel):
|
|
|
597
597
|
*,
|
|
598
598
|
return_completions_only: bool = False,
|
|
599
599
|
show_progress: bool = True,
|
|
600
|
-
tools:
|
|
600
|
+
tools: Sequence[Tool | dict | MCPServer] | None = None,
|
|
601
601
|
output_schema: type[BaseModel] | dict | None = None,
|
|
602
602
|
cache: CachePattern | None = None,
|
|
603
603
|
service_tier: Literal["auto", "default", "flex", "priority"] | None = None,
|
|
@@ -672,7 +672,7 @@ class _LLMClient(BaseModel):
|
|
|
672
672
|
*,
|
|
673
673
|
return_completions_only: bool = False,
|
|
674
674
|
show_progress=True,
|
|
675
|
-
tools:
|
|
675
|
+
tools: Sequence[Tool | dict | MCPServer] | None = None,
|
|
676
676
|
output_schema: type[BaseModel] | dict | None = None,
|
|
677
677
|
cache: CachePattern | None = None,
|
|
678
678
|
):
|
|
@@ -705,7 +705,7 @@ class _LLMClient(BaseModel):
|
|
|
705
705
|
self,
|
|
706
706
|
prompt: Prompt,
|
|
707
707
|
*,
|
|
708
|
-
tools:
|
|
708
|
+
tools: Sequence[Tool | dict | MCPServer] | None = None,
|
|
709
709
|
output_schema: type[BaseModel] | dict | None = None,
|
|
710
710
|
cache: CachePattern | None = None,
|
|
711
711
|
service_tier: Literal["auto", "default", "flex", "priority"] | None = None,
|
|
@@ -742,7 +742,7 @@ class _LLMClient(BaseModel):
|
|
|
742
742
|
self,
|
|
743
743
|
prompt: Prompt,
|
|
744
744
|
*,
|
|
745
|
-
tools:
|
|
745
|
+
tools: Sequence[Tool | dict | MCPServer] | None = None,
|
|
746
746
|
output_schema: type[BaseModel] | dict | None = None,
|
|
747
747
|
cache: CachePattern | None = None,
|
|
748
748
|
service_tier: Literal["auto", "default", "flex", "priority"] | None = None,
|
|
@@ -835,7 +835,7 @@ class _LLMClient(BaseModel):
|
|
|
835
835
|
async def stream(
|
|
836
836
|
self,
|
|
837
837
|
prompt: Prompt,
|
|
838
|
-
tools:
|
|
838
|
+
tools: Sequence[Tool | dict | MCPServer] | None = None,
|
|
839
839
|
):
|
|
840
840
|
model, sampling_params = self._select_model()
|
|
841
841
|
prompt = prompts_to_conversations([prompt])[0]
|
|
@@ -856,7 +856,7 @@ class _LLMClient(BaseModel):
|
|
|
856
856
|
task_id: int,
|
|
857
857
|
conversation: Conversation,
|
|
858
858
|
*,
|
|
859
|
-
tools:
|
|
859
|
+
tools: Sequence[Tool | dict | MCPServer] | None = None,
|
|
860
860
|
max_rounds: int = 5,
|
|
861
861
|
) -> AgentLoopResponse:
|
|
862
862
|
"""Internal method to run agent loop and return wrapped result."""
|
|
@@ -920,7 +920,7 @@ class _LLMClient(BaseModel):
|
|
|
920
920
|
self,
|
|
921
921
|
conversation: Prompt,
|
|
922
922
|
*,
|
|
923
|
-
tools:
|
|
923
|
+
tools: Sequence[Tool | dict | MCPServer] | None = None,
|
|
924
924
|
max_rounds: int = 5,
|
|
925
925
|
) -> int:
|
|
926
926
|
"""Start an agent loop without waiting for it to complete.
|
|
@@ -967,7 +967,7 @@ class _LLMClient(BaseModel):
|
|
|
967
967
|
self,
|
|
968
968
|
conversation: Prompt,
|
|
969
969
|
*,
|
|
970
|
-
tools:
|
|
970
|
+
tools: Sequence[Tool | dict | MCPServer] | None = None,
|
|
971
971
|
max_rounds: int = 5,
|
|
972
972
|
show_progress: bool = False,
|
|
973
973
|
) -> tuple[Conversation, APIResponse]:
|
|
@@ -986,7 +986,7 @@ class _LLMClient(BaseModel):
|
|
|
986
986
|
self,
|
|
987
987
|
conversation: Prompt,
|
|
988
988
|
*,
|
|
989
|
-
tools:
|
|
989
|
+
tools: Sequence[Tool | dict | MCPServer] | None = None,
|
|
990
990
|
max_rounds: int = 5,
|
|
991
991
|
show_progress: bool = False,
|
|
992
992
|
) -> tuple[Conversation, APIResponse]:
|
|
@@ -1067,7 +1067,7 @@ def LLMClient(
|
|
|
1067
1067
|
extra_headers: dict[str, str] | None = None,
|
|
1068
1068
|
use_responses_api: bool = False,
|
|
1069
1069
|
background: bool = False,
|
|
1070
|
-
temperature: float = 0
|
|
1070
|
+
temperature: float = 1.0,
|
|
1071
1071
|
top_p: float = 1.0,
|
|
1072
1072
|
json_mode: bool = False,
|
|
1073
1073
|
max_new_tokens: int = 512,
|
|
@@ -1096,7 +1096,7 @@ def LLMClient(
|
|
|
1096
1096
|
extra_headers: dict[str, str] | None = None,
|
|
1097
1097
|
use_responses_api: bool = False,
|
|
1098
1098
|
background: bool = False,
|
|
1099
|
-
temperature: float = 0
|
|
1099
|
+
temperature: float = 1.0,
|
|
1100
1100
|
top_p: float = 1.0,
|
|
1101
1101
|
json_mode: bool = False,
|
|
1102
1102
|
max_new_tokens: int = 512,
|
|
@@ -1124,7 +1124,7 @@ def LLMClient(
|
|
|
1124
1124
|
extra_headers: dict[str, str] | None = None,
|
|
1125
1125
|
use_responses_api: bool = False,
|
|
1126
1126
|
background: bool = False,
|
|
1127
|
-
temperature: float = 0
|
|
1127
|
+
temperature: float = 1.0,
|
|
1128
1128
|
top_p: float = 1.0,
|
|
1129
1129
|
json_mode: bool = False,
|
|
1130
1130
|
max_new_tokens: int = 512,
|
|
@@ -4,11 +4,13 @@ from pydantic import BaseModel
|
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
class SamplingParams(BaseModel):
|
|
7
|
-
temperature: float =
|
|
7
|
+
temperature: float = 1.0 # more typical for new models
|
|
8
8
|
top_p: float = 1.0
|
|
9
9
|
json_mode: bool = False
|
|
10
10
|
max_new_tokens: int = 2_048
|
|
11
|
+
global_effort: Literal["low", "medium", "high"] = "high" # for opus-4.5
|
|
11
12
|
reasoning_effort: Literal["low", "medium", "high", "minimal", "none", None] = None
|
|
13
|
+
thinking_budget: int | None = None
|
|
12
14
|
logprobs: bool = False
|
|
13
15
|
top_logprobs: int | None = None
|
|
14
16
|
strict_tools: bool = True
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# Backward compatibility - re-export from new locations
|
|
2
|
+
# Pipelines (workflow functions)
|
|
3
|
+
from ..pipelines import extract, extract_async, score_llm, translate, translate_async
|
|
4
|
+
|
|
5
|
+
# Prefab tools (Tool managers)
|
|
6
|
+
from ..tool.prefab import (
|
|
7
|
+
SubAgentManager,
|
|
8
|
+
TodoItem,
|
|
9
|
+
TodoManager,
|
|
10
|
+
TodoPriority,
|
|
11
|
+
TodoStatus,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
__all__ = [
|
|
15
|
+
"extract",
|
|
16
|
+
"extract_async",
|
|
17
|
+
"TodoItem",
|
|
18
|
+
"TodoManager",
|
|
19
|
+
"TodoPriority",
|
|
20
|
+
"TodoStatus",
|
|
21
|
+
"translate",
|
|
22
|
+
"translate_async",
|
|
23
|
+
"score_llm",
|
|
24
|
+
"SubAgentManager",
|
|
25
|
+
]
|
|
@@ -10,6 +10,19 @@ ANTHROPIC_MODELS = {
|
|
|
10
10
|
# ░███
|
|
11
11
|
# █████
|
|
12
12
|
#
|
|
13
|
+
"claude-4.5-opus": {
|
|
14
|
+
"id": "claude-4.5-opus",
|
|
15
|
+
"name": "claude-opus-4-5-20251101",
|
|
16
|
+
"api_base": "https://api.anthropic.com/v1",
|
|
17
|
+
"api_key_env_var": "ANTHROPIC_API_KEY",
|
|
18
|
+
"supports_json": False,
|
|
19
|
+
"api_spec": "anthropic",
|
|
20
|
+
"input_cost": 5.0,
|
|
21
|
+
"cached_input_cost": 0.50,
|
|
22
|
+
"cache_write_cost": 6.25,
|
|
23
|
+
"output_cost": 25.0,
|
|
24
|
+
"reasoning_model": True,
|
|
25
|
+
},
|
|
13
26
|
"claude-4.5-haiku": {
|
|
14
27
|
"id": "claude-4.5-haiku",
|
|
15
28
|
"name": "claude-haiku-4-5-20251001",
|
|
@@ -21,6 +34,7 @@ ANTHROPIC_MODELS = {
|
|
|
21
34
|
"cached_input_cost": 0.10,
|
|
22
35
|
"cache_write_cost": 1.25,
|
|
23
36
|
"output_cost": 3.0,
|
|
37
|
+
"reasoning_model": True,
|
|
24
38
|
},
|
|
25
39
|
"claude-4.5-sonnet": {
|
|
26
40
|
"id": "claude-4.5-sonnet",
|
|
@@ -33,6 +47,7 @@ ANTHROPIC_MODELS = {
|
|
|
33
47
|
"cached_input_cost": 0.30,
|
|
34
48
|
"cache_write_cost": 3.75,
|
|
35
49
|
"output_cost": 15.0,
|
|
50
|
+
"reasoning_model": True,
|
|
36
51
|
},
|
|
37
52
|
"claude-4.1-opus": {
|
|
38
53
|
"id": "claude-4.1-opus",
|
{lm_deluge-0.0.79/src/lm_deluge/llm_tools → lm_deluge-0.0.81/src/lm_deluge/pipelines}/__init__.py
RENAMED
|
@@ -1,18 +1,11 @@
|
|
|
1
1
|
from .extract import extract, extract_async
|
|
2
2
|
from .score import score_llm
|
|
3
|
-
from .subagents import SubAgentManager
|
|
4
|
-
from .todos import TodoItem, TodoManager, TodoPriority, TodoStatus
|
|
5
3
|
from .translate import translate, translate_async
|
|
6
4
|
|
|
7
5
|
__all__ = [
|
|
8
6
|
"extract",
|
|
9
7
|
"extract_async",
|
|
10
|
-
"TodoItem",
|
|
11
|
-
"TodoManager",
|
|
12
|
-
"TodoPriority",
|
|
13
|
-
"TodoStatus",
|
|
14
8
|
"translate",
|
|
15
9
|
"translate_async",
|
|
16
10
|
"score_llm",
|
|
17
|
-
"SubAgentManager",
|
|
18
11
|
]
|
{lm_deluge-0.0.79/src/lm_deluge/llm_tools → lm_deluge-0.0.81/src/lm_deluge/pipelines}/score.py
RENAMED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from ..client import
|
|
1
|
+
from ..client import _LLMClient, APIResponse
|
|
2
2
|
from ..util.logprobs import extract_prob
|
|
3
3
|
|
|
4
4
|
# def extract_prob_yes(logprobs: list[dict]):
|
|
@@ -24,7 +24,7 @@ from ..util.logprobs import extract_prob
|
|
|
24
24
|
def score_llm(
|
|
25
25
|
scoring_prompt_template: str,
|
|
26
26
|
inputs: list[tuple | list | dict], # to format the template
|
|
27
|
-
scoring_model:
|
|
27
|
+
scoring_model: _LLMClient,
|
|
28
28
|
return_probabilities: bool,
|
|
29
29
|
yes_token: str = "yes",
|
|
30
30
|
) -> list[bool | None] | list[float | None]:
|
{lm_deluge-0.0.79/src/lm_deluge/llm_tools → lm_deluge-0.0.81/src/lm_deluge/pipelines}/translate.py
RENAMED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
-
from ..client import
|
|
2
|
+
from ..client import _LLMClient
|
|
3
3
|
|
|
4
4
|
translation_prompt = (
|
|
5
5
|
"Translate the following text (enclosed in ```) into English. "
|
|
@@ -20,7 +20,9 @@ def is_english(text: str, low_memory: bool = True):
|
|
|
20
20
|
return True
|
|
21
21
|
|
|
22
22
|
|
|
23
|
-
async def translate_async(
|
|
23
|
+
async def translate_async(
|
|
24
|
+
texts: list[str], client: _LLMClient, low_memory: bool = True
|
|
25
|
+
):
|
|
24
26
|
to_translate_idxs = [
|
|
25
27
|
i for i, text in enumerate(texts) if not is_english(text, low_memory=low_memory)
|
|
26
28
|
]
|
|
@@ -40,5 +42,5 @@ async def translate_async(texts: list[str], client: LLMClient, low_memory: bool
|
|
|
40
42
|
return texts
|
|
41
43
|
|
|
42
44
|
|
|
43
|
-
def translate(texts: list[str], client:
|
|
45
|
+
def translate(texts: list[str], client: _LLMClient, low_memory: bool = True):
|
|
44
46
|
return asyncio.run(translate_async(texts, client, low_memory))
|