lm-deluge 0.0.82__tar.gz → 0.0.84__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lm_deluge-0.0.82/src/lm_deluge.egg-info → lm_deluge-0.0.84}/PKG-INFO +1 -1
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/pyproject.toml +1 -1
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/api_requests/anthropic.py +9 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/api_requests/gemini.py +34 -2
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/api_requests/openai.py +1 -1
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/client.py +100 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/models/__init__.py +3 -1
- lm_deluge-0.0.84/src/lm_deluge/models/arcee.py +16 -0
- lm_deluge-0.0.84/src/lm_deluge/models/deepseek.py +59 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/models/google.py +14 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/models/kimi.py +2 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/models/openrouter.py +10 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/models/together.py +11 -0
- lm_deluge-0.0.84/src/lm_deluge/models/zai.py +1 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/prompt.py +39 -11
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/tool/__init__.py +11 -4
- lm_deluge-0.0.84/src/lm_deluge/tool/builtin/anthropic/__init__.py +300 -0
- lm_deluge-0.0.84/src/lm_deluge/tool/builtin/gemini.py +59 -0
- lm_deluge-0.0.84/src/lm_deluge/tool/cua/__init__.py +173 -0
- lm_deluge-0.0.84/src/lm_deluge/tool/cua/actions.py +148 -0
- lm_deluge-0.0.84/src/lm_deluge/tool/cua/base.py +27 -0
- lm_deluge-0.0.84/src/lm_deluge/tool/cua/batch.py +215 -0
- lm_deluge-0.0.84/src/lm_deluge/tool/cua/converters.py +466 -0
- lm_deluge-0.0.84/src/lm_deluge/tool/cua/kernel.py +702 -0
- lm_deluge-0.0.84/src/lm_deluge/tool/cua/trycua.py +989 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/tool/prefab/__init__.py +8 -0
- lm_deluge-0.0.84/src/lm_deluge/tool/prefab/docs.py +1119 -0
- lm_deluge-0.0.84/src/lm_deluge/tool/prefab/email.py +294 -0
- lm_deluge-0.0.84/src/lm_deluge/tool/prefab/filesystem.py +1711 -0
- lm_deluge-0.0.84/src/lm_deluge/tool/prefab/memory.py +458 -0
- lm_deluge-0.0.84/src/lm_deluge/tool/prefab/random.py +212 -0
- lm_deluge-0.0.84/src/lm_deluge/tool/prefab/sheets.py +385 -0
- lm_deluge-0.0.84/src/lm_deluge/tool/prefab/web_search.py +195 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/warnings.py +1 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84/src/lm_deluge.egg-info}/PKG-INFO +1 -1
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge.egg-info/SOURCES.txt +22 -18
- lm_deluge-0.0.82/src/lm_deluge/built_in_tools/anthropic/__init__.py +0 -128
- lm_deluge-0.0.82/src/lm_deluge/llm_tools/__init__.py +0 -25
- lm_deluge-0.0.82/src/lm_deluge/models/deepseek.py +0 -27
- lm_deluge-0.0.82/src/lm_deluge/tool/prefab/filesystem.py +0 -821
- lm_deluge-0.0.82/src/lm_deluge/tool/prefab/memory.py +0 -190
- lm_deluge-0.0.82/tests/test_batch_tool.py +0 -98
- lm_deluge-0.0.82/tests/test_builtin_tools.py +0 -58
- lm_deluge-0.0.82/tests/test_file_upload.py +0 -627
- lm_deluge-0.0.82/tests/test_filesystem.py +0 -121
- lm_deluge-0.0.82/tests/test_filesystem_live.py +0 -82
- lm_deluge-0.0.82/tests/test_mock_openai.py +0 -582
- lm_deluge-0.0.82/tests/test_native_mcp_server.py +0 -66
- lm_deluge-0.0.82/tests/test_openrouter_generic.py +0 -238
- lm_deluge-0.0.82/tests/test_otc.py +0 -117
- lm_deluge-0.0.82/tests/test_tool_search.py +0 -86
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/LICENSE +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/README.md +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/setup.cfg +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/__init__.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/api_requests/__init__.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/api_requests/base.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/api_requests/bedrock.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/api_requests/chat_reasoning.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/api_requests/common.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/api_requests/deprecated/bedrock.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/api_requests/deprecated/cohere.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/api_requests/deprecated/deepseek.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/api_requests/deprecated/mistral.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/api_requests/deprecated/vertex.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/api_requests/mistral.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/api_requests/response.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/batches.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/cache.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/cli.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/config.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/embed.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/errors.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/file.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/image.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/mock_openai.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/models/anthropic.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/models/bedrock.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/models/cerebras.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/models/cohere.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/models/fireworks.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/models/grok.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/models/groq.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/models/meta.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/models/minimax.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/models/mistral.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/models/openai.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/pipelines/__init__.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/pipelines/classify.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/pipelines/extract.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/pipelines/locate.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/pipelines/ocr.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/pipelines/score.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/pipelines/translate.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/request_context.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/rerank.py +0 -0
- {lm_deluge-0.0.82/src/lm_deluge/built_in_tools → lm_deluge-0.0.84/src/lm_deluge/tool/builtin}/anthropic/bash.py +0 -0
- {lm_deluge-0.0.82/src/lm_deluge/built_in_tools → lm_deluge-0.0.84/src/lm_deluge/tool/builtin}/anthropic/computer_use.py +0 -0
- {lm_deluge-0.0.82/src/lm_deluge/built_in_tools → lm_deluge-0.0.84/src/lm_deluge/tool/builtin}/anthropic/editor.py +0 -0
- {lm_deluge-0.0.82/src/lm_deluge/built_in_tools → lm_deluge-0.0.84/src/lm_deluge/tool/builtin}/base.py +0 -0
- {lm_deluge-0.0.82/src/lm_deluge/built_in_tools → lm_deluge-0.0.84/src/lm_deluge/tool/builtin}/openai.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/tool/prefab/batch_tool.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/tool/prefab/otc/__init__.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/tool/prefab/otc/executor.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/tool/prefab/otc/parse.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/tool/prefab/sandbox.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/tool/prefab/subagents.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/tool/prefab/todos.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/tool/prefab/tool_search.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/tracker.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/usage.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/util/harmony.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/util/json.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/util/logprobs.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/util/schema.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/util/spatial.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/util/validation.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge/util/xml.py +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge.egg-info/dependency_links.txt +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge.egg-info/requires.txt +0 -0
- {lm_deluge-0.0.82 → lm_deluge-0.0.84}/src/lm_deluge.egg-info/top_level.txt +0 -0
|
@@ -101,11 +101,14 @@ def _build_anthropic_request(
|
|
|
101
101
|
request_json["max_tokens"] += budget
|
|
102
102
|
else:
|
|
103
103
|
request_json["thinking"] = {"type": "disabled"}
|
|
104
|
+
if "kimi" in model.id and "thinking" in model.id:
|
|
105
|
+
maybe_warn("WARN_KIMI_THINKING_NO_REASONING")
|
|
104
106
|
|
|
105
107
|
else:
|
|
106
108
|
request_json["thinking"] = {"type": "disabled"}
|
|
107
109
|
if sampling_params.reasoning_effort:
|
|
108
110
|
print("ignoring reasoning_effort for non-reasoning model")
|
|
111
|
+
|
|
109
112
|
if system_message is not None:
|
|
110
113
|
request_json["system"] = system_message
|
|
111
114
|
|
|
@@ -164,6 +167,9 @@ def _build_anthropic_request(
|
|
|
164
167
|
"bash_20241022",
|
|
165
168
|
]:
|
|
166
169
|
_add_beta(base_headers, "computer-use-2024-10-22")
|
|
170
|
+
elif tool["type"] == "computer_20251124":
|
|
171
|
+
# Claude Opus 4.5 - newest computer use with zoom support
|
|
172
|
+
_add_beta(base_headers, "computer-use-2025-11-24")
|
|
167
173
|
elif tool["type"] == "computer_20250124":
|
|
168
174
|
_add_beta(base_headers, "computer-use-2025-01-24")
|
|
169
175
|
elif tool["type"] == "code_execution_20250522":
|
|
@@ -231,6 +237,9 @@ class AnthropicRequest(APIRequestBase):
|
|
|
231
237
|
data = await http_response.json()
|
|
232
238
|
response_content = data["content"]
|
|
233
239
|
|
|
240
|
+
# print("=== CONTENT ===")
|
|
241
|
+
# print(response_content)
|
|
242
|
+
|
|
234
243
|
# Parse response into Message with parts
|
|
235
244
|
parts = []
|
|
236
245
|
for item in response_content:
|
|
@@ -114,8 +114,40 @@ async def _build_gemini_request(
|
|
|
114
114
|
|
|
115
115
|
# Add tools if provided
|
|
116
116
|
if tools:
|
|
117
|
-
|
|
118
|
-
|
|
117
|
+
request_tools = []
|
|
118
|
+
function_declarations = []
|
|
119
|
+
|
|
120
|
+
for tool in tools:
|
|
121
|
+
if isinstance(tool, dict) and tool.get("type") == "gemini_computer_use":
|
|
122
|
+
# Gemini computer use tool - add as separate tool entry
|
|
123
|
+
env_map = {
|
|
124
|
+
"browser": "ENVIRONMENT_BROWSER",
|
|
125
|
+
"android": "ENVIRONMENT_ANDROID",
|
|
126
|
+
}
|
|
127
|
+
env = env_map.get(
|
|
128
|
+
tool.get("environment", "browser"), "ENVIRONMENT_BROWSER"
|
|
129
|
+
)
|
|
130
|
+
cu_tool: dict = {
|
|
131
|
+
"computerUse": {
|
|
132
|
+
"environment": env,
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
excluded = tool.get("excluded_predefined_functions")
|
|
136
|
+
if excluded:
|
|
137
|
+
cu_tool["computerUse"]["excludedPredefinedFunctions"] = excluded
|
|
138
|
+
request_tools.append(cu_tool)
|
|
139
|
+
elif hasattr(tool, "dump_for"):
|
|
140
|
+
# Regular Tool object
|
|
141
|
+
function_declarations.append(tool.dump_for("google"))
|
|
142
|
+
elif isinstance(tool, dict):
|
|
143
|
+
# Raw dict tool - assume it's a function declaration
|
|
144
|
+
function_declarations.append(tool)
|
|
145
|
+
|
|
146
|
+
if function_declarations:
|
|
147
|
+
request_tools.append({"functionDeclarations": function_declarations})
|
|
148
|
+
|
|
149
|
+
if request_tools:
|
|
150
|
+
request_json["tools"] = request_tools
|
|
119
151
|
|
|
120
152
|
# Handle JSON mode
|
|
121
153
|
if sampling_params.json_mode and model.supports_json:
|
|
@@ -367,7 +367,7 @@ async def _build_oa_responses_request(
|
|
|
367
367
|
elif isinstance(tool, dict):
|
|
368
368
|
# if computer use, make sure model supports it
|
|
369
369
|
if tool["type"] == "computer_use_preview":
|
|
370
|
-
if model.name != "
|
|
370
|
+
if model.name != "computer-use-preview":
|
|
371
371
|
raise ValueError(f"model {model.id} does not support computer use")
|
|
372
372
|
# have to use truncation
|
|
373
373
|
request_json["truncation"] = "auto"
|
|
@@ -84,6 +84,8 @@ class _LLMClient(BaseModel):
|
|
|
84
84
|
json_mode: bool = False
|
|
85
85
|
max_new_tokens: int = 512
|
|
86
86
|
reasoning_effort: Literal["low", "medium", "high", "minimal", "none", None] = None
|
|
87
|
+
global_effort: Literal["low", "medium", "high"] | None = None
|
|
88
|
+
thinking_budget: int | None = None
|
|
87
89
|
logprobs: bool = False
|
|
88
90
|
top_logprobs: int | None = None
|
|
89
91
|
force_local_mcp: bool = False
|
|
@@ -212,6 +214,8 @@ class _LLMClient(BaseModel):
|
|
|
212
214
|
json_mode=self.json_mode,
|
|
213
215
|
max_new_tokens=self.max_new_tokens,
|
|
214
216
|
reasoning_effort=self.reasoning_effort,
|
|
217
|
+
global_effort=self.global_effort or "high",
|
|
218
|
+
thinking_budget=self.thinking_budget,
|
|
215
219
|
logprobs=self.logprobs,
|
|
216
220
|
top_logprobs=self.top_logprobs,
|
|
217
221
|
)
|
|
@@ -347,6 +351,8 @@ class _LLMClient(BaseModel):
|
|
|
347
351
|
json_mode=data.get("json_mode", False),
|
|
348
352
|
max_new_tokens=data.get("max_new_tokens", 512),
|
|
349
353
|
reasoning_effort=data.get("reasoning_effort", None),
|
|
354
|
+
global_effort=data.get("global_effort") or "high",
|
|
355
|
+
thinking_budget=data.get("thinking_budget", None),
|
|
350
356
|
logprobs=data.get("logprobs", False),
|
|
351
357
|
top_logprobs=data.get("top_logprobs", None),
|
|
352
358
|
)
|
|
@@ -1006,6 +1012,92 @@ class _LLMClient(BaseModel):
|
|
|
1006
1012
|
)
|
|
1007
1013
|
)
|
|
1008
1014
|
|
|
1015
|
+
async def process_agent_loops_async(
|
|
1016
|
+
self,
|
|
1017
|
+
prompts: Sequence[Prompt],
|
|
1018
|
+
*,
|
|
1019
|
+
tools: Sequence[Tool | dict | MCPServer] | None = None,
|
|
1020
|
+
max_rounds: int = 5,
|
|
1021
|
+
max_concurrent_agents: int = 10,
|
|
1022
|
+
show_progress: bool = True,
|
|
1023
|
+
) -> list[tuple[Conversation, APIResponse]]:
|
|
1024
|
+
"""Process multiple agent loops concurrently.
|
|
1025
|
+
|
|
1026
|
+
Each prompt becomes an independent agent loop that can make multiple LLM
|
|
1027
|
+
calls and execute tools until completion. The agent loops run concurrently,
|
|
1028
|
+
limited by ``max_concurrent_agents``, while the underlying LLM requests
|
|
1029
|
+
are still governed by ``max_concurrent_requests``.
|
|
1030
|
+
|
|
1031
|
+
Args:
|
|
1032
|
+
prompts: Sequence of prompts, each becoming a separate agent loop.
|
|
1033
|
+
tools: Tools available to all agent loops.
|
|
1034
|
+
max_rounds: Maximum rounds per agent loop (default 5).
|
|
1035
|
+
max_concurrent_agents: Maximum number of agent loops running
|
|
1036
|
+
concurrently (default 10). This is separate from the LLM request
|
|
1037
|
+
concurrency limit.
|
|
1038
|
+
show_progress: Whether to show progress bar for LLM requests.
|
|
1039
|
+
|
|
1040
|
+
Returns:
|
|
1041
|
+
List of (Conversation, APIResponse) tuples in the same order as
|
|
1042
|
+
the input prompts.
|
|
1043
|
+
"""
|
|
1044
|
+
# Convert prompts to Conversations
|
|
1045
|
+
conversations = prompts_to_conversations(list(prompts))
|
|
1046
|
+
|
|
1047
|
+
# Ensure tracker exists for underlying LLM requests
|
|
1048
|
+
if self._tracker is None:
|
|
1049
|
+
self.open(total=0, show_progress=show_progress)
|
|
1050
|
+
tracker_preopened = False
|
|
1051
|
+
else:
|
|
1052
|
+
tracker_preopened = True
|
|
1053
|
+
|
|
1054
|
+
# Semaphore to limit concurrent agent loops
|
|
1055
|
+
agent_semaphore = asyncio.Semaphore(max_concurrent_agents)
|
|
1056
|
+
|
|
1057
|
+
async def run_single_loop(
|
|
1058
|
+
idx: int, conv: Conversation
|
|
1059
|
+
) -> tuple[int, Conversation, APIResponse]:
|
|
1060
|
+
"""Run a single agent loop with semaphore protection."""
|
|
1061
|
+
async with agent_semaphore:
|
|
1062
|
+
task_id = self._next_task_id
|
|
1063
|
+
self._next_task_id += 1
|
|
1064
|
+
result = await self._run_agent_loop_internal(
|
|
1065
|
+
task_id, conv, tools=tools, max_rounds=max_rounds
|
|
1066
|
+
)
|
|
1067
|
+
return idx, result.conversation, result.final_response
|
|
1068
|
+
|
|
1069
|
+
# Launch all agent loops concurrently (semaphore limits actual concurrency)
|
|
1070
|
+
tasks = [run_single_loop(idx, conv) for idx, conv in enumerate(conversations)]
|
|
1071
|
+
completed = await asyncio.gather(*tasks)
|
|
1072
|
+
|
|
1073
|
+
# Close tracker if we opened it
|
|
1074
|
+
if not tracker_preopened:
|
|
1075
|
+
self.close()
|
|
1076
|
+
|
|
1077
|
+
# Sort by original index and extract results
|
|
1078
|
+
completed_sorted = sorted(completed, key=lambda x: x[0])
|
|
1079
|
+
return [(conv, resp) for _, conv, resp in completed_sorted]
|
|
1080
|
+
|
|
1081
|
+
def process_agent_loops_sync(
|
|
1082
|
+
self,
|
|
1083
|
+
prompts: Sequence[Prompt],
|
|
1084
|
+
*,
|
|
1085
|
+
tools: Sequence[Tool | dict | MCPServer] | None = None,
|
|
1086
|
+
max_rounds: int = 5,
|
|
1087
|
+
max_concurrent_agents: int = 10,
|
|
1088
|
+
show_progress: bool = True,
|
|
1089
|
+
) -> list[tuple[Conversation, APIResponse]]:
|
|
1090
|
+
"""Synchronous wrapper for :meth:`process_agent_loops_async`."""
|
|
1091
|
+
return asyncio.run(
|
|
1092
|
+
self.process_agent_loops_async(
|
|
1093
|
+
prompts,
|
|
1094
|
+
tools=tools,
|
|
1095
|
+
max_rounds=max_rounds,
|
|
1096
|
+
max_concurrent_agents=max_concurrent_agents,
|
|
1097
|
+
show_progress=show_progress,
|
|
1098
|
+
)
|
|
1099
|
+
)
|
|
1100
|
+
|
|
1009
1101
|
async def submit_batch_job(
|
|
1010
1102
|
self,
|
|
1011
1103
|
prompts: Prompt | Sequence[Prompt],
|
|
@@ -1077,6 +1169,8 @@ def LLMClient(
|
|
|
1077
1169
|
json_mode: bool = False,
|
|
1078
1170
|
max_new_tokens: int = 512,
|
|
1079
1171
|
reasoning_effort: Literal["low", "medium", "high", "minimal", "none", None] = None,
|
|
1172
|
+
global_effort: Literal["low", "medium", "high"] | None = None,
|
|
1173
|
+
thinking_budget: int | None = None,
|
|
1080
1174
|
logprobs: bool = False,
|
|
1081
1175
|
top_logprobs: int | None = None,
|
|
1082
1176
|
force_local_mcp: bool = False,
|
|
@@ -1106,6 +1200,8 @@ def LLMClient(
|
|
|
1106
1200
|
json_mode: bool = False,
|
|
1107
1201
|
max_new_tokens: int = 512,
|
|
1108
1202
|
reasoning_effort: Literal["low", "medium", "high", "minimal", "none", None] = None,
|
|
1203
|
+
global_effort: Literal["low", "medium", "high"] | None = None,
|
|
1204
|
+
thinking_budget: int | None = None,
|
|
1109
1205
|
logprobs: bool = False,
|
|
1110
1206
|
top_logprobs: int | None = None,
|
|
1111
1207
|
force_local_mcp: bool = False,
|
|
@@ -1134,6 +1230,8 @@ def LLMClient(
|
|
|
1134
1230
|
json_mode: bool = False,
|
|
1135
1231
|
max_new_tokens: int = 512,
|
|
1136
1232
|
reasoning_effort: Literal["low", "medium", "high", "minimal", "none", None] = None,
|
|
1233
|
+
global_effort: Literal["low", "medium", "high"] | None = None,
|
|
1234
|
+
thinking_budget: int | None = None,
|
|
1137
1235
|
logprobs: bool = False,
|
|
1138
1236
|
top_logprobs: int | None = None,
|
|
1139
1237
|
force_local_mcp: bool = False,
|
|
@@ -1174,6 +1272,8 @@ def LLMClient(
|
|
|
1174
1272
|
json_mode=json_mode,
|
|
1175
1273
|
max_new_tokens=max_new_tokens,
|
|
1176
1274
|
reasoning_effort=reasoning_effort,
|
|
1275
|
+
global_effort=global_effort,
|
|
1276
|
+
thinking_budget=thinking_budget,
|
|
1177
1277
|
logprobs=logprobs,
|
|
1178
1278
|
top_logprobs=top_logprobs,
|
|
1179
1279
|
force_local_mcp=force_local_mcp,
|
|
@@ -4,9 +4,10 @@ import random
|
|
|
4
4
|
from dataclasses import dataclass, field
|
|
5
5
|
|
|
6
6
|
from ..request_context import RequestContext
|
|
7
|
+
from .anthropic import ANTHROPIC_MODELS
|
|
7
8
|
|
|
8
9
|
# Import and register all provider models
|
|
9
|
-
from .
|
|
10
|
+
from .arcee import ARCEE_MODELS
|
|
10
11
|
from .bedrock import BEDROCK_MODELS
|
|
11
12
|
from .cerebras import CEREBRAS_MODELS
|
|
12
13
|
from .cohere import COHERE_MODELS
|
|
@@ -128,6 +129,7 @@ def register_model(
|
|
|
128
129
|
# Register all models from all providers
|
|
129
130
|
for model_dict in [
|
|
130
131
|
ANTHROPIC_MODELS,
|
|
132
|
+
ARCEE_MODELS,
|
|
131
133
|
BEDROCK_MODELS,
|
|
132
134
|
COHERE_MODELS,
|
|
133
135
|
DEEPSEEK_MODELS,
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
ARCEE_MODELS = {
|
|
2
|
+
"trinity-mini": {
|
|
3
|
+
"id": "trinity-mini",
|
|
4
|
+
"name": "trinity-mini",
|
|
5
|
+
"api_base": "https://api.arcee.ai/api/v1",
|
|
6
|
+
"api_key_env_var": "ARCEE_API_KEY",
|
|
7
|
+
"supports_json": True,
|
|
8
|
+
"supports_logprobs": False,
|
|
9
|
+
"supports_responses": False,
|
|
10
|
+
"api_spec": "openai",
|
|
11
|
+
"input_cost": 0.045,
|
|
12
|
+
"cached_input_cost": 0.045,
|
|
13
|
+
"output_cost": 0.15,
|
|
14
|
+
"reasoning_model": True,
|
|
15
|
+
}
|
|
16
|
+
}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
DEEPSEEK_MODELS = {
|
|
2
|
+
# ______ _
|
|
3
|
+
# (______) | |
|
|
4
|
+
# _ _ _____ _____ ____ ___ _____ _____| | _
|
|
5
|
+
# | | | | ___ | ___ | _ \ /___) ___ | ___ | |_/ )
|
|
6
|
+
# | |__/ /| ____| ____| |_| |___ | ____| ____| _ (
|
|
7
|
+
# |_____/ |_____)_____) __/(___/|_____)_____)_| \_)
|
|
8
|
+
# |_|
|
|
9
|
+
"deepseek-chat": {
|
|
10
|
+
"id": "deepseek-chat",
|
|
11
|
+
"name": "deepseek-chat",
|
|
12
|
+
"api_base": "https://api.deepseek.com/v1",
|
|
13
|
+
"api_key_env_var": "DEEPSEEK_API_KEY",
|
|
14
|
+
"api_spec": "openai",
|
|
15
|
+
"input_cost": 0.28,
|
|
16
|
+
"cached_input_cost": 0.028,
|
|
17
|
+
"output_cost": 0.42,
|
|
18
|
+
},
|
|
19
|
+
"deepseek-r1": {
|
|
20
|
+
"id": "deepseek-r1",
|
|
21
|
+
"name": "deepseek-reasoner",
|
|
22
|
+
"api_base": "https://api.deepseek.com/v1",
|
|
23
|
+
"api_key_env_var": "DEEPSEEK_API_KEY",
|
|
24
|
+
"api_spec": "openai",
|
|
25
|
+
"input_cost": 0.28,
|
|
26
|
+
"cached_input_cost": 0.028,
|
|
27
|
+
"output_cost": 0.42,
|
|
28
|
+
},
|
|
29
|
+
"deepseek-reasoner": {
|
|
30
|
+
"id": "deepseek-reasoner",
|
|
31
|
+
"name": "deepseek-reasoner",
|
|
32
|
+
"api_base": "https://api.deepseek.com/v1",
|
|
33
|
+
"api_key_env_var": "DEEPSEEK_API_KEY",
|
|
34
|
+
"api_spec": "openai",
|
|
35
|
+
"input_cost": 0.28,
|
|
36
|
+
"cached_input_cost": 0.028,
|
|
37
|
+
"output_cost": 0.42,
|
|
38
|
+
},
|
|
39
|
+
"deepseek-reasoner-anthropic-compat": {
|
|
40
|
+
"id": "deepseek-reasoner-anthropic-compat",
|
|
41
|
+
"name": "deepseek-reasoner",
|
|
42
|
+
"api_base": "https://api.deepseek.com/anthropic",
|
|
43
|
+
"api_key_env_var": "DEEPSEEK_API_KEY",
|
|
44
|
+
"api_spec": "anthropic",
|
|
45
|
+
"input_cost": 0.28,
|
|
46
|
+
"cached_input_cost": 0.028,
|
|
47
|
+
"output_cost": 0.42,
|
|
48
|
+
},
|
|
49
|
+
"deepseek-speciale": {
|
|
50
|
+
"id": "deepseek-speciale",
|
|
51
|
+
"name": "deepseek-reasoner",
|
|
52
|
+
"api_base": "https://api.deepseek.com/v3.2_speciale_expires_on_20251215/v1",
|
|
53
|
+
"api_key_env_var": "DEEPSEEK_API_KEY",
|
|
54
|
+
"api_spec": "openai",
|
|
55
|
+
"input_cost": 0.28,
|
|
56
|
+
"cached_input_cost": 0.028,
|
|
57
|
+
"output_cost": 0.42,
|
|
58
|
+
},
|
|
59
|
+
}
|
|
@@ -153,4 +153,18 @@ GOOGLE_MODELS = {
|
|
|
153
153
|
# Note: >200k tokens pricing is $4/$18 per million
|
|
154
154
|
"reasoning_model": True,
|
|
155
155
|
},
|
|
156
|
+
# Gemini 2.5 Computer Use model
|
|
157
|
+
"gemini-2.5-computer-use": {
|
|
158
|
+
"id": "gemini-2.5-computer-use",
|
|
159
|
+
"name": "gemini-2.5-computer-use-preview-10-2025",
|
|
160
|
+
"api_base": "https://generativelanguage.googleapis.com/v1beta",
|
|
161
|
+
"api_key_env_var": "GEMINI_API_KEY",
|
|
162
|
+
"supports_json": True,
|
|
163
|
+
"supports_logprobs": False,
|
|
164
|
+
"api_spec": "gemini",
|
|
165
|
+
"input_cost": 1.25, # same as gemini-2.5-pro for now
|
|
166
|
+
"cached_input_cost": 0.31,
|
|
167
|
+
"output_cost": 10.0,
|
|
168
|
+
"reasoning_model": True,
|
|
169
|
+
},
|
|
156
170
|
}
|
|
@@ -22,6 +22,7 @@ KIMI_MODELS = {
|
|
|
22
22
|
"api_key_env_var": "MOONSHOT_API_KEY",
|
|
23
23
|
"supports_json": True,
|
|
24
24
|
"api_spec": "anthropic",
|
|
25
|
+
"reasoning_model": True,
|
|
25
26
|
},
|
|
26
27
|
"kimi-k2-thinking-turbo": {
|
|
27
28
|
"id": "kimi-k2-thinking-turbo",
|
|
@@ -30,5 +31,6 @@ KIMI_MODELS = {
|
|
|
30
31
|
"api_key_env_var": "MOONSHOT_API_KEY",
|
|
31
32
|
"supports_json": True,
|
|
32
33
|
"api_spec": "anthropic",
|
|
34
|
+
"reasoning_model": True,
|
|
33
35
|
},
|
|
34
36
|
}
|
|
@@ -71,4 +71,14 @@ OPENROUTER_MODELS = {
|
|
|
71
71
|
"input_cost": 0.2,
|
|
72
72
|
"output_cost": 35,
|
|
73
73
|
},
|
|
74
|
+
"trinity-mini-openrouter": {
|
|
75
|
+
"id": "trinity-mini-openrouter",
|
|
76
|
+
"name": "arcee-ai/trinity-mini:free",
|
|
77
|
+
"api_base": "https://openrouter.ai/api/v1",
|
|
78
|
+
"api_key_env_var": "OPENROUTER_API_KEY",
|
|
79
|
+
"supports_json": True,
|
|
80
|
+
"api_spec": "openai",
|
|
81
|
+
"input_cost": 0.045,
|
|
82
|
+
"output_cost": 0.15,
|
|
83
|
+
},
|
|
74
84
|
}
|
|
@@ -93,4 +93,15 @@ TOGETHER_MODELS = {
|
|
|
93
93
|
"output_cost": 0.59,
|
|
94
94
|
"reasoning_model": True,
|
|
95
95
|
},
|
|
96
|
+
"trinity-mini-together": {
|
|
97
|
+
"id": "trinity-mini-together",
|
|
98
|
+
"name": "arcee-ai/trinity-mini",
|
|
99
|
+
"api_base": "https://api.together.xyz/v1",
|
|
100
|
+
"api_key_env_var": "TOGETHER_API_KEY",
|
|
101
|
+
"supports_json": False,
|
|
102
|
+
"api_spec": "openai",
|
|
103
|
+
"input_cost": 0.18,
|
|
104
|
+
"output_cost": 0.59,
|
|
105
|
+
"reasoning_model": True,
|
|
106
|
+
},
|
|
96
107
|
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
ZAI_MODELS = {}
|
|
@@ -203,6 +203,8 @@ class ToolResult:
|
|
|
203
203
|
"call_id": self.tool_call_id,
|
|
204
204
|
}
|
|
205
205
|
if self.built_in_type == "computer_call":
|
|
206
|
+
# OpenAI expects "computer_call_output" for the result type
|
|
207
|
+
result["type"] = "computer_call_output"
|
|
206
208
|
result["output"] = output_data.get("output", {})
|
|
207
209
|
if "acknowledged_safety_checks" in output_data:
|
|
208
210
|
result["acknowledged_safety_checks"] = output_data[
|
|
@@ -235,15 +237,41 @@ class ToolResult:
|
|
|
235
237
|
raise ValueError("unsupported self.result type")
|
|
236
238
|
|
|
237
239
|
def gemini(self) -> dict:
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
"functionResponse": {
|
|
242
|
-
"name": self.tool_call_id, # Gemini uses name field for ID
|
|
243
|
-
"response": {"result": self.result},
|
|
244
|
-
}
|
|
240
|
+
# Build the function response
|
|
241
|
+
func_response: dict = {
|
|
242
|
+
"name": self.tool_call_id, # Gemini uses name field for ID
|
|
245
243
|
}
|
|
246
244
|
|
|
245
|
+
# Handle different result types
|
|
246
|
+
if isinstance(self.result, str):
|
|
247
|
+
func_response["response"] = {"result": self.result}
|
|
248
|
+
elif isinstance(self.result, dict):
|
|
249
|
+
# Check for Gemini computer use format with inline screenshot
|
|
250
|
+
if self.built_in_type == "gemini_computer_use":
|
|
251
|
+
# Gemini CU expects response dict with optional inline_data parts
|
|
252
|
+
func_response["response"] = self.result.get("response", {})
|
|
253
|
+
# Include inline data (screenshot) if present
|
|
254
|
+
if "inline_data" in self.result:
|
|
255
|
+
func_response["parts"] = [
|
|
256
|
+
{
|
|
257
|
+
"inlineData": {
|
|
258
|
+
"mimeType": self.result["inline_data"].get(
|
|
259
|
+
"mime_type", "image/png"
|
|
260
|
+
),
|
|
261
|
+
"data": self.result["inline_data"]["data"],
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
]
|
|
265
|
+
else:
|
|
266
|
+
func_response["response"] = self.result
|
|
267
|
+
elif isinstance(self.result, list):
|
|
268
|
+
# Handle content blocks (images, etc.) - not yet implemented
|
|
269
|
+
raise ValueError("can't handle content blocks for gemini yet")
|
|
270
|
+
else:
|
|
271
|
+
func_response["response"] = {"result": str(self.result)}
|
|
272
|
+
|
|
273
|
+
return {"functionResponse": func_response}
|
|
274
|
+
|
|
247
275
|
def mistral(self) -> dict:
|
|
248
276
|
return {
|
|
249
277
|
"type": "tool_result",
|
|
@@ -1367,14 +1395,14 @@ class Conversation:
|
|
|
1367
1395
|
# For assistant messages, extract computer calls as separate items
|
|
1368
1396
|
text_parts = []
|
|
1369
1397
|
for p in m.parts:
|
|
1370
|
-
if isinstance(p, ToolCall) and p.
|
|
1398
|
+
if isinstance(p, ToolCall) and p.built_in_type == "computer_call":
|
|
1371
1399
|
# Computer calls become separate items in the input array
|
|
1372
|
-
|
|
1400
|
+
# p.arguments already contains the full action dict with "type"
|
|
1373
1401
|
input_items.append(
|
|
1374
1402
|
{
|
|
1375
1403
|
"type": "computer_call",
|
|
1376
1404
|
"call_id": p.id,
|
|
1377
|
-
"action":
|
|
1405
|
+
"action": p.arguments,
|
|
1378
1406
|
}
|
|
1379
1407
|
)
|
|
1380
1408
|
elif isinstance(p, Text):
|
|
@@ -1752,7 +1780,7 @@ class Conversation:
|
|
|
1752
1780
|
Prompt: TypeAlias = str | list[dict] | Message | Conversation
|
|
1753
1781
|
|
|
1754
1782
|
|
|
1755
|
-
def prompts_to_conversations(prompts: Sequence[Prompt]) -> Sequence[
|
|
1783
|
+
def prompts_to_conversations(prompts: Sequence[Prompt]) -> Sequence[Conversation]:
|
|
1756
1784
|
converted = []
|
|
1757
1785
|
for prompt in prompts:
|
|
1758
1786
|
if isinstance(prompt, Conversation):
|
|
@@ -614,6 +614,7 @@ class Tool(BaseModel):
|
|
|
614
614
|
cls,
|
|
615
615
|
func: Callable,
|
|
616
616
|
*,
|
|
617
|
+
name: str | None = None,
|
|
617
618
|
include_output_schema_in_description: bool = False,
|
|
618
619
|
) -> "Tool":
|
|
619
620
|
"""
|
|
@@ -629,6 +630,8 @@ class Tool(BaseModel):
|
|
|
629
630
|
|
|
630
631
|
Args:
|
|
631
632
|
func: The function to create a tool from.
|
|
633
|
+
name: Optional name override for the tool. If not provided,
|
|
634
|
+
uses the function's __name__.
|
|
632
635
|
include_output_schema_in_description: If True, append the return type
|
|
633
636
|
and any complex type definitions to the tool description. This can
|
|
634
637
|
help the model understand what the tool returns. Default is False.
|
|
@@ -646,6 +649,10 @@ class Tool(BaseModel):
|
|
|
646
649
|
# tool.output_schema contains schema for list[dict]
|
|
647
650
|
# tool.call(query="test", validate_output=True) validates return value
|
|
648
651
|
|
|
652
|
+
# With custom name:
|
|
653
|
+
tool = Tool.from_function(search, name="search_database")
|
|
654
|
+
# tool.name is "search_database"
|
|
655
|
+
|
|
649
656
|
# With output schema in description:
|
|
650
657
|
tool = Tool.from_function(search, include_output_schema_in_description=True)
|
|
651
658
|
# Description becomes:
|
|
@@ -653,11 +660,11 @@ class Tool(BaseModel):
|
|
|
653
660
|
#
|
|
654
661
|
# Returns: list[dict]"
|
|
655
662
|
"""
|
|
656
|
-
# Get function name
|
|
657
|
-
|
|
663
|
+
# Get function name (use override if provided)
|
|
664
|
+
tool_name = name if name is not None else func.__name__
|
|
658
665
|
|
|
659
666
|
# Get docstring for description
|
|
660
|
-
description = func.__doc__ or f"Call the {
|
|
667
|
+
description = func.__doc__ or f"Call the {tool_name} function"
|
|
661
668
|
description = description.strip()
|
|
662
669
|
|
|
663
670
|
# Use TypeAdapter for robust schema generation
|
|
@@ -705,7 +712,7 @@ class Tool(BaseModel):
|
|
|
705
712
|
description = f"{description}\n\n{output_info}"
|
|
706
713
|
|
|
707
714
|
tool = cls(
|
|
708
|
-
name=
|
|
715
|
+
name=tool_name,
|
|
709
716
|
description=description,
|
|
710
717
|
parameters=parameters,
|
|
711
718
|
required=required,
|