lm-deluge 0.0.80__py3-none-any.whl → 0.0.82__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lm_deluge/__init__.py +1 -2
- lm_deluge/api_requests/anthropic.py +2 -1
- lm_deluge/api_requests/base.py +13 -0
- lm_deluge/api_requests/gemini.py +1 -1
- lm_deluge/api_requests/openai.py +3 -2
- lm_deluge/client.py +16 -11
- lm_deluge/llm_tools/__init__.py +12 -5
- lm_deluge/pipelines/__init__.py +11 -0
- lm_deluge/{llm_tools → pipelines}/score.py +2 -2
- lm_deluge/{llm_tools → pipelines}/translate.py +5 -3
- lm_deluge/prompt.py +105 -0
- lm_deluge/request_context.py +2 -2
- lm_deluge/{tool.py → tool/__init__.py} +531 -314
- lm_deluge/tool/prefab/__init__.py +29 -0
- lm_deluge/tool/prefab/batch_tool.py +156 -0
- lm_deluge/{llm_tools → tool/prefab}/filesystem.py +1 -1
- lm_deluge/tool/prefab/memory.py +190 -0
- lm_deluge/tool/prefab/otc/__init__.py +165 -0
- lm_deluge/tool/prefab/otc/executor.py +281 -0
- lm_deluge/tool/prefab/otc/parse.py +188 -0
- lm_deluge/{llm_tools → tool/prefab}/sandbox.py +251 -61
- lm_deluge/{llm_tools → tool/prefab}/todos.py +1 -1
- lm_deluge/tool/prefab/tool_search.py +169 -0
- lm_deluge/tracker.py +16 -13
- {lm_deluge-0.0.80.dist-info → lm_deluge-0.0.82.dist-info}/METADATA +2 -3
- {lm_deluge-0.0.80.dist-info → lm_deluge-0.0.82.dist-info}/RECORD +34 -28
- lm_deluge/presets/cerebras.py +0 -17
- lm_deluge/presets/meta.py +0 -13
- /lm_deluge/{llm_tools → pipelines}/classify.py +0 -0
- /lm_deluge/{llm_tools → pipelines}/extract.py +0 -0
- /lm_deluge/{llm_tools → pipelines}/locate.py +0 -0
- /lm_deluge/{llm_tools → pipelines}/ocr.py +0 -0
- /lm_deluge/{llm_tools → tool/prefab}/subagents.py +0 -0
- {lm_deluge-0.0.80.dist-info → lm_deluge-0.0.82.dist-info}/WHEEL +0 -0
- {lm_deluge-0.0.80.dist-info → lm_deluge-0.0.82.dist-info}/licenses/LICENSE +0 -0
- {lm_deluge-0.0.80.dist-info → lm_deluge-0.0.82.dist-info}/top_level.txt +0 -0
lm_deluge/__init__.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from .client import APIResponse, LLMClient, SamplingParams
|
|
2
2
|
from .file import File
|
|
3
3
|
from .prompt import Conversation, Message
|
|
4
|
-
from .tool import Tool
|
|
4
|
+
from .tool import Tool
|
|
5
5
|
|
|
6
6
|
try:
|
|
7
7
|
from .mock_openai import ( # noqa
|
|
@@ -25,7 +25,6 @@ __all__ = [
|
|
|
25
25
|
"Conversation",
|
|
26
26
|
"Message",
|
|
27
27
|
"Tool",
|
|
28
|
-
"ToolParams",
|
|
29
28
|
"File",
|
|
30
29
|
]
|
|
31
30
|
|
|
@@ -64,7 +64,7 @@ def _build_anthropic_request(
|
|
|
64
64
|
}
|
|
65
65
|
|
|
66
66
|
if model.id == "claude-4.5-opus" and sampling_params.global_effort:
|
|
67
|
-
request_json["
|
|
67
|
+
request_json["output_config"] = {"effort": sampling_params.global_effort}
|
|
68
68
|
_add_beta(base_headers, "effort-2025-11-24")
|
|
69
69
|
|
|
70
70
|
# handle thinking
|
|
@@ -115,6 +115,7 @@ def _build_anthropic_request(
|
|
|
115
115
|
if "4-1" in model.name or "4-5" in model.name:
|
|
116
116
|
request_json.pop("top_p")
|
|
117
117
|
|
|
118
|
+
# print(request_json)
|
|
118
119
|
# Handle structured outputs (output_format)
|
|
119
120
|
if context.output_schema:
|
|
120
121
|
if model.supports_json:
|
lm_deluge/api_requests/base.py
CHANGED
|
@@ -222,6 +222,19 @@ class APIRequestBase(ABC):
|
|
|
222
222
|
usage=None,
|
|
223
223
|
)
|
|
224
224
|
|
|
225
|
+
except aiohttp.ServerDisconnectedError:
|
|
226
|
+
return APIResponse(
|
|
227
|
+
id=self.context.task_id,
|
|
228
|
+
model_internal=self.context.model_name,
|
|
229
|
+
prompt=self.context.prompt,
|
|
230
|
+
sampling_params=self.context.sampling_params,
|
|
231
|
+
status_code=None,
|
|
232
|
+
is_error=True,
|
|
233
|
+
error_message="Server disconnected.",
|
|
234
|
+
content=None,
|
|
235
|
+
usage=None,
|
|
236
|
+
)
|
|
237
|
+
|
|
225
238
|
except Exception as e:
|
|
226
239
|
raise_if_modal_exception(e)
|
|
227
240
|
tb = traceback.format_exc()
|
lm_deluge/api_requests/gemini.py
CHANGED
lm_deluge/api_requests/openai.py
CHANGED
|
@@ -2,17 +2,18 @@ import json
|
|
|
2
2
|
import os
|
|
3
3
|
import traceback as tb
|
|
4
4
|
from types import SimpleNamespace
|
|
5
|
+
from typing import Sequence
|
|
5
6
|
|
|
6
7
|
import aiohttp
|
|
7
8
|
from aiohttp import ClientResponse
|
|
8
9
|
|
|
9
10
|
from lm_deluge.request_context import RequestContext
|
|
10
11
|
from lm_deluge.tool import MCPServer, Tool
|
|
11
|
-
from lm_deluge.warnings import maybe_warn
|
|
12
12
|
from lm_deluge.util.schema import (
|
|
13
13
|
prepare_output_schema,
|
|
14
14
|
transform_schema_for_openai,
|
|
15
15
|
)
|
|
16
|
+
from lm_deluge.warnings import maybe_warn
|
|
16
17
|
|
|
17
18
|
from ..config import SamplingParams
|
|
18
19
|
from ..models import APIModel
|
|
@@ -610,7 +611,7 @@ async def stream_chat(
|
|
|
610
611
|
model_name: str, # must correspond to registry
|
|
611
612
|
prompt: Conversation,
|
|
612
613
|
sampling_params: SamplingParams = SamplingParams(),
|
|
613
|
-
tools:
|
|
614
|
+
tools: Sequence[Tool | dict | MCPServer] | None = None,
|
|
614
615
|
cache: CachePattern | None = None,
|
|
615
616
|
extra_headers: dict[str, str] | None = None,
|
|
616
617
|
):
|
lm_deluge/client.py
CHANGED
|
@@ -103,6 +103,11 @@ class _LLMClient(BaseModel):
|
|
|
103
103
|
_tracker: StatusTracker | None = PrivateAttr(default=None)
|
|
104
104
|
_capacity_lock: asyncio.Lock = PrivateAttr(default_factory=asyncio.Lock)
|
|
105
105
|
|
|
106
|
+
# usage
|
|
107
|
+
def print_usage(self):
|
|
108
|
+
if self._tracker:
|
|
109
|
+
self._tracker.log_usage()
|
|
110
|
+
|
|
106
111
|
# Progress management for queueing API
|
|
107
112
|
def open(self, total: int | None = None, show_progress: bool = True):
|
|
108
113
|
self._tracker = StatusTracker(
|
|
@@ -572,7 +577,7 @@ class _LLMClient(BaseModel):
|
|
|
572
577
|
*,
|
|
573
578
|
return_completions_only: Literal[True],
|
|
574
579
|
show_progress: bool = ...,
|
|
575
|
-
tools:
|
|
580
|
+
tools: Sequence[Tool | dict | MCPServer] | None = ...,
|
|
576
581
|
output_schema: type[BaseModel] | dict | None = ...,
|
|
577
582
|
cache: CachePattern | None = ...,
|
|
578
583
|
service_tier: Literal["auto", "default", "flex", "priority"] | None = ...,
|
|
@@ -585,7 +590,7 @@ class _LLMClient(BaseModel):
|
|
|
585
590
|
*,
|
|
586
591
|
return_completions_only: Literal[False] = ...,
|
|
587
592
|
show_progress: bool = ...,
|
|
588
|
-
tools:
|
|
593
|
+
tools: Sequence[Tool | dict | MCPServer] | None = ...,
|
|
589
594
|
output_schema: type[BaseModel] | dict | None = ...,
|
|
590
595
|
cache: CachePattern | None = ...,
|
|
591
596
|
service_tier: Literal["auto", "default", "flex", "priority"] | None = ...,
|
|
@@ -597,7 +602,7 @@ class _LLMClient(BaseModel):
|
|
|
597
602
|
*,
|
|
598
603
|
return_completions_only: bool = False,
|
|
599
604
|
show_progress: bool = True,
|
|
600
|
-
tools:
|
|
605
|
+
tools: Sequence[Tool | dict | MCPServer] | None = None,
|
|
601
606
|
output_schema: type[BaseModel] | dict | None = None,
|
|
602
607
|
cache: CachePattern | None = None,
|
|
603
608
|
service_tier: Literal["auto", "default", "flex", "priority"] | None = None,
|
|
@@ -672,7 +677,7 @@ class _LLMClient(BaseModel):
|
|
|
672
677
|
*,
|
|
673
678
|
return_completions_only: bool = False,
|
|
674
679
|
show_progress=True,
|
|
675
|
-
tools:
|
|
680
|
+
tools: Sequence[Tool | dict | MCPServer] | None = None,
|
|
676
681
|
output_schema: type[BaseModel] | dict | None = None,
|
|
677
682
|
cache: CachePattern | None = None,
|
|
678
683
|
):
|
|
@@ -705,7 +710,7 @@ class _LLMClient(BaseModel):
|
|
|
705
710
|
self,
|
|
706
711
|
prompt: Prompt,
|
|
707
712
|
*,
|
|
708
|
-
tools:
|
|
713
|
+
tools: Sequence[Tool | dict | MCPServer] | None = None,
|
|
709
714
|
output_schema: type[BaseModel] | dict | None = None,
|
|
710
715
|
cache: CachePattern | None = None,
|
|
711
716
|
service_tier: Literal["auto", "default", "flex", "priority"] | None = None,
|
|
@@ -742,7 +747,7 @@ class _LLMClient(BaseModel):
|
|
|
742
747
|
self,
|
|
743
748
|
prompt: Prompt,
|
|
744
749
|
*,
|
|
745
|
-
tools:
|
|
750
|
+
tools: Sequence[Tool | dict | MCPServer] | None = None,
|
|
746
751
|
output_schema: type[BaseModel] | dict | None = None,
|
|
747
752
|
cache: CachePattern | None = None,
|
|
748
753
|
service_tier: Literal["auto", "default", "flex", "priority"] | None = None,
|
|
@@ -835,7 +840,7 @@ class _LLMClient(BaseModel):
|
|
|
835
840
|
async def stream(
|
|
836
841
|
self,
|
|
837
842
|
prompt: Prompt,
|
|
838
|
-
tools:
|
|
843
|
+
tools: Sequence[Tool | dict | MCPServer] | None = None,
|
|
839
844
|
):
|
|
840
845
|
model, sampling_params = self._select_model()
|
|
841
846
|
prompt = prompts_to_conversations([prompt])[0]
|
|
@@ -856,7 +861,7 @@ class _LLMClient(BaseModel):
|
|
|
856
861
|
task_id: int,
|
|
857
862
|
conversation: Conversation,
|
|
858
863
|
*,
|
|
859
|
-
tools:
|
|
864
|
+
tools: Sequence[Tool | dict | MCPServer] | None = None,
|
|
860
865
|
max_rounds: int = 5,
|
|
861
866
|
) -> AgentLoopResponse:
|
|
862
867
|
"""Internal method to run agent loop and return wrapped result."""
|
|
@@ -920,7 +925,7 @@ class _LLMClient(BaseModel):
|
|
|
920
925
|
self,
|
|
921
926
|
conversation: Prompt,
|
|
922
927
|
*,
|
|
923
|
-
tools:
|
|
928
|
+
tools: Sequence[Tool | dict | MCPServer] | None = None,
|
|
924
929
|
max_rounds: int = 5,
|
|
925
930
|
) -> int:
|
|
926
931
|
"""Start an agent loop without waiting for it to complete.
|
|
@@ -967,7 +972,7 @@ class _LLMClient(BaseModel):
|
|
|
967
972
|
self,
|
|
968
973
|
conversation: Prompt,
|
|
969
974
|
*,
|
|
970
|
-
tools:
|
|
975
|
+
tools: Sequence[Tool | dict | MCPServer] | None = None,
|
|
971
976
|
max_rounds: int = 5,
|
|
972
977
|
show_progress: bool = False,
|
|
973
978
|
) -> tuple[Conversation, APIResponse]:
|
|
@@ -986,7 +991,7 @@ class _LLMClient(BaseModel):
|
|
|
986
991
|
self,
|
|
987
992
|
conversation: Prompt,
|
|
988
993
|
*,
|
|
989
|
-
tools:
|
|
994
|
+
tools: Sequence[Tool | dict | MCPServer] | None = None,
|
|
990
995
|
max_rounds: int = 5,
|
|
991
996
|
show_progress: bool = False,
|
|
992
997
|
) -> tuple[Conversation, APIResponse]:
|
lm_deluge/llm_tools/__init__.py
CHANGED
|
@@ -1,8 +1,15 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
from
|
|
4
|
-
|
|
5
|
-
|
|
1
|
+
# Backward compatibility - re-export from new locations
|
|
2
|
+
# Pipelines (workflow functions)
|
|
3
|
+
from ..pipelines import extract, extract_async, score_llm, translate, translate_async
|
|
4
|
+
|
|
5
|
+
# Prefab tools (Tool managers)
|
|
6
|
+
from ..tool.prefab import (
|
|
7
|
+
SubAgentManager,
|
|
8
|
+
TodoItem,
|
|
9
|
+
TodoManager,
|
|
10
|
+
TodoPriority,
|
|
11
|
+
TodoStatus,
|
|
12
|
+
)
|
|
6
13
|
|
|
7
14
|
__all__ = [
|
|
8
15
|
"extract",
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from ..client import
|
|
1
|
+
from ..client import _LLMClient, APIResponse
|
|
2
2
|
from ..util.logprobs import extract_prob
|
|
3
3
|
|
|
4
4
|
# def extract_prob_yes(logprobs: list[dict]):
|
|
@@ -24,7 +24,7 @@ from ..util.logprobs import extract_prob
|
|
|
24
24
|
def score_llm(
|
|
25
25
|
scoring_prompt_template: str,
|
|
26
26
|
inputs: list[tuple | list | dict], # to format the template
|
|
27
|
-
scoring_model:
|
|
27
|
+
scoring_model: _LLMClient,
|
|
28
28
|
return_probabilities: bool,
|
|
29
29
|
yes_token: str = "yes",
|
|
30
30
|
) -> list[bool | None] | list[float | None]:
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
-
from ..client import
|
|
2
|
+
from ..client import _LLMClient
|
|
3
3
|
|
|
4
4
|
translation_prompt = (
|
|
5
5
|
"Translate the following text (enclosed in ```) into English. "
|
|
@@ -20,7 +20,9 @@ def is_english(text: str, low_memory: bool = True):
|
|
|
20
20
|
return True
|
|
21
21
|
|
|
22
22
|
|
|
23
|
-
async def translate_async(
|
|
23
|
+
async def translate_async(
|
|
24
|
+
texts: list[str], client: _LLMClient, low_memory: bool = True
|
|
25
|
+
):
|
|
24
26
|
to_translate_idxs = [
|
|
25
27
|
i for i, text in enumerate(texts) if not is_english(text, low_memory=low_memory)
|
|
26
28
|
]
|
|
@@ -40,5 +42,5 @@ async def translate_async(texts: list[str], client: LLMClient, low_memory: bool
|
|
|
40
42
|
return texts
|
|
41
43
|
|
|
42
44
|
|
|
43
|
-
def translate(texts: list[str], client:
|
|
45
|
+
def translate(texts: list[str], client: _LLMClient, low_memory: bool = True):
|
|
44
46
|
return asyncio.run(translate_async(texts, client, low_memory))
|
lm_deluge/prompt.py
CHANGED
|
@@ -1598,6 +1598,111 @@ class Conversation:
|
|
|
1598
1598
|
|
|
1599
1599
|
return {"messages": serialized}
|
|
1600
1600
|
|
|
1601
|
+
def print(self, max_text_length: int = 500, indent: int = 2) -> None:
|
|
1602
|
+
"""Pretty-print the conversation to stdout.
|
|
1603
|
+
|
|
1604
|
+
Args:
|
|
1605
|
+
max_text_length: Truncate text content longer than this (default 500 chars)
|
|
1606
|
+
indent: JSON indentation for tool calls/results (default 2)
|
|
1607
|
+
"""
|
|
1608
|
+
ROLE_COLORS = {
|
|
1609
|
+
"system": "\033[95m", # magenta
|
|
1610
|
+
"user": "\033[94m", # blue
|
|
1611
|
+
"assistant": "\033[92m", # green
|
|
1612
|
+
"tool": "\033[93m", # yellow
|
|
1613
|
+
}
|
|
1614
|
+
RESET = "\033[0m"
|
|
1615
|
+
DIM = "\033[2m"
|
|
1616
|
+
BOLD = "\033[1m"
|
|
1617
|
+
|
|
1618
|
+
def truncate(text: str, max_len: int) -> str:
|
|
1619
|
+
if len(text) <= max_len:
|
|
1620
|
+
return text
|
|
1621
|
+
return (
|
|
1622
|
+
text[:max_len] + f"{DIM}... [{len(text) - max_len} more chars]{RESET}"
|
|
1623
|
+
)
|
|
1624
|
+
|
|
1625
|
+
def format_json(obj: dict | list, ind: int) -> str:
|
|
1626
|
+
return json.dumps(obj, indent=ind, ensure_ascii=False)
|
|
1627
|
+
|
|
1628
|
+
print(f"\n{BOLD}{'=' * 60}{RESET}")
|
|
1629
|
+
print(f"{BOLD}Conversation ({len(self.messages)} messages){RESET}")
|
|
1630
|
+
print(f"{BOLD}{'=' * 60}{RESET}\n")
|
|
1631
|
+
|
|
1632
|
+
for i, msg in enumerate(self.messages):
|
|
1633
|
+
role_color = ROLE_COLORS.get(msg.role, "")
|
|
1634
|
+
print(f"{role_color}{BOLD}[{msg.role.upper()}]{RESET}")
|
|
1635
|
+
|
|
1636
|
+
for part in msg.parts:
|
|
1637
|
+
if isinstance(part, Text):
|
|
1638
|
+
text = truncate(part.text, max_text_length)
|
|
1639
|
+
# Indent multiline text
|
|
1640
|
+
lines = text.split("\n")
|
|
1641
|
+
if len(lines) > 1:
|
|
1642
|
+
print(" " + "\n ".join(lines))
|
|
1643
|
+
else:
|
|
1644
|
+
print(f" {text}")
|
|
1645
|
+
|
|
1646
|
+
elif isinstance(part, Image):
|
|
1647
|
+
w, h = part.size
|
|
1648
|
+
print(f" {DIM}<Image ({w}x{h})>{RESET}")
|
|
1649
|
+
|
|
1650
|
+
elif isinstance(part, File):
|
|
1651
|
+
size = part.size
|
|
1652
|
+
filename = getattr(part, "filename", None)
|
|
1653
|
+
if filename:
|
|
1654
|
+
print(f" {DIM}<File: {filename} ({size} bytes)>{RESET}")
|
|
1655
|
+
else:
|
|
1656
|
+
print(f" {DIM}<File ({size} bytes)>{RESET}")
|
|
1657
|
+
|
|
1658
|
+
elif isinstance(part, ToolCall):
|
|
1659
|
+
print(
|
|
1660
|
+
f" {DIM}Tool Call:{RESET} {BOLD}{part.name}{RESET} (id: {part.id})"
|
|
1661
|
+
)
|
|
1662
|
+
if part.arguments:
|
|
1663
|
+
args_json = format_json(part.arguments, indent)
|
|
1664
|
+
# Indent the JSON
|
|
1665
|
+
indented = "\n".join(
|
|
1666
|
+
" " + line for line in args_json.split("\n")
|
|
1667
|
+
)
|
|
1668
|
+
print(indented)
|
|
1669
|
+
|
|
1670
|
+
elif isinstance(part, ToolResult):
|
|
1671
|
+
print(f" {DIM}Tool Result:{RESET} (call_id: {part.tool_call_id})")
|
|
1672
|
+
if isinstance(part.result, str):
|
|
1673
|
+
result_text = truncate(part.result, max_text_length)
|
|
1674
|
+
lines = result_text.split("\n")
|
|
1675
|
+
for line in lines:
|
|
1676
|
+
print(f" {line}")
|
|
1677
|
+
elif isinstance(part.result, dict):
|
|
1678
|
+
result_json = format_json(part.result, indent)
|
|
1679
|
+
indented = "\n".join(
|
|
1680
|
+
" " + line for line in result_json.split("\n")
|
|
1681
|
+
)
|
|
1682
|
+
print(indented)
|
|
1683
|
+
elif isinstance(part.result, list):
|
|
1684
|
+
print(f" {DIM}<{len(part.result)} content blocks>{RESET}")
|
|
1685
|
+
for block in part.result:
|
|
1686
|
+
if isinstance(block, Text):
|
|
1687
|
+
block_text = truncate(block.text, max_text_length // 2)
|
|
1688
|
+
print(f" [text] {block_text}")
|
|
1689
|
+
elif isinstance(block, Image):
|
|
1690
|
+
bw, bh = block.size
|
|
1691
|
+
print(f" {DIM}<Image ({bw}x{bh})>{RESET}")
|
|
1692
|
+
|
|
1693
|
+
elif isinstance(part, Thinking):
|
|
1694
|
+
print(f" {DIM}Thinking:{RESET}")
|
|
1695
|
+
thought = truncate(part.content, max_text_length)
|
|
1696
|
+
lines = thought.split("\n")
|
|
1697
|
+
for line in lines:
|
|
1698
|
+
print(f" {DIM}{line}{RESET}")
|
|
1699
|
+
|
|
1700
|
+
# Separator between messages
|
|
1701
|
+
if i < len(self.messages) - 1:
|
|
1702
|
+
print(f"\n{'-' * 40}\n")
|
|
1703
|
+
|
|
1704
|
+
print(f"\n{BOLD}{'=' * 60}{RESET}\n")
|
|
1705
|
+
|
|
1601
1706
|
@classmethod
|
|
1602
1707
|
def from_log(cls, payload: dict) -> "Conversation":
|
|
1603
1708
|
"""Re-hydrate a Conversation previously produced by `to_log()`."""
|
lm_deluge/request_context.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from dataclasses import dataclass, field
|
|
2
2
|
from functools import cached_property
|
|
3
|
-
from typing import Any, Callable, TYPE_CHECKING
|
|
3
|
+
from typing import Any, Callable, Sequence, TYPE_CHECKING
|
|
4
4
|
|
|
5
5
|
from .config import SamplingParams
|
|
6
6
|
from .prompt import CachePattern, Conversation
|
|
@@ -34,7 +34,7 @@ class RequestContext:
|
|
|
34
34
|
callback: Callable | None = None
|
|
35
35
|
|
|
36
36
|
# Optional features
|
|
37
|
-
tools:
|
|
37
|
+
tools: Sequence[Any] | None = None
|
|
38
38
|
output_schema: "type[BaseModel] | dict | None" = None
|
|
39
39
|
cache: CachePattern | None = None
|
|
40
40
|
use_responses_api: bool = False
|