lm-deluge 0.0.80__py3-none-any.whl → 0.0.82__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. lm_deluge/__init__.py +1 -2
  2. lm_deluge/api_requests/anthropic.py +2 -1
  3. lm_deluge/api_requests/base.py +13 -0
  4. lm_deluge/api_requests/gemini.py +1 -1
  5. lm_deluge/api_requests/openai.py +3 -2
  6. lm_deluge/client.py +16 -11
  7. lm_deluge/llm_tools/__init__.py +12 -5
  8. lm_deluge/pipelines/__init__.py +11 -0
  9. lm_deluge/{llm_tools → pipelines}/score.py +2 -2
  10. lm_deluge/{llm_tools → pipelines}/translate.py +5 -3
  11. lm_deluge/prompt.py +105 -0
  12. lm_deluge/request_context.py +2 -2
  13. lm_deluge/{tool.py → tool/__init__.py} +531 -314
  14. lm_deluge/tool/prefab/__init__.py +29 -0
  15. lm_deluge/tool/prefab/batch_tool.py +156 -0
  16. lm_deluge/{llm_tools → tool/prefab}/filesystem.py +1 -1
  17. lm_deluge/tool/prefab/memory.py +190 -0
  18. lm_deluge/tool/prefab/otc/__init__.py +165 -0
  19. lm_deluge/tool/prefab/otc/executor.py +281 -0
  20. lm_deluge/tool/prefab/otc/parse.py +188 -0
  21. lm_deluge/{llm_tools → tool/prefab}/sandbox.py +251 -61
  22. lm_deluge/{llm_tools → tool/prefab}/todos.py +1 -1
  23. lm_deluge/tool/prefab/tool_search.py +169 -0
  24. lm_deluge/tracker.py +16 -13
  25. {lm_deluge-0.0.80.dist-info → lm_deluge-0.0.82.dist-info}/METADATA +2 -3
  26. {lm_deluge-0.0.80.dist-info → lm_deluge-0.0.82.dist-info}/RECORD +34 -28
  27. lm_deluge/presets/cerebras.py +0 -17
  28. lm_deluge/presets/meta.py +0 -13
  29. /lm_deluge/{llm_tools → pipelines}/classify.py +0 -0
  30. /lm_deluge/{llm_tools → pipelines}/extract.py +0 -0
  31. /lm_deluge/{llm_tools → pipelines}/locate.py +0 -0
  32. /lm_deluge/{llm_tools → pipelines}/ocr.py +0 -0
  33. /lm_deluge/{llm_tools → tool/prefab}/subagents.py +0 -0
  34. {lm_deluge-0.0.80.dist-info → lm_deluge-0.0.82.dist-info}/WHEEL +0 -0
  35. {lm_deluge-0.0.80.dist-info → lm_deluge-0.0.82.dist-info}/licenses/LICENSE +0 -0
  36. {lm_deluge-0.0.80.dist-info → lm_deluge-0.0.82.dist-info}/top_level.txt +0 -0
lm_deluge/__init__.py CHANGED
@@ -1,7 +1,7 @@
1
1
  from .client import APIResponse, LLMClient, SamplingParams
2
2
  from .file import File
3
3
  from .prompt import Conversation, Message
4
- from .tool import Tool, ToolParams
4
+ from .tool import Tool
5
5
 
6
6
  try:
7
7
  from .mock_openai import ( # noqa
@@ -25,7 +25,6 @@ __all__ = [
25
25
  "Conversation",
26
26
  "Message",
27
27
  "Tool",
28
- "ToolParams",
29
28
  "File",
30
29
  ]
31
30
 
@@ -64,7 +64,7 @@ def _build_anthropic_request(
64
64
  }
65
65
 
66
66
  if model.id == "claude-4.5-opus" and sampling_params.global_effort:
67
- request_json["effort"] = sampling_params.global_effort
67
+ request_json["output_config"] = {"effort": sampling_params.global_effort}
68
68
  _add_beta(base_headers, "effort-2025-11-24")
69
69
 
70
70
  # handle thinking
@@ -115,6 +115,7 @@ def _build_anthropic_request(
115
115
  if "4-1" in model.name or "4-5" in model.name:
116
116
  request_json.pop("top_p")
117
117
 
118
+ # print(request_json)
118
119
  # Handle structured outputs (output_format)
119
120
  if context.output_schema:
120
121
  if model.supports_json:
@@ -222,6 +222,19 @@ class APIRequestBase(ABC):
222
222
  usage=None,
223
223
  )
224
224
 
225
+ except aiohttp.ServerDisconnectedError:
226
+ return APIResponse(
227
+ id=self.context.task_id,
228
+ model_internal=self.context.model_name,
229
+ prompt=self.context.prompt,
230
+ sampling_params=self.context.sampling_params,
231
+ status_code=None,
232
+ is_error=True,
233
+ error_message="Server disconnected.",
234
+ content=None,
235
+ usage=None,
236
+ )
237
+
225
238
  except Exception as e:
226
239
  raise_if_modal_exception(e)
227
240
  tb = traceback.format_exc()
@@ -173,7 +173,7 @@ class GeminiRequest(APIRequestBase):
173
173
  self.request_json = await _build_gemini_request(
174
174
  self.model,
175
175
  self.context.prompt,
176
- self.context.tools,
176
+ self.context.tools, # type: ignore
177
177
  self.context.sampling_params,
178
178
  )
179
179
 
@@ -2,17 +2,18 @@ import json
2
2
  import os
3
3
  import traceback as tb
4
4
  from types import SimpleNamespace
5
+ from typing import Sequence
5
6
 
6
7
  import aiohttp
7
8
  from aiohttp import ClientResponse
8
9
 
9
10
  from lm_deluge.request_context import RequestContext
10
11
  from lm_deluge.tool import MCPServer, Tool
11
- from lm_deluge.warnings import maybe_warn
12
12
  from lm_deluge.util.schema import (
13
13
  prepare_output_schema,
14
14
  transform_schema_for_openai,
15
15
  )
16
+ from lm_deluge.warnings import maybe_warn
16
17
 
17
18
  from ..config import SamplingParams
18
19
  from ..models import APIModel
@@ -610,7 +611,7 @@ async def stream_chat(
610
611
  model_name: str, # must correspond to registry
611
612
  prompt: Conversation,
612
613
  sampling_params: SamplingParams = SamplingParams(),
613
- tools: list | None = None,
614
+ tools: Sequence[Tool | dict | MCPServer] | None = None,
614
615
  cache: CachePattern | None = None,
615
616
  extra_headers: dict[str, str] | None = None,
616
617
  ):
lm_deluge/client.py CHANGED
@@ -103,6 +103,11 @@ class _LLMClient(BaseModel):
103
103
  _tracker: StatusTracker | None = PrivateAttr(default=None)
104
104
  _capacity_lock: asyncio.Lock = PrivateAttr(default_factory=asyncio.Lock)
105
105
 
106
+ # usage
107
+ def print_usage(self):
108
+ if self._tracker:
109
+ self._tracker.log_usage()
110
+
106
111
  # Progress management for queueing API
107
112
  def open(self, total: int | None = None, show_progress: bool = True):
108
113
  self._tracker = StatusTracker(
@@ -572,7 +577,7 @@ class _LLMClient(BaseModel):
572
577
  *,
573
578
  return_completions_only: Literal[True],
574
579
  show_progress: bool = ...,
575
- tools: list[Tool | dict | MCPServer] | None = ...,
580
+ tools: Sequence[Tool | dict | MCPServer] | None = ...,
576
581
  output_schema: type[BaseModel] | dict | None = ...,
577
582
  cache: CachePattern | None = ...,
578
583
  service_tier: Literal["auto", "default", "flex", "priority"] | None = ...,
@@ -585,7 +590,7 @@ class _LLMClient(BaseModel):
585
590
  *,
586
591
  return_completions_only: Literal[False] = ...,
587
592
  show_progress: bool = ...,
588
- tools: list[Tool | dict | MCPServer] | None = ...,
593
+ tools: Sequence[Tool | dict | MCPServer] | None = ...,
589
594
  output_schema: type[BaseModel] | dict | None = ...,
590
595
  cache: CachePattern | None = ...,
591
596
  service_tier: Literal["auto", "default", "flex", "priority"] | None = ...,
@@ -597,7 +602,7 @@ class _LLMClient(BaseModel):
597
602
  *,
598
603
  return_completions_only: bool = False,
599
604
  show_progress: bool = True,
600
- tools: list[Tool | dict | MCPServer] | None = None,
605
+ tools: Sequence[Tool | dict | MCPServer] | None = None,
601
606
  output_schema: type[BaseModel] | dict | None = None,
602
607
  cache: CachePattern | None = None,
603
608
  service_tier: Literal["auto", "default", "flex", "priority"] | None = None,
@@ -672,7 +677,7 @@ class _LLMClient(BaseModel):
672
677
  *,
673
678
  return_completions_only: bool = False,
674
679
  show_progress=True,
675
- tools: list[Tool | dict | MCPServer] | None = None,
680
+ tools: Sequence[Tool | dict | MCPServer] | None = None,
676
681
  output_schema: type[BaseModel] | dict | None = None,
677
682
  cache: CachePattern | None = None,
678
683
  ):
@@ -705,7 +710,7 @@ class _LLMClient(BaseModel):
705
710
  self,
706
711
  prompt: Prompt,
707
712
  *,
708
- tools: list[Tool | dict | MCPServer] | None = None,
713
+ tools: Sequence[Tool | dict | MCPServer] | None = None,
709
714
  output_schema: type[BaseModel] | dict | None = None,
710
715
  cache: CachePattern | None = None,
711
716
  service_tier: Literal["auto", "default", "flex", "priority"] | None = None,
@@ -742,7 +747,7 @@ class _LLMClient(BaseModel):
742
747
  self,
743
748
  prompt: Prompt,
744
749
  *,
745
- tools: list[Tool | dict | MCPServer] | None = None,
750
+ tools: Sequence[Tool | dict | MCPServer] | None = None,
746
751
  output_schema: type[BaseModel] | dict | None = None,
747
752
  cache: CachePattern | None = None,
748
753
  service_tier: Literal["auto", "default", "flex", "priority"] | None = None,
@@ -835,7 +840,7 @@ class _LLMClient(BaseModel):
835
840
  async def stream(
836
841
  self,
837
842
  prompt: Prompt,
838
- tools: list[Tool | dict | MCPServer] | None = None,
843
+ tools: Sequence[Tool | dict | MCPServer] | None = None,
839
844
  ):
840
845
  model, sampling_params = self._select_model()
841
846
  prompt = prompts_to_conversations([prompt])[0]
@@ -856,7 +861,7 @@ class _LLMClient(BaseModel):
856
861
  task_id: int,
857
862
  conversation: Conversation,
858
863
  *,
859
- tools: list[Tool | dict | MCPServer] | None = None,
864
+ tools: Sequence[Tool | dict | MCPServer] | None = None,
860
865
  max_rounds: int = 5,
861
866
  ) -> AgentLoopResponse:
862
867
  """Internal method to run agent loop and return wrapped result."""
@@ -920,7 +925,7 @@ class _LLMClient(BaseModel):
920
925
  self,
921
926
  conversation: Prompt,
922
927
  *,
923
- tools: list[Tool | dict | MCPServer] | None = None,
928
+ tools: Sequence[Tool | dict | MCPServer] | None = None,
924
929
  max_rounds: int = 5,
925
930
  ) -> int:
926
931
  """Start an agent loop without waiting for it to complete.
@@ -967,7 +972,7 @@ class _LLMClient(BaseModel):
967
972
  self,
968
973
  conversation: Prompt,
969
974
  *,
970
- tools: list[Tool | dict | MCPServer] | None = None,
975
+ tools: Sequence[Tool | dict | MCPServer] | None = None,
971
976
  max_rounds: int = 5,
972
977
  show_progress: bool = False,
973
978
  ) -> tuple[Conversation, APIResponse]:
@@ -986,7 +991,7 @@ class _LLMClient(BaseModel):
986
991
  self,
987
992
  conversation: Prompt,
988
993
  *,
989
- tools: list[Tool | dict | MCPServer] | None = None,
994
+ tools: Sequence[Tool | dict | MCPServer] | None = None,
990
995
  max_rounds: int = 5,
991
996
  show_progress: bool = False,
992
997
  ) -> tuple[Conversation, APIResponse]:
@@ -1,8 +1,15 @@
1
- from .extract import extract, extract_async
2
- from .score import score_llm
3
- from .subagents import SubAgentManager
4
- from .todos import TodoItem, TodoManager, TodoPriority, TodoStatus
5
- from .translate import translate, translate_async
1
+ # Backward compatibility - re-export from new locations
2
+ # Pipelines (workflow functions)
3
+ from ..pipelines import extract, extract_async, score_llm, translate, translate_async
4
+
5
+ # Prefab tools (Tool managers)
6
+ from ..tool.prefab import (
7
+ SubAgentManager,
8
+ TodoItem,
9
+ TodoManager,
10
+ TodoPriority,
11
+ TodoStatus,
12
+ )
6
13
 
7
14
  __all__ = [
8
15
  "extract",
@@ -0,0 +1,11 @@
1
+ from .extract import extract, extract_async
2
+ from .score import score_llm
3
+ from .translate import translate, translate_async
4
+
5
+ __all__ = [
6
+ "extract",
7
+ "extract_async",
8
+ "translate",
9
+ "translate_async",
10
+ "score_llm",
11
+ ]
@@ -1,4 +1,4 @@
1
- from ..client import LLMClient, APIResponse
1
+ from ..client import _LLMClient, APIResponse
2
2
  from ..util.logprobs import extract_prob
3
3
 
4
4
  # def extract_prob_yes(logprobs: list[dict]):
@@ -24,7 +24,7 @@ from ..util.logprobs import extract_prob
24
24
  def score_llm(
25
25
  scoring_prompt_template: str,
26
26
  inputs: list[tuple | list | dict], # to format the template
27
- scoring_model: LLMClient,
27
+ scoring_model: _LLMClient,
28
28
  return_probabilities: bool,
29
29
  yes_token: str = "yes",
30
30
  ) -> list[bool | None] | list[float | None]:
@@ -1,5 +1,5 @@
1
1
  import asyncio
2
- from ..client import LLMClient
2
+ from ..client import _LLMClient
3
3
 
4
4
  translation_prompt = (
5
5
  "Translate the following text (enclosed in ```) into English. "
@@ -20,7 +20,9 @@ def is_english(text: str, low_memory: bool = True):
20
20
  return True
21
21
 
22
22
 
23
- async def translate_async(texts: list[str], client: LLMClient, low_memory: bool = True):
23
+ async def translate_async(
24
+ texts: list[str], client: _LLMClient, low_memory: bool = True
25
+ ):
24
26
  to_translate_idxs = [
25
27
  i for i, text in enumerate(texts) if not is_english(text, low_memory=low_memory)
26
28
  ]
@@ -40,5 +42,5 @@ async def translate_async(texts: list[str], client: LLMClient, low_memory: bool
40
42
  return texts
41
43
 
42
44
 
43
- def translate(texts: list[str], client: LLMClient, low_memory: bool = True):
45
+ def translate(texts: list[str], client: _LLMClient, low_memory: bool = True):
44
46
  return asyncio.run(translate_async(texts, client, low_memory))
lm_deluge/prompt.py CHANGED
@@ -1598,6 +1598,111 @@ class Conversation:
1598
1598
 
1599
1599
  return {"messages": serialized}
1600
1600
 
1601
+ def print(self, max_text_length: int = 500, indent: int = 2) -> None:
1602
+ """Pretty-print the conversation to stdout.
1603
+
1604
+ Args:
1605
+ max_text_length: Truncate text content longer than this (default 500 chars)
1606
+ indent: JSON indentation for tool calls/results (default 2)
1607
+ """
1608
+ ROLE_COLORS = {
1609
+ "system": "\033[95m", # magenta
1610
+ "user": "\033[94m", # blue
1611
+ "assistant": "\033[92m", # green
1612
+ "tool": "\033[93m", # yellow
1613
+ }
1614
+ RESET = "\033[0m"
1615
+ DIM = "\033[2m"
1616
+ BOLD = "\033[1m"
1617
+
1618
+ def truncate(text: str, max_len: int) -> str:
1619
+ if len(text) <= max_len:
1620
+ return text
1621
+ return (
1622
+ text[:max_len] + f"{DIM}... [{len(text) - max_len} more chars]{RESET}"
1623
+ )
1624
+
1625
+ def format_json(obj: dict | list, ind: int) -> str:
1626
+ return json.dumps(obj, indent=ind, ensure_ascii=False)
1627
+
1628
+ print(f"\n{BOLD}{'=' * 60}{RESET}")
1629
+ print(f"{BOLD}Conversation ({len(self.messages)} messages){RESET}")
1630
+ print(f"{BOLD}{'=' * 60}{RESET}\n")
1631
+
1632
+ for i, msg in enumerate(self.messages):
1633
+ role_color = ROLE_COLORS.get(msg.role, "")
1634
+ print(f"{role_color}{BOLD}[{msg.role.upper()}]{RESET}")
1635
+
1636
+ for part in msg.parts:
1637
+ if isinstance(part, Text):
1638
+ text = truncate(part.text, max_text_length)
1639
+ # Indent multiline text
1640
+ lines = text.split("\n")
1641
+ if len(lines) > 1:
1642
+ print(" " + "\n ".join(lines))
1643
+ else:
1644
+ print(f" {text}")
1645
+
1646
+ elif isinstance(part, Image):
1647
+ w, h = part.size
1648
+ print(f" {DIM}<Image ({w}x{h})>{RESET}")
1649
+
1650
+ elif isinstance(part, File):
1651
+ size = part.size
1652
+ filename = getattr(part, "filename", None)
1653
+ if filename:
1654
+ print(f" {DIM}<File: {filename} ({size} bytes)>{RESET}")
1655
+ else:
1656
+ print(f" {DIM}<File ({size} bytes)>{RESET}")
1657
+
1658
+ elif isinstance(part, ToolCall):
1659
+ print(
1660
+ f" {DIM}Tool Call:{RESET} {BOLD}{part.name}{RESET} (id: {part.id})"
1661
+ )
1662
+ if part.arguments:
1663
+ args_json = format_json(part.arguments, indent)
1664
+ # Indent the JSON
1665
+ indented = "\n".join(
1666
+ " " + line for line in args_json.split("\n")
1667
+ )
1668
+ print(indented)
1669
+
1670
+ elif isinstance(part, ToolResult):
1671
+ print(f" {DIM}Tool Result:{RESET} (call_id: {part.tool_call_id})")
1672
+ if isinstance(part.result, str):
1673
+ result_text = truncate(part.result, max_text_length)
1674
+ lines = result_text.split("\n")
1675
+ for line in lines:
1676
+ print(f" {line}")
1677
+ elif isinstance(part.result, dict):
1678
+ result_json = format_json(part.result, indent)
1679
+ indented = "\n".join(
1680
+ " " + line for line in result_json.split("\n")
1681
+ )
1682
+ print(indented)
1683
+ elif isinstance(part.result, list):
1684
+ print(f" {DIM}<{len(part.result)} content blocks>{RESET}")
1685
+ for block in part.result:
1686
+ if isinstance(block, Text):
1687
+ block_text = truncate(block.text, max_text_length // 2)
1688
+ print(f" [text] {block_text}")
1689
+ elif isinstance(block, Image):
1690
+ bw, bh = block.size
1691
+ print(f" {DIM}<Image ({bw}x{bh})>{RESET}")
1692
+
1693
+ elif isinstance(part, Thinking):
1694
+ print(f" {DIM}Thinking:{RESET}")
1695
+ thought = truncate(part.content, max_text_length)
1696
+ lines = thought.split("\n")
1697
+ for line in lines:
1698
+ print(f" {DIM}{line}{RESET}")
1699
+
1700
+ # Separator between messages
1701
+ if i < len(self.messages) - 1:
1702
+ print(f"\n{'-' * 40}\n")
1703
+
1704
+ print(f"\n{BOLD}{'=' * 60}{RESET}\n")
1705
+
1601
1706
  @classmethod
1602
1707
  def from_log(cls, payload: dict) -> "Conversation":
1603
1708
  """Re-hydrate a Conversation previously produced by `to_log()`."""
@@ -1,6 +1,6 @@
1
1
  from dataclasses import dataclass, field
2
2
  from functools import cached_property
3
- from typing import Any, Callable, TYPE_CHECKING
3
+ from typing import Any, Callable, Sequence, TYPE_CHECKING
4
4
 
5
5
  from .config import SamplingParams
6
6
  from .prompt import CachePattern, Conversation
@@ -34,7 +34,7 @@ class RequestContext:
34
34
  callback: Callable | None = None
35
35
 
36
36
  # Optional features
37
- tools: list | None = None
37
+ tools: Sequence[Any] | None = None
38
38
  output_schema: "type[BaseModel] | dict | None" = None
39
39
  cache: CachePattern | None = None
40
40
  use_responses_api: bool = False