lm-deluge 0.0.67__py3-none-any.whl → 0.0.88__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lm-deluge might be problematic. Click here for more details.
- lm_deluge/__init__.py +25 -2
- lm_deluge/api_requests/anthropic.py +92 -17
- lm_deluge/api_requests/base.py +47 -11
- lm_deluge/api_requests/bedrock.py +7 -4
- lm_deluge/api_requests/chat_reasoning.py +4 -0
- lm_deluge/api_requests/gemini.py +138 -18
- lm_deluge/api_requests/openai.py +114 -21
- lm_deluge/client.py +282 -49
- lm_deluge/config.py +15 -3
- lm_deluge/mock_openai.py +643 -0
- lm_deluge/models/__init__.py +12 -1
- lm_deluge/models/anthropic.py +17 -2
- lm_deluge/models/arcee.py +16 -0
- lm_deluge/models/deepseek.py +36 -4
- lm_deluge/models/google.py +29 -0
- lm_deluge/models/grok.py +24 -0
- lm_deluge/models/kimi.py +36 -0
- lm_deluge/models/minimax.py +10 -0
- lm_deluge/models/openai.py +100 -0
- lm_deluge/models/openrouter.py +86 -8
- lm_deluge/models/together.py +11 -0
- lm_deluge/models/zai.py +1 -0
- lm_deluge/pipelines/gepa/__init__.py +95 -0
- lm_deluge/pipelines/gepa/core.py +354 -0
- lm_deluge/pipelines/gepa/docs/samples.py +696 -0
- lm_deluge/pipelines/gepa/examples/01_synthetic_keywords.py +140 -0
- lm_deluge/pipelines/gepa/examples/02_gsm8k_math.py +261 -0
- lm_deluge/pipelines/gepa/examples/03_hotpotqa_multihop.py +300 -0
- lm_deluge/pipelines/gepa/examples/04_batch_classification.py +271 -0
- lm_deluge/pipelines/gepa/examples/simple_qa.py +129 -0
- lm_deluge/pipelines/gepa/optimizer.py +435 -0
- lm_deluge/pipelines/gepa/proposer.py +235 -0
- lm_deluge/pipelines/gepa/util.py +165 -0
- lm_deluge/{llm_tools → pipelines}/score.py +2 -2
- lm_deluge/{llm_tools → pipelines}/translate.py +5 -3
- lm_deluge/prompt.py +224 -40
- lm_deluge/request_context.py +7 -2
- lm_deluge/tool/__init__.py +1118 -0
- lm_deluge/tool/builtin/anthropic/__init__.py +300 -0
- lm_deluge/tool/builtin/gemini.py +59 -0
- lm_deluge/tool/builtin/openai.py +74 -0
- lm_deluge/tool/cua/__init__.py +173 -0
- lm_deluge/tool/cua/actions.py +148 -0
- lm_deluge/tool/cua/base.py +27 -0
- lm_deluge/tool/cua/batch.py +215 -0
- lm_deluge/tool/cua/converters.py +466 -0
- lm_deluge/tool/cua/kernel.py +702 -0
- lm_deluge/tool/cua/trycua.py +989 -0
- lm_deluge/tool/prefab/__init__.py +45 -0
- lm_deluge/tool/prefab/batch_tool.py +156 -0
- lm_deluge/tool/prefab/docs.py +1119 -0
- lm_deluge/tool/prefab/email.py +294 -0
- lm_deluge/tool/prefab/filesystem.py +1711 -0
- lm_deluge/tool/prefab/full_text_search/__init__.py +285 -0
- lm_deluge/tool/prefab/full_text_search/tantivy_index.py +396 -0
- lm_deluge/tool/prefab/memory.py +458 -0
- lm_deluge/tool/prefab/otc/__init__.py +165 -0
- lm_deluge/tool/prefab/otc/executor.py +281 -0
- lm_deluge/tool/prefab/otc/parse.py +188 -0
- lm_deluge/tool/prefab/random.py +212 -0
- lm_deluge/tool/prefab/rlm/__init__.py +296 -0
- lm_deluge/tool/prefab/rlm/executor.py +349 -0
- lm_deluge/tool/prefab/rlm/parse.py +144 -0
- lm_deluge/tool/prefab/sandbox.py +1621 -0
- lm_deluge/tool/prefab/sheets.py +385 -0
- lm_deluge/tool/prefab/subagents.py +233 -0
- lm_deluge/tool/prefab/todos.py +342 -0
- lm_deluge/tool/prefab/tool_search.py +169 -0
- lm_deluge/tool/prefab/web_search.py +199 -0
- lm_deluge/tracker.py +16 -13
- lm_deluge/util/schema.py +412 -0
- lm_deluge/warnings.py +8 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.88.dist-info}/METADATA +22 -9
- lm_deluge-0.0.88.dist-info/RECORD +117 -0
- lm_deluge/built_in_tools/anthropic/__init__.py +0 -128
- lm_deluge/built_in_tools/openai.py +0 -28
- lm_deluge/presets/cerebras.py +0 -17
- lm_deluge/presets/meta.py +0 -13
- lm_deluge/tool.py +0 -849
- lm_deluge-0.0.67.dist-info/RECORD +0 -72
- lm_deluge/{llm_tools → pipelines}/__init__.py +1 -1
- /lm_deluge/{llm_tools → pipelines}/classify.py +0 -0
- /lm_deluge/{llm_tools → pipelines}/extract.py +0 -0
- /lm_deluge/{llm_tools → pipelines}/locate.py +0 -0
- /lm_deluge/{llm_tools → pipelines}/ocr.py +0 -0
- /lm_deluge/{built_in_tools → tool/builtin}/anthropic/bash.py +0 -0
- /lm_deluge/{built_in_tools → tool/builtin}/anthropic/computer_use.py +0 -0
- /lm_deluge/{built_in_tools → tool/builtin}/anthropic/editor.py +0 -0
- /lm_deluge/{built_in_tools → tool/builtin}/base.py +0 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.88.dist-info}/WHEEL +0 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.88.dist-info}/licenses/LICENSE +0 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.88.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,458 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import random
|
|
5
|
+
import time
|
|
6
|
+
from collections import Counter
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from typing import Any, Sequence, TypeAlias
|
|
9
|
+
|
|
10
|
+
import yaml
|
|
11
|
+
from pydantic import BaseModel, Field
|
|
12
|
+
from typing_extensions import TypedDict
|
|
13
|
+
|
|
14
|
+
# from rapidfuzz import fuzz, process
|
|
15
|
+
from .. import Tool
|
|
16
|
+
|
|
17
|
+
MEMORY_DESCRIPTION = """Use this tool to search, read, and update a long-term "memory" that can be used across sessions, when previous messages are cleared. Whether and when to use memory depends on the situation—for complex tasks, it can store information about work so far, what needs to be done next, why you're doing what you're doing, etc. For personal conversations, it can be used to save "memories" that can be referenced later."""
|
|
18
|
+
|
|
19
|
+
MEMORY_WRITE = """
|
|
20
|
+
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
MEMORY_READ = """
|
|
24
|
+
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class MemoryItem(BaseModel):
|
|
29
|
+
"""Structured representation of a single memory."""
|
|
30
|
+
|
|
31
|
+
id: int
|
|
32
|
+
description: str = Field(
|
|
33
|
+
description='Short description ("preview") of the memory (1 sentence)'
|
|
34
|
+
)
|
|
35
|
+
content: str = Field(
|
|
36
|
+
description="Full content of the memory. May use Markdown for formatting."
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class MemoryItemDict(TypedDict):
|
|
41
|
+
id: int
|
|
42
|
+
description: str
|
|
43
|
+
content: str
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
MemoryLike: TypeAlias = MemoryItem | MemoryItemDict
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class MemoryManager:
|
|
50
|
+
"""Stateful todo scratchpad that exposes read/write tools."""
|
|
51
|
+
|
|
52
|
+
def __init__(
|
|
53
|
+
self,
|
|
54
|
+
memories: Sequence[MemoryLike] | dict[int, MemoryLike] | None = None,
|
|
55
|
+
*,
|
|
56
|
+
write_tool_name: str = "memwrite",
|
|
57
|
+
read_tool_name: str = "memread",
|
|
58
|
+
search_tool_name: str = "memsearch",
|
|
59
|
+
update_tool_name: str = "memupdate",
|
|
60
|
+
delete_tool_name: str = "memdelete",
|
|
61
|
+
):
|
|
62
|
+
self.write_tool_name = write_tool_name
|
|
63
|
+
self.read_tool_name = read_tool_name
|
|
64
|
+
self.search_tool_name = search_tool_name
|
|
65
|
+
self.update_tool_name = update_tool_name
|
|
66
|
+
self.delete_tool_name = delete_tool_name
|
|
67
|
+
self._memories: dict[int, MemoryItem] = {}
|
|
68
|
+
self._tools: list[Tool] | None = None
|
|
69
|
+
|
|
70
|
+
if memories:
|
|
71
|
+
if isinstance(memories, dict):
|
|
72
|
+
self._memories = {k: self._coerce(v) for k, v in memories.items()}
|
|
73
|
+
else:
|
|
74
|
+
coerced = [self._coerce(mem) for mem in memories]
|
|
75
|
+
self._memories = {x.id: x for x in coerced}
|
|
76
|
+
|
|
77
|
+
@classmethod
|
|
78
|
+
def from_file(
|
|
79
|
+
cls,
|
|
80
|
+
file: str,
|
|
81
|
+
) -> MemoryManager:
|
|
82
|
+
# file should be a json file
|
|
83
|
+
with open(file) as f:
|
|
84
|
+
memories = json.load(f)
|
|
85
|
+
return cls(memories)
|
|
86
|
+
|
|
87
|
+
def _coerce(self, mem: MemoryLike) -> MemoryItem:
|
|
88
|
+
if isinstance(mem, MemoryItem):
|
|
89
|
+
return mem
|
|
90
|
+
if isinstance(mem, dict):
|
|
91
|
+
return MemoryItem(**mem)
|
|
92
|
+
raise TypeError("Memories must be MemoryItem instances or dicts")
|
|
93
|
+
|
|
94
|
+
def _serialize(self) -> list[dict[str, Any]]:
|
|
95
|
+
return [mem.model_dump() for mem in self._memories.values()]
|
|
96
|
+
|
|
97
|
+
def to_file(self, file: str):
|
|
98
|
+
mems = self._serialize()
|
|
99
|
+
with open(file, "w") as f:
|
|
100
|
+
f.write(json.dumps(mems))
|
|
101
|
+
|
|
102
|
+
@staticmethod
|
|
103
|
+
def _format_memory(mem: MemoryItem, include_content: bool = True) -> str:
|
|
104
|
+
dumped = mem.model_dump()
|
|
105
|
+
if not include_content:
|
|
106
|
+
dumped.pop("content")
|
|
107
|
+
return yaml.safe_dump(dumped)
|
|
108
|
+
|
|
109
|
+
# helpers
|
|
110
|
+
def _search(self, queries: list[str], limit: int = 5) -> list[MemoryItem]:
|
|
111
|
+
hits = Counter()
|
|
112
|
+
for q in queries:
|
|
113
|
+
keywords = q.lower().split()
|
|
114
|
+
for k in keywords:
|
|
115
|
+
for mem_id, mem in self._memories.items():
|
|
116
|
+
if k in mem.description.lower() or k in mem.content.lower():
|
|
117
|
+
hits[mem_id] += 1
|
|
118
|
+
|
|
119
|
+
top_k = hits.most_common(limit)
|
|
120
|
+
|
|
121
|
+
return self._read([hit[0] for hit in top_k if hit[1] > 0])
|
|
122
|
+
|
|
123
|
+
def _read(self, memory_ids: list[int]) -> list[MemoryItem]:
|
|
124
|
+
return [
|
|
125
|
+
mem
|
|
126
|
+
for mem_id in memory_ids
|
|
127
|
+
if mem_id is not None and (mem := self._memories.get(mem_id)) is not None
|
|
128
|
+
]
|
|
129
|
+
|
|
130
|
+
def _add(self, description: str, content: str):
|
|
131
|
+
new_id = max(self._memories) + 1 if self._memories else 1
|
|
132
|
+
self._memories[new_id] = self._coerce(
|
|
133
|
+
{"id": new_id, "description": description, "content": content}
|
|
134
|
+
)
|
|
135
|
+
return new_id
|
|
136
|
+
|
|
137
|
+
def _update(self, mem_id: int, description: str, content: str):
|
|
138
|
+
self._memories[mem_id].description = description
|
|
139
|
+
self._memories[mem_id].content = content
|
|
140
|
+
|
|
141
|
+
def _delete(self, mem_id: int):
|
|
142
|
+
self._memories.pop(mem_id)
|
|
143
|
+
|
|
144
|
+
def get_tools(self) -> list[Tool]:
|
|
145
|
+
"""Return Tool instances bound to this manager's state."""
|
|
146
|
+
if self._tools is not None:
|
|
147
|
+
return self._tools
|
|
148
|
+
|
|
149
|
+
def search_tool(queries: list[str], limit: int = 5) -> str:
|
|
150
|
+
"""Search for memories using keyword search. Use as many queries as you want, the top results will be fused into one list. Search results include just id and description."""
|
|
151
|
+
mems = self._search(queries, limit=limit)
|
|
152
|
+
return "\n---\n".join(
|
|
153
|
+
[self._format_memory(mem, include_content=False) for mem in mems]
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
def read_tool(mem_ids: list[int]) -> str:
|
|
157
|
+
"""Read the full contents of one or more memories."""
|
|
158
|
+
mems = self._read(mem_ids)
|
|
159
|
+
return "\n---\n".join(
|
|
160
|
+
[self._format_memory(mem, include_content=True) for mem in mems]
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
def add_tool(description: str, content: str):
|
|
164
|
+
"""Add a new memory."""
|
|
165
|
+
return self._add(description, content)
|
|
166
|
+
|
|
167
|
+
def update_tool(mem_id: int, description: str, content: str) -> str:
|
|
168
|
+
"""Update a memory by ID. Must provide content and description, even if only changing one of them."""
|
|
169
|
+
self._update(mem_id, description, content)
|
|
170
|
+
|
|
171
|
+
return f"Memory {mem_id} updated successfully."
|
|
172
|
+
|
|
173
|
+
def delete_tool(mem_id: int) -> str:
|
|
174
|
+
"""Delete a memory by ID."""
|
|
175
|
+
self._delete(mem_id)
|
|
176
|
+
return f"Memory {mem_id} deleted successfully."
|
|
177
|
+
|
|
178
|
+
def _rename(tool: Tool, name: str) -> Tool:
|
|
179
|
+
if tool.name == name:
|
|
180
|
+
return tool
|
|
181
|
+
return tool.model_copy(update={"name": name})
|
|
182
|
+
|
|
183
|
+
self._tools = [
|
|
184
|
+
_rename(Tool.from_function(search_tool), self.search_tool_name),
|
|
185
|
+
_rename(Tool.from_function(read_tool), self.read_tool_name),
|
|
186
|
+
_rename(Tool.from_function(add_tool), self.write_tool_name),
|
|
187
|
+
_rename(Tool.from_function(update_tool), self.update_tool_name),
|
|
188
|
+
_rename(Tool.from_function(delete_tool), self.delete_tool_name),
|
|
189
|
+
]
|
|
190
|
+
return self._tools
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
@dataclass
|
|
194
|
+
class S3RetryConfig:
|
|
195
|
+
"""Configuration for retry behavior on conflicts."""
|
|
196
|
+
|
|
197
|
+
max_retries: int = 5
|
|
198
|
+
base_delay: float = 0.1
|
|
199
|
+
max_delay: float = 5.0
|
|
200
|
+
jitter: float = 0.1
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
class S3MemoryManager:
|
|
204
|
+
"""
|
|
205
|
+
S3-backed memory manager with optimistic concurrency control.
|
|
206
|
+
|
|
207
|
+
Same API as MemoryManager but persists to S3 with safe concurrent access
|
|
208
|
+
using S3 conditional writes (If-Match with ETags).
|
|
209
|
+
|
|
210
|
+
Example:
|
|
211
|
+
manager = S3MemoryManager(
|
|
212
|
+
bucket="my-ai-memories",
|
|
213
|
+
key="agent-123/memories.json",
|
|
214
|
+
)
|
|
215
|
+
tools = manager.get_tools()
|
|
216
|
+
"""
|
|
217
|
+
|
|
218
|
+
def __init__(
|
|
219
|
+
self,
|
|
220
|
+
bucket: str,
|
|
221
|
+
key: str = "memories.json",
|
|
222
|
+
s3_client: Any | None = None,
|
|
223
|
+
retry_config: S3RetryConfig | None = None,
|
|
224
|
+
*,
|
|
225
|
+
write_tool_name: str = "memwrite",
|
|
226
|
+
read_tool_name: str = "memread",
|
|
227
|
+
search_tool_name: str = "memsearch",
|
|
228
|
+
update_tool_name: str = "memupdate",
|
|
229
|
+
delete_tool_name: str = "memdelete",
|
|
230
|
+
):
|
|
231
|
+
self.bucket = bucket
|
|
232
|
+
self.key = key
|
|
233
|
+
self._client = s3_client
|
|
234
|
+
self.retry_config = retry_config or S3RetryConfig()
|
|
235
|
+
|
|
236
|
+
self.write_tool_name = write_tool_name
|
|
237
|
+
self.read_tool_name = read_tool_name
|
|
238
|
+
self.search_tool_name = search_tool_name
|
|
239
|
+
self.update_tool_name = update_tool_name
|
|
240
|
+
self.delete_tool_name = delete_tool_name
|
|
241
|
+
|
|
242
|
+
self._tools: list[Tool] | None = None
|
|
243
|
+
self._cached_etag: str | None = None
|
|
244
|
+
|
|
245
|
+
@property
|
|
246
|
+
def client(self):
|
|
247
|
+
"""Lazy initialization of S3 client."""
|
|
248
|
+
if self._client is None:
|
|
249
|
+
import boto3
|
|
250
|
+
|
|
251
|
+
self._client = boto3.client("s3")
|
|
252
|
+
return self._client
|
|
253
|
+
|
|
254
|
+
def _load_memories(self) -> tuple[dict[int, MemoryItem], str | None]:
|
|
255
|
+
"""Load memories from S3, returning (memories_dict, etag)."""
|
|
256
|
+
try:
|
|
257
|
+
response = self.client.get_object(Bucket=self.bucket, Key=self.key)
|
|
258
|
+
etag = response["ETag"].strip('"')
|
|
259
|
+
data = json.loads(response["Body"].read().decode("utf-8"))
|
|
260
|
+
memories = {item["id"]: MemoryItem(**item) for item in data}
|
|
261
|
+
return memories, etag
|
|
262
|
+
except self.client.exceptions.NoSuchKey:
|
|
263
|
+
return {}, None
|
|
264
|
+
|
|
265
|
+
def _save_memories(
|
|
266
|
+
self,
|
|
267
|
+
memories: dict[int, MemoryItem],
|
|
268
|
+
expected_etag: str | None,
|
|
269
|
+
) -> str:
|
|
270
|
+
"""
|
|
271
|
+
Save memories to S3 with optimistic locking.
|
|
272
|
+
|
|
273
|
+
Args:
|
|
274
|
+
memories: The memories dict to save
|
|
275
|
+
expected_etag: The ETag we expect (None for new file)
|
|
276
|
+
|
|
277
|
+
Returns:
|
|
278
|
+
The new ETag after saving
|
|
279
|
+
|
|
280
|
+
Raises:
|
|
281
|
+
ConflictError: If the file was modified by another process
|
|
282
|
+
"""
|
|
283
|
+
data = [mem.model_dump() for mem in memories.values()]
|
|
284
|
+
body = json.dumps(data, indent=2).encode("utf-8")
|
|
285
|
+
|
|
286
|
+
kwargs: dict[str, Any] = {
|
|
287
|
+
"Bucket": self.bucket,
|
|
288
|
+
"Key": self.key,
|
|
289
|
+
"Body": body,
|
|
290
|
+
"ContentType": "application/json",
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
if expected_etag is None:
|
|
294
|
+
# Creating new file - use If-None-Match
|
|
295
|
+
kwargs["IfNoneMatch"] = "*"
|
|
296
|
+
else:
|
|
297
|
+
# Updating existing file - use If-Match
|
|
298
|
+
kwargs["IfMatch"] = f'"{expected_etag}"'
|
|
299
|
+
|
|
300
|
+
try:
|
|
301
|
+
response = self.client.put_object(**kwargs)
|
|
302
|
+
return response["ETag"].strip('"')
|
|
303
|
+
except self.client.exceptions.ClientError as e:
|
|
304
|
+
error_code = e.response.get("Error", {}).get("Code", "")
|
|
305
|
+
# Handle both PreconditionFailed and ConditionalRequestConflict
|
|
306
|
+
if error_code in ("PreconditionFailed", "ConditionalRequestConflict"):
|
|
307
|
+
raise S3MemoryConflictError(
|
|
308
|
+
"Memory file was modified by another process"
|
|
309
|
+
) from e
|
|
310
|
+
raise
|
|
311
|
+
|
|
312
|
+
def _retry_operation(self, operation_name: str, func):
|
|
313
|
+
"""Execute a function with retry on conflicts."""
|
|
314
|
+
config = self.retry_config
|
|
315
|
+
last_error: Exception | None = None
|
|
316
|
+
|
|
317
|
+
for attempt in range(config.max_retries + 1):
|
|
318
|
+
try:
|
|
319
|
+
return func()
|
|
320
|
+
except S3MemoryConflictError as e:
|
|
321
|
+
last_error = e
|
|
322
|
+
if attempt >= config.max_retries:
|
|
323
|
+
break
|
|
324
|
+
delay = min(config.base_delay * (2**attempt), config.max_delay)
|
|
325
|
+
jitter = delay * config.jitter * random.random()
|
|
326
|
+
time.sleep(delay + jitter)
|
|
327
|
+
|
|
328
|
+
raise last_error or RuntimeError(f"Retry failed for {operation_name}")
|
|
329
|
+
|
|
330
|
+
@staticmethod
|
|
331
|
+
def _format_memory(mem: MemoryItem, include_content: bool = True) -> str:
|
|
332
|
+
dumped = mem.model_dump()
|
|
333
|
+
if not include_content:
|
|
334
|
+
dumped.pop("content")
|
|
335
|
+
return yaml.safe_dump(dumped)
|
|
336
|
+
|
|
337
|
+
def _search(self, queries: list[str], limit: int = 5) -> list[MemoryItem]:
|
|
338
|
+
memories, _ = self._load_memories()
|
|
339
|
+
hits: Counter[int] = Counter()
|
|
340
|
+
|
|
341
|
+
for q in queries:
|
|
342
|
+
keywords = q.lower().split()
|
|
343
|
+
for k in keywords:
|
|
344
|
+
for mem_id, mem in memories.items():
|
|
345
|
+
if k in mem.description.lower() or k in mem.content.lower():
|
|
346
|
+
hits[mem_id] += 1
|
|
347
|
+
|
|
348
|
+
top_k = hits.most_common(limit)
|
|
349
|
+
return [memories[hit[0]] for hit in top_k if hit[1] > 0]
|
|
350
|
+
|
|
351
|
+
def _read(self, memory_ids: list[int]) -> list[MemoryItem]:
|
|
352
|
+
memories, _ = self._load_memories()
|
|
353
|
+
return [
|
|
354
|
+
mem
|
|
355
|
+
for mem_id in memory_ids
|
|
356
|
+
if mem_id is not None and (mem := memories.get(mem_id)) is not None
|
|
357
|
+
]
|
|
358
|
+
|
|
359
|
+
def _add(self, description: str, content: str) -> int:
|
|
360
|
+
def do_add():
|
|
361
|
+
memories, etag = self._load_memories()
|
|
362
|
+
new_id = max(memories.keys()) + 1 if memories else 1
|
|
363
|
+
memories[new_id] = MemoryItem(
|
|
364
|
+
id=new_id, description=description, content=content
|
|
365
|
+
)
|
|
366
|
+
self._save_memories(memories, etag)
|
|
367
|
+
return new_id
|
|
368
|
+
|
|
369
|
+
return self._retry_operation("add", do_add)
|
|
370
|
+
|
|
371
|
+
def _update(self, mem_id: int, description: str, content: str):
|
|
372
|
+
def do_update():
|
|
373
|
+
memories, etag = self._load_memories()
|
|
374
|
+
if mem_id not in memories:
|
|
375
|
+
raise KeyError(f"Memory {mem_id} not found")
|
|
376
|
+
memories[mem_id].description = description
|
|
377
|
+
memories[mem_id].content = content
|
|
378
|
+
self._save_memories(memories, etag)
|
|
379
|
+
|
|
380
|
+
self._retry_operation("update", do_update)
|
|
381
|
+
|
|
382
|
+
def _delete(self, mem_id: int):
|
|
383
|
+
def do_delete():
|
|
384
|
+
memories, etag = self._load_memories()
|
|
385
|
+
if mem_id not in memories:
|
|
386
|
+
raise KeyError(f"Memory {mem_id} not found")
|
|
387
|
+
memories.pop(mem_id)
|
|
388
|
+
self._save_memories(memories, etag)
|
|
389
|
+
|
|
390
|
+
self._retry_operation("delete", do_delete)
|
|
391
|
+
|
|
392
|
+
def get_tools(self) -> list[Tool]:
|
|
393
|
+
"""Return Tool instances bound to this manager's state."""
|
|
394
|
+
if self._tools is not None:
|
|
395
|
+
return self._tools
|
|
396
|
+
|
|
397
|
+
def search_tool(queries: list[str], limit: int = 5) -> str:
|
|
398
|
+
"""Search for memories using keyword search. Use as many queries as you want, the top results will be fused into one list. Search results include just id and description."""
|
|
399
|
+
mems = self._search(queries, limit=limit)
|
|
400
|
+
return "\n---\n".join(
|
|
401
|
+
[self._format_memory(mem, include_content=False) for mem in mems]
|
|
402
|
+
)
|
|
403
|
+
|
|
404
|
+
def read_tool(mem_ids: list[int]) -> str:
|
|
405
|
+
"""Read the full contents of one or more memories."""
|
|
406
|
+
mems = self._read(mem_ids)
|
|
407
|
+
return "\n---\n".join(
|
|
408
|
+
[self._format_memory(mem, include_content=True) for mem in mems]
|
|
409
|
+
)
|
|
410
|
+
|
|
411
|
+
def add_tool(description: str, content: str):
|
|
412
|
+
"""Add a new memory."""
|
|
413
|
+
return self._add(description, content)
|
|
414
|
+
|
|
415
|
+
def update_tool(mem_id: int, description: str, content: str) -> str:
|
|
416
|
+
"""Update a memory by ID. Must provide content and description, even if only changing one of them."""
|
|
417
|
+
self._update(mem_id, description, content)
|
|
418
|
+
return f"Memory {mem_id} updated successfully."
|
|
419
|
+
|
|
420
|
+
def delete_tool(mem_id: int) -> str:
|
|
421
|
+
"""Delete a memory by ID."""
|
|
422
|
+
self._delete(mem_id)
|
|
423
|
+
return f"Memory {mem_id} deleted successfully."
|
|
424
|
+
|
|
425
|
+
def _rename(tool: Tool, name: str) -> Tool:
|
|
426
|
+
if tool.name == name:
|
|
427
|
+
return tool
|
|
428
|
+
return tool.model_copy(update={"name": name})
|
|
429
|
+
|
|
430
|
+
self._tools = [
|
|
431
|
+
_rename(Tool.from_function(search_tool), self.search_tool_name),
|
|
432
|
+
_rename(Tool.from_function(read_tool), self.read_tool_name),
|
|
433
|
+
_rename(Tool.from_function(add_tool), self.write_tool_name),
|
|
434
|
+
_rename(Tool.from_function(update_tool), self.update_tool_name),
|
|
435
|
+
_rename(Tool.from_function(delete_tool), self.delete_tool_name),
|
|
436
|
+
]
|
|
437
|
+
return self._tools
|
|
438
|
+
|
|
439
|
+
def get_all_memories(self) -> list[MemoryItem]:
|
|
440
|
+
"""Get all memories (useful for debugging/inspection)."""
|
|
441
|
+
memories, _ = self._load_memories()
|
|
442
|
+
return list(memories.values())
|
|
443
|
+
|
|
444
|
+
def clear_all(self):
|
|
445
|
+
"""Delete all memories (useful for testing)."""
|
|
446
|
+
try:
|
|
447
|
+
self.client.delete_object(Bucket=self.bucket, Key=self.key)
|
|
448
|
+
except self.client.exceptions.ClientError:
|
|
449
|
+
pass
|
|
450
|
+
|
|
451
|
+
|
|
452
|
+
class S3MemoryConflictError(Exception):
|
|
453
|
+
"""Raised when a write conflict occurs due to concurrent modification."""
|
|
454
|
+
|
|
455
|
+
pass
|
|
456
|
+
|
|
457
|
+
|
|
458
|
+
__all__ = ["MemoryManager", "S3MemoryManager", "S3MemoryConflictError", "S3RetryConfig"]
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Open Tool Composition (OTC) for lm-deluge.
|
|
3
|
+
|
|
4
|
+
Allows LLMs to write Python code that orchestrates multiple tool calls,
|
|
5
|
+
with only the final result entering the model's context.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from lm_deluge.tool import Tool
|
|
9
|
+
|
|
10
|
+
from .executor import OTCExecutor
|
|
11
|
+
from .parse import OTCExecutionError, OTCSecurityError
|
|
12
|
+
|
|
13
|
+
OTC_PROMPT = (
|
|
14
|
+
"""The "compose" tool allows you to write code"""
|
|
15
|
+
"that orchestrates multiple tool calls. "
|
|
16
|
+
"The purpose is to compose tool calls to get a "
|
|
17
|
+
"final result, without wasting network roundtrips "
|
|
18
|
+
"or input tokens on intermediate results. Use this to:\n"
|
|
19
|
+
" - Call multiple tools and combine their results\n"
|
|
20
|
+
" - Filter or aggregate data from tool results\n"
|
|
21
|
+
" - Implement conditional logic based on tool outputs\n"
|
|
22
|
+
" - Process large amounts of data without polluting your context\n"
|
|
23
|
+
"The code you write is a restricted subset of Python that runs in a "
|
|
24
|
+
"sandboxed environment, with access to each of your tools as a function. "
|
|
25
|
+
"Only the final output (via print() or a 'result' variable) will be returned to you.\n\n"
|
|
26
|
+
"IMPORTANT:\n"
|
|
27
|
+
" - Tools are called synchronously (no await needed)\n"
|
|
28
|
+
" - Use print() or set a 'result' variable for output\n"
|
|
29
|
+
" - You can use `json` and standard builtins (list, dict, sum, len, etc.) without importing anything\n"
|
|
30
|
+
" - Imports, file I/O, and network access are disabled\n\n"
|
|
31
|
+
"<<AVAILABLE_TOOLS>>\n\n"
|
|
32
|
+
"Example:\n"
|
|
33
|
+
"```python\n"
|
|
34
|
+
"# Get team members and their expenses\n"
|
|
35
|
+
'team = get_team_members(department="engineering")\n'
|
|
36
|
+
'expenses = [get_expenses(user_id=m["id"], quarter="Q3") for m in team]\n\n'
|
|
37
|
+
"# Find who exceeded budget\n"
|
|
38
|
+
"over_budget = []\n"
|
|
39
|
+
"for member, exp in zip(team, expenses):\n"
|
|
40
|
+
'\ttotal = sum(e["amount"] for e in exp)\n'
|
|
41
|
+
"\tif total > 10000:\n"
|
|
42
|
+
'\t\tover_budget.append({"name": member["name"], "total": total})\n\n'
|
|
43
|
+
"print(json.dumps(over_budget))\n"
|
|
44
|
+
"```"
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class ToolComposer:
|
|
49
|
+
"""Manages OTC for a set of tools, exposing a compose tool.
|
|
50
|
+
|
|
51
|
+
Similar to SubAgentManager but for tool composition instead of subagents.
|
|
52
|
+
|
|
53
|
+
Example:
|
|
54
|
+
>>> composer = ToolComposer(tools=[search_tool, fetch_tool, calculator_tool])
|
|
55
|
+
>>> all_tools = composer.get_all_tools() # Original tools + compose tool
|
|
56
|
+
>>> # LLM can now call compose() to orchestrate the other tools
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
def __init__(
|
|
60
|
+
self,
|
|
61
|
+
tools: list[Tool],
|
|
62
|
+
compose_tool_name: str = "compose",
|
|
63
|
+
include_tools_in_prompt: bool = False,
|
|
64
|
+
):
|
|
65
|
+
"""Initialize the ToolComposer.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
tools: Tools available for composition
|
|
69
|
+
compose_tool_name: Name for the composition tool
|
|
70
|
+
include_tools_in_prompt: Whether to include tool signatures in compose description
|
|
71
|
+
"""
|
|
72
|
+
self.tools = tools
|
|
73
|
+
self.compose_tool_name = compose_tool_name
|
|
74
|
+
self.include_tools_in_prompt = include_tools_in_prompt
|
|
75
|
+
self.executor = OTCExecutor(tools)
|
|
76
|
+
|
|
77
|
+
def _generate_tool_signatures(self) -> str:
|
|
78
|
+
"""Generate Python-style signatures for available tools."""
|
|
79
|
+
signatures = []
|
|
80
|
+
for tool in self.tools:
|
|
81
|
+
params = []
|
|
82
|
+
for name, schema in (tool.parameters or {}).items():
|
|
83
|
+
param_type = schema.get("type", "any")
|
|
84
|
+
if param_type == "string":
|
|
85
|
+
param_type = "str"
|
|
86
|
+
elif param_type == "integer":
|
|
87
|
+
param_type = "int"
|
|
88
|
+
elif param_type == "number":
|
|
89
|
+
param_type = "float"
|
|
90
|
+
elif param_type == "boolean":
|
|
91
|
+
param_type = "bool"
|
|
92
|
+
elif param_type == "array":
|
|
93
|
+
param_type = "list"
|
|
94
|
+
elif param_type == "object":
|
|
95
|
+
param_type = "dict"
|
|
96
|
+
|
|
97
|
+
required = tool.required and name in tool.required
|
|
98
|
+
if required:
|
|
99
|
+
params.append(f"{name}: {param_type}")
|
|
100
|
+
else:
|
|
101
|
+
params.append(f"{name}: {param_type} = None")
|
|
102
|
+
|
|
103
|
+
sig = f"{tool.name}({', '.join(params)})"
|
|
104
|
+
desc = tool.description or "No description"
|
|
105
|
+
# Truncate long descriptions
|
|
106
|
+
if len(desc) > 500:
|
|
107
|
+
desc = desc[:497] + "..."
|
|
108
|
+
signatures.append(f" {sig}\n {desc}")
|
|
109
|
+
|
|
110
|
+
return "\n".join(signatures)
|
|
111
|
+
|
|
112
|
+
def _build_compose_description(self) -> str:
|
|
113
|
+
"""Build the description for the compose tool."""
|
|
114
|
+
base_desc = OTC_PROMPT
|
|
115
|
+
|
|
116
|
+
if self.include_tools_in_prompt:
|
|
117
|
+
tool_sigs = self._generate_tool_signatures()
|
|
118
|
+
base_desc = base_desc.replace(
|
|
119
|
+
"<<AVAILABLE_TOOLS>>", f"# Available Tools\n{tool_sigs}"
|
|
120
|
+
)
|
|
121
|
+
else:
|
|
122
|
+
base_desc = base_desc.replace(
|
|
123
|
+
"<<AVAILABLE_TOOLS>>",
|
|
124
|
+
"# Available Tools\nYou can use any tool available to you, but you must use it as a Python function.",
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
return base_desc
|
|
128
|
+
|
|
129
|
+
async def _compose(self, code: str) -> str:
|
|
130
|
+
"""Execute composition code."""
|
|
131
|
+
try:
|
|
132
|
+
return await self.executor.execute(code)
|
|
133
|
+
except OTCSecurityError as e:
|
|
134
|
+
return f"Security error: {e}"
|
|
135
|
+
except OTCExecutionError as e:
|
|
136
|
+
return f"Execution error: {e}"
|
|
137
|
+
except Exception as e:
|
|
138
|
+
return f"Unexpected error: {type(e).__name__}: {e}"
|
|
139
|
+
|
|
140
|
+
def get_compose_tool(self) -> Tool:
|
|
141
|
+
"""Get the composition tool."""
|
|
142
|
+
return Tool(
|
|
143
|
+
name=self.compose_tool_name,
|
|
144
|
+
description=self._build_compose_description(),
|
|
145
|
+
run=self._compose,
|
|
146
|
+
parameters={
|
|
147
|
+
"code": {
|
|
148
|
+
"type": "string",
|
|
149
|
+
"description": "Python code to execute. Use available tools as functions.",
|
|
150
|
+
}
|
|
151
|
+
},
|
|
152
|
+
required=["code"],
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
def get_all_tools(self) -> list[Tool]:
|
|
156
|
+
"""Get all tools including the compose tool.
|
|
157
|
+
|
|
158
|
+
Returns tools in order: [compose_tool, ...original_tools]
|
|
159
|
+
The compose tool is first to encourage the model to consider composition.
|
|
160
|
+
"""
|
|
161
|
+
return [self.get_compose_tool()] + self.tools
|
|
162
|
+
|
|
163
|
+
def get_tools_without_compose(self) -> list[Tool]:
|
|
164
|
+
"""Get just the original tools without the compose tool."""
|
|
165
|
+
return self.tools
|