lm-deluge 0.0.67__py3-none-any.whl → 0.0.88__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lm-deluge might be problematic. Click here for more details.
- lm_deluge/__init__.py +25 -2
- lm_deluge/api_requests/anthropic.py +92 -17
- lm_deluge/api_requests/base.py +47 -11
- lm_deluge/api_requests/bedrock.py +7 -4
- lm_deluge/api_requests/chat_reasoning.py +4 -0
- lm_deluge/api_requests/gemini.py +138 -18
- lm_deluge/api_requests/openai.py +114 -21
- lm_deluge/client.py +282 -49
- lm_deluge/config.py +15 -3
- lm_deluge/mock_openai.py +643 -0
- lm_deluge/models/__init__.py +12 -1
- lm_deluge/models/anthropic.py +17 -2
- lm_deluge/models/arcee.py +16 -0
- lm_deluge/models/deepseek.py +36 -4
- lm_deluge/models/google.py +29 -0
- lm_deluge/models/grok.py +24 -0
- lm_deluge/models/kimi.py +36 -0
- lm_deluge/models/minimax.py +10 -0
- lm_deluge/models/openai.py +100 -0
- lm_deluge/models/openrouter.py +86 -8
- lm_deluge/models/together.py +11 -0
- lm_deluge/models/zai.py +1 -0
- lm_deluge/pipelines/gepa/__init__.py +95 -0
- lm_deluge/pipelines/gepa/core.py +354 -0
- lm_deluge/pipelines/gepa/docs/samples.py +696 -0
- lm_deluge/pipelines/gepa/examples/01_synthetic_keywords.py +140 -0
- lm_deluge/pipelines/gepa/examples/02_gsm8k_math.py +261 -0
- lm_deluge/pipelines/gepa/examples/03_hotpotqa_multihop.py +300 -0
- lm_deluge/pipelines/gepa/examples/04_batch_classification.py +271 -0
- lm_deluge/pipelines/gepa/examples/simple_qa.py +129 -0
- lm_deluge/pipelines/gepa/optimizer.py +435 -0
- lm_deluge/pipelines/gepa/proposer.py +235 -0
- lm_deluge/pipelines/gepa/util.py +165 -0
- lm_deluge/{llm_tools → pipelines}/score.py +2 -2
- lm_deluge/{llm_tools → pipelines}/translate.py +5 -3
- lm_deluge/prompt.py +224 -40
- lm_deluge/request_context.py +7 -2
- lm_deluge/tool/__init__.py +1118 -0
- lm_deluge/tool/builtin/anthropic/__init__.py +300 -0
- lm_deluge/tool/builtin/gemini.py +59 -0
- lm_deluge/tool/builtin/openai.py +74 -0
- lm_deluge/tool/cua/__init__.py +173 -0
- lm_deluge/tool/cua/actions.py +148 -0
- lm_deluge/tool/cua/base.py +27 -0
- lm_deluge/tool/cua/batch.py +215 -0
- lm_deluge/tool/cua/converters.py +466 -0
- lm_deluge/tool/cua/kernel.py +702 -0
- lm_deluge/tool/cua/trycua.py +989 -0
- lm_deluge/tool/prefab/__init__.py +45 -0
- lm_deluge/tool/prefab/batch_tool.py +156 -0
- lm_deluge/tool/prefab/docs.py +1119 -0
- lm_deluge/tool/prefab/email.py +294 -0
- lm_deluge/tool/prefab/filesystem.py +1711 -0
- lm_deluge/tool/prefab/full_text_search/__init__.py +285 -0
- lm_deluge/tool/prefab/full_text_search/tantivy_index.py +396 -0
- lm_deluge/tool/prefab/memory.py +458 -0
- lm_deluge/tool/prefab/otc/__init__.py +165 -0
- lm_deluge/tool/prefab/otc/executor.py +281 -0
- lm_deluge/tool/prefab/otc/parse.py +188 -0
- lm_deluge/tool/prefab/random.py +212 -0
- lm_deluge/tool/prefab/rlm/__init__.py +296 -0
- lm_deluge/tool/prefab/rlm/executor.py +349 -0
- lm_deluge/tool/prefab/rlm/parse.py +144 -0
- lm_deluge/tool/prefab/sandbox.py +1621 -0
- lm_deluge/tool/prefab/sheets.py +385 -0
- lm_deluge/tool/prefab/subagents.py +233 -0
- lm_deluge/tool/prefab/todos.py +342 -0
- lm_deluge/tool/prefab/tool_search.py +169 -0
- lm_deluge/tool/prefab/web_search.py +199 -0
- lm_deluge/tracker.py +16 -13
- lm_deluge/util/schema.py +412 -0
- lm_deluge/warnings.py +8 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.88.dist-info}/METADATA +22 -9
- lm_deluge-0.0.88.dist-info/RECORD +117 -0
- lm_deluge/built_in_tools/anthropic/__init__.py +0 -128
- lm_deluge/built_in_tools/openai.py +0 -28
- lm_deluge/presets/cerebras.py +0 -17
- lm_deluge/presets/meta.py +0 -13
- lm_deluge/tool.py +0 -849
- lm_deluge-0.0.67.dist-info/RECORD +0 -72
- lm_deluge/{llm_tools → pipelines}/__init__.py +1 -1
- /lm_deluge/{llm_tools → pipelines}/classify.py +0 -0
- /lm_deluge/{llm_tools → pipelines}/extract.py +0 -0
- /lm_deluge/{llm_tools → pipelines}/locate.py +0 -0
- /lm_deluge/{llm_tools → pipelines}/ocr.py +0 -0
- /lm_deluge/{built_in_tools → tool/builtin}/anthropic/bash.py +0 -0
- /lm_deluge/{built_in_tools → tool/builtin}/anthropic/computer_use.py +0 -0
- /lm_deluge/{built_in_tools → tool/builtin}/anthropic/editor.py +0 -0
- /lm_deluge/{built_in_tools → tool/builtin}/base.py +0 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.88.dist-info}/WHEEL +0 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.88.dist-info}/licenses/LICENSE +0 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.88.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,1621 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import json
|
|
3
|
+
import os
|
|
4
|
+
import secrets
|
|
5
|
+
import shlex
|
|
6
|
+
import struct
|
|
7
|
+
import time
|
|
8
|
+
import uuid
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from lm_deluge.tool import Tool
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class TrackedProcess:
|
|
17
|
+
"""Tracks a process running in the sandbox."""
|
|
18
|
+
|
|
19
|
+
process: Any # Modal's ContainerProcess
|
|
20
|
+
name: str
|
|
21
|
+
command: str
|
|
22
|
+
started_at: float = field(default_factory=time.time)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class ModalSandbox:
|
|
26
|
+
def __init__(
|
|
27
|
+
self,
|
|
28
|
+
app_name: str | None = None,
|
|
29
|
+
*,
|
|
30
|
+
image: Any | None = None,
|
|
31
|
+
block_network: bool = False,
|
|
32
|
+
add_local_files: list[str] | None = None,
|
|
33
|
+
encrypted_ports: list[int] | None = None,
|
|
34
|
+
):
|
|
35
|
+
import modal
|
|
36
|
+
|
|
37
|
+
app_name = app_name or secrets.token_urlsafe(32)
|
|
38
|
+
app = modal.App.lookup(app_name, create_if_missing=True)
|
|
39
|
+
self.app = app
|
|
40
|
+
self.block_network = block_network
|
|
41
|
+
self.encrypted_ports = encrypted_ports or []
|
|
42
|
+
|
|
43
|
+
if image is None:
|
|
44
|
+
image = modal.Image.debian_slim(python_version="3.12")
|
|
45
|
+
|
|
46
|
+
assert isinstance(image, modal.Image), "expected modal Image"
|
|
47
|
+
if add_local_files:
|
|
48
|
+
for path in add_local_files:
|
|
49
|
+
if os.path.exists(path):
|
|
50
|
+
# Compute a reasonable remote path based on the basename
|
|
51
|
+
basename = os.path.basename(os.path.normpath(path))
|
|
52
|
+
remote_path = f"/root/{basename}"
|
|
53
|
+
if os.path.isdir(path):
|
|
54
|
+
image = image.add_local_dir(path, remote_path) # type: ignore
|
|
55
|
+
else:
|
|
56
|
+
image = image.add_local_file(path, remote_path) # type: ignore
|
|
57
|
+
else:
|
|
58
|
+
raise FileNotFoundError(f"File not found: {path}")
|
|
59
|
+
|
|
60
|
+
# Create sandbox with encrypted_ports if specified
|
|
61
|
+
create_kwargs: dict[str, Any] = {
|
|
62
|
+
"app": app,
|
|
63
|
+
"block_network": block_network,
|
|
64
|
+
"image": image,
|
|
65
|
+
}
|
|
66
|
+
if self.encrypted_ports:
|
|
67
|
+
create_kwargs["encrypted_ports"] = self.encrypted_ports
|
|
68
|
+
|
|
69
|
+
self.sb = modal.Sandbox.create(**create_kwargs)
|
|
70
|
+
|
|
71
|
+
# Process tracking - simple dict for background processes
|
|
72
|
+
self.processes: dict[str, TrackedProcess] = {}
|
|
73
|
+
self.process_counter: int = 0
|
|
74
|
+
self._destroyed = False
|
|
75
|
+
|
|
76
|
+
def __enter__(self):
|
|
77
|
+
"""Synchronous context manager entry (use async with for async support)."""
|
|
78
|
+
return self
|
|
79
|
+
|
|
80
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
81
|
+
"""Synchronous context manager exit - cleanup sandbox."""
|
|
82
|
+
if not self._destroyed:
|
|
83
|
+
self._destroy()
|
|
84
|
+
return False
|
|
85
|
+
|
|
86
|
+
def __del__(self):
|
|
87
|
+
"""Cleanup sandbox when garbage collected (backup cleanup)."""
|
|
88
|
+
if not self._destroyed:
|
|
89
|
+
try:
|
|
90
|
+
self._destroy()
|
|
91
|
+
except Exception:
|
|
92
|
+
# Ignore errors during cleanup in __del__
|
|
93
|
+
pass
|
|
94
|
+
|
|
95
|
+
def _generate_process_name(self) -> str:
|
|
96
|
+
"""Generate a unique process name like p1, p2, etc."""
|
|
97
|
+
self.process_counter += 1
|
|
98
|
+
return f"p{self.process_counter}"
|
|
99
|
+
|
|
100
|
+
async def _exec(
|
|
101
|
+
self,
|
|
102
|
+
command: str | None = None,
|
|
103
|
+
cmd: list[str] | None = None,
|
|
104
|
+
timeout: int | None = None,
|
|
105
|
+
wait: bool = True,
|
|
106
|
+
name: str | None = None,
|
|
107
|
+
) -> str:
|
|
108
|
+
"""
|
|
109
|
+
Execute a command in the sandbox.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
command: Shell command as a string (e.g., "ls -la")
|
|
113
|
+
cmd: Command as array of strings (e.g., ["ls", "-la"])
|
|
114
|
+
timeout: Timeout in seconds (leave empty for no timeout)
|
|
115
|
+
wait: If True, wait for completion and return output.
|
|
116
|
+
If False, run in background and return immediately.
|
|
117
|
+
name: Name for background process (auto-generated if not provided)
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
Output string if wait=True, or confirmation message if wait=False
|
|
121
|
+
"""
|
|
122
|
+
# Handle both command formats
|
|
123
|
+
if command is not None:
|
|
124
|
+
# String format - wrap in bash -c
|
|
125
|
+
cmd_list = ["bash", "-c", command]
|
|
126
|
+
cmd_str = command
|
|
127
|
+
elif cmd is not None:
|
|
128
|
+
# Array format - use directly
|
|
129
|
+
cmd_list = cmd
|
|
130
|
+
cmd_str = shlex.join(cmd)
|
|
131
|
+
else:
|
|
132
|
+
return "Error: Must provide either 'command' (string) or 'cmd' (array)"
|
|
133
|
+
|
|
134
|
+
# Disable timeout for background processes so long-running servers survive
|
|
135
|
+
exec_timeout = timeout if wait else None
|
|
136
|
+
|
|
137
|
+
# Start the process
|
|
138
|
+
process = await self.sb.exec.aio(*cmd_list, timeout=exec_timeout)
|
|
139
|
+
|
|
140
|
+
if wait:
|
|
141
|
+
# Wait for completion and return output
|
|
142
|
+
output = ""
|
|
143
|
+
try:
|
|
144
|
+
async for line in process.stdout:
|
|
145
|
+
output += line
|
|
146
|
+
except Exception:
|
|
147
|
+
pass
|
|
148
|
+
|
|
149
|
+
# Wait for process to complete to get exit code
|
|
150
|
+
await process.wait.aio()
|
|
151
|
+
|
|
152
|
+
# Truncate if needed
|
|
153
|
+
if len(output) > 5000:
|
|
154
|
+
output = "...[truncated]...\n" + output[-5000:]
|
|
155
|
+
|
|
156
|
+
# Include exit code if non-zero
|
|
157
|
+
if process.returncode != 0:
|
|
158
|
+
output = f"[Exit code: {process.returncode}]\n{output}"
|
|
159
|
+
|
|
160
|
+
return output if output else "(no output)"
|
|
161
|
+
else:
|
|
162
|
+
# Background process - track it but don't read stdout
|
|
163
|
+
proc_name = name or self._generate_process_name()
|
|
164
|
+
tracked = TrackedProcess(
|
|
165
|
+
process=process,
|
|
166
|
+
name=proc_name,
|
|
167
|
+
command=cmd_str,
|
|
168
|
+
)
|
|
169
|
+
self.processes[proc_name] = tracked
|
|
170
|
+
|
|
171
|
+
return (
|
|
172
|
+
f"Started background process '{proc_name}'.\n"
|
|
173
|
+
f"Command: {cmd_str}\n"
|
|
174
|
+
f"Note: Use another command (e.g., curl localhost:PORT) to verify the process is working. "
|
|
175
|
+
f"Use list_processes() to check status."
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
def _check_process(self, name: str | None = None) -> str:
|
|
179
|
+
"""
|
|
180
|
+
Check status of a background process.
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
name: Process name. If not provided, shows all processes.
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
Process status information
|
|
187
|
+
"""
|
|
188
|
+
if not self.processes:
|
|
189
|
+
return "No background processes have been started."
|
|
190
|
+
|
|
191
|
+
if name:
|
|
192
|
+
proc = self.processes.get(name)
|
|
193
|
+
if not proc:
|
|
194
|
+
available = ", ".join(self.processes.keys())
|
|
195
|
+
return f"Process '{name}' not found. Available: {available}"
|
|
196
|
+
|
|
197
|
+
# Use poll() to check status without blocking
|
|
198
|
+
poll_result = proc.process.poll()
|
|
199
|
+
if poll_result is None:
|
|
200
|
+
status = "running"
|
|
201
|
+
else:
|
|
202
|
+
status = f"completed (exit code: {poll_result})"
|
|
203
|
+
|
|
204
|
+
elapsed = time.time() - proc.started_at
|
|
205
|
+
return f"Process: {name}\nCommand: {proc.command}\nStatus: {status}\nRunning for: {elapsed:.1f}s"
|
|
206
|
+
else:
|
|
207
|
+
# Show all processes
|
|
208
|
+
lines = ["NAME STATUS COMMAND"]
|
|
209
|
+
for proc_name, proc in self.processes.items():
|
|
210
|
+
poll_result = proc.process.poll()
|
|
211
|
+
if poll_result is None:
|
|
212
|
+
status = "running"
|
|
213
|
+
else:
|
|
214
|
+
status = f"exit {poll_result}"
|
|
215
|
+
|
|
216
|
+
cmd_display = (
|
|
217
|
+
proc.command[:40] + "..."
|
|
218
|
+
if len(proc.command) > 40
|
|
219
|
+
else proc.command
|
|
220
|
+
)
|
|
221
|
+
lines.append(f"{proc_name:<8} {status:<19} {cmd_display}")
|
|
222
|
+
|
|
223
|
+
return "\n".join(lines)
|
|
224
|
+
|
|
225
|
+
def _get_url(self, port: int = 8080) -> str:
|
|
226
|
+
"""
|
|
227
|
+
Get public URL for a port.
|
|
228
|
+
|
|
229
|
+
Args:
|
|
230
|
+
port: Port number (default 8080)
|
|
231
|
+
|
|
232
|
+
Returns:
|
|
233
|
+
URL and token information
|
|
234
|
+
"""
|
|
235
|
+
if self.block_network:
|
|
236
|
+
return "Error: Network is blocked. Create sandbox with block_network=False to use tunnels."
|
|
237
|
+
|
|
238
|
+
# For port 8080 or if no encrypted_ports, use create_connect_token
|
|
239
|
+
if port == 8080 or port not in self.encrypted_ports:
|
|
240
|
+
try:
|
|
241
|
+
creds = self.sb.create_connect_token(
|
|
242
|
+
user_metadata={"user_id": "sandbox"}
|
|
243
|
+
)
|
|
244
|
+
return f"URL: {creds.url}\nToken: {creds.token}"
|
|
245
|
+
except Exception as e:
|
|
246
|
+
return f"Error getting URL: {e}"
|
|
247
|
+
|
|
248
|
+
# For other ports that were configured with encrypted_ports
|
|
249
|
+
try:
|
|
250
|
+
tunnels = self.sb.tunnels()
|
|
251
|
+
if port in tunnels:
|
|
252
|
+
tunnel = tunnels[port]
|
|
253
|
+
return f"URL: {tunnel.url}"
|
|
254
|
+
else:
|
|
255
|
+
available = list(tunnels.keys()) if tunnels else []
|
|
256
|
+
return f"Port {port} not available. Available ports: {available}"
|
|
257
|
+
except Exception as e:
|
|
258
|
+
return f"Error getting tunnel: {e}"
|
|
259
|
+
|
|
260
|
+
def _destroy(self):
|
|
261
|
+
"""Destroy the sandbox and mark as destroyed."""
|
|
262
|
+
if not self._destroyed:
|
|
263
|
+
self.sb.terminate()
|
|
264
|
+
self._destroyed = True
|
|
265
|
+
|
|
266
|
+
def get_tools(self):
|
|
267
|
+
bash_tool = Tool(
|
|
268
|
+
name="bash",
|
|
269
|
+
description=(
|
|
270
|
+
"Execute a bash command in the sandbox environment. "
|
|
271
|
+
"Set wait=False to run servers or long-running processes in the background. "
|
|
272
|
+
"For background processes, verify they're working using another command (e.g., curl localhost:PORT)."
|
|
273
|
+
),
|
|
274
|
+
run=self._exec,
|
|
275
|
+
parameters={
|
|
276
|
+
"command": {
|
|
277
|
+
"type": "string",
|
|
278
|
+
"description": "Shell command to execute (e.g., 'ls -la', 'python -m http.server 8080')",
|
|
279
|
+
},
|
|
280
|
+
"wait": {
|
|
281
|
+
"type": "boolean",
|
|
282
|
+
"description": "If true (default), wait for completion. If false, run in background.",
|
|
283
|
+
},
|
|
284
|
+
"name": {
|
|
285
|
+
"type": "string",
|
|
286
|
+
"description": "Name for background process (e.g., 'server'). Only used with wait=false.",
|
|
287
|
+
},
|
|
288
|
+
"timeout": {
|
|
289
|
+
"type": "integer",
|
|
290
|
+
"description": "Timeout in seconds; leave empty for no timeout",
|
|
291
|
+
},
|
|
292
|
+
},
|
|
293
|
+
required=["command"],
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
check_tool = Tool(
|
|
297
|
+
name="list_processes",
|
|
298
|
+
description="Check status of background processes. Shows whether each process is running or has exited.",
|
|
299
|
+
run=self._check_process,
|
|
300
|
+
parameters={
|
|
301
|
+
"name": {
|
|
302
|
+
"type": "string",
|
|
303
|
+
"description": "Process name to check, or omit to see all processes",
|
|
304
|
+
},
|
|
305
|
+
},
|
|
306
|
+
required=[],
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
url_tool = Tool(
|
|
310
|
+
name="get_url",
|
|
311
|
+
description=(
|
|
312
|
+
"Get a public URL to access a port in the sandbox. "
|
|
313
|
+
"Use after starting a web server to get the external URL. "
|
|
314
|
+
"Default port is 8080."
|
|
315
|
+
),
|
|
316
|
+
run=self._get_url,
|
|
317
|
+
parameters={
|
|
318
|
+
"port": {
|
|
319
|
+
"type": "integer",
|
|
320
|
+
"description": "Port number to expose (default: 8080)",
|
|
321
|
+
},
|
|
322
|
+
},
|
|
323
|
+
required=[],
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
return [bash_tool, check_tool, url_tool]
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
class DaytonaSandbox:
|
|
330
|
+
def __init__(
|
|
331
|
+
self,
|
|
332
|
+
api_key: str | None = None,
|
|
333
|
+
api_url: str | None = None,
|
|
334
|
+
target: str | None = None,
|
|
335
|
+
sandbox_id: str | None = None,
|
|
336
|
+
language: str = "python",
|
|
337
|
+
auto_start: bool = True,
|
|
338
|
+
):
|
|
339
|
+
"""
|
|
340
|
+
Initialize a Daytona sandbox.
|
|
341
|
+
|
|
342
|
+
Args:
|
|
343
|
+
api_key: Daytona API key (if None, will look for DAYTONA_API_KEY env var)
|
|
344
|
+
api_url: Daytona API URL (if None, will look for DAYTONA_API_URL env var)
|
|
345
|
+
target: Daytona target (if None, will look for DAYTONA_TARGET env var)
|
|
346
|
+
sandbox_id: ID of existing sandbox to connect to (if None, creates a new one)
|
|
347
|
+
language: Programming language for the sandbox (default: python)
|
|
348
|
+
auto_start: Whether to automatically start the sandbox if stopped
|
|
349
|
+
"""
|
|
350
|
+
import os
|
|
351
|
+
|
|
352
|
+
self.api_key = api_key or os.getenv("DAYTONA_API_KEY")
|
|
353
|
+
self.api_url = api_url or os.getenv("DAYTONA_API_URL")
|
|
354
|
+
self.target = target or os.getenv("DAYTONA_TARGET")
|
|
355
|
+
self.sandbox_id = sandbox_id
|
|
356
|
+
self.language = language
|
|
357
|
+
self.auto_start = auto_start
|
|
358
|
+
self.sandbox = None
|
|
359
|
+
self.client = None
|
|
360
|
+
self._initialized = False
|
|
361
|
+
self._destroyed = False
|
|
362
|
+
|
|
363
|
+
async def __aenter__(self):
|
|
364
|
+
"""Async context manager entry - initialize sandbox."""
|
|
365
|
+
await self._ensure_initialized()
|
|
366
|
+
return self
|
|
367
|
+
|
|
368
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
369
|
+
"""Async context manager exit - cleanup sandbox."""
|
|
370
|
+
if not self._destroyed:
|
|
371
|
+
await self._destroy()
|
|
372
|
+
return False
|
|
373
|
+
|
|
374
|
+
def __del__(self):
|
|
375
|
+
"""Cleanup sandbox when garbage collected (backup cleanup).
|
|
376
|
+
|
|
377
|
+
Note: This attempts sync cleanup which may not work perfectly for async resources.
|
|
378
|
+
Prefer using 'async with' for guaranteed cleanup.
|
|
379
|
+
"""
|
|
380
|
+
if not self._destroyed and self.sandbox:
|
|
381
|
+
import warnings
|
|
382
|
+
|
|
383
|
+
warnings.warn(
|
|
384
|
+
"DaytonaSandbox was not properly cleaned up. "
|
|
385
|
+
"Use 'async with DaytonaSandbox(...) as sandbox:' for automatic cleanup.",
|
|
386
|
+
ResourceWarning,
|
|
387
|
+
stacklevel=2,
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
async def _ensure_initialized(self):
|
|
391
|
+
"""Lazy initialization of sandbox"""
|
|
392
|
+
if self._initialized:
|
|
393
|
+
return
|
|
394
|
+
|
|
395
|
+
from daytona_sdk import ( # type: ignore
|
|
396
|
+
AsyncDaytona,
|
|
397
|
+
CreateSandboxBaseParams,
|
|
398
|
+
DaytonaConfig,
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
# Initialize client with config
|
|
402
|
+
if self.api_key or self.api_url or self.target:
|
|
403
|
+
config = DaytonaConfig(
|
|
404
|
+
api_key=self.api_key, api_url=self.api_url, target=self.target
|
|
405
|
+
)
|
|
406
|
+
self.client = AsyncDaytona(config)
|
|
407
|
+
else:
|
|
408
|
+
# Use environment variables
|
|
409
|
+
self.client = AsyncDaytona()
|
|
410
|
+
|
|
411
|
+
if self.sandbox_id:
|
|
412
|
+
# Connect to existing sandbox - use find_one with id label
|
|
413
|
+
sandboxes = await self.client.list(labels={"id": self.sandbox_id})
|
|
414
|
+
if not sandboxes or not sandboxes.items:
|
|
415
|
+
raise ValueError(f"Sandbox with ID {self.sandbox_id} not found")
|
|
416
|
+
self.sandbox = sandboxes.items[0]
|
|
417
|
+
else:
|
|
418
|
+
# Create new sandbox with default configuration
|
|
419
|
+
params = CreateSandboxBaseParams(language=self.language) # type: ignore
|
|
420
|
+
self.sandbox = await self.client.create(params) # type: ignore
|
|
421
|
+
self.sandbox_id = self.sandbox.id
|
|
422
|
+
|
|
423
|
+
# Start sandbox if needed
|
|
424
|
+
if self.auto_start and self.sandbox.state != "started":
|
|
425
|
+
await self.sandbox.start()
|
|
426
|
+
|
|
427
|
+
self._initialized = True
|
|
428
|
+
|
|
429
|
+
async def _exec(
|
|
430
|
+
self,
|
|
431
|
+
command: str,
|
|
432
|
+
timeout: int = 30,
|
|
433
|
+
cwd: str | None = None,
|
|
434
|
+
env: dict | None = None,
|
|
435
|
+
) -> str:
|
|
436
|
+
"""
|
|
437
|
+
Execute a shell command in the sandbox.
|
|
438
|
+
|
|
439
|
+
Args:
|
|
440
|
+
command: Shell command to execute
|
|
441
|
+
timeout: Timeout in seconds (None for no timeout)
|
|
442
|
+
cwd: Working directory for the command
|
|
443
|
+
env: Environment variables for the command
|
|
444
|
+
|
|
445
|
+
Returns:
|
|
446
|
+
Command output and exit code information
|
|
447
|
+
"""
|
|
448
|
+
await self._ensure_initialized()
|
|
449
|
+
|
|
450
|
+
# Execute command using the process interface
|
|
451
|
+
# API: exec(command, cwd=".", env=None, timeout=None) -> ExecutionResponse
|
|
452
|
+
assert self.sandbox, "no sandbox"
|
|
453
|
+
result = await self.sandbox.process.exec(
|
|
454
|
+
command=command, cwd=cwd or ".", env=env, timeout=timeout
|
|
455
|
+
)
|
|
456
|
+
|
|
457
|
+
# ExecutionResponse has .result (output) and .exit_code
|
|
458
|
+
output = result.result or ""
|
|
459
|
+
|
|
460
|
+
# Include exit code if non-zero
|
|
461
|
+
if result.exit_code != 0:
|
|
462
|
+
output = f"[Exit code: {result.exit_code}]\n{output}"
|
|
463
|
+
|
|
464
|
+
# Limit output to last 5000 characters to avoid overwhelming the LLM
|
|
465
|
+
if len(output) > 5000:
|
|
466
|
+
output = "...[truncated]...\n" + output[-5000:]
|
|
467
|
+
|
|
468
|
+
return output or "(no output)"
|
|
469
|
+
|
|
470
|
+
async def _read_file(self, path: str, max_size: int = 50000) -> str:
|
|
471
|
+
"""
|
|
472
|
+
Read a file from the sandbox.
|
|
473
|
+
|
|
474
|
+
Args:
|
|
475
|
+
path: Path to the file in the sandbox
|
|
476
|
+
max_size: Maximum file size in bytes to read
|
|
477
|
+
|
|
478
|
+
Returns:
|
|
479
|
+
File contents as string
|
|
480
|
+
"""
|
|
481
|
+
await self._ensure_initialized()
|
|
482
|
+
|
|
483
|
+
# API: download_file(remote_path, timeout=1800) -> bytes
|
|
484
|
+
assert self.sandbox, "no sandbox"
|
|
485
|
+
content_bytes = await self.sandbox.fs.download_file(path)
|
|
486
|
+
content = content_bytes.decode("utf-8", errors="replace")
|
|
487
|
+
|
|
488
|
+
if len(content) > max_size:
|
|
489
|
+
return f"File too large ({len(content)} bytes). First {max_size} bytes:\n{content[:max_size]}"
|
|
490
|
+
|
|
491
|
+
return content
|
|
492
|
+
|
|
493
|
+
async def _write_file(self, path: str, content: str) -> str:
|
|
494
|
+
"""
|
|
495
|
+
Write content to a file in the sandbox.
|
|
496
|
+
|
|
497
|
+
Args:
|
|
498
|
+
path: Path to the file in the sandbox
|
|
499
|
+
content: Content to write
|
|
500
|
+
|
|
501
|
+
Returns:
|
|
502
|
+
Success message
|
|
503
|
+
"""
|
|
504
|
+
await self._ensure_initialized()
|
|
505
|
+
assert self.sandbox, "no sandbox"
|
|
506
|
+
|
|
507
|
+
# API: upload_file(file: bytes, remote_path: str, timeout=1800) -> None
|
|
508
|
+
content_bytes = content.encode("utf-8")
|
|
509
|
+
await self.sandbox.fs.upload_file(content_bytes, path)
|
|
510
|
+
return f"Successfully wrote {len(content)} bytes to {path}"
|
|
511
|
+
|
|
512
|
+
async def _list_files(self, path: str = ".", pattern: str | None = None) -> str:
|
|
513
|
+
"""
|
|
514
|
+
List files in a directory.
|
|
515
|
+
|
|
516
|
+
Args:
|
|
517
|
+
path: Directory path to list
|
|
518
|
+
pattern: Optional glob pattern to filter files
|
|
519
|
+
|
|
520
|
+
Returns:
|
|
521
|
+
Formatted list of files
|
|
522
|
+
"""
|
|
523
|
+
await self._ensure_initialized()
|
|
524
|
+
assert self.sandbox, "no sandbox"
|
|
525
|
+
|
|
526
|
+
if pattern:
|
|
527
|
+
# API: find_files(path, pattern) -> List[Match]
|
|
528
|
+
matches = await self.sandbox.fs.find_files(path=path, pattern=pattern)
|
|
529
|
+
if not matches:
|
|
530
|
+
return f"No files matching '{pattern}' found in {path}"
|
|
531
|
+
|
|
532
|
+
# Format the matches
|
|
533
|
+
files = [match.file for match in matches]
|
|
534
|
+
return "\n".join(files)
|
|
535
|
+
else:
|
|
536
|
+
# API: list_files(path) -> List[FileInfo]
|
|
537
|
+
file_infos = await self.sandbox.fs.list_files(path=path)
|
|
538
|
+
|
|
539
|
+
if not file_infos:
|
|
540
|
+
return f"No files found in {path}"
|
|
541
|
+
|
|
542
|
+
# Format the output with file info
|
|
543
|
+
lines = []
|
|
544
|
+
for info in file_infos:
|
|
545
|
+
# FileInfo has .name, .size, .mode, .is_dir, etc
|
|
546
|
+
if info.is_dir:
|
|
547
|
+
lines.append(f"{info.name}/")
|
|
548
|
+
else:
|
|
549
|
+
lines.append(f"{info.name} ({info.size} bytes)")
|
|
550
|
+
return "\n".join(lines)
|
|
551
|
+
|
|
552
|
+
async def _get_preview_link(self, port: int = 8080) -> str:
|
|
553
|
+
"""
|
|
554
|
+
Get a preview link for exposing a port.
|
|
555
|
+
|
|
556
|
+
Args:
|
|
557
|
+
port: Port number to expose
|
|
558
|
+
|
|
559
|
+
Returns:
|
|
560
|
+
Preview URL and token information
|
|
561
|
+
"""
|
|
562
|
+
await self._ensure_initialized()
|
|
563
|
+
assert self.sandbox, "no sandbox"
|
|
564
|
+
preview = await self.sandbox.get_preview_link(port)
|
|
565
|
+
|
|
566
|
+
result = f"URL: {preview.url}"
|
|
567
|
+
if hasattr(preview, "token") and preview.token:
|
|
568
|
+
result += f"\nToken: {preview.token}"
|
|
569
|
+
|
|
570
|
+
return result
|
|
571
|
+
|
|
572
|
+
async def _get_working_dir(self) -> str:
|
|
573
|
+
"""Get the current working directory in the sandbox."""
|
|
574
|
+
await self._ensure_initialized()
|
|
575
|
+
assert self.sandbox, "no sandbox"
|
|
576
|
+
return await self.sandbox.get_work_dir()
|
|
577
|
+
|
|
578
|
+
async def _destroy(self):
|
|
579
|
+
"""Delete the sandbox and clean up resources."""
|
|
580
|
+
if self.sandbox and not self._destroyed:
|
|
581
|
+
await self.sandbox.delete()
|
|
582
|
+
self._destroyed = True
|
|
583
|
+
self._initialized = False
|
|
584
|
+
self.sandbox = None
|
|
585
|
+
|
|
586
|
+
def get_tools(self):
|
|
587
|
+
"""Return list of tools for LLM use."""
|
|
588
|
+
bash_tool = Tool(
|
|
589
|
+
name="bash",
|
|
590
|
+
description=(
|
|
591
|
+
"Execute a bash command in the Daytona sandbox environment. "
|
|
592
|
+
"The command runs in a persistent Linux environment. "
|
|
593
|
+
"Provide the command as a string (e.g., 'ls -la' or 'python script.py'). "
|
|
594
|
+
"Output is truncated to the last 5000 characters if longer. "
|
|
595
|
+
"Exit codes are included in output if non-zero."
|
|
596
|
+
),
|
|
597
|
+
run=self._exec,
|
|
598
|
+
parameters={
|
|
599
|
+
"command": {
|
|
600
|
+
"type": "string",
|
|
601
|
+
"description": "The shell command to execute (e.g., 'ls -la', 'python script.py')",
|
|
602
|
+
},
|
|
603
|
+
"timeout": {
|
|
604
|
+
"type": "integer",
|
|
605
|
+
"description": "Timeout in seconds for the command execution (default: 30)",
|
|
606
|
+
},
|
|
607
|
+
"cwd": {
|
|
608
|
+
"type": "string",
|
|
609
|
+
"description": "Working directory for the command (default: current directory)",
|
|
610
|
+
},
|
|
611
|
+
"env": {
|
|
612
|
+
"type": "object",
|
|
613
|
+
"description": "Environment variables for the command (optional)",
|
|
614
|
+
},
|
|
615
|
+
},
|
|
616
|
+
required=["command"],
|
|
617
|
+
)
|
|
618
|
+
|
|
619
|
+
read_file_tool = Tool(
|
|
620
|
+
name="read_file",
|
|
621
|
+
description=(
|
|
622
|
+
"Read the contents of a file from the sandbox filesystem. "
|
|
623
|
+
"Provide the absolute or relative path to the file. "
|
|
624
|
+
"Files larger than 50KB are truncated."
|
|
625
|
+
),
|
|
626
|
+
run=self._read_file,
|
|
627
|
+
parameters={
|
|
628
|
+
"path": {
|
|
629
|
+
"type": "string",
|
|
630
|
+
"description": "Path to the file to read (e.g., '/home/user/script.py')",
|
|
631
|
+
},
|
|
632
|
+
"max_size": {
|
|
633
|
+
"type": "integer",
|
|
634
|
+
"description": "Maximum file size in bytes to read (default: 50000)",
|
|
635
|
+
},
|
|
636
|
+
},
|
|
637
|
+
required=["path"],
|
|
638
|
+
)
|
|
639
|
+
|
|
640
|
+
write_file_tool = Tool(
|
|
641
|
+
name="write_file",
|
|
642
|
+
description=(
|
|
643
|
+
"Write content to a file in the sandbox filesystem. "
|
|
644
|
+
"Creates the file if it doesn't exist, overwrites if it does. "
|
|
645
|
+
"Parent directories must exist."
|
|
646
|
+
),
|
|
647
|
+
run=self._write_file,
|
|
648
|
+
parameters={
|
|
649
|
+
"path": {
|
|
650
|
+
"type": "string",
|
|
651
|
+
"description": "Path where to write the file (e.g., '/home/user/script.py')",
|
|
652
|
+
},
|
|
653
|
+
"content": {
|
|
654
|
+
"type": "string",
|
|
655
|
+
"description": "Content to write to the file",
|
|
656
|
+
},
|
|
657
|
+
},
|
|
658
|
+
required=["path", "content"],
|
|
659
|
+
)
|
|
660
|
+
|
|
661
|
+
list_files_tool = Tool(
|
|
662
|
+
name="list_files",
|
|
663
|
+
description=(
|
|
664
|
+
"List files and directories in the sandbox filesystem. "
|
|
665
|
+
"Useful for exploring the sandbox environment and finding files. "
|
|
666
|
+
"Optionally filter by glob pattern (e.g., '*.py', '**/*.txt')."
|
|
667
|
+
),
|
|
668
|
+
run=self._list_files,
|
|
669
|
+
parameters={
|
|
670
|
+
"path": {
|
|
671
|
+
"type": "string",
|
|
672
|
+
"description": "Directory path to list (default: current directory)",
|
|
673
|
+
},
|
|
674
|
+
"pattern": {
|
|
675
|
+
"type": "string",
|
|
676
|
+
"description": "Glob pattern to filter files (e.g., '*.py', '**/*.txt')",
|
|
677
|
+
},
|
|
678
|
+
},
|
|
679
|
+
required=[],
|
|
680
|
+
)
|
|
681
|
+
|
|
682
|
+
preview_tool = Tool(
|
|
683
|
+
name="get_preview_link",
|
|
684
|
+
description=(
|
|
685
|
+
"Get a public URL to access a port in the sandbox. "
|
|
686
|
+
"Useful for exposing web servers or applications running in the sandbox. "
|
|
687
|
+
"Returns a URL and authentication token if needed."
|
|
688
|
+
),
|
|
689
|
+
run=self._get_preview_link,
|
|
690
|
+
parameters={
|
|
691
|
+
"port": {
|
|
692
|
+
"type": "integer",
|
|
693
|
+
"description": "Port number to expose (default: 8080)",
|
|
694
|
+
},
|
|
695
|
+
},
|
|
696
|
+
required=[],
|
|
697
|
+
)
|
|
698
|
+
|
|
699
|
+
workdir_tool = Tool(
|
|
700
|
+
name="get_working_directory",
|
|
701
|
+
description=(
|
|
702
|
+
"Get the current working directory path in the sandbox. "
|
|
703
|
+
"Useful for understanding the sandbox environment layout."
|
|
704
|
+
),
|
|
705
|
+
run=self._get_working_dir,
|
|
706
|
+
parameters={},
|
|
707
|
+
required=[],
|
|
708
|
+
)
|
|
709
|
+
|
|
710
|
+
return [
|
|
711
|
+
bash_tool,
|
|
712
|
+
read_file_tool,
|
|
713
|
+
write_file_tool,
|
|
714
|
+
list_files_tool,
|
|
715
|
+
preview_tool,
|
|
716
|
+
workdir_tool,
|
|
717
|
+
]
|
|
718
|
+
|
|
719
|
+
|
|
720
|
+
class DockerSandbox:
|
|
721
|
+
"""
|
|
722
|
+
Local Docker-based sandbox for running code in isolated containers.
|
|
723
|
+
|
|
724
|
+
Works with Docker Desktop, Colima, or any Docker-compatible runtime.
|
|
725
|
+
Each sandbox instance creates its own container.
|
|
726
|
+
|
|
727
|
+
Requires:
|
|
728
|
+
- docker package installed (pip install docker)
|
|
729
|
+
- Docker daemon running (Docker Desktop, Colima, etc.)
|
|
730
|
+
|
|
731
|
+
Example:
|
|
732
|
+
async with DockerSandbox() as sandbox:
|
|
733
|
+
tools = sandbox.get_tools()
|
|
734
|
+
# Use tools with your LLM...
|
|
735
|
+
"""
|
|
736
|
+
|
|
737
|
+
# Default image - has uv pre-installed, Debian Bookworm base
|
|
738
|
+
DEFAULT_IMAGE = "ghcr.io/astral-sh/uv:python3.12-bookworm-slim"
|
|
739
|
+
|
|
740
|
+
def __init__(
|
|
741
|
+
self,
|
|
742
|
+
image: str | None = None,
|
|
743
|
+
*,
|
|
744
|
+
docker_host: str | None = None,
|
|
745
|
+
network_mode: str = "bridge",
|
|
746
|
+
mem_limit: str = "512m",
|
|
747
|
+
cpu_period: int = 100000,
|
|
748
|
+
cpu_quota: int | None = None,
|
|
749
|
+
working_dir: str = "/workspace",
|
|
750
|
+
):
|
|
751
|
+
"""
|
|
752
|
+
Initialize a Docker sandbox.
|
|
753
|
+
|
|
754
|
+
Args:
|
|
755
|
+
image: Docker image to use. Defaults to uv's Python 3.12 image.
|
|
756
|
+
docker_host: Docker socket URL. If None, auto-detects from DOCKER_HOST
|
|
757
|
+
env var or tries common socket paths.
|
|
758
|
+
network_mode: Docker network mode. "bridge" (default) for internet access,
|
|
759
|
+
"none" for full isolation.
|
|
760
|
+
mem_limit: Memory limit (e.g., "512m", "1g"). Default "512m".
|
|
761
|
+
cpu_period: CPU period in microseconds. Default 100000.
|
|
762
|
+
cpu_quota: CPU quota in microseconds. None for no limit.
|
|
763
|
+
E.g., 50000 with period 100000 = 50% of one CPU.
|
|
764
|
+
working_dir: Working directory inside container. Default "/workspace".
|
|
765
|
+
"""
|
|
766
|
+
self.image = image or self.DEFAULT_IMAGE
|
|
767
|
+
self.docker_host = docker_host
|
|
768
|
+
self.network_mode = network_mode
|
|
769
|
+
self.mem_limit = mem_limit
|
|
770
|
+
self.cpu_period = cpu_period
|
|
771
|
+
self.cpu_quota = cpu_quota
|
|
772
|
+
self.working_dir = working_dir
|
|
773
|
+
|
|
774
|
+
# State
|
|
775
|
+
self.container = None
|
|
776
|
+
self._client = None
|
|
777
|
+
self._initialized = False
|
|
778
|
+
self._destroyed = False
|
|
779
|
+
|
|
780
|
+
# Process tracking for background processes
|
|
781
|
+
self.processes: dict[str, TrackedProcess] = {}
|
|
782
|
+
self.process_counter: int = 0
|
|
783
|
+
|
|
784
|
+
@property
|
|
785
|
+
def client(self):
|
|
786
|
+
"""Lazy-load Docker client."""
|
|
787
|
+
if self._client is None:
|
|
788
|
+
import docker
|
|
789
|
+
|
|
790
|
+
if self.docker_host:
|
|
791
|
+
self._client = docker.DockerClient(base_url=self.docker_host)
|
|
792
|
+
else:
|
|
793
|
+
# Auto-detect socket location
|
|
794
|
+
# Try DOCKER_HOST env first, then common socket paths
|
|
795
|
+
docker_host = os.environ.get("DOCKER_HOST")
|
|
796
|
+
if not docker_host:
|
|
797
|
+
# Common socket paths (Docker Desktop, Colima, Podman, etc.)
|
|
798
|
+
socket_paths = [
|
|
799
|
+
os.path.expanduser("~/.colima/default/docker.sock"),
|
|
800
|
+
os.path.expanduser("~/.colima/docker.sock"),
|
|
801
|
+
"/var/run/docker.sock",
|
|
802
|
+
os.path.expanduser("~/.docker/run/docker.sock"),
|
|
803
|
+
os.path.expanduser(
|
|
804
|
+
"~/.local/share/containers/podman/machine/podman.sock"
|
|
805
|
+
),
|
|
806
|
+
]
|
|
807
|
+
for path in socket_paths:
|
|
808
|
+
if os.path.exists(path):
|
|
809
|
+
docker_host = f"unix://{path}"
|
|
810
|
+
break
|
|
811
|
+
|
|
812
|
+
if docker_host:
|
|
813
|
+
self._client = docker.DockerClient(base_url=docker_host)
|
|
814
|
+
else:
|
|
815
|
+
# Fall back to default (will likely fail but gives clear error)
|
|
816
|
+
self._client = docker.from_env()
|
|
817
|
+
return self._client
|
|
818
|
+
|
|
819
|
+
async def __aenter__(self):
|
|
820
|
+
"""Async context manager entry - initialize sandbox."""
|
|
821
|
+
await self._ensure_initialized()
|
|
822
|
+
return self
|
|
823
|
+
|
|
824
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
825
|
+
"""Async context manager exit - cleanup sandbox."""
|
|
826
|
+
if not self._destroyed:
|
|
827
|
+
await self._destroy()
|
|
828
|
+
return False
|
|
829
|
+
|
|
830
|
+
def __enter__(self):
|
|
831
|
+
"""Sync context manager entry."""
|
|
832
|
+
import asyncio
|
|
833
|
+
|
|
834
|
+
asyncio.get_event_loop().run_until_complete(self._ensure_initialized())
|
|
835
|
+
return self
|
|
836
|
+
|
|
837
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
838
|
+
"""Sync context manager exit."""
|
|
839
|
+
if not self._destroyed:
|
|
840
|
+
self._destroy_sync()
|
|
841
|
+
return False
|
|
842
|
+
|
|
843
|
+
def __del__(self):
|
|
844
|
+
"""Cleanup container when garbage collected (backup cleanup)."""
|
|
845
|
+
if not self._destroyed and self.container:
|
|
846
|
+
import warnings
|
|
847
|
+
|
|
848
|
+
warnings.warn(
|
|
849
|
+
"DockerSandbox was not properly cleaned up. "
|
|
850
|
+
"Use 'with DockerSandbox(...) as sandbox:' for automatic cleanup.",
|
|
851
|
+
ResourceWarning,
|
|
852
|
+
stacklevel=2,
|
|
853
|
+
)
|
|
854
|
+
|
|
855
|
+
async def _ensure_initialized(self):
|
|
856
|
+
"""Lazy initialization - pull image if needed and start container."""
|
|
857
|
+
if self._initialized:
|
|
858
|
+
return
|
|
859
|
+
|
|
860
|
+
# Pull image if not present
|
|
861
|
+
await asyncio.to_thread(self._pull_image_if_needed)
|
|
862
|
+
|
|
863
|
+
# Create and start container
|
|
864
|
+
await asyncio.to_thread(self._create_container)
|
|
865
|
+
|
|
866
|
+
self._initialized = True
|
|
867
|
+
|
|
868
|
+
def _pull_image_if_needed(self):
|
|
869
|
+
"""Pull the Docker image if not already present."""
|
|
870
|
+
try:
|
|
871
|
+
self.client.images.get(self.image)
|
|
872
|
+
except Exception:
|
|
873
|
+
# Image not found locally, pull it
|
|
874
|
+
self.client.images.pull(self.image)
|
|
875
|
+
|
|
876
|
+
def _create_container(self):
|
|
877
|
+
"""Create and start the container."""
|
|
878
|
+
self.container = self.client.containers.run(
|
|
879
|
+
self.image,
|
|
880
|
+
command=["sleep", "infinity"],
|
|
881
|
+
detach=True,
|
|
882
|
+
remove=True, # Auto-remove when stopped
|
|
883
|
+
network_mode=self.network_mode,
|
|
884
|
+
mem_limit=self.mem_limit,
|
|
885
|
+
cpu_period=self.cpu_period,
|
|
886
|
+
cpu_quota=self.cpu_quota,
|
|
887
|
+
working_dir=self.working_dir,
|
|
888
|
+
# Create the working directory
|
|
889
|
+
entrypoint=[
|
|
890
|
+
"/bin/sh",
|
|
891
|
+
"-c",
|
|
892
|
+
f"mkdir -p {self.working_dir} && sleep infinity",
|
|
893
|
+
],
|
|
894
|
+
)
|
|
895
|
+
|
|
896
|
+
def _generate_process_name(self) -> str:
|
|
897
|
+
"""Generate a unique process name like p1, p2, etc."""
|
|
898
|
+
self.process_counter += 1
|
|
899
|
+
return f"p{self.process_counter}"
|
|
900
|
+
|
|
901
|
+
async def _exec(
|
|
902
|
+
self,
|
|
903
|
+
command: str,
|
|
904
|
+
timeout: int = 60,
|
|
905
|
+
wait: bool = True,
|
|
906
|
+
name: str | None = None,
|
|
907
|
+
) -> str:
|
|
908
|
+
"""
|
|
909
|
+
Execute a command in the sandbox.
|
|
910
|
+
|
|
911
|
+
Args:
|
|
912
|
+
command: Shell command to execute
|
|
913
|
+
timeout: Timeout in seconds (only applies when wait=True)
|
|
914
|
+
wait: If True, wait for completion. If False, run in background.
|
|
915
|
+
name: Name for background process (auto-generated if not provided)
|
|
916
|
+
|
|
917
|
+
Returns:
|
|
918
|
+
Command output if wait=True, or status message if wait=False
|
|
919
|
+
"""
|
|
920
|
+
await self._ensure_initialized()
|
|
921
|
+
assert self.container is not None, "Container not initialized"
|
|
922
|
+
|
|
923
|
+
if wait:
|
|
924
|
+
# Synchronous execution with timeout
|
|
925
|
+
try:
|
|
926
|
+
exit_code, output = await asyncio.wait_for(
|
|
927
|
+
asyncio.to_thread(
|
|
928
|
+
self.container.exec_run,
|
|
929
|
+
["sh", "-c", command],
|
|
930
|
+
workdir=self.working_dir,
|
|
931
|
+
),
|
|
932
|
+
timeout=timeout,
|
|
933
|
+
)
|
|
934
|
+
except asyncio.TimeoutError:
|
|
935
|
+
return f"[Timeout after {timeout}s]"
|
|
936
|
+
|
|
937
|
+
# Decode output
|
|
938
|
+
if isinstance(output, bytes):
|
|
939
|
+
output = output.decode("utf-8", errors="replace")
|
|
940
|
+
|
|
941
|
+
# Truncate if needed
|
|
942
|
+
if len(output) > 5000:
|
|
943
|
+
output = "...[truncated]...\n" + output[-5000:]
|
|
944
|
+
|
|
945
|
+
# Include exit code if non-zero
|
|
946
|
+
if exit_code != 0:
|
|
947
|
+
output = f"[Exit code: {exit_code}]\n{output}"
|
|
948
|
+
|
|
949
|
+
return output if output else "(no output)"
|
|
950
|
+
else:
|
|
951
|
+
# Background execution
|
|
952
|
+
exec_id = await asyncio.to_thread(
|
|
953
|
+
self.client.api.exec_create,
|
|
954
|
+
self.container.id,
|
|
955
|
+
["sh", "-c", command],
|
|
956
|
+
workdir=self.working_dir,
|
|
957
|
+
)
|
|
958
|
+
await asyncio.to_thread(
|
|
959
|
+
self.client.api.exec_start,
|
|
960
|
+
exec_id,
|
|
961
|
+
detach=True,
|
|
962
|
+
)
|
|
963
|
+
|
|
964
|
+
proc_name = name or self._generate_process_name()
|
|
965
|
+
tracked = TrackedProcess(
|
|
966
|
+
process=exec_id,
|
|
967
|
+
name=proc_name,
|
|
968
|
+
command=command,
|
|
969
|
+
)
|
|
970
|
+
self.processes[proc_name] = tracked
|
|
971
|
+
|
|
972
|
+
return (
|
|
973
|
+
f"Started background process '{proc_name}'.\n"
|
|
974
|
+
f"Command: {command}\n"
|
|
975
|
+
f"Use list_processes() to check status."
|
|
976
|
+
)
|
|
977
|
+
|
|
978
|
+
def _check_process(self, name: str | None = None) -> str:
|
|
979
|
+
"""Check status of background processes."""
|
|
980
|
+
if not self.processes:
|
|
981
|
+
return "No background processes have been started."
|
|
982
|
+
|
|
983
|
+
if name:
|
|
984
|
+
proc = self.processes.get(name)
|
|
985
|
+
if not proc:
|
|
986
|
+
available = ", ".join(self.processes.keys())
|
|
987
|
+
return f"Process '{name}' not found. Available: {available}"
|
|
988
|
+
|
|
989
|
+
# Check exec status
|
|
990
|
+
exec_info = self.client.api.exec_inspect(proc.process)
|
|
991
|
+
running = exec_info.get("Running", False)
|
|
992
|
+
exit_code = exec_info.get("ExitCode")
|
|
993
|
+
|
|
994
|
+
if running:
|
|
995
|
+
status = "running"
|
|
996
|
+
else:
|
|
997
|
+
status = f"completed (exit code: {exit_code})"
|
|
998
|
+
|
|
999
|
+
elapsed = time.time() - proc.started_at
|
|
1000
|
+
return f"Process: {name}\nCommand: {proc.command}\nStatus: {status}\nRunning for: {elapsed:.1f}s"
|
|
1001
|
+
else:
|
|
1002
|
+
# Show all processes
|
|
1003
|
+
lines = ["NAME STATUS COMMAND"]
|
|
1004
|
+
for proc_name, proc in self.processes.items():
|
|
1005
|
+
exec_info = self.client.api.exec_inspect(proc.process)
|
|
1006
|
+
running = exec_info.get("Running", False)
|
|
1007
|
+
exit_code = exec_info.get("ExitCode")
|
|
1008
|
+
|
|
1009
|
+
if running:
|
|
1010
|
+
status = "running"
|
|
1011
|
+
else:
|
|
1012
|
+
status = f"exit {exit_code}"
|
|
1013
|
+
|
|
1014
|
+
cmd_display = (
|
|
1015
|
+
proc.command[:40] + "..."
|
|
1016
|
+
if len(proc.command) > 40
|
|
1017
|
+
else proc.command
|
|
1018
|
+
)
|
|
1019
|
+
lines.append(f"{proc_name:<8} {status:<19} {cmd_display}")
|
|
1020
|
+
|
|
1021
|
+
return "\n".join(lines)
|
|
1022
|
+
|
|
1023
|
+
async def _destroy(self):
|
|
1024
|
+
"""Stop the container and clean up."""
|
|
1025
|
+
if self._destroyed:
|
|
1026
|
+
return
|
|
1027
|
+
|
|
1028
|
+
if self.container:
|
|
1029
|
+
try:
|
|
1030
|
+
await asyncio.to_thread(self.container.stop, timeout=5)
|
|
1031
|
+
except Exception:
|
|
1032
|
+
pass # Container might already be stopped
|
|
1033
|
+
|
|
1034
|
+
self._destroyed = True
|
|
1035
|
+
self._initialized = False
|
|
1036
|
+
|
|
1037
|
+
def _destroy_sync(self):
|
|
1038
|
+
"""Synchronous version of destroy."""
|
|
1039
|
+
if self._destroyed:
|
|
1040
|
+
return
|
|
1041
|
+
|
|
1042
|
+
if self.container:
|
|
1043
|
+
try:
|
|
1044
|
+
self.container.stop(timeout=5)
|
|
1045
|
+
except Exception:
|
|
1046
|
+
pass
|
|
1047
|
+
|
|
1048
|
+
self._destroyed = True
|
|
1049
|
+
self._initialized = False
|
|
1050
|
+
|
|
1051
|
+
def get_tools(self):
|
|
1052
|
+
"""Return list of tools for LLM use."""
|
|
1053
|
+
bash_tool = Tool(
|
|
1054
|
+
name="bash",
|
|
1055
|
+
description=(
|
|
1056
|
+
"Execute a bash command in the Docker sandbox environment. "
|
|
1057
|
+
"The sandbox has Python 3.12 and uv pre-installed. "
|
|
1058
|
+
"Use 'apt-get update && apt-get install -y <package>' for system packages. "
|
|
1059
|
+
"Set wait=false to run servers or long-running processes in background."
|
|
1060
|
+
),
|
|
1061
|
+
run=self._exec,
|
|
1062
|
+
parameters={
|
|
1063
|
+
"command": {
|
|
1064
|
+
"type": "string",
|
|
1065
|
+
"description": "The shell command to execute",
|
|
1066
|
+
},
|
|
1067
|
+
"timeout": {
|
|
1068
|
+
"type": "integer",
|
|
1069
|
+
"description": "Timeout in seconds (default: 60, only for wait=true)",
|
|
1070
|
+
},
|
|
1071
|
+
"wait": {
|
|
1072
|
+
"type": "boolean",
|
|
1073
|
+
"description": "If true (default), wait for completion. If false, run in background.",
|
|
1074
|
+
},
|
|
1075
|
+
"name": {
|
|
1076
|
+
"type": "string",
|
|
1077
|
+
"description": "Name for background process (e.g., 'server'). Only used with wait=false.",
|
|
1078
|
+
},
|
|
1079
|
+
},
|
|
1080
|
+
required=["command"],
|
|
1081
|
+
)
|
|
1082
|
+
|
|
1083
|
+
check_tool = Tool(
|
|
1084
|
+
name="list_processes",
|
|
1085
|
+
description="Check status of background processes started with wait=false.",
|
|
1086
|
+
run=self._check_process,
|
|
1087
|
+
parameters={
|
|
1088
|
+
"name": {
|
|
1089
|
+
"type": "string",
|
|
1090
|
+
"description": "Process name to check, or omit to see all processes",
|
|
1091
|
+
},
|
|
1092
|
+
},
|
|
1093
|
+
required=[],
|
|
1094
|
+
)
|
|
1095
|
+
|
|
1096
|
+
return [bash_tool, check_tool]
|
|
1097
|
+
|
|
1098
|
+
|
|
1099
|
+
class FargateSandbox:
|
|
1100
|
+
"""
|
|
1101
|
+
AWS Fargate-based sandbox for running untrusted code in isolated containers.
|
|
1102
|
+
|
|
1103
|
+
Requires:
|
|
1104
|
+
- boto3 installed
|
|
1105
|
+
- AWS credentials configured
|
|
1106
|
+
- VPC with subnets that have internet access (for pulling images)
|
|
1107
|
+
- Security group that allows outbound traffic
|
|
1108
|
+
|
|
1109
|
+
The sandbox automatically:
|
|
1110
|
+
- Creates IAM roles for task execution and ECS Exec
|
|
1111
|
+
- Registers a task definition with the specified image
|
|
1112
|
+
- Runs a Fargate task and waits for it to be ready
|
|
1113
|
+
- Executes commands via ECS Exec (SSM Session Manager)
|
|
1114
|
+
|
|
1115
|
+
Example:
|
|
1116
|
+
async with FargateSandbox(
|
|
1117
|
+
subnets=["subnet-abc123"],
|
|
1118
|
+
security_groups=["sg-abc123"],
|
|
1119
|
+
) as sandbox:
|
|
1120
|
+
tools = sandbox.get_tools()
|
|
1121
|
+
# Use tools with your LLM...
|
|
1122
|
+
"""
|
|
1123
|
+
|
|
1124
|
+
# Default image - minimal Python with common tools
|
|
1125
|
+
DEFAULT_IMAGE = "python:3.12-slim"
|
|
1126
|
+
|
|
1127
|
+
# IAM policy for ECS Exec (SSM Session Manager)
|
|
1128
|
+
EXEC_POLICY = {
|
|
1129
|
+
"Version": "2012-10-17",
|
|
1130
|
+
"Statement": [
|
|
1131
|
+
{
|
|
1132
|
+
"Effect": "Allow",
|
|
1133
|
+
"Action": [
|
|
1134
|
+
"ssmmessages:CreateControlChannel",
|
|
1135
|
+
"ssmmessages:CreateDataChannel",
|
|
1136
|
+
"ssmmessages:OpenControlChannel",
|
|
1137
|
+
"ssmmessages:OpenDataChannel",
|
|
1138
|
+
],
|
|
1139
|
+
"Resource": "*",
|
|
1140
|
+
}
|
|
1141
|
+
],
|
|
1142
|
+
}
|
|
1143
|
+
|
|
1144
|
+
# Trust policy for ECS tasks
|
|
1145
|
+
TASK_TRUST_POLICY = {
|
|
1146
|
+
"Version": "2012-10-17",
|
|
1147
|
+
"Statement": [
|
|
1148
|
+
{
|
|
1149
|
+
"Effect": "Allow",
|
|
1150
|
+
"Principal": {"Service": "ecs-tasks.amazonaws.com"},
|
|
1151
|
+
"Action": "sts:AssumeRole",
|
|
1152
|
+
}
|
|
1153
|
+
],
|
|
1154
|
+
}
|
|
1155
|
+
|
|
1156
|
+
def __init__(
|
|
1157
|
+
self,
|
|
1158
|
+
subnets: list[str],
|
|
1159
|
+
security_groups: list[str],
|
|
1160
|
+
*,
|
|
1161
|
+
cluster: str | None = None,
|
|
1162
|
+
image: str | None = None,
|
|
1163
|
+
cpu: int = 256,
|
|
1164
|
+
memory: int = 512,
|
|
1165
|
+
region: str | None = None,
|
|
1166
|
+
task_role_arn: str | None = None,
|
|
1167
|
+
execution_role_arn: str | None = None,
|
|
1168
|
+
assign_public_ip: bool = True,
|
|
1169
|
+
):
|
|
1170
|
+
"""
|
|
1171
|
+
Initialize a Fargate sandbox.
|
|
1172
|
+
|
|
1173
|
+
Args:
|
|
1174
|
+
subnets: List of VPC subnet IDs (required). Use subnets with internet
|
|
1175
|
+
access (public subnets with IGW, or private with NAT).
|
|
1176
|
+
security_groups: List of security group IDs (required). Must allow
|
|
1177
|
+
outbound HTTPS (443) for ECS Exec to work.
|
|
1178
|
+
cluster: ECS cluster name. If None, uses "lm-deluge-sandbox" (created if missing).
|
|
1179
|
+
image: Docker image to use. Defaults to python:3.12-slim.
|
|
1180
|
+
cpu: Fargate CPU units (256, 512, 1024, 2048, 4096). Default 256.
|
|
1181
|
+
memory: Fargate memory in MB. Must be compatible with CPU. Default 512.
|
|
1182
|
+
region: AWS region. If None, uses boto3 default.
|
|
1183
|
+
task_role_arn: IAM role ARN for the task. If None, creates one with
|
|
1184
|
+
minimal permissions (just SSM for ECS Exec).
|
|
1185
|
+
execution_role_arn: IAM role ARN for task execution. If None, uses
|
|
1186
|
+
the AWS managed ecsTaskExecutionRole.
|
|
1187
|
+
assign_public_ip: Whether to assign a public IP. Required if using
|
|
1188
|
+
public subnets without NAT. Default True.
|
|
1189
|
+
"""
|
|
1190
|
+
self.subnets = subnets
|
|
1191
|
+
self.security_groups = security_groups
|
|
1192
|
+
self.cluster = cluster or "lm-deluge-sandbox"
|
|
1193
|
+
self.image = image or self.DEFAULT_IMAGE
|
|
1194
|
+
self.cpu = str(cpu)
|
|
1195
|
+
self.memory = str(memory)
|
|
1196
|
+
self.region = region
|
|
1197
|
+
self.task_role_arn = task_role_arn
|
|
1198
|
+
self.execution_role_arn = execution_role_arn
|
|
1199
|
+
self.assign_public_ip = assign_public_ip
|
|
1200
|
+
|
|
1201
|
+
# State
|
|
1202
|
+
self.task_arn: str | None = None
|
|
1203
|
+
self.task_definition_arn: str | None = None
|
|
1204
|
+
self._initialized = False
|
|
1205
|
+
self._destroyed = False
|
|
1206
|
+
|
|
1207
|
+
# boto3 clients (lazy init)
|
|
1208
|
+
self._ecs_client = None
|
|
1209
|
+
self._iam_client = None
|
|
1210
|
+
|
|
1211
|
+
@property
|
|
1212
|
+
def ecs(self):
|
|
1213
|
+
"""Lazy-load ECS client."""
|
|
1214
|
+
if self._ecs_client is None:
|
|
1215
|
+
import boto3
|
|
1216
|
+
|
|
1217
|
+
self._ecs_client = boto3.client("ecs", region_name=self.region)
|
|
1218
|
+
return self._ecs_client
|
|
1219
|
+
|
|
1220
|
+
@property
|
|
1221
|
+
def iam(self):
|
|
1222
|
+
"""Lazy-load IAM client."""
|
|
1223
|
+
if self._iam_client is None:
|
|
1224
|
+
import boto3
|
|
1225
|
+
|
|
1226
|
+
self._iam_client = boto3.client("iam", region_name=self.region)
|
|
1227
|
+
return self._iam_client
|
|
1228
|
+
|
|
1229
|
+
async def __aenter__(self):
|
|
1230
|
+
"""Async context manager entry - initialize sandbox."""
|
|
1231
|
+
await self._ensure_initialized()
|
|
1232
|
+
return self
|
|
1233
|
+
|
|
1234
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
1235
|
+
"""Async context manager exit - cleanup sandbox."""
|
|
1236
|
+
if not self._destroyed:
|
|
1237
|
+
await self._destroy()
|
|
1238
|
+
return False
|
|
1239
|
+
|
|
1240
|
+
def __del__(self):
|
|
1241
|
+
"""Cleanup sandbox when garbage collected (backup cleanup)."""
|
|
1242
|
+
if not self._destroyed and self.task_arn:
|
|
1243
|
+
import warnings
|
|
1244
|
+
|
|
1245
|
+
warnings.warn(
|
|
1246
|
+
"FargateSandbox was not properly cleaned up. "
|
|
1247
|
+
"Use 'async with FargateSandbox(...) as sandbox:' for automatic cleanup.",
|
|
1248
|
+
ResourceWarning,
|
|
1249
|
+
stacklevel=2,
|
|
1250
|
+
)
|
|
1251
|
+
|
|
1252
|
+
async def _ensure_initialized(self):
|
|
1253
|
+
"""Lazy initialization - create cluster, task def, and run task."""
|
|
1254
|
+
if self._initialized:
|
|
1255
|
+
return
|
|
1256
|
+
|
|
1257
|
+
# Ensure cluster exists
|
|
1258
|
+
await self._ensure_cluster()
|
|
1259
|
+
|
|
1260
|
+
# Ensure IAM roles exist
|
|
1261
|
+
await self._ensure_roles()
|
|
1262
|
+
|
|
1263
|
+
# Register task definition
|
|
1264
|
+
await self._register_task_definition()
|
|
1265
|
+
|
|
1266
|
+
# Run the task
|
|
1267
|
+
await self._run_task()
|
|
1268
|
+
|
|
1269
|
+
# Wait for task to be running
|
|
1270
|
+
await self._wait_for_task()
|
|
1271
|
+
|
|
1272
|
+
self._initialized = True
|
|
1273
|
+
|
|
1274
|
+
async def _ensure_cluster(self):
|
|
1275
|
+
"""Create ECS cluster if it doesn't exist."""
|
|
1276
|
+
try:
|
|
1277
|
+
response = await asyncio.to_thread(
|
|
1278
|
+
self.ecs.describe_clusters, clusters=[self.cluster]
|
|
1279
|
+
)
|
|
1280
|
+
clusters = response.get("clusters", [])
|
|
1281
|
+
if clusters and clusters[0].get("status") == "ACTIVE":
|
|
1282
|
+
return # Cluster exists
|
|
1283
|
+
except Exception:
|
|
1284
|
+
pass
|
|
1285
|
+
|
|
1286
|
+
# Create cluster
|
|
1287
|
+
await asyncio.to_thread(
|
|
1288
|
+
self.ecs.create_cluster,
|
|
1289
|
+
clusterName=self.cluster,
|
|
1290
|
+
settings=[
|
|
1291
|
+
{"name": "containerInsights", "value": "disabled"},
|
|
1292
|
+
],
|
|
1293
|
+
)
|
|
1294
|
+
|
|
1295
|
+
async def _ensure_roles(self):
|
|
1296
|
+
"""Create IAM roles if not provided."""
|
|
1297
|
+
# Task role (for ECS Exec)
|
|
1298
|
+
if not self.task_role_arn:
|
|
1299
|
+
role_name = "lm-deluge-sandbox-task-role"
|
|
1300
|
+
try:
|
|
1301
|
+
response = await asyncio.to_thread(
|
|
1302
|
+
self.iam.get_role, RoleName=role_name
|
|
1303
|
+
)
|
|
1304
|
+
self.task_role_arn = response["Role"]["Arn"]
|
|
1305
|
+
except self.iam.exceptions.NoSuchEntityException:
|
|
1306
|
+
# Create the role
|
|
1307
|
+
response = await asyncio.to_thread(
|
|
1308
|
+
self.iam.create_role,
|
|
1309
|
+
RoleName=role_name,
|
|
1310
|
+
AssumeRolePolicyDocument=json.dumps(self.TASK_TRUST_POLICY),
|
|
1311
|
+
Description="Task role for lm-deluge Fargate sandbox (ECS Exec)",
|
|
1312
|
+
)
|
|
1313
|
+
self.task_role_arn = response["Role"]["Arn"]
|
|
1314
|
+
|
|
1315
|
+
# Attach inline policy for ECS Exec
|
|
1316
|
+
await asyncio.to_thread(
|
|
1317
|
+
self.iam.put_role_policy,
|
|
1318
|
+
RoleName=role_name,
|
|
1319
|
+
PolicyName="ecs-exec-policy",
|
|
1320
|
+
PolicyDocument=json.dumps(self.EXEC_POLICY),
|
|
1321
|
+
)
|
|
1322
|
+
|
|
1323
|
+
# IAM is eventually consistent - wait a bit
|
|
1324
|
+
await asyncio.sleep(5)
|
|
1325
|
+
|
|
1326
|
+
# Execution role (for pulling images, logs)
|
|
1327
|
+
if not self.execution_role_arn:
|
|
1328
|
+
role_name = "lm-deluge-sandbox-execution-role"
|
|
1329
|
+
try:
|
|
1330
|
+
response = await asyncio.to_thread(
|
|
1331
|
+
self.iam.get_role, RoleName=role_name
|
|
1332
|
+
)
|
|
1333
|
+
self.execution_role_arn = response["Role"]["Arn"]
|
|
1334
|
+
except self.iam.exceptions.NoSuchEntityException:
|
|
1335
|
+
# Create the role
|
|
1336
|
+
response = await asyncio.to_thread(
|
|
1337
|
+
self.iam.create_role,
|
|
1338
|
+
RoleName=role_name,
|
|
1339
|
+
AssumeRolePolicyDocument=json.dumps(self.TASK_TRUST_POLICY),
|
|
1340
|
+
Description="Execution role for lm-deluge Fargate sandbox",
|
|
1341
|
+
)
|
|
1342
|
+
self.execution_role_arn = response["Role"]["Arn"]
|
|
1343
|
+
|
|
1344
|
+
# Attach AWS managed policy
|
|
1345
|
+
await asyncio.to_thread(
|
|
1346
|
+
self.iam.attach_role_policy,
|
|
1347
|
+
RoleName=role_name,
|
|
1348
|
+
PolicyArn="arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy",
|
|
1349
|
+
)
|
|
1350
|
+
|
|
1351
|
+
# IAM is eventually consistent - wait a bit
|
|
1352
|
+
await asyncio.sleep(5)
|
|
1353
|
+
|
|
1354
|
+
async def _register_task_definition(self):
|
|
1355
|
+
"""Register a task definition for the sandbox."""
|
|
1356
|
+
family = f"lm-deluge-sandbox-{secrets.token_hex(4)}"
|
|
1357
|
+
|
|
1358
|
+
response = await asyncio.to_thread(
|
|
1359
|
+
self.ecs.register_task_definition,
|
|
1360
|
+
family=family,
|
|
1361
|
+
networkMode="awsvpc",
|
|
1362
|
+
requiresCompatibilities=["FARGATE"],
|
|
1363
|
+
cpu=self.cpu,
|
|
1364
|
+
memory=self.memory,
|
|
1365
|
+
taskRoleArn=self.task_role_arn,
|
|
1366
|
+
executionRoleArn=self.execution_role_arn,
|
|
1367
|
+
containerDefinitions=[
|
|
1368
|
+
{
|
|
1369
|
+
"name": "sandbox",
|
|
1370
|
+
"image": self.image,
|
|
1371
|
+
"essential": True,
|
|
1372
|
+
# Keep container running - sleep infinity
|
|
1373
|
+
"command": ["sh", "-c", "sleep infinity"],
|
|
1374
|
+
"linuxParameters": {
|
|
1375
|
+
"initProcessEnabled": True, # Required for ECS Exec
|
|
1376
|
+
},
|
|
1377
|
+
}
|
|
1378
|
+
],
|
|
1379
|
+
)
|
|
1380
|
+
self.task_definition_arn = response["taskDefinition"]["taskDefinitionArn"]
|
|
1381
|
+
|
|
1382
|
+
async def _run_task(self):
|
|
1383
|
+
"""Run a Fargate task."""
|
|
1384
|
+
response = await asyncio.to_thread(
|
|
1385
|
+
self.ecs.run_task,
|
|
1386
|
+
cluster=self.cluster,
|
|
1387
|
+
taskDefinition=self.task_definition_arn,
|
|
1388
|
+
launchType="FARGATE",
|
|
1389
|
+
enableExecuteCommand=True, # Enable ECS Exec
|
|
1390
|
+
networkConfiguration={
|
|
1391
|
+
"awsvpcConfiguration": {
|
|
1392
|
+
"subnets": self.subnets,
|
|
1393
|
+
"securityGroups": self.security_groups,
|
|
1394
|
+
"assignPublicIp": "ENABLED"
|
|
1395
|
+
if self.assign_public_ip
|
|
1396
|
+
else "DISABLED",
|
|
1397
|
+
}
|
|
1398
|
+
},
|
|
1399
|
+
)
|
|
1400
|
+
|
|
1401
|
+
tasks = response.get("tasks", [])
|
|
1402
|
+
if not tasks:
|
|
1403
|
+
failures = response.get("failures", [])
|
|
1404
|
+
raise RuntimeError(f"Failed to run task: {failures}")
|
|
1405
|
+
|
|
1406
|
+
self.task_arn = tasks[0]["taskArn"]
|
|
1407
|
+
|
|
1408
|
+
async def _wait_for_task(self, timeout: int = 120):
|
|
1409
|
+
"""Wait for task to reach RUNNING state."""
|
|
1410
|
+
start = time.time()
|
|
1411
|
+
while time.time() - start < timeout:
|
|
1412
|
+
response = await asyncio.to_thread(
|
|
1413
|
+
self.ecs.describe_tasks,
|
|
1414
|
+
cluster=self.cluster,
|
|
1415
|
+
tasks=[self.task_arn],
|
|
1416
|
+
)
|
|
1417
|
+
tasks = response.get("tasks", [])
|
|
1418
|
+
if tasks:
|
|
1419
|
+
status = tasks[0].get("lastStatus")
|
|
1420
|
+
if status == "RUNNING":
|
|
1421
|
+
# Also check that execute command agent is running
|
|
1422
|
+
containers = tasks[0].get("containers", [])
|
|
1423
|
+
for container in containers:
|
|
1424
|
+
managed_agents = container.get("managedAgents", [])
|
|
1425
|
+
for agent in managed_agents:
|
|
1426
|
+
if agent.get("name") == "ExecuteCommandAgent":
|
|
1427
|
+
if agent.get("lastStatus") == "RUNNING":
|
|
1428
|
+
return
|
|
1429
|
+
elif status in ("STOPPED", "DEACTIVATING"):
|
|
1430
|
+
reason = tasks[0].get("stoppedReason", "Unknown")
|
|
1431
|
+
raise RuntimeError(f"Task stopped: {reason}")
|
|
1432
|
+
|
|
1433
|
+
await asyncio.sleep(2)
|
|
1434
|
+
|
|
1435
|
+
raise TimeoutError(f"Task did not reach RUNNING state within {timeout}s")
|
|
1436
|
+
|
|
1437
|
+
async def _exec(
|
|
1438
|
+
self,
|
|
1439
|
+
command: str,
|
|
1440
|
+
timeout: int = 60,
|
|
1441
|
+
) -> str:
|
|
1442
|
+
"""
|
|
1443
|
+
Execute a command in the sandbox.
|
|
1444
|
+
|
|
1445
|
+
Args:
|
|
1446
|
+
command: Shell command to execute
|
|
1447
|
+
timeout: Timeout in seconds
|
|
1448
|
+
|
|
1449
|
+
Returns:
|
|
1450
|
+
Command output (stdout + stderr)
|
|
1451
|
+
"""
|
|
1452
|
+
await self._ensure_initialized()
|
|
1453
|
+
|
|
1454
|
+
# Call ECS execute_command
|
|
1455
|
+
response = await asyncio.to_thread(
|
|
1456
|
+
self.ecs.execute_command,
|
|
1457
|
+
cluster=self.cluster,
|
|
1458
|
+
task=self.task_arn,
|
|
1459
|
+
container="sandbox",
|
|
1460
|
+
interactive=True,
|
|
1461
|
+
command=f"/bin/sh -c {shlex.quote(command)}",
|
|
1462
|
+
)
|
|
1463
|
+
|
|
1464
|
+
session = response.get("session", {})
|
|
1465
|
+
stream_url = session.get("streamUrl")
|
|
1466
|
+
token = session.get("tokenValue")
|
|
1467
|
+
|
|
1468
|
+
if not stream_url or not token:
|
|
1469
|
+
return f"Error: Failed to get session: {response}"
|
|
1470
|
+
|
|
1471
|
+
# Connect to websocket and read output
|
|
1472
|
+
try:
|
|
1473
|
+
output = await self._read_ssm_session(stream_url, token, timeout)
|
|
1474
|
+
except Exception as e:
|
|
1475
|
+
return f"Error executing command: {e}"
|
|
1476
|
+
|
|
1477
|
+
# Truncate if needed
|
|
1478
|
+
if len(output) > 5000:
|
|
1479
|
+
output = "...[truncated]...\n" + output[-5000:]
|
|
1480
|
+
|
|
1481
|
+
return output if output else "(no output)"
|
|
1482
|
+
|
|
1483
|
+
async def _read_ssm_session(self, stream_url: str, token: str, timeout: int) -> str:
|
|
1484
|
+
"""
|
|
1485
|
+
Connect to SSM session websocket and read command output.
|
|
1486
|
+
|
|
1487
|
+
The SSM agent uses a binary protocol:
|
|
1488
|
+
- Header: 4-byte big-endian length + 32-byte null-padded message type
|
|
1489
|
+
- Payload varies by message type
|
|
1490
|
+
|
|
1491
|
+
Note: SSM retransmits messages until ACKed. Since we're just reading
|
|
1492
|
+
(not fully implementing the protocol), we deduplicate by tracking
|
|
1493
|
+
seen message hashes.
|
|
1494
|
+
"""
|
|
1495
|
+
import aiohttp
|
|
1496
|
+
|
|
1497
|
+
output_chunks = []
|
|
1498
|
+
seen_messages: set[bytes] = set() # Dedupe retransmissions
|
|
1499
|
+
|
|
1500
|
+
async with aiohttp.ClientSession() as session:
|
|
1501
|
+
async with session.ws_connect(stream_url, receive_timeout=timeout) as ws:
|
|
1502
|
+
# Send init message with token
|
|
1503
|
+
init_message = {
|
|
1504
|
+
"MessageSchemaVersion": "1.0",
|
|
1505
|
+
"RequestId": str(uuid.uuid4()),
|
|
1506
|
+
"TokenValue": token,
|
|
1507
|
+
}
|
|
1508
|
+
await ws.send_str(json.dumps(init_message))
|
|
1509
|
+
|
|
1510
|
+
# Read messages until channel closes or timeout
|
|
1511
|
+
try:
|
|
1512
|
+
async for msg in ws:
|
|
1513
|
+
if msg.type == aiohttp.WSMsgType.BINARY:
|
|
1514
|
+
# Skip duplicate messages (SSM retransmits until ACKed)
|
|
1515
|
+
msg_hash = msg.data[:116] # Header is enough to identify
|
|
1516
|
+
if msg_hash in seen_messages:
|
|
1517
|
+
continue
|
|
1518
|
+
seen_messages.add(msg_hash)
|
|
1519
|
+
|
|
1520
|
+
parsed = self._parse_ssm_message(msg.data)
|
|
1521
|
+
if parsed:
|
|
1522
|
+
msg_type, payload = parsed
|
|
1523
|
+
if "output_stream_data" in msg_type:
|
|
1524
|
+
output_chunks.append(payload)
|
|
1525
|
+
elif "channel_closed" in msg_type:
|
|
1526
|
+
break
|
|
1527
|
+
elif msg.type == aiohttp.WSMsgType.ERROR:
|
|
1528
|
+
break
|
|
1529
|
+
elif msg.type == aiohttp.WSMsgType.CLOSED:
|
|
1530
|
+
break
|
|
1531
|
+
except asyncio.TimeoutError:
|
|
1532
|
+
pass
|
|
1533
|
+
|
|
1534
|
+
return "".join(output_chunks)
|
|
1535
|
+
|
|
1536
|
+
def _parse_ssm_message(self, data: bytes) -> tuple[str, str] | None:
|
|
1537
|
+
"""
|
|
1538
|
+
Parse an SSM agent message.
|
|
1539
|
+
|
|
1540
|
+
Format:
|
|
1541
|
+
- Bytes 0-3: Header length (big-endian uint32)
|
|
1542
|
+
- Bytes 4-35: Message type (32 bytes, null-padded ASCII)
|
|
1543
|
+
- After header: Payload length (4 bytes) + payload
|
|
1544
|
+
"""
|
|
1545
|
+
if len(data) < 36:
|
|
1546
|
+
return None
|
|
1547
|
+
|
|
1548
|
+
try:
|
|
1549
|
+
header_len = struct.unpack(">I", data[0:4])[0]
|
|
1550
|
+
msg_type = data[4:36].decode("ascii").rstrip("\x00")
|
|
1551
|
+
|
|
1552
|
+
# Payload starts after header
|
|
1553
|
+
if len(data) > header_len:
|
|
1554
|
+
payload_data = data[header_len:]
|
|
1555
|
+
if len(payload_data) >= 4:
|
|
1556
|
+
payload_len = struct.unpack(">I", payload_data[0:4])[0]
|
|
1557
|
+
if len(payload_data) >= 4 + payload_len:
|
|
1558
|
+
payload = payload_data[4 : 4 + payload_len].decode(
|
|
1559
|
+
"utf-8", errors="replace"
|
|
1560
|
+
)
|
|
1561
|
+
return msg_type, payload
|
|
1562
|
+
|
|
1563
|
+
return msg_type, ""
|
|
1564
|
+
except Exception:
|
|
1565
|
+
return None
|
|
1566
|
+
|
|
1567
|
+
async def _destroy(self):
|
|
1568
|
+
"""Stop the task and clean up."""
|
|
1569
|
+
if self._destroyed:
|
|
1570
|
+
return
|
|
1571
|
+
|
|
1572
|
+
if self.task_arn:
|
|
1573
|
+
try:
|
|
1574
|
+
await asyncio.to_thread(
|
|
1575
|
+
self.ecs.stop_task,
|
|
1576
|
+
cluster=self.cluster,
|
|
1577
|
+
task=self.task_arn,
|
|
1578
|
+
reason="Sandbox destroyed",
|
|
1579
|
+
)
|
|
1580
|
+
except Exception:
|
|
1581
|
+
pass # Best effort
|
|
1582
|
+
|
|
1583
|
+
# Optionally deregister task definition
|
|
1584
|
+
if self.task_definition_arn:
|
|
1585
|
+
try:
|
|
1586
|
+
await asyncio.to_thread(
|
|
1587
|
+
self.ecs.deregister_task_definition,
|
|
1588
|
+
taskDefinition=self.task_definition_arn,
|
|
1589
|
+
)
|
|
1590
|
+
except Exception:
|
|
1591
|
+
pass
|
|
1592
|
+
|
|
1593
|
+
self._destroyed = True
|
|
1594
|
+
self._initialized = False
|
|
1595
|
+
|
|
1596
|
+
def get_tools(self):
|
|
1597
|
+
"""Return list of tools for LLM use."""
|
|
1598
|
+
bash_tool = Tool(
|
|
1599
|
+
name="bash",
|
|
1600
|
+
description=(
|
|
1601
|
+
"Execute a bash command in the AWS Fargate sandbox environment. "
|
|
1602
|
+
"The command runs in an isolated container. "
|
|
1603
|
+
"Output is truncated to the last 5000 characters if longer. "
|
|
1604
|
+
"Note: This sandbox does not support background processes - "
|
|
1605
|
+
"commands must complete within the timeout."
|
|
1606
|
+
),
|
|
1607
|
+
run=self._exec,
|
|
1608
|
+
parameters={
|
|
1609
|
+
"command": {
|
|
1610
|
+
"type": "string",
|
|
1611
|
+
"description": "The shell command to execute (e.g., 'ls -la', 'python script.py')",
|
|
1612
|
+
},
|
|
1613
|
+
"timeout": {
|
|
1614
|
+
"type": "integer",
|
|
1615
|
+
"description": "Timeout in seconds for the command execution (default: 60)",
|
|
1616
|
+
},
|
|
1617
|
+
},
|
|
1618
|
+
required=["command"],
|
|
1619
|
+
)
|
|
1620
|
+
|
|
1621
|
+
return [bash_tool]
|