lm-deluge 0.0.67__py3-none-any.whl → 0.0.88__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lm-deluge might be problematic. Click here for more details.

Files changed (92) hide show
  1. lm_deluge/__init__.py +25 -2
  2. lm_deluge/api_requests/anthropic.py +92 -17
  3. lm_deluge/api_requests/base.py +47 -11
  4. lm_deluge/api_requests/bedrock.py +7 -4
  5. lm_deluge/api_requests/chat_reasoning.py +4 -0
  6. lm_deluge/api_requests/gemini.py +138 -18
  7. lm_deluge/api_requests/openai.py +114 -21
  8. lm_deluge/client.py +282 -49
  9. lm_deluge/config.py +15 -3
  10. lm_deluge/mock_openai.py +643 -0
  11. lm_deluge/models/__init__.py +12 -1
  12. lm_deluge/models/anthropic.py +17 -2
  13. lm_deluge/models/arcee.py +16 -0
  14. lm_deluge/models/deepseek.py +36 -4
  15. lm_deluge/models/google.py +29 -0
  16. lm_deluge/models/grok.py +24 -0
  17. lm_deluge/models/kimi.py +36 -0
  18. lm_deluge/models/minimax.py +10 -0
  19. lm_deluge/models/openai.py +100 -0
  20. lm_deluge/models/openrouter.py +86 -8
  21. lm_deluge/models/together.py +11 -0
  22. lm_deluge/models/zai.py +1 -0
  23. lm_deluge/pipelines/gepa/__init__.py +95 -0
  24. lm_deluge/pipelines/gepa/core.py +354 -0
  25. lm_deluge/pipelines/gepa/docs/samples.py +696 -0
  26. lm_deluge/pipelines/gepa/examples/01_synthetic_keywords.py +140 -0
  27. lm_deluge/pipelines/gepa/examples/02_gsm8k_math.py +261 -0
  28. lm_deluge/pipelines/gepa/examples/03_hotpotqa_multihop.py +300 -0
  29. lm_deluge/pipelines/gepa/examples/04_batch_classification.py +271 -0
  30. lm_deluge/pipelines/gepa/examples/simple_qa.py +129 -0
  31. lm_deluge/pipelines/gepa/optimizer.py +435 -0
  32. lm_deluge/pipelines/gepa/proposer.py +235 -0
  33. lm_deluge/pipelines/gepa/util.py +165 -0
  34. lm_deluge/{llm_tools → pipelines}/score.py +2 -2
  35. lm_deluge/{llm_tools → pipelines}/translate.py +5 -3
  36. lm_deluge/prompt.py +224 -40
  37. lm_deluge/request_context.py +7 -2
  38. lm_deluge/tool/__init__.py +1118 -0
  39. lm_deluge/tool/builtin/anthropic/__init__.py +300 -0
  40. lm_deluge/tool/builtin/gemini.py +59 -0
  41. lm_deluge/tool/builtin/openai.py +74 -0
  42. lm_deluge/tool/cua/__init__.py +173 -0
  43. lm_deluge/tool/cua/actions.py +148 -0
  44. lm_deluge/tool/cua/base.py +27 -0
  45. lm_deluge/tool/cua/batch.py +215 -0
  46. lm_deluge/tool/cua/converters.py +466 -0
  47. lm_deluge/tool/cua/kernel.py +702 -0
  48. lm_deluge/tool/cua/trycua.py +989 -0
  49. lm_deluge/tool/prefab/__init__.py +45 -0
  50. lm_deluge/tool/prefab/batch_tool.py +156 -0
  51. lm_deluge/tool/prefab/docs.py +1119 -0
  52. lm_deluge/tool/prefab/email.py +294 -0
  53. lm_deluge/tool/prefab/filesystem.py +1711 -0
  54. lm_deluge/tool/prefab/full_text_search/__init__.py +285 -0
  55. lm_deluge/tool/prefab/full_text_search/tantivy_index.py +396 -0
  56. lm_deluge/tool/prefab/memory.py +458 -0
  57. lm_deluge/tool/prefab/otc/__init__.py +165 -0
  58. lm_deluge/tool/prefab/otc/executor.py +281 -0
  59. lm_deluge/tool/prefab/otc/parse.py +188 -0
  60. lm_deluge/tool/prefab/random.py +212 -0
  61. lm_deluge/tool/prefab/rlm/__init__.py +296 -0
  62. lm_deluge/tool/prefab/rlm/executor.py +349 -0
  63. lm_deluge/tool/prefab/rlm/parse.py +144 -0
  64. lm_deluge/tool/prefab/sandbox.py +1621 -0
  65. lm_deluge/tool/prefab/sheets.py +385 -0
  66. lm_deluge/tool/prefab/subagents.py +233 -0
  67. lm_deluge/tool/prefab/todos.py +342 -0
  68. lm_deluge/tool/prefab/tool_search.py +169 -0
  69. lm_deluge/tool/prefab/web_search.py +199 -0
  70. lm_deluge/tracker.py +16 -13
  71. lm_deluge/util/schema.py +412 -0
  72. lm_deluge/warnings.py +8 -0
  73. {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.88.dist-info}/METADATA +22 -9
  74. lm_deluge-0.0.88.dist-info/RECORD +117 -0
  75. lm_deluge/built_in_tools/anthropic/__init__.py +0 -128
  76. lm_deluge/built_in_tools/openai.py +0 -28
  77. lm_deluge/presets/cerebras.py +0 -17
  78. lm_deluge/presets/meta.py +0 -13
  79. lm_deluge/tool.py +0 -849
  80. lm_deluge-0.0.67.dist-info/RECORD +0 -72
  81. lm_deluge/{llm_tools → pipelines}/__init__.py +1 -1
  82. /lm_deluge/{llm_tools → pipelines}/classify.py +0 -0
  83. /lm_deluge/{llm_tools → pipelines}/extract.py +0 -0
  84. /lm_deluge/{llm_tools → pipelines}/locate.py +0 -0
  85. /lm_deluge/{llm_tools → pipelines}/ocr.py +0 -0
  86. /lm_deluge/{built_in_tools → tool/builtin}/anthropic/bash.py +0 -0
  87. /lm_deluge/{built_in_tools → tool/builtin}/anthropic/computer_use.py +0 -0
  88. /lm_deluge/{built_in_tools → tool/builtin}/anthropic/editor.py +0 -0
  89. /lm_deluge/{built_in_tools → tool/builtin}/base.py +0 -0
  90. {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.88.dist-info}/WHEEL +0 -0
  91. {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.88.dist-info}/licenses/LICENSE +0 -0
  92. {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.88.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1621 @@
1
+ import asyncio
2
+ import json
3
+ import os
4
+ import secrets
5
+ import shlex
6
+ import struct
7
+ import time
8
+ import uuid
9
+ from dataclasses import dataclass, field
10
+ from typing import Any
11
+
12
+ from lm_deluge.tool import Tool
13
+
14
+
15
+ @dataclass
16
+ class TrackedProcess:
17
+ """Tracks a process running in the sandbox."""
18
+
19
+ process: Any # Modal's ContainerProcess
20
+ name: str
21
+ command: str
22
+ started_at: float = field(default_factory=time.time)
23
+
24
+
25
+ class ModalSandbox:
26
+ def __init__(
27
+ self,
28
+ app_name: str | None = None,
29
+ *,
30
+ image: Any | None = None,
31
+ block_network: bool = False,
32
+ add_local_files: list[str] | None = None,
33
+ encrypted_ports: list[int] | None = None,
34
+ ):
35
+ import modal
36
+
37
+ app_name = app_name or secrets.token_urlsafe(32)
38
+ app = modal.App.lookup(app_name, create_if_missing=True)
39
+ self.app = app
40
+ self.block_network = block_network
41
+ self.encrypted_ports = encrypted_ports or []
42
+
43
+ if image is None:
44
+ image = modal.Image.debian_slim(python_version="3.12")
45
+
46
+ assert isinstance(image, modal.Image), "expected modal Image"
47
+ if add_local_files:
48
+ for path in add_local_files:
49
+ if os.path.exists(path):
50
+ # Compute a reasonable remote path based on the basename
51
+ basename = os.path.basename(os.path.normpath(path))
52
+ remote_path = f"/root/{basename}"
53
+ if os.path.isdir(path):
54
+ image = image.add_local_dir(path, remote_path) # type: ignore
55
+ else:
56
+ image = image.add_local_file(path, remote_path) # type: ignore
57
+ else:
58
+ raise FileNotFoundError(f"File not found: {path}")
59
+
60
+ # Create sandbox with encrypted_ports if specified
61
+ create_kwargs: dict[str, Any] = {
62
+ "app": app,
63
+ "block_network": block_network,
64
+ "image": image,
65
+ }
66
+ if self.encrypted_ports:
67
+ create_kwargs["encrypted_ports"] = self.encrypted_ports
68
+
69
+ self.sb = modal.Sandbox.create(**create_kwargs)
70
+
71
+ # Process tracking - simple dict for background processes
72
+ self.processes: dict[str, TrackedProcess] = {}
73
+ self.process_counter: int = 0
74
+ self._destroyed = False
75
+
76
+ def __enter__(self):
77
+ """Synchronous context manager entry (use async with for async support)."""
78
+ return self
79
+
80
+ def __exit__(self, exc_type, exc_val, exc_tb):
81
+ """Synchronous context manager exit - cleanup sandbox."""
82
+ if not self._destroyed:
83
+ self._destroy()
84
+ return False
85
+
86
+ def __del__(self):
87
+ """Cleanup sandbox when garbage collected (backup cleanup)."""
88
+ if not self._destroyed:
89
+ try:
90
+ self._destroy()
91
+ except Exception:
92
+ # Ignore errors during cleanup in __del__
93
+ pass
94
+
95
+ def _generate_process_name(self) -> str:
96
+ """Generate a unique process name like p1, p2, etc."""
97
+ self.process_counter += 1
98
+ return f"p{self.process_counter}"
99
+
100
+ async def _exec(
101
+ self,
102
+ command: str | None = None,
103
+ cmd: list[str] | None = None,
104
+ timeout: int | None = None,
105
+ wait: bool = True,
106
+ name: str | None = None,
107
+ ) -> str:
108
+ """
109
+ Execute a command in the sandbox.
110
+
111
+ Args:
112
+ command: Shell command as a string (e.g., "ls -la")
113
+ cmd: Command as array of strings (e.g., ["ls", "-la"])
114
+ timeout: Timeout in seconds (leave empty for no timeout)
115
+ wait: If True, wait for completion and return output.
116
+ If False, run in background and return immediately.
117
+ name: Name for background process (auto-generated if not provided)
118
+
119
+ Returns:
120
+ Output string if wait=True, or confirmation message if wait=False
121
+ """
122
+ # Handle both command formats
123
+ if command is not None:
124
+ # String format - wrap in bash -c
125
+ cmd_list = ["bash", "-c", command]
126
+ cmd_str = command
127
+ elif cmd is not None:
128
+ # Array format - use directly
129
+ cmd_list = cmd
130
+ cmd_str = shlex.join(cmd)
131
+ else:
132
+ return "Error: Must provide either 'command' (string) or 'cmd' (array)"
133
+
134
+ # Disable timeout for background processes so long-running servers survive
135
+ exec_timeout = timeout if wait else None
136
+
137
+ # Start the process
138
+ process = await self.sb.exec.aio(*cmd_list, timeout=exec_timeout)
139
+
140
+ if wait:
141
+ # Wait for completion and return output
142
+ output = ""
143
+ try:
144
+ async for line in process.stdout:
145
+ output += line
146
+ except Exception:
147
+ pass
148
+
149
+ # Wait for process to complete to get exit code
150
+ await process.wait.aio()
151
+
152
+ # Truncate if needed
153
+ if len(output) > 5000:
154
+ output = "...[truncated]...\n" + output[-5000:]
155
+
156
+ # Include exit code if non-zero
157
+ if process.returncode != 0:
158
+ output = f"[Exit code: {process.returncode}]\n{output}"
159
+
160
+ return output if output else "(no output)"
161
+ else:
162
+ # Background process - track it but don't read stdout
163
+ proc_name = name or self._generate_process_name()
164
+ tracked = TrackedProcess(
165
+ process=process,
166
+ name=proc_name,
167
+ command=cmd_str,
168
+ )
169
+ self.processes[proc_name] = tracked
170
+
171
+ return (
172
+ f"Started background process '{proc_name}'.\n"
173
+ f"Command: {cmd_str}\n"
174
+ f"Note: Use another command (e.g., curl localhost:PORT) to verify the process is working. "
175
+ f"Use list_processes() to check status."
176
+ )
177
+
178
+ def _check_process(self, name: str | None = None) -> str:
179
+ """
180
+ Check status of a background process.
181
+
182
+ Args:
183
+ name: Process name. If not provided, shows all processes.
184
+
185
+ Returns:
186
+ Process status information
187
+ """
188
+ if not self.processes:
189
+ return "No background processes have been started."
190
+
191
+ if name:
192
+ proc = self.processes.get(name)
193
+ if not proc:
194
+ available = ", ".join(self.processes.keys())
195
+ return f"Process '{name}' not found. Available: {available}"
196
+
197
+ # Use poll() to check status without blocking
198
+ poll_result = proc.process.poll()
199
+ if poll_result is None:
200
+ status = "running"
201
+ else:
202
+ status = f"completed (exit code: {poll_result})"
203
+
204
+ elapsed = time.time() - proc.started_at
205
+ return f"Process: {name}\nCommand: {proc.command}\nStatus: {status}\nRunning for: {elapsed:.1f}s"
206
+ else:
207
+ # Show all processes
208
+ lines = ["NAME STATUS COMMAND"]
209
+ for proc_name, proc in self.processes.items():
210
+ poll_result = proc.process.poll()
211
+ if poll_result is None:
212
+ status = "running"
213
+ else:
214
+ status = f"exit {poll_result}"
215
+
216
+ cmd_display = (
217
+ proc.command[:40] + "..."
218
+ if len(proc.command) > 40
219
+ else proc.command
220
+ )
221
+ lines.append(f"{proc_name:<8} {status:<19} {cmd_display}")
222
+
223
+ return "\n".join(lines)
224
+
225
+ def _get_url(self, port: int = 8080) -> str:
226
+ """
227
+ Get public URL for a port.
228
+
229
+ Args:
230
+ port: Port number (default 8080)
231
+
232
+ Returns:
233
+ URL and token information
234
+ """
235
+ if self.block_network:
236
+ return "Error: Network is blocked. Create sandbox with block_network=False to use tunnels."
237
+
238
+ # For port 8080 or if no encrypted_ports, use create_connect_token
239
+ if port == 8080 or port not in self.encrypted_ports:
240
+ try:
241
+ creds = self.sb.create_connect_token(
242
+ user_metadata={"user_id": "sandbox"}
243
+ )
244
+ return f"URL: {creds.url}\nToken: {creds.token}"
245
+ except Exception as e:
246
+ return f"Error getting URL: {e}"
247
+
248
+ # For other ports that were configured with encrypted_ports
249
+ try:
250
+ tunnels = self.sb.tunnels()
251
+ if port in tunnels:
252
+ tunnel = tunnels[port]
253
+ return f"URL: {tunnel.url}"
254
+ else:
255
+ available = list(tunnels.keys()) if tunnels else []
256
+ return f"Port {port} not available. Available ports: {available}"
257
+ except Exception as e:
258
+ return f"Error getting tunnel: {e}"
259
+
260
+ def _destroy(self):
261
+ """Destroy the sandbox and mark as destroyed."""
262
+ if not self._destroyed:
263
+ self.sb.terminate()
264
+ self._destroyed = True
265
+
266
+ def get_tools(self):
267
+ bash_tool = Tool(
268
+ name="bash",
269
+ description=(
270
+ "Execute a bash command in the sandbox environment. "
271
+ "Set wait=False to run servers or long-running processes in the background. "
272
+ "For background processes, verify they're working using another command (e.g., curl localhost:PORT)."
273
+ ),
274
+ run=self._exec,
275
+ parameters={
276
+ "command": {
277
+ "type": "string",
278
+ "description": "Shell command to execute (e.g., 'ls -la', 'python -m http.server 8080')",
279
+ },
280
+ "wait": {
281
+ "type": "boolean",
282
+ "description": "If true (default), wait for completion. If false, run in background.",
283
+ },
284
+ "name": {
285
+ "type": "string",
286
+ "description": "Name for background process (e.g., 'server'). Only used with wait=false.",
287
+ },
288
+ "timeout": {
289
+ "type": "integer",
290
+ "description": "Timeout in seconds; leave empty for no timeout",
291
+ },
292
+ },
293
+ required=["command"],
294
+ )
295
+
296
+ check_tool = Tool(
297
+ name="list_processes",
298
+ description="Check status of background processes. Shows whether each process is running or has exited.",
299
+ run=self._check_process,
300
+ parameters={
301
+ "name": {
302
+ "type": "string",
303
+ "description": "Process name to check, or omit to see all processes",
304
+ },
305
+ },
306
+ required=[],
307
+ )
308
+
309
+ url_tool = Tool(
310
+ name="get_url",
311
+ description=(
312
+ "Get a public URL to access a port in the sandbox. "
313
+ "Use after starting a web server to get the external URL. "
314
+ "Default port is 8080."
315
+ ),
316
+ run=self._get_url,
317
+ parameters={
318
+ "port": {
319
+ "type": "integer",
320
+ "description": "Port number to expose (default: 8080)",
321
+ },
322
+ },
323
+ required=[],
324
+ )
325
+
326
+ return [bash_tool, check_tool, url_tool]
327
+
328
+
329
+ class DaytonaSandbox:
330
+ def __init__(
331
+ self,
332
+ api_key: str | None = None,
333
+ api_url: str | None = None,
334
+ target: str | None = None,
335
+ sandbox_id: str | None = None,
336
+ language: str = "python",
337
+ auto_start: bool = True,
338
+ ):
339
+ """
340
+ Initialize a Daytona sandbox.
341
+
342
+ Args:
343
+ api_key: Daytona API key (if None, will look for DAYTONA_API_KEY env var)
344
+ api_url: Daytona API URL (if None, will look for DAYTONA_API_URL env var)
345
+ target: Daytona target (if None, will look for DAYTONA_TARGET env var)
346
+ sandbox_id: ID of existing sandbox to connect to (if None, creates a new one)
347
+ language: Programming language for the sandbox (default: python)
348
+ auto_start: Whether to automatically start the sandbox if stopped
349
+ """
350
+ import os
351
+
352
+ self.api_key = api_key or os.getenv("DAYTONA_API_KEY")
353
+ self.api_url = api_url or os.getenv("DAYTONA_API_URL")
354
+ self.target = target or os.getenv("DAYTONA_TARGET")
355
+ self.sandbox_id = sandbox_id
356
+ self.language = language
357
+ self.auto_start = auto_start
358
+ self.sandbox = None
359
+ self.client = None
360
+ self._initialized = False
361
+ self._destroyed = False
362
+
363
+ async def __aenter__(self):
364
+ """Async context manager entry - initialize sandbox."""
365
+ await self._ensure_initialized()
366
+ return self
367
+
368
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
369
+ """Async context manager exit - cleanup sandbox."""
370
+ if not self._destroyed:
371
+ await self._destroy()
372
+ return False
373
+
374
+ def __del__(self):
375
+ """Cleanup sandbox when garbage collected (backup cleanup).
376
+
377
+ Note: This attempts sync cleanup which may not work perfectly for async resources.
378
+ Prefer using 'async with' for guaranteed cleanup.
379
+ """
380
+ if not self._destroyed and self.sandbox:
381
+ import warnings
382
+
383
+ warnings.warn(
384
+ "DaytonaSandbox was not properly cleaned up. "
385
+ "Use 'async with DaytonaSandbox(...) as sandbox:' for automatic cleanup.",
386
+ ResourceWarning,
387
+ stacklevel=2,
388
+ )
389
+
390
+ async def _ensure_initialized(self):
391
+ """Lazy initialization of sandbox"""
392
+ if self._initialized:
393
+ return
394
+
395
+ from daytona_sdk import ( # type: ignore
396
+ AsyncDaytona,
397
+ CreateSandboxBaseParams,
398
+ DaytonaConfig,
399
+ )
400
+
401
+ # Initialize client with config
402
+ if self.api_key or self.api_url or self.target:
403
+ config = DaytonaConfig(
404
+ api_key=self.api_key, api_url=self.api_url, target=self.target
405
+ )
406
+ self.client = AsyncDaytona(config)
407
+ else:
408
+ # Use environment variables
409
+ self.client = AsyncDaytona()
410
+
411
+ if self.sandbox_id:
412
+ # Connect to existing sandbox - use find_one with id label
413
+ sandboxes = await self.client.list(labels={"id": self.sandbox_id})
414
+ if not sandboxes or not sandboxes.items:
415
+ raise ValueError(f"Sandbox with ID {self.sandbox_id} not found")
416
+ self.sandbox = sandboxes.items[0]
417
+ else:
418
+ # Create new sandbox with default configuration
419
+ params = CreateSandboxBaseParams(language=self.language) # type: ignore
420
+ self.sandbox = await self.client.create(params) # type: ignore
421
+ self.sandbox_id = self.sandbox.id
422
+
423
+ # Start sandbox if needed
424
+ if self.auto_start and self.sandbox.state != "started":
425
+ await self.sandbox.start()
426
+
427
+ self._initialized = True
428
+
429
+ async def _exec(
430
+ self,
431
+ command: str,
432
+ timeout: int = 30,
433
+ cwd: str | None = None,
434
+ env: dict | None = None,
435
+ ) -> str:
436
+ """
437
+ Execute a shell command in the sandbox.
438
+
439
+ Args:
440
+ command: Shell command to execute
441
+ timeout: Timeout in seconds (None for no timeout)
442
+ cwd: Working directory for the command
443
+ env: Environment variables for the command
444
+
445
+ Returns:
446
+ Command output and exit code information
447
+ """
448
+ await self._ensure_initialized()
449
+
450
+ # Execute command using the process interface
451
+ # API: exec(command, cwd=".", env=None, timeout=None) -> ExecutionResponse
452
+ assert self.sandbox, "no sandbox"
453
+ result = await self.sandbox.process.exec(
454
+ command=command, cwd=cwd or ".", env=env, timeout=timeout
455
+ )
456
+
457
+ # ExecutionResponse has .result (output) and .exit_code
458
+ output = result.result or ""
459
+
460
+ # Include exit code if non-zero
461
+ if result.exit_code != 0:
462
+ output = f"[Exit code: {result.exit_code}]\n{output}"
463
+
464
+ # Limit output to last 5000 characters to avoid overwhelming the LLM
465
+ if len(output) > 5000:
466
+ output = "...[truncated]...\n" + output[-5000:]
467
+
468
+ return output or "(no output)"
469
+
470
+ async def _read_file(self, path: str, max_size: int = 50000) -> str:
471
+ """
472
+ Read a file from the sandbox.
473
+
474
+ Args:
475
+ path: Path to the file in the sandbox
476
+ max_size: Maximum file size in bytes to read
477
+
478
+ Returns:
479
+ File contents as string
480
+ """
481
+ await self._ensure_initialized()
482
+
483
+ # API: download_file(remote_path, timeout=1800) -> bytes
484
+ assert self.sandbox, "no sandbox"
485
+ content_bytes = await self.sandbox.fs.download_file(path)
486
+ content = content_bytes.decode("utf-8", errors="replace")
487
+
488
+ if len(content) > max_size:
489
+ return f"File too large ({len(content)} bytes). First {max_size} bytes:\n{content[:max_size]}"
490
+
491
+ return content
492
+
493
+ async def _write_file(self, path: str, content: str) -> str:
494
+ """
495
+ Write content to a file in the sandbox.
496
+
497
+ Args:
498
+ path: Path to the file in the sandbox
499
+ content: Content to write
500
+
501
+ Returns:
502
+ Success message
503
+ """
504
+ await self._ensure_initialized()
505
+ assert self.sandbox, "no sandbox"
506
+
507
+ # API: upload_file(file: bytes, remote_path: str, timeout=1800) -> None
508
+ content_bytes = content.encode("utf-8")
509
+ await self.sandbox.fs.upload_file(content_bytes, path)
510
+ return f"Successfully wrote {len(content)} bytes to {path}"
511
+
512
+ async def _list_files(self, path: str = ".", pattern: str | None = None) -> str:
513
+ """
514
+ List files in a directory.
515
+
516
+ Args:
517
+ path: Directory path to list
518
+ pattern: Optional glob pattern to filter files
519
+
520
+ Returns:
521
+ Formatted list of files
522
+ """
523
+ await self._ensure_initialized()
524
+ assert self.sandbox, "no sandbox"
525
+
526
+ if pattern:
527
+ # API: find_files(path, pattern) -> List[Match]
528
+ matches = await self.sandbox.fs.find_files(path=path, pattern=pattern)
529
+ if not matches:
530
+ return f"No files matching '{pattern}' found in {path}"
531
+
532
+ # Format the matches
533
+ files = [match.file for match in matches]
534
+ return "\n".join(files)
535
+ else:
536
+ # API: list_files(path) -> List[FileInfo]
537
+ file_infos = await self.sandbox.fs.list_files(path=path)
538
+
539
+ if not file_infos:
540
+ return f"No files found in {path}"
541
+
542
+ # Format the output with file info
543
+ lines = []
544
+ for info in file_infos:
545
+ # FileInfo has .name, .size, .mode, .is_dir, etc
546
+ if info.is_dir:
547
+ lines.append(f"{info.name}/")
548
+ else:
549
+ lines.append(f"{info.name} ({info.size} bytes)")
550
+ return "\n".join(lines)
551
+
552
+ async def _get_preview_link(self, port: int = 8080) -> str:
553
+ """
554
+ Get a preview link for exposing a port.
555
+
556
+ Args:
557
+ port: Port number to expose
558
+
559
+ Returns:
560
+ Preview URL and token information
561
+ """
562
+ await self._ensure_initialized()
563
+ assert self.sandbox, "no sandbox"
564
+ preview = await self.sandbox.get_preview_link(port)
565
+
566
+ result = f"URL: {preview.url}"
567
+ if hasattr(preview, "token") and preview.token:
568
+ result += f"\nToken: {preview.token}"
569
+
570
+ return result
571
+
572
+ async def _get_working_dir(self) -> str:
573
+ """Get the current working directory in the sandbox."""
574
+ await self._ensure_initialized()
575
+ assert self.sandbox, "no sandbox"
576
+ return await self.sandbox.get_work_dir()
577
+
578
+ async def _destroy(self):
579
+ """Delete the sandbox and clean up resources."""
580
+ if self.sandbox and not self._destroyed:
581
+ await self.sandbox.delete()
582
+ self._destroyed = True
583
+ self._initialized = False
584
+ self.sandbox = None
585
+
586
+ def get_tools(self):
587
+ """Return list of tools for LLM use."""
588
+ bash_tool = Tool(
589
+ name="bash",
590
+ description=(
591
+ "Execute a bash command in the Daytona sandbox environment. "
592
+ "The command runs in a persistent Linux environment. "
593
+ "Provide the command as a string (e.g., 'ls -la' or 'python script.py'). "
594
+ "Output is truncated to the last 5000 characters if longer. "
595
+ "Exit codes are included in output if non-zero."
596
+ ),
597
+ run=self._exec,
598
+ parameters={
599
+ "command": {
600
+ "type": "string",
601
+ "description": "The shell command to execute (e.g., 'ls -la', 'python script.py')",
602
+ },
603
+ "timeout": {
604
+ "type": "integer",
605
+ "description": "Timeout in seconds for the command execution (default: 30)",
606
+ },
607
+ "cwd": {
608
+ "type": "string",
609
+ "description": "Working directory for the command (default: current directory)",
610
+ },
611
+ "env": {
612
+ "type": "object",
613
+ "description": "Environment variables for the command (optional)",
614
+ },
615
+ },
616
+ required=["command"],
617
+ )
618
+
619
+ read_file_tool = Tool(
620
+ name="read_file",
621
+ description=(
622
+ "Read the contents of a file from the sandbox filesystem. "
623
+ "Provide the absolute or relative path to the file. "
624
+ "Files larger than 50KB are truncated."
625
+ ),
626
+ run=self._read_file,
627
+ parameters={
628
+ "path": {
629
+ "type": "string",
630
+ "description": "Path to the file to read (e.g., '/home/user/script.py')",
631
+ },
632
+ "max_size": {
633
+ "type": "integer",
634
+ "description": "Maximum file size in bytes to read (default: 50000)",
635
+ },
636
+ },
637
+ required=["path"],
638
+ )
639
+
640
+ write_file_tool = Tool(
641
+ name="write_file",
642
+ description=(
643
+ "Write content to a file in the sandbox filesystem. "
644
+ "Creates the file if it doesn't exist, overwrites if it does. "
645
+ "Parent directories must exist."
646
+ ),
647
+ run=self._write_file,
648
+ parameters={
649
+ "path": {
650
+ "type": "string",
651
+ "description": "Path where to write the file (e.g., '/home/user/script.py')",
652
+ },
653
+ "content": {
654
+ "type": "string",
655
+ "description": "Content to write to the file",
656
+ },
657
+ },
658
+ required=["path", "content"],
659
+ )
660
+
661
+ list_files_tool = Tool(
662
+ name="list_files",
663
+ description=(
664
+ "List files and directories in the sandbox filesystem. "
665
+ "Useful for exploring the sandbox environment and finding files. "
666
+ "Optionally filter by glob pattern (e.g., '*.py', '**/*.txt')."
667
+ ),
668
+ run=self._list_files,
669
+ parameters={
670
+ "path": {
671
+ "type": "string",
672
+ "description": "Directory path to list (default: current directory)",
673
+ },
674
+ "pattern": {
675
+ "type": "string",
676
+ "description": "Glob pattern to filter files (e.g., '*.py', '**/*.txt')",
677
+ },
678
+ },
679
+ required=[],
680
+ )
681
+
682
+ preview_tool = Tool(
683
+ name="get_preview_link",
684
+ description=(
685
+ "Get a public URL to access a port in the sandbox. "
686
+ "Useful for exposing web servers or applications running in the sandbox. "
687
+ "Returns a URL and authentication token if needed."
688
+ ),
689
+ run=self._get_preview_link,
690
+ parameters={
691
+ "port": {
692
+ "type": "integer",
693
+ "description": "Port number to expose (default: 8080)",
694
+ },
695
+ },
696
+ required=[],
697
+ )
698
+
699
+ workdir_tool = Tool(
700
+ name="get_working_directory",
701
+ description=(
702
+ "Get the current working directory path in the sandbox. "
703
+ "Useful for understanding the sandbox environment layout."
704
+ ),
705
+ run=self._get_working_dir,
706
+ parameters={},
707
+ required=[],
708
+ )
709
+
710
+ return [
711
+ bash_tool,
712
+ read_file_tool,
713
+ write_file_tool,
714
+ list_files_tool,
715
+ preview_tool,
716
+ workdir_tool,
717
+ ]
718
+
719
+
720
+ class DockerSandbox:
721
+ """
722
+ Local Docker-based sandbox for running code in isolated containers.
723
+
724
+ Works with Docker Desktop, Colima, or any Docker-compatible runtime.
725
+ Each sandbox instance creates its own container.
726
+
727
+ Requires:
728
+ - docker package installed (pip install docker)
729
+ - Docker daemon running (Docker Desktop, Colima, etc.)
730
+
731
+ Example:
732
+ async with DockerSandbox() as sandbox:
733
+ tools = sandbox.get_tools()
734
+ # Use tools with your LLM...
735
+ """
736
+
737
+ # Default image - has uv pre-installed, Debian Bookworm base
738
+ DEFAULT_IMAGE = "ghcr.io/astral-sh/uv:python3.12-bookworm-slim"
739
+
740
+ def __init__(
741
+ self,
742
+ image: str | None = None,
743
+ *,
744
+ docker_host: str | None = None,
745
+ network_mode: str = "bridge",
746
+ mem_limit: str = "512m",
747
+ cpu_period: int = 100000,
748
+ cpu_quota: int | None = None,
749
+ working_dir: str = "/workspace",
750
+ ):
751
+ """
752
+ Initialize a Docker sandbox.
753
+
754
+ Args:
755
+ image: Docker image to use. Defaults to uv's Python 3.12 image.
756
+ docker_host: Docker socket URL. If None, auto-detects from DOCKER_HOST
757
+ env var or tries common socket paths.
758
+ network_mode: Docker network mode. "bridge" (default) for internet access,
759
+ "none" for full isolation.
760
+ mem_limit: Memory limit (e.g., "512m", "1g"). Default "512m".
761
+ cpu_period: CPU period in microseconds. Default 100000.
762
+ cpu_quota: CPU quota in microseconds. None for no limit.
763
+ E.g., 50000 with period 100000 = 50% of one CPU.
764
+ working_dir: Working directory inside container. Default "/workspace".
765
+ """
766
+ self.image = image or self.DEFAULT_IMAGE
767
+ self.docker_host = docker_host
768
+ self.network_mode = network_mode
769
+ self.mem_limit = mem_limit
770
+ self.cpu_period = cpu_period
771
+ self.cpu_quota = cpu_quota
772
+ self.working_dir = working_dir
773
+
774
+ # State
775
+ self.container = None
776
+ self._client = None
777
+ self._initialized = False
778
+ self._destroyed = False
779
+
780
+ # Process tracking for background processes
781
+ self.processes: dict[str, TrackedProcess] = {}
782
+ self.process_counter: int = 0
783
+
784
+ @property
785
+ def client(self):
786
+ """Lazy-load Docker client."""
787
+ if self._client is None:
788
+ import docker
789
+
790
+ if self.docker_host:
791
+ self._client = docker.DockerClient(base_url=self.docker_host)
792
+ else:
793
+ # Auto-detect socket location
794
+ # Try DOCKER_HOST env first, then common socket paths
795
+ docker_host = os.environ.get("DOCKER_HOST")
796
+ if not docker_host:
797
+ # Common socket paths (Docker Desktop, Colima, Podman, etc.)
798
+ socket_paths = [
799
+ os.path.expanduser("~/.colima/default/docker.sock"),
800
+ os.path.expanduser("~/.colima/docker.sock"),
801
+ "/var/run/docker.sock",
802
+ os.path.expanduser("~/.docker/run/docker.sock"),
803
+ os.path.expanduser(
804
+ "~/.local/share/containers/podman/machine/podman.sock"
805
+ ),
806
+ ]
807
+ for path in socket_paths:
808
+ if os.path.exists(path):
809
+ docker_host = f"unix://{path}"
810
+ break
811
+
812
+ if docker_host:
813
+ self._client = docker.DockerClient(base_url=docker_host)
814
+ else:
815
+ # Fall back to default (will likely fail but gives clear error)
816
+ self._client = docker.from_env()
817
+ return self._client
818
+
819
+ async def __aenter__(self):
820
+ """Async context manager entry - initialize sandbox."""
821
+ await self._ensure_initialized()
822
+ return self
823
+
824
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
825
+ """Async context manager exit - cleanup sandbox."""
826
+ if not self._destroyed:
827
+ await self._destroy()
828
+ return False
829
+
830
+ def __enter__(self):
831
+ """Sync context manager entry."""
832
+ import asyncio
833
+
834
+ asyncio.get_event_loop().run_until_complete(self._ensure_initialized())
835
+ return self
836
+
837
+ def __exit__(self, exc_type, exc_val, exc_tb):
838
+ """Sync context manager exit."""
839
+ if not self._destroyed:
840
+ self._destroy_sync()
841
+ return False
842
+
843
+ def __del__(self):
844
+ """Cleanup container when garbage collected (backup cleanup)."""
845
+ if not self._destroyed and self.container:
846
+ import warnings
847
+
848
+ warnings.warn(
849
+ "DockerSandbox was not properly cleaned up. "
850
+ "Use 'with DockerSandbox(...) as sandbox:' for automatic cleanup.",
851
+ ResourceWarning,
852
+ stacklevel=2,
853
+ )
854
+
855
+ async def _ensure_initialized(self):
856
+ """Lazy initialization - pull image if needed and start container."""
857
+ if self._initialized:
858
+ return
859
+
860
+ # Pull image if not present
861
+ await asyncio.to_thread(self._pull_image_if_needed)
862
+
863
+ # Create and start container
864
+ await asyncio.to_thread(self._create_container)
865
+
866
+ self._initialized = True
867
+
868
+ def _pull_image_if_needed(self):
869
+ """Pull the Docker image if not already present."""
870
+ try:
871
+ self.client.images.get(self.image)
872
+ except Exception:
873
+ # Image not found locally, pull it
874
+ self.client.images.pull(self.image)
875
+
876
+ def _create_container(self):
877
+ """Create and start the container."""
878
+ self.container = self.client.containers.run(
879
+ self.image,
880
+ command=["sleep", "infinity"],
881
+ detach=True,
882
+ remove=True, # Auto-remove when stopped
883
+ network_mode=self.network_mode,
884
+ mem_limit=self.mem_limit,
885
+ cpu_period=self.cpu_period,
886
+ cpu_quota=self.cpu_quota,
887
+ working_dir=self.working_dir,
888
+ # Create the working directory
889
+ entrypoint=[
890
+ "/bin/sh",
891
+ "-c",
892
+ f"mkdir -p {self.working_dir} && sleep infinity",
893
+ ],
894
+ )
895
+
896
+ def _generate_process_name(self) -> str:
897
+ """Generate a unique process name like p1, p2, etc."""
898
+ self.process_counter += 1
899
+ return f"p{self.process_counter}"
900
+
901
+ async def _exec(
902
+ self,
903
+ command: str,
904
+ timeout: int = 60,
905
+ wait: bool = True,
906
+ name: str | None = None,
907
+ ) -> str:
908
+ """
909
+ Execute a command in the sandbox.
910
+
911
+ Args:
912
+ command: Shell command to execute
913
+ timeout: Timeout in seconds (only applies when wait=True)
914
+ wait: If True, wait for completion. If False, run in background.
915
+ name: Name for background process (auto-generated if not provided)
916
+
917
+ Returns:
918
+ Command output if wait=True, or status message if wait=False
919
+ """
920
+ await self._ensure_initialized()
921
+ assert self.container is not None, "Container not initialized"
922
+
923
+ if wait:
924
+ # Synchronous execution with timeout
925
+ try:
926
+ exit_code, output = await asyncio.wait_for(
927
+ asyncio.to_thread(
928
+ self.container.exec_run,
929
+ ["sh", "-c", command],
930
+ workdir=self.working_dir,
931
+ ),
932
+ timeout=timeout,
933
+ )
934
+ except asyncio.TimeoutError:
935
+ return f"[Timeout after {timeout}s]"
936
+
937
+ # Decode output
938
+ if isinstance(output, bytes):
939
+ output = output.decode("utf-8", errors="replace")
940
+
941
+ # Truncate if needed
942
+ if len(output) > 5000:
943
+ output = "...[truncated]...\n" + output[-5000:]
944
+
945
+ # Include exit code if non-zero
946
+ if exit_code != 0:
947
+ output = f"[Exit code: {exit_code}]\n{output}"
948
+
949
+ return output if output else "(no output)"
950
+ else:
951
+ # Background execution
952
+ exec_id = await asyncio.to_thread(
953
+ self.client.api.exec_create,
954
+ self.container.id,
955
+ ["sh", "-c", command],
956
+ workdir=self.working_dir,
957
+ )
958
+ await asyncio.to_thread(
959
+ self.client.api.exec_start,
960
+ exec_id,
961
+ detach=True,
962
+ )
963
+
964
+ proc_name = name or self._generate_process_name()
965
+ tracked = TrackedProcess(
966
+ process=exec_id,
967
+ name=proc_name,
968
+ command=command,
969
+ )
970
+ self.processes[proc_name] = tracked
971
+
972
+ return (
973
+ f"Started background process '{proc_name}'.\n"
974
+ f"Command: {command}\n"
975
+ f"Use list_processes() to check status."
976
+ )
977
+
978
+ def _check_process(self, name: str | None = None) -> str:
979
+ """Check status of background processes."""
980
+ if not self.processes:
981
+ return "No background processes have been started."
982
+
983
+ if name:
984
+ proc = self.processes.get(name)
985
+ if not proc:
986
+ available = ", ".join(self.processes.keys())
987
+ return f"Process '{name}' not found. Available: {available}"
988
+
989
+ # Check exec status
990
+ exec_info = self.client.api.exec_inspect(proc.process)
991
+ running = exec_info.get("Running", False)
992
+ exit_code = exec_info.get("ExitCode")
993
+
994
+ if running:
995
+ status = "running"
996
+ else:
997
+ status = f"completed (exit code: {exit_code})"
998
+
999
+ elapsed = time.time() - proc.started_at
1000
+ return f"Process: {name}\nCommand: {proc.command}\nStatus: {status}\nRunning for: {elapsed:.1f}s"
1001
+ else:
1002
+ # Show all processes
1003
+ lines = ["NAME STATUS COMMAND"]
1004
+ for proc_name, proc in self.processes.items():
1005
+ exec_info = self.client.api.exec_inspect(proc.process)
1006
+ running = exec_info.get("Running", False)
1007
+ exit_code = exec_info.get("ExitCode")
1008
+
1009
+ if running:
1010
+ status = "running"
1011
+ else:
1012
+ status = f"exit {exit_code}"
1013
+
1014
+ cmd_display = (
1015
+ proc.command[:40] + "..."
1016
+ if len(proc.command) > 40
1017
+ else proc.command
1018
+ )
1019
+ lines.append(f"{proc_name:<8} {status:<19} {cmd_display}")
1020
+
1021
+ return "\n".join(lines)
1022
+
1023
+ async def _destroy(self):
1024
+ """Stop the container and clean up."""
1025
+ if self._destroyed:
1026
+ return
1027
+
1028
+ if self.container:
1029
+ try:
1030
+ await asyncio.to_thread(self.container.stop, timeout=5)
1031
+ except Exception:
1032
+ pass # Container might already be stopped
1033
+
1034
+ self._destroyed = True
1035
+ self._initialized = False
1036
+
1037
+ def _destroy_sync(self):
1038
+ """Synchronous version of destroy."""
1039
+ if self._destroyed:
1040
+ return
1041
+
1042
+ if self.container:
1043
+ try:
1044
+ self.container.stop(timeout=5)
1045
+ except Exception:
1046
+ pass
1047
+
1048
+ self._destroyed = True
1049
+ self._initialized = False
1050
+
1051
+ def get_tools(self):
1052
+ """Return list of tools for LLM use."""
1053
+ bash_tool = Tool(
1054
+ name="bash",
1055
+ description=(
1056
+ "Execute a bash command in the Docker sandbox environment. "
1057
+ "The sandbox has Python 3.12 and uv pre-installed. "
1058
+ "Use 'apt-get update && apt-get install -y <package>' for system packages. "
1059
+ "Set wait=false to run servers or long-running processes in background."
1060
+ ),
1061
+ run=self._exec,
1062
+ parameters={
1063
+ "command": {
1064
+ "type": "string",
1065
+ "description": "The shell command to execute",
1066
+ },
1067
+ "timeout": {
1068
+ "type": "integer",
1069
+ "description": "Timeout in seconds (default: 60, only for wait=true)",
1070
+ },
1071
+ "wait": {
1072
+ "type": "boolean",
1073
+ "description": "If true (default), wait for completion. If false, run in background.",
1074
+ },
1075
+ "name": {
1076
+ "type": "string",
1077
+ "description": "Name for background process (e.g., 'server'). Only used with wait=false.",
1078
+ },
1079
+ },
1080
+ required=["command"],
1081
+ )
1082
+
1083
+ check_tool = Tool(
1084
+ name="list_processes",
1085
+ description="Check status of background processes started with wait=false.",
1086
+ run=self._check_process,
1087
+ parameters={
1088
+ "name": {
1089
+ "type": "string",
1090
+ "description": "Process name to check, or omit to see all processes",
1091
+ },
1092
+ },
1093
+ required=[],
1094
+ )
1095
+
1096
+ return [bash_tool, check_tool]
1097
+
1098
+
1099
+ class FargateSandbox:
1100
+ """
1101
+ AWS Fargate-based sandbox for running untrusted code in isolated containers.
1102
+
1103
+ Requires:
1104
+ - boto3 installed
1105
+ - AWS credentials configured
1106
+ - VPC with subnets that have internet access (for pulling images)
1107
+ - Security group that allows outbound traffic
1108
+
1109
+ The sandbox automatically:
1110
+ - Creates IAM roles for task execution and ECS Exec
1111
+ - Registers a task definition with the specified image
1112
+ - Runs a Fargate task and waits for it to be ready
1113
+ - Executes commands via ECS Exec (SSM Session Manager)
1114
+
1115
+ Example:
1116
+ async with FargateSandbox(
1117
+ subnets=["subnet-abc123"],
1118
+ security_groups=["sg-abc123"],
1119
+ ) as sandbox:
1120
+ tools = sandbox.get_tools()
1121
+ # Use tools with your LLM...
1122
+ """
1123
+
1124
+ # Default image - minimal Python with common tools
1125
+ DEFAULT_IMAGE = "python:3.12-slim"
1126
+
1127
+ # IAM policy for ECS Exec (SSM Session Manager)
1128
+ EXEC_POLICY = {
1129
+ "Version": "2012-10-17",
1130
+ "Statement": [
1131
+ {
1132
+ "Effect": "Allow",
1133
+ "Action": [
1134
+ "ssmmessages:CreateControlChannel",
1135
+ "ssmmessages:CreateDataChannel",
1136
+ "ssmmessages:OpenControlChannel",
1137
+ "ssmmessages:OpenDataChannel",
1138
+ ],
1139
+ "Resource": "*",
1140
+ }
1141
+ ],
1142
+ }
1143
+
1144
+ # Trust policy for ECS tasks
1145
+ TASK_TRUST_POLICY = {
1146
+ "Version": "2012-10-17",
1147
+ "Statement": [
1148
+ {
1149
+ "Effect": "Allow",
1150
+ "Principal": {"Service": "ecs-tasks.amazonaws.com"},
1151
+ "Action": "sts:AssumeRole",
1152
+ }
1153
+ ],
1154
+ }
1155
+
1156
+ def __init__(
1157
+ self,
1158
+ subnets: list[str],
1159
+ security_groups: list[str],
1160
+ *,
1161
+ cluster: str | None = None,
1162
+ image: str | None = None,
1163
+ cpu: int = 256,
1164
+ memory: int = 512,
1165
+ region: str | None = None,
1166
+ task_role_arn: str | None = None,
1167
+ execution_role_arn: str | None = None,
1168
+ assign_public_ip: bool = True,
1169
+ ):
1170
+ """
1171
+ Initialize a Fargate sandbox.
1172
+
1173
+ Args:
1174
+ subnets: List of VPC subnet IDs (required). Use subnets with internet
1175
+ access (public subnets with IGW, or private with NAT).
1176
+ security_groups: List of security group IDs (required). Must allow
1177
+ outbound HTTPS (443) for ECS Exec to work.
1178
+ cluster: ECS cluster name. If None, uses "lm-deluge-sandbox" (created if missing).
1179
+ image: Docker image to use. Defaults to python:3.12-slim.
1180
+ cpu: Fargate CPU units (256, 512, 1024, 2048, 4096). Default 256.
1181
+ memory: Fargate memory in MB. Must be compatible with CPU. Default 512.
1182
+ region: AWS region. If None, uses boto3 default.
1183
+ task_role_arn: IAM role ARN for the task. If None, creates one with
1184
+ minimal permissions (just SSM for ECS Exec).
1185
+ execution_role_arn: IAM role ARN for task execution. If None, uses
1186
+ the AWS managed ecsTaskExecutionRole.
1187
+ assign_public_ip: Whether to assign a public IP. Required if using
1188
+ public subnets without NAT. Default True.
1189
+ """
1190
+ self.subnets = subnets
1191
+ self.security_groups = security_groups
1192
+ self.cluster = cluster or "lm-deluge-sandbox"
1193
+ self.image = image or self.DEFAULT_IMAGE
1194
+ self.cpu = str(cpu)
1195
+ self.memory = str(memory)
1196
+ self.region = region
1197
+ self.task_role_arn = task_role_arn
1198
+ self.execution_role_arn = execution_role_arn
1199
+ self.assign_public_ip = assign_public_ip
1200
+
1201
+ # State
1202
+ self.task_arn: str | None = None
1203
+ self.task_definition_arn: str | None = None
1204
+ self._initialized = False
1205
+ self._destroyed = False
1206
+
1207
+ # boto3 clients (lazy init)
1208
+ self._ecs_client = None
1209
+ self._iam_client = None
1210
+
1211
+ @property
1212
+ def ecs(self):
1213
+ """Lazy-load ECS client."""
1214
+ if self._ecs_client is None:
1215
+ import boto3
1216
+
1217
+ self._ecs_client = boto3.client("ecs", region_name=self.region)
1218
+ return self._ecs_client
1219
+
1220
+ @property
1221
+ def iam(self):
1222
+ """Lazy-load IAM client."""
1223
+ if self._iam_client is None:
1224
+ import boto3
1225
+
1226
+ self._iam_client = boto3.client("iam", region_name=self.region)
1227
+ return self._iam_client
1228
+
1229
+ async def __aenter__(self):
1230
+ """Async context manager entry - initialize sandbox."""
1231
+ await self._ensure_initialized()
1232
+ return self
1233
+
1234
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
1235
+ """Async context manager exit - cleanup sandbox."""
1236
+ if not self._destroyed:
1237
+ await self._destroy()
1238
+ return False
1239
+
1240
+ def __del__(self):
1241
+ """Cleanup sandbox when garbage collected (backup cleanup)."""
1242
+ if not self._destroyed and self.task_arn:
1243
+ import warnings
1244
+
1245
+ warnings.warn(
1246
+ "FargateSandbox was not properly cleaned up. "
1247
+ "Use 'async with FargateSandbox(...) as sandbox:' for automatic cleanup.",
1248
+ ResourceWarning,
1249
+ stacklevel=2,
1250
+ )
1251
+
1252
+ async def _ensure_initialized(self):
1253
+ """Lazy initialization - create cluster, task def, and run task."""
1254
+ if self._initialized:
1255
+ return
1256
+
1257
+ # Ensure cluster exists
1258
+ await self._ensure_cluster()
1259
+
1260
+ # Ensure IAM roles exist
1261
+ await self._ensure_roles()
1262
+
1263
+ # Register task definition
1264
+ await self._register_task_definition()
1265
+
1266
+ # Run the task
1267
+ await self._run_task()
1268
+
1269
+ # Wait for task to be running
1270
+ await self._wait_for_task()
1271
+
1272
+ self._initialized = True
1273
+
1274
+ async def _ensure_cluster(self):
1275
+ """Create ECS cluster if it doesn't exist."""
1276
+ try:
1277
+ response = await asyncio.to_thread(
1278
+ self.ecs.describe_clusters, clusters=[self.cluster]
1279
+ )
1280
+ clusters = response.get("clusters", [])
1281
+ if clusters and clusters[0].get("status") == "ACTIVE":
1282
+ return # Cluster exists
1283
+ except Exception:
1284
+ pass
1285
+
1286
+ # Create cluster
1287
+ await asyncio.to_thread(
1288
+ self.ecs.create_cluster,
1289
+ clusterName=self.cluster,
1290
+ settings=[
1291
+ {"name": "containerInsights", "value": "disabled"},
1292
+ ],
1293
+ )
1294
+
1295
+ async def _ensure_roles(self):
1296
+ """Create IAM roles if not provided."""
1297
+ # Task role (for ECS Exec)
1298
+ if not self.task_role_arn:
1299
+ role_name = "lm-deluge-sandbox-task-role"
1300
+ try:
1301
+ response = await asyncio.to_thread(
1302
+ self.iam.get_role, RoleName=role_name
1303
+ )
1304
+ self.task_role_arn = response["Role"]["Arn"]
1305
+ except self.iam.exceptions.NoSuchEntityException:
1306
+ # Create the role
1307
+ response = await asyncio.to_thread(
1308
+ self.iam.create_role,
1309
+ RoleName=role_name,
1310
+ AssumeRolePolicyDocument=json.dumps(self.TASK_TRUST_POLICY),
1311
+ Description="Task role for lm-deluge Fargate sandbox (ECS Exec)",
1312
+ )
1313
+ self.task_role_arn = response["Role"]["Arn"]
1314
+
1315
+ # Attach inline policy for ECS Exec
1316
+ await asyncio.to_thread(
1317
+ self.iam.put_role_policy,
1318
+ RoleName=role_name,
1319
+ PolicyName="ecs-exec-policy",
1320
+ PolicyDocument=json.dumps(self.EXEC_POLICY),
1321
+ )
1322
+
1323
+ # IAM is eventually consistent - wait a bit
1324
+ await asyncio.sleep(5)
1325
+
1326
+ # Execution role (for pulling images, logs)
1327
+ if not self.execution_role_arn:
1328
+ role_name = "lm-deluge-sandbox-execution-role"
1329
+ try:
1330
+ response = await asyncio.to_thread(
1331
+ self.iam.get_role, RoleName=role_name
1332
+ )
1333
+ self.execution_role_arn = response["Role"]["Arn"]
1334
+ except self.iam.exceptions.NoSuchEntityException:
1335
+ # Create the role
1336
+ response = await asyncio.to_thread(
1337
+ self.iam.create_role,
1338
+ RoleName=role_name,
1339
+ AssumeRolePolicyDocument=json.dumps(self.TASK_TRUST_POLICY),
1340
+ Description="Execution role for lm-deluge Fargate sandbox",
1341
+ )
1342
+ self.execution_role_arn = response["Role"]["Arn"]
1343
+
1344
+ # Attach AWS managed policy
1345
+ await asyncio.to_thread(
1346
+ self.iam.attach_role_policy,
1347
+ RoleName=role_name,
1348
+ PolicyArn="arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy",
1349
+ )
1350
+
1351
+ # IAM is eventually consistent - wait a bit
1352
+ await asyncio.sleep(5)
1353
+
1354
+ async def _register_task_definition(self):
1355
+ """Register a task definition for the sandbox."""
1356
+ family = f"lm-deluge-sandbox-{secrets.token_hex(4)}"
1357
+
1358
+ response = await asyncio.to_thread(
1359
+ self.ecs.register_task_definition,
1360
+ family=family,
1361
+ networkMode="awsvpc",
1362
+ requiresCompatibilities=["FARGATE"],
1363
+ cpu=self.cpu,
1364
+ memory=self.memory,
1365
+ taskRoleArn=self.task_role_arn,
1366
+ executionRoleArn=self.execution_role_arn,
1367
+ containerDefinitions=[
1368
+ {
1369
+ "name": "sandbox",
1370
+ "image": self.image,
1371
+ "essential": True,
1372
+ # Keep container running - sleep infinity
1373
+ "command": ["sh", "-c", "sleep infinity"],
1374
+ "linuxParameters": {
1375
+ "initProcessEnabled": True, # Required for ECS Exec
1376
+ },
1377
+ }
1378
+ ],
1379
+ )
1380
+ self.task_definition_arn = response["taskDefinition"]["taskDefinitionArn"]
1381
+
1382
+ async def _run_task(self):
1383
+ """Run a Fargate task."""
1384
+ response = await asyncio.to_thread(
1385
+ self.ecs.run_task,
1386
+ cluster=self.cluster,
1387
+ taskDefinition=self.task_definition_arn,
1388
+ launchType="FARGATE",
1389
+ enableExecuteCommand=True, # Enable ECS Exec
1390
+ networkConfiguration={
1391
+ "awsvpcConfiguration": {
1392
+ "subnets": self.subnets,
1393
+ "securityGroups": self.security_groups,
1394
+ "assignPublicIp": "ENABLED"
1395
+ if self.assign_public_ip
1396
+ else "DISABLED",
1397
+ }
1398
+ },
1399
+ )
1400
+
1401
+ tasks = response.get("tasks", [])
1402
+ if not tasks:
1403
+ failures = response.get("failures", [])
1404
+ raise RuntimeError(f"Failed to run task: {failures}")
1405
+
1406
+ self.task_arn = tasks[0]["taskArn"]
1407
+
1408
+ async def _wait_for_task(self, timeout: int = 120):
1409
+ """Wait for task to reach RUNNING state."""
1410
+ start = time.time()
1411
+ while time.time() - start < timeout:
1412
+ response = await asyncio.to_thread(
1413
+ self.ecs.describe_tasks,
1414
+ cluster=self.cluster,
1415
+ tasks=[self.task_arn],
1416
+ )
1417
+ tasks = response.get("tasks", [])
1418
+ if tasks:
1419
+ status = tasks[0].get("lastStatus")
1420
+ if status == "RUNNING":
1421
+ # Also check that execute command agent is running
1422
+ containers = tasks[0].get("containers", [])
1423
+ for container in containers:
1424
+ managed_agents = container.get("managedAgents", [])
1425
+ for agent in managed_agents:
1426
+ if agent.get("name") == "ExecuteCommandAgent":
1427
+ if agent.get("lastStatus") == "RUNNING":
1428
+ return
1429
+ elif status in ("STOPPED", "DEACTIVATING"):
1430
+ reason = tasks[0].get("stoppedReason", "Unknown")
1431
+ raise RuntimeError(f"Task stopped: {reason}")
1432
+
1433
+ await asyncio.sleep(2)
1434
+
1435
+ raise TimeoutError(f"Task did not reach RUNNING state within {timeout}s")
1436
+
1437
+ async def _exec(
1438
+ self,
1439
+ command: str,
1440
+ timeout: int = 60,
1441
+ ) -> str:
1442
+ """
1443
+ Execute a command in the sandbox.
1444
+
1445
+ Args:
1446
+ command: Shell command to execute
1447
+ timeout: Timeout in seconds
1448
+
1449
+ Returns:
1450
+ Command output (stdout + stderr)
1451
+ """
1452
+ await self._ensure_initialized()
1453
+
1454
+ # Call ECS execute_command
1455
+ response = await asyncio.to_thread(
1456
+ self.ecs.execute_command,
1457
+ cluster=self.cluster,
1458
+ task=self.task_arn,
1459
+ container="sandbox",
1460
+ interactive=True,
1461
+ command=f"/bin/sh -c {shlex.quote(command)}",
1462
+ )
1463
+
1464
+ session = response.get("session", {})
1465
+ stream_url = session.get("streamUrl")
1466
+ token = session.get("tokenValue")
1467
+
1468
+ if not stream_url or not token:
1469
+ return f"Error: Failed to get session: {response}"
1470
+
1471
+ # Connect to websocket and read output
1472
+ try:
1473
+ output = await self._read_ssm_session(stream_url, token, timeout)
1474
+ except Exception as e:
1475
+ return f"Error executing command: {e}"
1476
+
1477
+ # Truncate if needed
1478
+ if len(output) > 5000:
1479
+ output = "...[truncated]...\n" + output[-5000:]
1480
+
1481
+ return output if output else "(no output)"
1482
+
1483
+ async def _read_ssm_session(self, stream_url: str, token: str, timeout: int) -> str:
1484
+ """
1485
+ Connect to SSM session websocket and read command output.
1486
+
1487
+ The SSM agent uses a binary protocol:
1488
+ - Header: 4-byte big-endian length + 32-byte null-padded message type
1489
+ - Payload varies by message type
1490
+
1491
+ Note: SSM retransmits messages until ACKed. Since we're just reading
1492
+ (not fully implementing the protocol), we deduplicate by tracking
1493
+ seen message hashes.
1494
+ """
1495
+ import aiohttp
1496
+
1497
+ output_chunks = []
1498
+ seen_messages: set[bytes] = set() # Dedupe retransmissions
1499
+
1500
+ async with aiohttp.ClientSession() as session:
1501
+ async with session.ws_connect(stream_url, receive_timeout=timeout) as ws:
1502
+ # Send init message with token
1503
+ init_message = {
1504
+ "MessageSchemaVersion": "1.0",
1505
+ "RequestId": str(uuid.uuid4()),
1506
+ "TokenValue": token,
1507
+ }
1508
+ await ws.send_str(json.dumps(init_message))
1509
+
1510
+ # Read messages until channel closes or timeout
1511
+ try:
1512
+ async for msg in ws:
1513
+ if msg.type == aiohttp.WSMsgType.BINARY:
1514
+ # Skip duplicate messages (SSM retransmits until ACKed)
1515
+ msg_hash = msg.data[:116] # Header is enough to identify
1516
+ if msg_hash in seen_messages:
1517
+ continue
1518
+ seen_messages.add(msg_hash)
1519
+
1520
+ parsed = self._parse_ssm_message(msg.data)
1521
+ if parsed:
1522
+ msg_type, payload = parsed
1523
+ if "output_stream_data" in msg_type:
1524
+ output_chunks.append(payload)
1525
+ elif "channel_closed" in msg_type:
1526
+ break
1527
+ elif msg.type == aiohttp.WSMsgType.ERROR:
1528
+ break
1529
+ elif msg.type == aiohttp.WSMsgType.CLOSED:
1530
+ break
1531
+ except asyncio.TimeoutError:
1532
+ pass
1533
+
1534
+ return "".join(output_chunks)
1535
+
1536
+ def _parse_ssm_message(self, data: bytes) -> tuple[str, str] | None:
1537
+ """
1538
+ Parse an SSM agent message.
1539
+
1540
+ Format:
1541
+ - Bytes 0-3: Header length (big-endian uint32)
1542
+ - Bytes 4-35: Message type (32 bytes, null-padded ASCII)
1543
+ - After header: Payload length (4 bytes) + payload
1544
+ """
1545
+ if len(data) < 36:
1546
+ return None
1547
+
1548
+ try:
1549
+ header_len = struct.unpack(">I", data[0:4])[0]
1550
+ msg_type = data[4:36].decode("ascii").rstrip("\x00")
1551
+
1552
+ # Payload starts after header
1553
+ if len(data) > header_len:
1554
+ payload_data = data[header_len:]
1555
+ if len(payload_data) >= 4:
1556
+ payload_len = struct.unpack(">I", payload_data[0:4])[0]
1557
+ if len(payload_data) >= 4 + payload_len:
1558
+ payload = payload_data[4 : 4 + payload_len].decode(
1559
+ "utf-8", errors="replace"
1560
+ )
1561
+ return msg_type, payload
1562
+
1563
+ return msg_type, ""
1564
+ except Exception:
1565
+ return None
1566
+
1567
+ async def _destroy(self):
1568
+ """Stop the task and clean up."""
1569
+ if self._destroyed:
1570
+ return
1571
+
1572
+ if self.task_arn:
1573
+ try:
1574
+ await asyncio.to_thread(
1575
+ self.ecs.stop_task,
1576
+ cluster=self.cluster,
1577
+ task=self.task_arn,
1578
+ reason="Sandbox destroyed",
1579
+ )
1580
+ except Exception:
1581
+ pass # Best effort
1582
+
1583
+ # Optionally deregister task definition
1584
+ if self.task_definition_arn:
1585
+ try:
1586
+ await asyncio.to_thread(
1587
+ self.ecs.deregister_task_definition,
1588
+ taskDefinition=self.task_definition_arn,
1589
+ )
1590
+ except Exception:
1591
+ pass
1592
+
1593
+ self._destroyed = True
1594
+ self._initialized = False
1595
+
1596
+ def get_tools(self):
1597
+ """Return list of tools for LLM use."""
1598
+ bash_tool = Tool(
1599
+ name="bash",
1600
+ description=(
1601
+ "Execute a bash command in the AWS Fargate sandbox environment. "
1602
+ "The command runs in an isolated container. "
1603
+ "Output is truncated to the last 5000 characters if longer. "
1604
+ "Note: This sandbox does not support background processes - "
1605
+ "commands must complete within the timeout."
1606
+ ),
1607
+ run=self._exec,
1608
+ parameters={
1609
+ "command": {
1610
+ "type": "string",
1611
+ "description": "The shell command to execute (e.g., 'ls -la', 'python script.py')",
1612
+ },
1613
+ "timeout": {
1614
+ "type": "integer",
1615
+ "description": "Timeout in seconds for the command execution (default: 60)",
1616
+ },
1617
+ },
1618
+ required=["command"],
1619
+ )
1620
+
1621
+ return [bash_tool]