lm-deluge 0.0.87__py3-none-any.whl → 0.0.89__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lm_deluge/api_requests/gemini.py +19 -7
- lm_deluge/models/google.py +13 -0
- lm_deluge/tool/prefab/__init__.py +9 -1
- lm_deluge/tool/prefab/full_text_search/__init__.py +285 -0
- lm_deluge/tool/prefab/full_text_search/tantivy_index.py +396 -0
- lm_deluge/tool/prefab/rlm/__init__.py +296 -0
- lm_deluge/tool/prefab/rlm/executor.py +349 -0
- lm_deluge/tool/prefab/rlm/parse.py +144 -0
- lm_deluge/tool/prefab/sandbox.py +908 -0
- {lm_deluge-0.0.87.dist-info → lm_deluge-0.0.89.dist-info}/METADATA +12 -1
- {lm_deluge-0.0.87.dist-info → lm_deluge-0.0.89.dist-info}/RECORD +14 -9
- {lm_deluge-0.0.87.dist-info → lm_deluge-0.0.89.dist-info}/WHEEL +0 -0
- {lm_deluge-0.0.87.dist-info → lm_deluge-0.0.89.dist-info}/licenses/LICENSE +0 -0
- {lm_deluge-0.0.87.dist-info → lm_deluge-0.0.89.dist-info}/top_level.txt +0 -0
lm_deluge/tool/prefab/sandbox.py
CHANGED
|
@@ -1,7 +1,11 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import json
|
|
1
3
|
import os
|
|
2
4
|
import secrets
|
|
3
5
|
import shlex
|
|
6
|
+
import struct
|
|
4
7
|
import time
|
|
8
|
+
import uuid
|
|
5
9
|
from dataclasses import dataclass, field
|
|
6
10
|
from typing import Any
|
|
7
11
|
|
|
@@ -711,3 +715,907 @@ class DaytonaSandbox:
|
|
|
711
715
|
preview_tool,
|
|
712
716
|
workdir_tool,
|
|
713
717
|
]
|
|
718
|
+
|
|
719
|
+
|
|
720
|
+
class DockerSandbox:
|
|
721
|
+
"""
|
|
722
|
+
Local Docker-based sandbox for running code in isolated containers.
|
|
723
|
+
|
|
724
|
+
Works with Docker Desktop, Colima, or any Docker-compatible runtime.
|
|
725
|
+
Each sandbox instance creates its own container.
|
|
726
|
+
|
|
727
|
+
Requires:
|
|
728
|
+
- docker package installed (pip install docker)
|
|
729
|
+
- Docker daemon running (Docker Desktop, Colima, etc.)
|
|
730
|
+
|
|
731
|
+
Example:
|
|
732
|
+
async with DockerSandbox() as sandbox:
|
|
733
|
+
tools = sandbox.get_tools()
|
|
734
|
+
# Use tools with your LLM...
|
|
735
|
+
"""
|
|
736
|
+
|
|
737
|
+
# Default image - has uv pre-installed, Debian Bookworm base
|
|
738
|
+
DEFAULT_IMAGE = "ghcr.io/astral-sh/uv:python3.12-bookworm-slim"
|
|
739
|
+
|
|
740
|
+
def __init__(
|
|
741
|
+
self,
|
|
742
|
+
image: str | None = None,
|
|
743
|
+
*,
|
|
744
|
+
docker_host: str | None = None,
|
|
745
|
+
network_mode: str = "bridge",
|
|
746
|
+
mem_limit: str = "512m",
|
|
747
|
+
cpu_period: int = 100000,
|
|
748
|
+
cpu_quota: int | None = None,
|
|
749
|
+
working_dir: str = "/workspace",
|
|
750
|
+
):
|
|
751
|
+
"""
|
|
752
|
+
Initialize a Docker sandbox.
|
|
753
|
+
|
|
754
|
+
Args:
|
|
755
|
+
image: Docker image to use. Defaults to uv's Python 3.12 image.
|
|
756
|
+
docker_host: Docker socket URL. If None, auto-detects from DOCKER_HOST
|
|
757
|
+
env var or tries common socket paths.
|
|
758
|
+
network_mode: Docker network mode. "bridge" (default) for internet access,
|
|
759
|
+
"none" for full isolation.
|
|
760
|
+
mem_limit: Memory limit (e.g., "512m", "1g"). Default "512m".
|
|
761
|
+
cpu_period: CPU period in microseconds. Default 100000.
|
|
762
|
+
cpu_quota: CPU quota in microseconds. None for no limit.
|
|
763
|
+
E.g., 50000 with period 100000 = 50% of one CPU.
|
|
764
|
+
working_dir: Working directory inside container. Default "/workspace".
|
|
765
|
+
"""
|
|
766
|
+
self.image = image or self.DEFAULT_IMAGE
|
|
767
|
+
self.docker_host = docker_host
|
|
768
|
+
self.network_mode = network_mode
|
|
769
|
+
self.mem_limit = mem_limit
|
|
770
|
+
self.cpu_period = cpu_period
|
|
771
|
+
self.cpu_quota = cpu_quota
|
|
772
|
+
self.working_dir = working_dir
|
|
773
|
+
|
|
774
|
+
# State
|
|
775
|
+
self.container = None
|
|
776
|
+
self._client = None
|
|
777
|
+
self._initialized = False
|
|
778
|
+
self._destroyed = False
|
|
779
|
+
|
|
780
|
+
# Process tracking for background processes
|
|
781
|
+
self.processes: dict[str, TrackedProcess] = {}
|
|
782
|
+
self.process_counter: int = 0
|
|
783
|
+
|
|
784
|
+
@property
|
|
785
|
+
def client(self):
|
|
786
|
+
"""Lazy-load Docker client."""
|
|
787
|
+
if self._client is None:
|
|
788
|
+
import docker
|
|
789
|
+
|
|
790
|
+
if self.docker_host:
|
|
791
|
+
self._client = docker.DockerClient(base_url=self.docker_host)
|
|
792
|
+
else:
|
|
793
|
+
# Auto-detect socket location
|
|
794
|
+
# Try DOCKER_HOST env first, then common socket paths
|
|
795
|
+
docker_host = os.environ.get("DOCKER_HOST")
|
|
796
|
+
if not docker_host:
|
|
797
|
+
# Common socket paths (Docker Desktop, Colima, Podman, etc.)
|
|
798
|
+
socket_paths = [
|
|
799
|
+
os.path.expanduser("~/.colima/default/docker.sock"),
|
|
800
|
+
os.path.expanduser("~/.colima/docker.sock"),
|
|
801
|
+
"/var/run/docker.sock",
|
|
802
|
+
os.path.expanduser("~/.docker/run/docker.sock"),
|
|
803
|
+
os.path.expanduser(
|
|
804
|
+
"~/.local/share/containers/podman/machine/podman.sock"
|
|
805
|
+
),
|
|
806
|
+
]
|
|
807
|
+
for path in socket_paths:
|
|
808
|
+
if os.path.exists(path):
|
|
809
|
+
docker_host = f"unix://{path}"
|
|
810
|
+
break
|
|
811
|
+
|
|
812
|
+
if docker_host:
|
|
813
|
+
self._client = docker.DockerClient(base_url=docker_host)
|
|
814
|
+
else:
|
|
815
|
+
# Fall back to default (will likely fail but gives clear error)
|
|
816
|
+
self._client = docker.from_env()
|
|
817
|
+
return self._client
|
|
818
|
+
|
|
819
|
+
async def __aenter__(self):
|
|
820
|
+
"""Async context manager entry - initialize sandbox."""
|
|
821
|
+
await self._ensure_initialized()
|
|
822
|
+
return self
|
|
823
|
+
|
|
824
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
825
|
+
"""Async context manager exit - cleanup sandbox."""
|
|
826
|
+
if not self._destroyed:
|
|
827
|
+
await self._destroy()
|
|
828
|
+
return False
|
|
829
|
+
|
|
830
|
+
def __enter__(self):
|
|
831
|
+
"""Sync context manager entry."""
|
|
832
|
+
import asyncio
|
|
833
|
+
|
|
834
|
+
asyncio.get_event_loop().run_until_complete(self._ensure_initialized())
|
|
835
|
+
return self
|
|
836
|
+
|
|
837
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
838
|
+
"""Sync context manager exit."""
|
|
839
|
+
if not self._destroyed:
|
|
840
|
+
self._destroy_sync()
|
|
841
|
+
return False
|
|
842
|
+
|
|
843
|
+
def __del__(self):
|
|
844
|
+
"""Cleanup container when garbage collected (backup cleanup)."""
|
|
845
|
+
if not self._destroyed and self.container:
|
|
846
|
+
import warnings
|
|
847
|
+
|
|
848
|
+
warnings.warn(
|
|
849
|
+
"DockerSandbox was not properly cleaned up. "
|
|
850
|
+
"Use 'with DockerSandbox(...) as sandbox:' for automatic cleanup.",
|
|
851
|
+
ResourceWarning,
|
|
852
|
+
stacklevel=2,
|
|
853
|
+
)
|
|
854
|
+
|
|
855
|
+
async def _ensure_initialized(self):
|
|
856
|
+
"""Lazy initialization - pull image if needed and start container."""
|
|
857
|
+
if self._initialized:
|
|
858
|
+
return
|
|
859
|
+
|
|
860
|
+
# Pull image if not present
|
|
861
|
+
await asyncio.to_thread(self._pull_image_if_needed)
|
|
862
|
+
|
|
863
|
+
# Create and start container
|
|
864
|
+
await asyncio.to_thread(self._create_container)
|
|
865
|
+
|
|
866
|
+
self._initialized = True
|
|
867
|
+
|
|
868
|
+
def _pull_image_if_needed(self):
|
|
869
|
+
"""Pull the Docker image if not already present."""
|
|
870
|
+
try:
|
|
871
|
+
self.client.images.get(self.image)
|
|
872
|
+
except Exception:
|
|
873
|
+
# Image not found locally, pull it
|
|
874
|
+
self.client.images.pull(self.image)
|
|
875
|
+
|
|
876
|
+
def _create_container(self):
|
|
877
|
+
"""Create and start the container."""
|
|
878
|
+
self.container = self.client.containers.run(
|
|
879
|
+
self.image,
|
|
880
|
+
command=["sleep", "infinity"],
|
|
881
|
+
detach=True,
|
|
882
|
+
remove=True, # Auto-remove when stopped
|
|
883
|
+
network_mode=self.network_mode,
|
|
884
|
+
mem_limit=self.mem_limit,
|
|
885
|
+
cpu_period=self.cpu_period,
|
|
886
|
+
cpu_quota=self.cpu_quota,
|
|
887
|
+
working_dir=self.working_dir,
|
|
888
|
+
# Create the working directory
|
|
889
|
+
entrypoint=[
|
|
890
|
+
"/bin/sh",
|
|
891
|
+
"-c",
|
|
892
|
+
f"mkdir -p {self.working_dir} && sleep infinity",
|
|
893
|
+
],
|
|
894
|
+
)
|
|
895
|
+
|
|
896
|
+
def _generate_process_name(self) -> str:
|
|
897
|
+
"""Generate a unique process name like p1, p2, etc."""
|
|
898
|
+
self.process_counter += 1
|
|
899
|
+
return f"p{self.process_counter}"
|
|
900
|
+
|
|
901
|
+
async def _exec(
|
|
902
|
+
self,
|
|
903
|
+
command: str,
|
|
904
|
+
timeout: int = 60,
|
|
905
|
+
wait: bool = True,
|
|
906
|
+
name: str | None = None,
|
|
907
|
+
) -> str:
|
|
908
|
+
"""
|
|
909
|
+
Execute a command in the sandbox.
|
|
910
|
+
|
|
911
|
+
Args:
|
|
912
|
+
command: Shell command to execute
|
|
913
|
+
timeout: Timeout in seconds (only applies when wait=True)
|
|
914
|
+
wait: If True, wait for completion. If False, run in background.
|
|
915
|
+
name: Name for background process (auto-generated if not provided)
|
|
916
|
+
|
|
917
|
+
Returns:
|
|
918
|
+
Command output if wait=True, or status message if wait=False
|
|
919
|
+
"""
|
|
920
|
+
await self._ensure_initialized()
|
|
921
|
+
assert self.container is not None, "Container not initialized"
|
|
922
|
+
|
|
923
|
+
if wait:
|
|
924
|
+
# Synchronous execution with timeout
|
|
925
|
+
try:
|
|
926
|
+
exit_code, output = await asyncio.wait_for(
|
|
927
|
+
asyncio.to_thread(
|
|
928
|
+
self.container.exec_run,
|
|
929
|
+
["sh", "-c", command],
|
|
930
|
+
workdir=self.working_dir,
|
|
931
|
+
),
|
|
932
|
+
timeout=timeout,
|
|
933
|
+
)
|
|
934
|
+
except asyncio.TimeoutError:
|
|
935
|
+
return f"[Timeout after {timeout}s]"
|
|
936
|
+
|
|
937
|
+
# Decode output
|
|
938
|
+
if isinstance(output, bytes):
|
|
939
|
+
output = output.decode("utf-8", errors="replace")
|
|
940
|
+
|
|
941
|
+
# Truncate if needed
|
|
942
|
+
if len(output) > 5000:
|
|
943
|
+
output = "...[truncated]...\n" + output[-5000:]
|
|
944
|
+
|
|
945
|
+
# Include exit code if non-zero
|
|
946
|
+
if exit_code != 0:
|
|
947
|
+
output = f"[Exit code: {exit_code}]\n{output}"
|
|
948
|
+
|
|
949
|
+
return output if output else "(no output)"
|
|
950
|
+
else:
|
|
951
|
+
# Background execution
|
|
952
|
+
exec_id = await asyncio.to_thread(
|
|
953
|
+
self.client.api.exec_create,
|
|
954
|
+
self.container.id,
|
|
955
|
+
["sh", "-c", command],
|
|
956
|
+
workdir=self.working_dir,
|
|
957
|
+
)
|
|
958
|
+
await asyncio.to_thread(
|
|
959
|
+
self.client.api.exec_start,
|
|
960
|
+
exec_id,
|
|
961
|
+
detach=True,
|
|
962
|
+
)
|
|
963
|
+
|
|
964
|
+
proc_name = name or self._generate_process_name()
|
|
965
|
+
tracked = TrackedProcess(
|
|
966
|
+
process=exec_id,
|
|
967
|
+
name=proc_name,
|
|
968
|
+
command=command,
|
|
969
|
+
)
|
|
970
|
+
self.processes[proc_name] = tracked
|
|
971
|
+
|
|
972
|
+
return (
|
|
973
|
+
f"Started background process '{proc_name}'.\n"
|
|
974
|
+
f"Command: {command}\n"
|
|
975
|
+
f"Use list_processes() to check status."
|
|
976
|
+
)
|
|
977
|
+
|
|
978
|
+
def _check_process(self, name: str | None = None) -> str:
|
|
979
|
+
"""Check status of background processes."""
|
|
980
|
+
if not self.processes:
|
|
981
|
+
return "No background processes have been started."
|
|
982
|
+
|
|
983
|
+
if name:
|
|
984
|
+
proc = self.processes.get(name)
|
|
985
|
+
if not proc:
|
|
986
|
+
available = ", ".join(self.processes.keys())
|
|
987
|
+
return f"Process '{name}' not found. Available: {available}"
|
|
988
|
+
|
|
989
|
+
# Check exec status
|
|
990
|
+
exec_info = self.client.api.exec_inspect(proc.process)
|
|
991
|
+
running = exec_info.get("Running", False)
|
|
992
|
+
exit_code = exec_info.get("ExitCode")
|
|
993
|
+
|
|
994
|
+
if running:
|
|
995
|
+
status = "running"
|
|
996
|
+
else:
|
|
997
|
+
status = f"completed (exit code: {exit_code})"
|
|
998
|
+
|
|
999
|
+
elapsed = time.time() - proc.started_at
|
|
1000
|
+
return f"Process: {name}\nCommand: {proc.command}\nStatus: {status}\nRunning for: {elapsed:.1f}s"
|
|
1001
|
+
else:
|
|
1002
|
+
# Show all processes
|
|
1003
|
+
lines = ["NAME STATUS COMMAND"]
|
|
1004
|
+
for proc_name, proc in self.processes.items():
|
|
1005
|
+
exec_info = self.client.api.exec_inspect(proc.process)
|
|
1006
|
+
running = exec_info.get("Running", False)
|
|
1007
|
+
exit_code = exec_info.get("ExitCode")
|
|
1008
|
+
|
|
1009
|
+
if running:
|
|
1010
|
+
status = "running"
|
|
1011
|
+
else:
|
|
1012
|
+
status = f"exit {exit_code}"
|
|
1013
|
+
|
|
1014
|
+
cmd_display = (
|
|
1015
|
+
proc.command[:40] + "..."
|
|
1016
|
+
if len(proc.command) > 40
|
|
1017
|
+
else proc.command
|
|
1018
|
+
)
|
|
1019
|
+
lines.append(f"{proc_name:<8} {status:<19} {cmd_display}")
|
|
1020
|
+
|
|
1021
|
+
return "\n".join(lines)
|
|
1022
|
+
|
|
1023
|
+
async def _destroy(self):
|
|
1024
|
+
"""Stop the container and clean up."""
|
|
1025
|
+
if self._destroyed:
|
|
1026
|
+
return
|
|
1027
|
+
|
|
1028
|
+
if self.container:
|
|
1029
|
+
try:
|
|
1030
|
+
await asyncio.to_thread(self.container.stop, timeout=5)
|
|
1031
|
+
except Exception:
|
|
1032
|
+
pass # Container might already be stopped
|
|
1033
|
+
|
|
1034
|
+
self._destroyed = True
|
|
1035
|
+
self._initialized = False
|
|
1036
|
+
|
|
1037
|
+
def _destroy_sync(self):
|
|
1038
|
+
"""Synchronous version of destroy."""
|
|
1039
|
+
if self._destroyed:
|
|
1040
|
+
return
|
|
1041
|
+
|
|
1042
|
+
if self.container:
|
|
1043
|
+
try:
|
|
1044
|
+
self.container.stop(timeout=5)
|
|
1045
|
+
except Exception:
|
|
1046
|
+
pass
|
|
1047
|
+
|
|
1048
|
+
self._destroyed = True
|
|
1049
|
+
self._initialized = False
|
|
1050
|
+
|
|
1051
|
+
def get_tools(self):
|
|
1052
|
+
"""Return list of tools for LLM use."""
|
|
1053
|
+
bash_tool = Tool(
|
|
1054
|
+
name="bash",
|
|
1055
|
+
description=(
|
|
1056
|
+
"Execute a bash command in the Docker sandbox environment. "
|
|
1057
|
+
"The sandbox has Python 3.12 and uv pre-installed. "
|
|
1058
|
+
"Use 'apt-get update && apt-get install -y <package>' for system packages. "
|
|
1059
|
+
"Set wait=false to run servers or long-running processes in background."
|
|
1060
|
+
),
|
|
1061
|
+
run=self._exec,
|
|
1062
|
+
parameters={
|
|
1063
|
+
"command": {
|
|
1064
|
+
"type": "string",
|
|
1065
|
+
"description": "The shell command to execute",
|
|
1066
|
+
},
|
|
1067
|
+
"timeout": {
|
|
1068
|
+
"type": "integer",
|
|
1069
|
+
"description": "Timeout in seconds (default: 60, only for wait=true)",
|
|
1070
|
+
},
|
|
1071
|
+
"wait": {
|
|
1072
|
+
"type": "boolean",
|
|
1073
|
+
"description": "If true (default), wait for completion. If false, run in background.",
|
|
1074
|
+
},
|
|
1075
|
+
"name": {
|
|
1076
|
+
"type": "string",
|
|
1077
|
+
"description": "Name for background process (e.g., 'server'). Only used with wait=false.",
|
|
1078
|
+
},
|
|
1079
|
+
},
|
|
1080
|
+
required=["command"],
|
|
1081
|
+
)
|
|
1082
|
+
|
|
1083
|
+
check_tool = Tool(
|
|
1084
|
+
name="list_processes",
|
|
1085
|
+
description="Check status of background processes started with wait=false.",
|
|
1086
|
+
run=self._check_process,
|
|
1087
|
+
parameters={
|
|
1088
|
+
"name": {
|
|
1089
|
+
"type": "string",
|
|
1090
|
+
"description": "Process name to check, or omit to see all processes",
|
|
1091
|
+
},
|
|
1092
|
+
},
|
|
1093
|
+
required=[],
|
|
1094
|
+
)
|
|
1095
|
+
|
|
1096
|
+
return [bash_tool, check_tool]
|
|
1097
|
+
|
|
1098
|
+
|
|
1099
|
+
class FargateSandbox:
|
|
1100
|
+
"""
|
|
1101
|
+
AWS Fargate-based sandbox for running untrusted code in isolated containers.
|
|
1102
|
+
|
|
1103
|
+
Requires:
|
|
1104
|
+
- boto3 installed
|
|
1105
|
+
- AWS credentials configured
|
|
1106
|
+
- VPC with subnets that have internet access (for pulling images)
|
|
1107
|
+
- Security group that allows outbound traffic
|
|
1108
|
+
|
|
1109
|
+
The sandbox automatically:
|
|
1110
|
+
- Creates IAM roles for task execution and ECS Exec
|
|
1111
|
+
- Registers a task definition with the specified image
|
|
1112
|
+
- Runs a Fargate task and waits for it to be ready
|
|
1113
|
+
- Executes commands via ECS Exec (SSM Session Manager)
|
|
1114
|
+
|
|
1115
|
+
Example:
|
|
1116
|
+
async with FargateSandbox(
|
|
1117
|
+
subnets=["subnet-abc123"],
|
|
1118
|
+
security_groups=["sg-abc123"],
|
|
1119
|
+
) as sandbox:
|
|
1120
|
+
tools = sandbox.get_tools()
|
|
1121
|
+
# Use tools with your LLM...
|
|
1122
|
+
"""
|
|
1123
|
+
|
|
1124
|
+
# Default image - minimal Python with common tools
|
|
1125
|
+
DEFAULT_IMAGE = "python:3.12-slim"
|
|
1126
|
+
|
|
1127
|
+
# IAM policy for ECS Exec (SSM Session Manager)
|
|
1128
|
+
EXEC_POLICY = {
|
|
1129
|
+
"Version": "2012-10-17",
|
|
1130
|
+
"Statement": [
|
|
1131
|
+
{
|
|
1132
|
+
"Effect": "Allow",
|
|
1133
|
+
"Action": [
|
|
1134
|
+
"ssmmessages:CreateControlChannel",
|
|
1135
|
+
"ssmmessages:CreateDataChannel",
|
|
1136
|
+
"ssmmessages:OpenControlChannel",
|
|
1137
|
+
"ssmmessages:OpenDataChannel",
|
|
1138
|
+
],
|
|
1139
|
+
"Resource": "*",
|
|
1140
|
+
}
|
|
1141
|
+
],
|
|
1142
|
+
}
|
|
1143
|
+
|
|
1144
|
+
# Trust policy for ECS tasks
|
|
1145
|
+
TASK_TRUST_POLICY = {
|
|
1146
|
+
"Version": "2012-10-17",
|
|
1147
|
+
"Statement": [
|
|
1148
|
+
{
|
|
1149
|
+
"Effect": "Allow",
|
|
1150
|
+
"Principal": {"Service": "ecs-tasks.amazonaws.com"},
|
|
1151
|
+
"Action": "sts:AssumeRole",
|
|
1152
|
+
}
|
|
1153
|
+
],
|
|
1154
|
+
}
|
|
1155
|
+
|
|
1156
|
+
def __init__(
|
|
1157
|
+
self,
|
|
1158
|
+
subnets: list[str],
|
|
1159
|
+
security_groups: list[str],
|
|
1160
|
+
*,
|
|
1161
|
+
cluster: str | None = None,
|
|
1162
|
+
image: str | None = None,
|
|
1163
|
+
cpu: int = 256,
|
|
1164
|
+
memory: int = 512,
|
|
1165
|
+
region: str | None = None,
|
|
1166
|
+
task_role_arn: str | None = None,
|
|
1167
|
+
execution_role_arn: str | None = None,
|
|
1168
|
+
assign_public_ip: bool = True,
|
|
1169
|
+
):
|
|
1170
|
+
"""
|
|
1171
|
+
Initialize a Fargate sandbox.
|
|
1172
|
+
|
|
1173
|
+
Args:
|
|
1174
|
+
subnets: List of VPC subnet IDs (required). Use subnets with internet
|
|
1175
|
+
access (public subnets with IGW, or private with NAT).
|
|
1176
|
+
security_groups: List of security group IDs (required). Must allow
|
|
1177
|
+
outbound HTTPS (443) for ECS Exec to work.
|
|
1178
|
+
cluster: ECS cluster name. If None, uses "lm-deluge-sandbox" (created if missing).
|
|
1179
|
+
image: Docker image to use. Defaults to python:3.12-slim.
|
|
1180
|
+
cpu: Fargate CPU units (256, 512, 1024, 2048, 4096). Default 256.
|
|
1181
|
+
memory: Fargate memory in MB. Must be compatible with CPU. Default 512.
|
|
1182
|
+
region: AWS region. If None, uses boto3 default.
|
|
1183
|
+
task_role_arn: IAM role ARN for the task. If None, creates one with
|
|
1184
|
+
minimal permissions (just SSM for ECS Exec).
|
|
1185
|
+
execution_role_arn: IAM role ARN for task execution. If None, uses
|
|
1186
|
+
the AWS managed ecsTaskExecutionRole.
|
|
1187
|
+
assign_public_ip: Whether to assign a public IP. Required if using
|
|
1188
|
+
public subnets without NAT. Default True.
|
|
1189
|
+
"""
|
|
1190
|
+
self.subnets = subnets
|
|
1191
|
+
self.security_groups = security_groups
|
|
1192
|
+
self.cluster = cluster or "lm-deluge-sandbox"
|
|
1193
|
+
self.image = image or self.DEFAULT_IMAGE
|
|
1194
|
+
self.cpu = str(cpu)
|
|
1195
|
+
self.memory = str(memory)
|
|
1196
|
+
self.region = region
|
|
1197
|
+
self.task_role_arn = task_role_arn
|
|
1198
|
+
self.execution_role_arn = execution_role_arn
|
|
1199
|
+
self.assign_public_ip = assign_public_ip
|
|
1200
|
+
|
|
1201
|
+
# State
|
|
1202
|
+
self.task_arn: str | None = None
|
|
1203
|
+
self.task_definition_arn: str | None = None
|
|
1204
|
+
self._initialized = False
|
|
1205
|
+
self._destroyed = False
|
|
1206
|
+
|
|
1207
|
+
# boto3 clients (lazy init)
|
|
1208
|
+
self._ecs_client = None
|
|
1209
|
+
self._iam_client = None
|
|
1210
|
+
|
|
1211
|
+
@property
|
|
1212
|
+
def ecs(self):
|
|
1213
|
+
"""Lazy-load ECS client."""
|
|
1214
|
+
if self._ecs_client is None:
|
|
1215
|
+
import boto3
|
|
1216
|
+
|
|
1217
|
+
self._ecs_client = boto3.client("ecs", region_name=self.region)
|
|
1218
|
+
return self._ecs_client
|
|
1219
|
+
|
|
1220
|
+
@property
|
|
1221
|
+
def iam(self):
|
|
1222
|
+
"""Lazy-load IAM client."""
|
|
1223
|
+
if self._iam_client is None:
|
|
1224
|
+
import boto3
|
|
1225
|
+
|
|
1226
|
+
self._iam_client = boto3.client("iam", region_name=self.region)
|
|
1227
|
+
return self._iam_client
|
|
1228
|
+
|
|
1229
|
+
async def __aenter__(self):
|
|
1230
|
+
"""Async context manager entry - initialize sandbox."""
|
|
1231
|
+
await self._ensure_initialized()
|
|
1232
|
+
return self
|
|
1233
|
+
|
|
1234
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
1235
|
+
"""Async context manager exit - cleanup sandbox."""
|
|
1236
|
+
if not self._destroyed:
|
|
1237
|
+
await self._destroy()
|
|
1238
|
+
return False
|
|
1239
|
+
|
|
1240
|
+
def __del__(self):
|
|
1241
|
+
"""Cleanup sandbox when garbage collected (backup cleanup)."""
|
|
1242
|
+
if not self._destroyed and self.task_arn:
|
|
1243
|
+
import warnings
|
|
1244
|
+
|
|
1245
|
+
warnings.warn(
|
|
1246
|
+
"FargateSandbox was not properly cleaned up. "
|
|
1247
|
+
"Use 'async with FargateSandbox(...) as sandbox:' for automatic cleanup.",
|
|
1248
|
+
ResourceWarning,
|
|
1249
|
+
stacklevel=2,
|
|
1250
|
+
)
|
|
1251
|
+
|
|
1252
|
+
async def _ensure_initialized(self):
|
|
1253
|
+
"""Lazy initialization - create cluster, task def, and run task."""
|
|
1254
|
+
if self._initialized:
|
|
1255
|
+
return
|
|
1256
|
+
|
|
1257
|
+
# Ensure cluster exists
|
|
1258
|
+
await self._ensure_cluster()
|
|
1259
|
+
|
|
1260
|
+
# Ensure IAM roles exist
|
|
1261
|
+
await self._ensure_roles()
|
|
1262
|
+
|
|
1263
|
+
# Register task definition
|
|
1264
|
+
await self._register_task_definition()
|
|
1265
|
+
|
|
1266
|
+
# Run the task
|
|
1267
|
+
await self._run_task()
|
|
1268
|
+
|
|
1269
|
+
# Wait for task to be running
|
|
1270
|
+
await self._wait_for_task()
|
|
1271
|
+
|
|
1272
|
+
self._initialized = True
|
|
1273
|
+
|
|
1274
|
+
async def _ensure_cluster(self):
|
|
1275
|
+
"""Create ECS cluster if it doesn't exist."""
|
|
1276
|
+
try:
|
|
1277
|
+
response = await asyncio.to_thread(
|
|
1278
|
+
self.ecs.describe_clusters, clusters=[self.cluster]
|
|
1279
|
+
)
|
|
1280
|
+
clusters = response.get("clusters", [])
|
|
1281
|
+
if clusters and clusters[0].get("status") == "ACTIVE":
|
|
1282
|
+
return # Cluster exists
|
|
1283
|
+
except Exception:
|
|
1284
|
+
pass
|
|
1285
|
+
|
|
1286
|
+
# Create cluster
|
|
1287
|
+
await asyncio.to_thread(
|
|
1288
|
+
self.ecs.create_cluster,
|
|
1289
|
+
clusterName=self.cluster,
|
|
1290
|
+
settings=[
|
|
1291
|
+
{"name": "containerInsights", "value": "disabled"},
|
|
1292
|
+
],
|
|
1293
|
+
)
|
|
1294
|
+
|
|
1295
|
+
async def _ensure_roles(self):
|
|
1296
|
+
"""Create IAM roles if not provided."""
|
|
1297
|
+
# Task role (for ECS Exec)
|
|
1298
|
+
if not self.task_role_arn:
|
|
1299
|
+
role_name = "lm-deluge-sandbox-task-role"
|
|
1300
|
+
try:
|
|
1301
|
+
response = await asyncio.to_thread(
|
|
1302
|
+
self.iam.get_role, RoleName=role_name
|
|
1303
|
+
)
|
|
1304
|
+
self.task_role_arn = response["Role"]["Arn"]
|
|
1305
|
+
except self.iam.exceptions.NoSuchEntityException:
|
|
1306
|
+
# Create the role
|
|
1307
|
+
response = await asyncio.to_thread(
|
|
1308
|
+
self.iam.create_role,
|
|
1309
|
+
RoleName=role_name,
|
|
1310
|
+
AssumeRolePolicyDocument=json.dumps(self.TASK_TRUST_POLICY),
|
|
1311
|
+
Description="Task role for lm-deluge Fargate sandbox (ECS Exec)",
|
|
1312
|
+
)
|
|
1313
|
+
self.task_role_arn = response["Role"]["Arn"]
|
|
1314
|
+
|
|
1315
|
+
# Attach inline policy for ECS Exec
|
|
1316
|
+
await asyncio.to_thread(
|
|
1317
|
+
self.iam.put_role_policy,
|
|
1318
|
+
RoleName=role_name,
|
|
1319
|
+
PolicyName="ecs-exec-policy",
|
|
1320
|
+
PolicyDocument=json.dumps(self.EXEC_POLICY),
|
|
1321
|
+
)
|
|
1322
|
+
|
|
1323
|
+
# IAM is eventually consistent - wait a bit
|
|
1324
|
+
await asyncio.sleep(5)
|
|
1325
|
+
|
|
1326
|
+
# Execution role (for pulling images, logs)
|
|
1327
|
+
if not self.execution_role_arn:
|
|
1328
|
+
role_name = "lm-deluge-sandbox-execution-role"
|
|
1329
|
+
try:
|
|
1330
|
+
response = await asyncio.to_thread(
|
|
1331
|
+
self.iam.get_role, RoleName=role_name
|
|
1332
|
+
)
|
|
1333
|
+
self.execution_role_arn = response["Role"]["Arn"]
|
|
1334
|
+
except self.iam.exceptions.NoSuchEntityException:
|
|
1335
|
+
# Create the role
|
|
1336
|
+
response = await asyncio.to_thread(
|
|
1337
|
+
self.iam.create_role,
|
|
1338
|
+
RoleName=role_name,
|
|
1339
|
+
AssumeRolePolicyDocument=json.dumps(self.TASK_TRUST_POLICY),
|
|
1340
|
+
Description="Execution role for lm-deluge Fargate sandbox",
|
|
1341
|
+
)
|
|
1342
|
+
self.execution_role_arn = response["Role"]["Arn"]
|
|
1343
|
+
|
|
1344
|
+
# Attach AWS managed policy
|
|
1345
|
+
await asyncio.to_thread(
|
|
1346
|
+
self.iam.attach_role_policy,
|
|
1347
|
+
RoleName=role_name,
|
|
1348
|
+
PolicyArn="arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy",
|
|
1349
|
+
)
|
|
1350
|
+
|
|
1351
|
+
# IAM is eventually consistent - wait a bit
|
|
1352
|
+
await asyncio.sleep(5)
|
|
1353
|
+
|
|
1354
|
+
async def _register_task_definition(self):
|
|
1355
|
+
"""Register a task definition for the sandbox."""
|
|
1356
|
+
family = f"lm-deluge-sandbox-{secrets.token_hex(4)}"
|
|
1357
|
+
|
|
1358
|
+
response = await asyncio.to_thread(
|
|
1359
|
+
self.ecs.register_task_definition,
|
|
1360
|
+
family=family,
|
|
1361
|
+
networkMode="awsvpc",
|
|
1362
|
+
requiresCompatibilities=["FARGATE"],
|
|
1363
|
+
cpu=self.cpu,
|
|
1364
|
+
memory=self.memory,
|
|
1365
|
+
taskRoleArn=self.task_role_arn,
|
|
1366
|
+
executionRoleArn=self.execution_role_arn,
|
|
1367
|
+
containerDefinitions=[
|
|
1368
|
+
{
|
|
1369
|
+
"name": "sandbox",
|
|
1370
|
+
"image": self.image,
|
|
1371
|
+
"essential": True,
|
|
1372
|
+
# Keep container running - sleep infinity
|
|
1373
|
+
"command": ["sh", "-c", "sleep infinity"],
|
|
1374
|
+
"linuxParameters": {
|
|
1375
|
+
"initProcessEnabled": True, # Required for ECS Exec
|
|
1376
|
+
},
|
|
1377
|
+
}
|
|
1378
|
+
],
|
|
1379
|
+
)
|
|
1380
|
+
self.task_definition_arn = response["taskDefinition"]["taskDefinitionArn"]
|
|
1381
|
+
|
|
1382
|
+
async def _run_task(self):
|
|
1383
|
+
"""Run a Fargate task."""
|
|
1384
|
+
response = await asyncio.to_thread(
|
|
1385
|
+
self.ecs.run_task,
|
|
1386
|
+
cluster=self.cluster,
|
|
1387
|
+
taskDefinition=self.task_definition_arn,
|
|
1388
|
+
launchType="FARGATE",
|
|
1389
|
+
enableExecuteCommand=True, # Enable ECS Exec
|
|
1390
|
+
networkConfiguration={
|
|
1391
|
+
"awsvpcConfiguration": {
|
|
1392
|
+
"subnets": self.subnets,
|
|
1393
|
+
"securityGroups": self.security_groups,
|
|
1394
|
+
"assignPublicIp": "ENABLED"
|
|
1395
|
+
if self.assign_public_ip
|
|
1396
|
+
else "DISABLED",
|
|
1397
|
+
}
|
|
1398
|
+
},
|
|
1399
|
+
)
|
|
1400
|
+
|
|
1401
|
+
tasks = response.get("tasks", [])
|
|
1402
|
+
if not tasks:
|
|
1403
|
+
failures = response.get("failures", [])
|
|
1404
|
+
raise RuntimeError(f"Failed to run task: {failures}")
|
|
1405
|
+
|
|
1406
|
+
self.task_arn = tasks[0]["taskArn"]
|
|
1407
|
+
|
|
1408
|
+
async def _wait_for_task(self, timeout: int = 120):
|
|
1409
|
+
"""Wait for task to reach RUNNING state."""
|
|
1410
|
+
start = time.time()
|
|
1411
|
+
while time.time() - start < timeout:
|
|
1412
|
+
response = await asyncio.to_thread(
|
|
1413
|
+
self.ecs.describe_tasks,
|
|
1414
|
+
cluster=self.cluster,
|
|
1415
|
+
tasks=[self.task_arn],
|
|
1416
|
+
)
|
|
1417
|
+
tasks = response.get("tasks", [])
|
|
1418
|
+
if tasks:
|
|
1419
|
+
status = tasks[0].get("lastStatus")
|
|
1420
|
+
if status == "RUNNING":
|
|
1421
|
+
# Also check that execute command agent is running
|
|
1422
|
+
containers = tasks[0].get("containers", [])
|
|
1423
|
+
for container in containers:
|
|
1424
|
+
managed_agents = container.get("managedAgents", [])
|
|
1425
|
+
for agent in managed_agents:
|
|
1426
|
+
if agent.get("name") == "ExecuteCommandAgent":
|
|
1427
|
+
if agent.get("lastStatus") == "RUNNING":
|
|
1428
|
+
return
|
|
1429
|
+
elif status in ("STOPPED", "DEACTIVATING"):
|
|
1430
|
+
reason = tasks[0].get("stoppedReason", "Unknown")
|
|
1431
|
+
raise RuntimeError(f"Task stopped: {reason}")
|
|
1432
|
+
|
|
1433
|
+
await asyncio.sleep(2)
|
|
1434
|
+
|
|
1435
|
+
raise TimeoutError(f"Task did not reach RUNNING state within {timeout}s")
|
|
1436
|
+
|
|
1437
|
+
async def _exec(
|
|
1438
|
+
self,
|
|
1439
|
+
command: str,
|
|
1440
|
+
timeout: int = 60,
|
|
1441
|
+
) -> str:
|
|
1442
|
+
"""
|
|
1443
|
+
Execute a command in the sandbox.
|
|
1444
|
+
|
|
1445
|
+
Args:
|
|
1446
|
+
command: Shell command to execute
|
|
1447
|
+
timeout: Timeout in seconds
|
|
1448
|
+
|
|
1449
|
+
Returns:
|
|
1450
|
+
Command output (stdout + stderr)
|
|
1451
|
+
"""
|
|
1452
|
+
await self._ensure_initialized()
|
|
1453
|
+
|
|
1454
|
+
# Call ECS execute_command
|
|
1455
|
+
response = await asyncio.to_thread(
|
|
1456
|
+
self.ecs.execute_command,
|
|
1457
|
+
cluster=self.cluster,
|
|
1458
|
+
task=self.task_arn,
|
|
1459
|
+
container="sandbox",
|
|
1460
|
+
interactive=True,
|
|
1461
|
+
command=f"/bin/sh -c {shlex.quote(command)}",
|
|
1462
|
+
)
|
|
1463
|
+
|
|
1464
|
+
session = response.get("session", {})
|
|
1465
|
+
stream_url = session.get("streamUrl")
|
|
1466
|
+
token = session.get("tokenValue")
|
|
1467
|
+
|
|
1468
|
+
if not stream_url or not token:
|
|
1469
|
+
return f"Error: Failed to get session: {response}"
|
|
1470
|
+
|
|
1471
|
+
# Connect to websocket and read output
|
|
1472
|
+
try:
|
|
1473
|
+
output = await self._read_ssm_session(stream_url, token, timeout)
|
|
1474
|
+
except Exception as e:
|
|
1475
|
+
return f"Error executing command: {e}"
|
|
1476
|
+
|
|
1477
|
+
# Truncate if needed
|
|
1478
|
+
if len(output) > 5000:
|
|
1479
|
+
output = "...[truncated]...\n" + output[-5000:]
|
|
1480
|
+
|
|
1481
|
+
return output if output else "(no output)"
|
|
1482
|
+
|
|
1483
|
+
async def _read_ssm_session(self, stream_url: str, token: str, timeout: int) -> str:
|
|
1484
|
+
"""
|
|
1485
|
+
Connect to SSM session websocket and read command output.
|
|
1486
|
+
|
|
1487
|
+
The SSM agent uses a binary protocol:
|
|
1488
|
+
- Header: 4-byte big-endian length + 32-byte null-padded message type
|
|
1489
|
+
- Payload varies by message type
|
|
1490
|
+
|
|
1491
|
+
Note: SSM retransmits messages until ACKed. Since we're just reading
|
|
1492
|
+
(not fully implementing the protocol), we deduplicate by tracking
|
|
1493
|
+
seen message hashes.
|
|
1494
|
+
"""
|
|
1495
|
+
import aiohttp
|
|
1496
|
+
|
|
1497
|
+
output_chunks = []
|
|
1498
|
+
seen_messages: set[bytes] = set() # Dedupe retransmissions
|
|
1499
|
+
|
|
1500
|
+
async with aiohttp.ClientSession() as session:
|
|
1501
|
+
async with session.ws_connect(stream_url, receive_timeout=timeout) as ws:
|
|
1502
|
+
# Send init message with token
|
|
1503
|
+
init_message = {
|
|
1504
|
+
"MessageSchemaVersion": "1.0",
|
|
1505
|
+
"RequestId": str(uuid.uuid4()),
|
|
1506
|
+
"TokenValue": token,
|
|
1507
|
+
}
|
|
1508
|
+
await ws.send_str(json.dumps(init_message))
|
|
1509
|
+
|
|
1510
|
+
# Read messages until channel closes or timeout
|
|
1511
|
+
try:
|
|
1512
|
+
async for msg in ws:
|
|
1513
|
+
if msg.type == aiohttp.WSMsgType.BINARY:
|
|
1514
|
+
# Skip duplicate messages (SSM retransmits until ACKed)
|
|
1515
|
+
msg_hash = msg.data[:116] # Header is enough to identify
|
|
1516
|
+
if msg_hash in seen_messages:
|
|
1517
|
+
continue
|
|
1518
|
+
seen_messages.add(msg_hash)
|
|
1519
|
+
|
|
1520
|
+
parsed = self._parse_ssm_message(msg.data)
|
|
1521
|
+
if parsed:
|
|
1522
|
+
msg_type, payload = parsed
|
|
1523
|
+
if "output_stream_data" in msg_type:
|
|
1524
|
+
output_chunks.append(payload)
|
|
1525
|
+
elif "channel_closed" in msg_type:
|
|
1526
|
+
break
|
|
1527
|
+
elif msg.type == aiohttp.WSMsgType.ERROR:
|
|
1528
|
+
break
|
|
1529
|
+
elif msg.type == aiohttp.WSMsgType.CLOSED:
|
|
1530
|
+
break
|
|
1531
|
+
except asyncio.TimeoutError:
|
|
1532
|
+
pass
|
|
1533
|
+
|
|
1534
|
+
return "".join(output_chunks)
|
|
1535
|
+
|
|
1536
|
+
def _parse_ssm_message(self, data: bytes) -> tuple[str, str] | None:
|
|
1537
|
+
"""
|
|
1538
|
+
Parse an SSM agent message.
|
|
1539
|
+
|
|
1540
|
+
Format:
|
|
1541
|
+
- Bytes 0-3: Header length (big-endian uint32)
|
|
1542
|
+
- Bytes 4-35: Message type (32 bytes, null-padded ASCII)
|
|
1543
|
+
- After header: Payload length (4 bytes) + payload
|
|
1544
|
+
"""
|
|
1545
|
+
if len(data) < 36:
|
|
1546
|
+
return None
|
|
1547
|
+
|
|
1548
|
+
try:
|
|
1549
|
+
header_len = struct.unpack(">I", data[0:4])[0]
|
|
1550
|
+
msg_type = data[4:36].decode("ascii").rstrip("\x00")
|
|
1551
|
+
|
|
1552
|
+
# Payload starts after header
|
|
1553
|
+
if len(data) > header_len:
|
|
1554
|
+
payload_data = data[header_len:]
|
|
1555
|
+
if len(payload_data) >= 4:
|
|
1556
|
+
payload_len = struct.unpack(">I", payload_data[0:4])[0]
|
|
1557
|
+
if len(payload_data) >= 4 + payload_len:
|
|
1558
|
+
payload = payload_data[4 : 4 + payload_len].decode(
|
|
1559
|
+
"utf-8", errors="replace"
|
|
1560
|
+
)
|
|
1561
|
+
return msg_type, payload
|
|
1562
|
+
|
|
1563
|
+
return msg_type, ""
|
|
1564
|
+
except Exception:
|
|
1565
|
+
return None
|
|
1566
|
+
|
|
1567
|
+
async def _destroy(self):
|
|
1568
|
+
"""Stop the task and clean up."""
|
|
1569
|
+
if self._destroyed:
|
|
1570
|
+
return
|
|
1571
|
+
|
|
1572
|
+
if self.task_arn:
|
|
1573
|
+
try:
|
|
1574
|
+
await asyncio.to_thread(
|
|
1575
|
+
self.ecs.stop_task,
|
|
1576
|
+
cluster=self.cluster,
|
|
1577
|
+
task=self.task_arn,
|
|
1578
|
+
reason="Sandbox destroyed",
|
|
1579
|
+
)
|
|
1580
|
+
except Exception:
|
|
1581
|
+
pass # Best effort
|
|
1582
|
+
|
|
1583
|
+
# Optionally deregister task definition
|
|
1584
|
+
if self.task_definition_arn:
|
|
1585
|
+
try:
|
|
1586
|
+
await asyncio.to_thread(
|
|
1587
|
+
self.ecs.deregister_task_definition,
|
|
1588
|
+
taskDefinition=self.task_definition_arn,
|
|
1589
|
+
)
|
|
1590
|
+
except Exception:
|
|
1591
|
+
pass
|
|
1592
|
+
|
|
1593
|
+
self._destroyed = True
|
|
1594
|
+
self._initialized = False
|
|
1595
|
+
|
|
1596
|
+
def get_tools(self):
|
|
1597
|
+
"""Return list of tools for LLM use."""
|
|
1598
|
+
bash_tool = Tool(
|
|
1599
|
+
name="bash",
|
|
1600
|
+
description=(
|
|
1601
|
+
"Execute a bash command in the AWS Fargate sandbox environment. "
|
|
1602
|
+
"The command runs in an isolated container. "
|
|
1603
|
+
"Output is truncated to the last 5000 characters if longer. "
|
|
1604
|
+
"Note: This sandbox does not support background processes - "
|
|
1605
|
+
"commands must complete within the timeout."
|
|
1606
|
+
),
|
|
1607
|
+
run=self._exec,
|
|
1608
|
+
parameters={
|
|
1609
|
+
"command": {
|
|
1610
|
+
"type": "string",
|
|
1611
|
+
"description": "The shell command to execute (e.g., 'ls -la', 'python script.py')",
|
|
1612
|
+
},
|
|
1613
|
+
"timeout": {
|
|
1614
|
+
"type": "integer",
|
|
1615
|
+
"description": "Timeout in seconds for the command execution (default: 60)",
|
|
1616
|
+
},
|
|
1617
|
+
},
|
|
1618
|
+
required=["command"],
|
|
1619
|
+
)
|
|
1620
|
+
|
|
1621
|
+
return [bash_tool]
|