lm-deluge 0.0.87__py3-none-any.whl → 0.0.89__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,11 @@
1
+ import asyncio
2
+ import json
1
3
  import os
2
4
  import secrets
3
5
  import shlex
6
+ import struct
4
7
  import time
8
+ import uuid
5
9
  from dataclasses import dataclass, field
6
10
  from typing import Any
7
11
 
@@ -711,3 +715,907 @@ class DaytonaSandbox:
711
715
  preview_tool,
712
716
  workdir_tool,
713
717
  ]
718
+
719
+
720
+ class DockerSandbox:
721
+ """
722
+ Local Docker-based sandbox for running code in isolated containers.
723
+
724
+ Works with Docker Desktop, Colima, or any Docker-compatible runtime.
725
+ Each sandbox instance creates its own container.
726
+
727
+ Requires:
728
+ - docker package installed (pip install docker)
729
+ - Docker daemon running (Docker Desktop, Colima, etc.)
730
+
731
+ Example:
732
+ async with DockerSandbox() as sandbox:
733
+ tools = sandbox.get_tools()
734
+ # Use tools with your LLM...
735
+ """
736
+
737
+ # Default image - has uv pre-installed, Debian Bookworm base
738
+ DEFAULT_IMAGE = "ghcr.io/astral-sh/uv:python3.12-bookworm-slim"
739
+
740
+ def __init__(
741
+ self,
742
+ image: str | None = None,
743
+ *,
744
+ docker_host: str | None = None,
745
+ network_mode: str = "bridge",
746
+ mem_limit: str = "512m",
747
+ cpu_period: int = 100000,
748
+ cpu_quota: int | None = None,
749
+ working_dir: str = "/workspace",
750
+ ):
751
+ """
752
+ Initialize a Docker sandbox.
753
+
754
+ Args:
755
+ image: Docker image to use. Defaults to uv's Python 3.12 image.
756
+ docker_host: Docker socket URL. If None, auto-detects from DOCKER_HOST
757
+ env var or tries common socket paths.
758
+ network_mode: Docker network mode. "bridge" (default) for internet access,
759
+ "none" for full isolation.
760
+ mem_limit: Memory limit (e.g., "512m", "1g"). Default "512m".
761
+ cpu_period: CPU period in microseconds. Default 100000.
762
+ cpu_quota: CPU quota in microseconds. None for no limit.
763
+ E.g., 50000 with period 100000 = 50% of one CPU.
764
+ working_dir: Working directory inside container. Default "/workspace".
765
+ """
766
+ self.image = image or self.DEFAULT_IMAGE
767
+ self.docker_host = docker_host
768
+ self.network_mode = network_mode
769
+ self.mem_limit = mem_limit
770
+ self.cpu_period = cpu_period
771
+ self.cpu_quota = cpu_quota
772
+ self.working_dir = working_dir
773
+
774
+ # State
775
+ self.container = None
776
+ self._client = None
777
+ self._initialized = False
778
+ self._destroyed = False
779
+
780
+ # Process tracking for background processes
781
+ self.processes: dict[str, TrackedProcess] = {}
782
+ self.process_counter: int = 0
783
+
784
+ @property
785
+ def client(self):
786
+ """Lazy-load Docker client."""
787
+ if self._client is None:
788
+ import docker
789
+
790
+ if self.docker_host:
791
+ self._client = docker.DockerClient(base_url=self.docker_host)
792
+ else:
793
+ # Auto-detect socket location
794
+ # Try DOCKER_HOST env first, then common socket paths
795
+ docker_host = os.environ.get("DOCKER_HOST")
796
+ if not docker_host:
797
+ # Common socket paths (Docker Desktop, Colima, Podman, etc.)
798
+ socket_paths = [
799
+ os.path.expanduser("~/.colima/default/docker.sock"),
800
+ os.path.expanduser("~/.colima/docker.sock"),
801
+ "/var/run/docker.sock",
802
+ os.path.expanduser("~/.docker/run/docker.sock"),
803
+ os.path.expanduser(
804
+ "~/.local/share/containers/podman/machine/podman.sock"
805
+ ),
806
+ ]
807
+ for path in socket_paths:
808
+ if os.path.exists(path):
809
+ docker_host = f"unix://{path}"
810
+ break
811
+
812
+ if docker_host:
813
+ self._client = docker.DockerClient(base_url=docker_host)
814
+ else:
815
+ # Fall back to default (will likely fail but gives clear error)
816
+ self._client = docker.from_env()
817
+ return self._client
818
+
819
+ async def __aenter__(self):
820
+ """Async context manager entry - initialize sandbox."""
821
+ await self._ensure_initialized()
822
+ return self
823
+
824
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
825
+ """Async context manager exit - cleanup sandbox."""
826
+ if not self._destroyed:
827
+ await self._destroy()
828
+ return False
829
+
830
+ def __enter__(self):
831
+ """Sync context manager entry."""
832
+ import asyncio
833
+
834
+ asyncio.get_event_loop().run_until_complete(self._ensure_initialized())
835
+ return self
836
+
837
+ def __exit__(self, exc_type, exc_val, exc_tb):
838
+ """Sync context manager exit."""
839
+ if not self._destroyed:
840
+ self._destroy_sync()
841
+ return False
842
+
843
+ def __del__(self):
844
+ """Cleanup container when garbage collected (backup cleanup)."""
845
+ if not self._destroyed and self.container:
846
+ import warnings
847
+
848
+ warnings.warn(
849
+ "DockerSandbox was not properly cleaned up. "
850
+ "Use 'with DockerSandbox(...) as sandbox:' for automatic cleanup.",
851
+ ResourceWarning,
852
+ stacklevel=2,
853
+ )
854
+
855
+ async def _ensure_initialized(self):
856
+ """Lazy initialization - pull image if needed and start container."""
857
+ if self._initialized:
858
+ return
859
+
860
+ # Pull image if not present
861
+ await asyncio.to_thread(self._pull_image_if_needed)
862
+
863
+ # Create and start container
864
+ await asyncio.to_thread(self._create_container)
865
+
866
+ self._initialized = True
867
+
868
+ def _pull_image_if_needed(self):
869
+ """Pull the Docker image if not already present."""
870
+ try:
871
+ self.client.images.get(self.image)
872
+ except Exception:
873
+ # Image not found locally, pull it
874
+ self.client.images.pull(self.image)
875
+
876
+ def _create_container(self):
877
+ """Create and start the container."""
878
+ self.container = self.client.containers.run(
879
+ self.image,
880
+ command=["sleep", "infinity"],
881
+ detach=True,
882
+ remove=True, # Auto-remove when stopped
883
+ network_mode=self.network_mode,
884
+ mem_limit=self.mem_limit,
885
+ cpu_period=self.cpu_period,
886
+ cpu_quota=self.cpu_quota,
887
+ working_dir=self.working_dir,
888
+ # Create the working directory
889
+ entrypoint=[
890
+ "/bin/sh",
891
+ "-c",
892
+ f"mkdir -p {self.working_dir} && sleep infinity",
893
+ ],
894
+ )
895
+
896
+ def _generate_process_name(self) -> str:
897
+ """Generate a unique process name like p1, p2, etc."""
898
+ self.process_counter += 1
899
+ return f"p{self.process_counter}"
900
+
901
+ async def _exec(
902
+ self,
903
+ command: str,
904
+ timeout: int = 60,
905
+ wait: bool = True,
906
+ name: str | None = None,
907
+ ) -> str:
908
+ """
909
+ Execute a command in the sandbox.
910
+
911
+ Args:
912
+ command: Shell command to execute
913
+ timeout: Timeout in seconds (only applies when wait=True)
914
+ wait: If True, wait for completion. If False, run in background.
915
+ name: Name for background process (auto-generated if not provided)
916
+
917
+ Returns:
918
+ Command output if wait=True, or status message if wait=False
919
+ """
920
+ await self._ensure_initialized()
921
+ assert self.container is not None, "Container not initialized"
922
+
923
+ if wait:
924
+ # Synchronous execution with timeout
925
+ try:
926
+ exit_code, output = await asyncio.wait_for(
927
+ asyncio.to_thread(
928
+ self.container.exec_run,
929
+ ["sh", "-c", command],
930
+ workdir=self.working_dir,
931
+ ),
932
+ timeout=timeout,
933
+ )
934
+ except asyncio.TimeoutError:
935
+ return f"[Timeout after {timeout}s]"
936
+
937
+ # Decode output
938
+ if isinstance(output, bytes):
939
+ output = output.decode("utf-8", errors="replace")
940
+
941
+ # Truncate if needed
942
+ if len(output) > 5000:
943
+ output = "...[truncated]...\n" + output[-5000:]
944
+
945
+ # Include exit code if non-zero
946
+ if exit_code != 0:
947
+ output = f"[Exit code: {exit_code}]\n{output}"
948
+
949
+ return output if output else "(no output)"
950
+ else:
951
+ # Background execution
952
+ exec_id = await asyncio.to_thread(
953
+ self.client.api.exec_create,
954
+ self.container.id,
955
+ ["sh", "-c", command],
956
+ workdir=self.working_dir,
957
+ )
958
+ await asyncio.to_thread(
959
+ self.client.api.exec_start,
960
+ exec_id,
961
+ detach=True,
962
+ )
963
+
964
+ proc_name = name or self._generate_process_name()
965
+ tracked = TrackedProcess(
966
+ process=exec_id,
967
+ name=proc_name,
968
+ command=command,
969
+ )
970
+ self.processes[proc_name] = tracked
971
+
972
+ return (
973
+ f"Started background process '{proc_name}'.\n"
974
+ f"Command: {command}\n"
975
+ f"Use list_processes() to check status."
976
+ )
977
+
978
+ def _check_process(self, name: str | None = None) -> str:
979
+ """Check status of background processes."""
980
+ if not self.processes:
981
+ return "No background processes have been started."
982
+
983
+ if name:
984
+ proc = self.processes.get(name)
985
+ if not proc:
986
+ available = ", ".join(self.processes.keys())
987
+ return f"Process '{name}' not found. Available: {available}"
988
+
989
+ # Check exec status
990
+ exec_info = self.client.api.exec_inspect(proc.process)
991
+ running = exec_info.get("Running", False)
992
+ exit_code = exec_info.get("ExitCode")
993
+
994
+ if running:
995
+ status = "running"
996
+ else:
997
+ status = f"completed (exit code: {exit_code})"
998
+
999
+ elapsed = time.time() - proc.started_at
1000
+ return f"Process: {name}\nCommand: {proc.command}\nStatus: {status}\nRunning for: {elapsed:.1f}s"
1001
+ else:
1002
+ # Show all processes
1003
+ lines = ["NAME STATUS COMMAND"]
1004
+ for proc_name, proc in self.processes.items():
1005
+ exec_info = self.client.api.exec_inspect(proc.process)
1006
+ running = exec_info.get("Running", False)
1007
+ exit_code = exec_info.get("ExitCode")
1008
+
1009
+ if running:
1010
+ status = "running"
1011
+ else:
1012
+ status = f"exit {exit_code}"
1013
+
1014
+ cmd_display = (
1015
+ proc.command[:40] + "..."
1016
+ if len(proc.command) > 40
1017
+ else proc.command
1018
+ )
1019
+ lines.append(f"{proc_name:<8} {status:<19} {cmd_display}")
1020
+
1021
+ return "\n".join(lines)
1022
+
1023
+ async def _destroy(self):
1024
+ """Stop the container and clean up."""
1025
+ if self._destroyed:
1026
+ return
1027
+
1028
+ if self.container:
1029
+ try:
1030
+ await asyncio.to_thread(self.container.stop, timeout=5)
1031
+ except Exception:
1032
+ pass # Container might already be stopped
1033
+
1034
+ self._destroyed = True
1035
+ self._initialized = False
1036
+
1037
+ def _destroy_sync(self):
1038
+ """Synchronous version of destroy."""
1039
+ if self._destroyed:
1040
+ return
1041
+
1042
+ if self.container:
1043
+ try:
1044
+ self.container.stop(timeout=5)
1045
+ except Exception:
1046
+ pass
1047
+
1048
+ self._destroyed = True
1049
+ self._initialized = False
1050
+
1051
+ def get_tools(self):
1052
+ """Return list of tools for LLM use."""
1053
+ bash_tool = Tool(
1054
+ name="bash",
1055
+ description=(
1056
+ "Execute a bash command in the Docker sandbox environment. "
1057
+ "The sandbox has Python 3.12 and uv pre-installed. "
1058
+ "Use 'apt-get update && apt-get install -y <package>' for system packages. "
1059
+ "Set wait=false to run servers or long-running processes in background."
1060
+ ),
1061
+ run=self._exec,
1062
+ parameters={
1063
+ "command": {
1064
+ "type": "string",
1065
+ "description": "The shell command to execute",
1066
+ },
1067
+ "timeout": {
1068
+ "type": "integer",
1069
+ "description": "Timeout in seconds (default: 60, only for wait=true)",
1070
+ },
1071
+ "wait": {
1072
+ "type": "boolean",
1073
+ "description": "If true (default), wait for completion. If false, run in background.",
1074
+ },
1075
+ "name": {
1076
+ "type": "string",
1077
+ "description": "Name for background process (e.g., 'server'). Only used with wait=false.",
1078
+ },
1079
+ },
1080
+ required=["command"],
1081
+ )
1082
+
1083
+ check_tool = Tool(
1084
+ name="list_processes",
1085
+ description="Check status of background processes started with wait=false.",
1086
+ run=self._check_process,
1087
+ parameters={
1088
+ "name": {
1089
+ "type": "string",
1090
+ "description": "Process name to check, or omit to see all processes",
1091
+ },
1092
+ },
1093
+ required=[],
1094
+ )
1095
+
1096
+ return [bash_tool, check_tool]
1097
+
1098
+
1099
+ class FargateSandbox:
1100
+ """
1101
+ AWS Fargate-based sandbox for running untrusted code in isolated containers.
1102
+
1103
+ Requires:
1104
+ - boto3 installed
1105
+ - AWS credentials configured
1106
+ - VPC with subnets that have internet access (for pulling images)
1107
+ - Security group that allows outbound traffic
1108
+
1109
+ The sandbox automatically:
1110
+ - Creates IAM roles for task execution and ECS Exec
1111
+ - Registers a task definition with the specified image
1112
+ - Runs a Fargate task and waits for it to be ready
1113
+ - Executes commands via ECS Exec (SSM Session Manager)
1114
+
1115
+ Example:
1116
+ async with FargateSandbox(
1117
+ subnets=["subnet-abc123"],
1118
+ security_groups=["sg-abc123"],
1119
+ ) as sandbox:
1120
+ tools = sandbox.get_tools()
1121
+ # Use tools with your LLM...
1122
+ """
1123
+
1124
+ # Default image - minimal Python with common tools
1125
+ DEFAULT_IMAGE = "python:3.12-slim"
1126
+
1127
+ # IAM policy for ECS Exec (SSM Session Manager)
1128
+ EXEC_POLICY = {
1129
+ "Version": "2012-10-17",
1130
+ "Statement": [
1131
+ {
1132
+ "Effect": "Allow",
1133
+ "Action": [
1134
+ "ssmmessages:CreateControlChannel",
1135
+ "ssmmessages:CreateDataChannel",
1136
+ "ssmmessages:OpenControlChannel",
1137
+ "ssmmessages:OpenDataChannel",
1138
+ ],
1139
+ "Resource": "*",
1140
+ }
1141
+ ],
1142
+ }
1143
+
1144
+ # Trust policy for ECS tasks
1145
+ TASK_TRUST_POLICY = {
1146
+ "Version": "2012-10-17",
1147
+ "Statement": [
1148
+ {
1149
+ "Effect": "Allow",
1150
+ "Principal": {"Service": "ecs-tasks.amazonaws.com"},
1151
+ "Action": "sts:AssumeRole",
1152
+ }
1153
+ ],
1154
+ }
1155
+
1156
+ def __init__(
1157
+ self,
1158
+ subnets: list[str],
1159
+ security_groups: list[str],
1160
+ *,
1161
+ cluster: str | None = None,
1162
+ image: str | None = None,
1163
+ cpu: int = 256,
1164
+ memory: int = 512,
1165
+ region: str | None = None,
1166
+ task_role_arn: str | None = None,
1167
+ execution_role_arn: str | None = None,
1168
+ assign_public_ip: bool = True,
1169
+ ):
1170
+ """
1171
+ Initialize a Fargate sandbox.
1172
+
1173
+ Args:
1174
+ subnets: List of VPC subnet IDs (required). Use subnets with internet
1175
+ access (public subnets with IGW, or private with NAT).
1176
+ security_groups: List of security group IDs (required). Must allow
1177
+ outbound HTTPS (443) for ECS Exec to work.
1178
+ cluster: ECS cluster name. If None, uses "lm-deluge-sandbox" (created if missing).
1179
+ image: Docker image to use. Defaults to python:3.12-slim.
1180
+ cpu: Fargate CPU units (256, 512, 1024, 2048, 4096). Default 256.
1181
+ memory: Fargate memory in MB. Must be compatible with CPU. Default 512.
1182
+ region: AWS region. If None, uses boto3 default.
1183
+ task_role_arn: IAM role ARN for the task. If None, creates one with
1184
+ minimal permissions (just SSM for ECS Exec).
1185
+ execution_role_arn: IAM role ARN for task execution. If None, uses
1186
+ the AWS managed ecsTaskExecutionRole.
1187
+ assign_public_ip: Whether to assign a public IP. Required if using
1188
+ public subnets without NAT. Default True.
1189
+ """
1190
+ self.subnets = subnets
1191
+ self.security_groups = security_groups
1192
+ self.cluster = cluster or "lm-deluge-sandbox"
1193
+ self.image = image or self.DEFAULT_IMAGE
1194
+ self.cpu = str(cpu)
1195
+ self.memory = str(memory)
1196
+ self.region = region
1197
+ self.task_role_arn = task_role_arn
1198
+ self.execution_role_arn = execution_role_arn
1199
+ self.assign_public_ip = assign_public_ip
1200
+
1201
+ # State
1202
+ self.task_arn: str | None = None
1203
+ self.task_definition_arn: str | None = None
1204
+ self._initialized = False
1205
+ self._destroyed = False
1206
+
1207
+ # boto3 clients (lazy init)
1208
+ self._ecs_client = None
1209
+ self._iam_client = None
1210
+
1211
+ @property
1212
+ def ecs(self):
1213
+ """Lazy-load ECS client."""
1214
+ if self._ecs_client is None:
1215
+ import boto3
1216
+
1217
+ self._ecs_client = boto3.client("ecs", region_name=self.region)
1218
+ return self._ecs_client
1219
+
1220
+ @property
1221
+ def iam(self):
1222
+ """Lazy-load IAM client."""
1223
+ if self._iam_client is None:
1224
+ import boto3
1225
+
1226
+ self._iam_client = boto3.client("iam", region_name=self.region)
1227
+ return self._iam_client
1228
+
1229
+ async def __aenter__(self):
1230
+ """Async context manager entry - initialize sandbox."""
1231
+ await self._ensure_initialized()
1232
+ return self
1233
+
1234
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
1235
+ """Async context manager exit - cleanup sandbox."""
1236
+ if not self._destroyed:
1237
+ await self._destroy()
1238
+ return False
1239
+
1240
+ def __del__(self):
1241
+ """Cleanup sandbox when garbage collected (backup cleanup)."""
1242
+ if not self._destroyed and self.task_arn:
1243
+ import warnings
1244
+
1245
+ warnings.warn(
1246
+ "FargateSandbox was not properly cleaned up. "
1247
+ "Use 'async with FargateSandbox(...) as sandbox:' for automatic cleanup.",
1248
+ ResourceWarning,
1249
+ stacklevel=2,
1250
+ )
1251
+
1252
+ async def _ensure_initialized(self):
1253
+ """Lazy initialization - create cluster, task def, and run task."""
1254
+ if self._initialized:
1255
+ return
1256
+
1257
+ # Ensure cluster exists
1258
+ await self._ensure_cluster()
1259
+
1260
+ # Ensure IAM roles exist
1261
+ await self._ensure_roles()
1262
+
1263
+ # Register task definition
1264
+ await self._register_task_definition()
1265
+
1266
+ # Run the task
1267
+ await self._run_task()
1268
+
1269
+ # Wait for task to be running
1270
+ await self._wait_for_task()
1271
+
1272
+ self._initialized = True
1273
+
1274
+ async def _ensure_cluster(self):
1275
+ """Create ECS cluster if it doesn't exist."""
1276
+ try:
1277
+ response = await asyncio.to_thread(
1278
+ self.ecs.describe_clusters, clusters=[self.cluster]
1279
+ )
1280
+ clusters = response.get("clusters", [])
1281
+ if clusters and clusters[0].get("status") == "ACTIVE":
1282
+ return # Cluster exists
1283
+ except Exception:
1284
+ pass
1285
+
1286
+ # Create cluster
1287
+ await asyncio.to_thread(
1288
+ self.ecs.create_cluster,
1289
+ clusterName=self.cluster,
1290
+ settings=[
1291
+ {"name": "containerInsights", "value": "disabled"},
1292
+ ],
1293
+ )
1294
+
1295
+ async def _ensure_roles(self):
1296
+ """Create IAM roles if not provided."""
1297
+ # Task role (for ECS Exec)
1298
+ if not self.task_role_arn:
1299
+ role_name = "lm-deluge-sandbox-task-role"
1300
+ try:
1301
+ response = await asyncio.to_thread(
1302
+ self.iam.get_role, RoleName=role_name
1303
+ )
1304
+ self.task_role_arn = response["Role"]["Arn"]
1305
+ except self.iam.exceptions.NoSuchEntityException:
1306
+ # Create the role
1307
+ response = await asyncio.to_thread(
1308
+ self.iam.create_role,
1309
+ RoleName=role_name,
1310
+ AssumeRolePolicyDocument=json.dumps(self.TASK_TRUST_POLICY),
1311
+ Description="Task role for lm-deluge Fargate sandbox (ECS Exec)",
1312
+ )
1313
+ self.task_role_arn = response["Role"]["Arn"]
1314
+
1315
+ # Attach inline policy for ECS Exec
1316
+ await asyncio.to_thread(
1317
+ self.iam.put_role_policy,
1318
+ RoleName=role_name,
1319
+ PolicyName="ecs-exec-policy",
1320
+ PolicyDocument=json.dumps(self.EXEC_POLICY),
1321
+ )
1322
+
1323
+ # IAM is eventually consistent - wait a bit
1324
+ await asyncio.sleep(5)
1325
+
1326
+ # Execution role (for pulling images, logs)
1327
+ if not self.execution_role_arn:
1328
+ role_name = "lm-deluge-sandbox-execution-role"
1329
+ try:
1330
+ response = await asyncio.to_thread(
1331
+ self.iam.get_role, RoleName=role_name
1332
+ )
1333
+ self.execution_role_arn = response["Role"]["Arn"]
1334
+ except self.iam.exceptions.NoSuchEntityException:
1335
+ # Create the role
1336
+ response = await asyncio.to_thread(
1337
+ self.iam.create_role,
1338
+ RoleName=role_name,
1339
+ AssumeRolePolicyDocument=json.dumps(self.TASK_TRUST_POLICY),
1340
+ Description="Execution role for lm-deluge Fargate sandbox",
1341
+ )
1342
+ self.execution_role_arn = response["Role"]["Arn"]
1343
+
1344
+ # Attach AWS managed policy
1345
+ await asyncio.to_thread(
1346
+ self.iam.attach_role_policy,
1347
+ RoleName=role_name,
1348
+ PolicyArn="arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy",
1349
+ )
1350
+
1351
+ # IAM is eventually consistent - wait a bit
1352
+ await asyncio.sleep(5)
1353
+
1354
+ async def _register_task_definition(self):
1355
+ """Register a task definition for the sandbox."""
1356
+ family = f"lm-deluge-sandbox-{secrets.token_hex(4)}"
1357
+
1358
+ response = await asyncio.to_thread(
1359
+ self.ecs.register_task_definition,
1360
+ family=family,
1361
+ networkMode="awsvpc",
1362
+ requiresCompatibilities=["FARGATE"],
1363
+ cpu=self.cpu,
1364
+ memory=self.memory,
1365
+ taskRoleArn=self.task_role_arn,
1366
+ executionRoleArn=self.execution_role_arn,
1367
+ containerDefinitions=[
1368
+ {
1369
+ "name": "sandbox",
1370
+ "image": self.image,
1371
+ "essential": True,
1372
+ # Keep container running - sleep infinity
1373
+ "command": ["sh", "-c", "sleep infinity"],
1374
+ "linuxParameters": {
1375
+ "initProcessEnabled": True, # Required for ECS Exec
1376
+ },
1377
+ }
1378
+ ],
1379
+ )
1380
+ self.task_definition_arn = response["taskDefinition"]["taskDefinitionArn"]
1381
+
1382
+ async def _run_task(self):
1383
+ """Run a Fargate task."""
1384
+ response = await asyncio.to_thread(
1385
+ self.ecs.run_task,
1386
+ cluster=self.cluster,
1387
+ taskDefinition=self.task_definition_arn,
1388
+ launchType="FARGATE",
1389
+ enableExecuteCommand=True, # Enable ECS Exec
1390
+ networkConfiguration={
1391
+ "awsvpcConfiguration": {
1392
+ "subnets": self.subnets,
1393
+ "securityGroups": self.security_groups,
1394
+ "assignPublicIp": "ENABLED"
1395
+ if self.assign_public_ip
1396
+ else "DISABLED",
1397
+ }
1398
+ },
1399
+ )
1400
+
1401
+ tasks = response.get("tasks", [])
1402
+ if not tasks:
1403
+ failures = response.get("failures", [])
1404
+ raise RuntimeError(f"Failed to run task: {failures}")
1405
+
1406
+ self.task_arn = tasks[0]["taskArn"]
1407
+
1408
+ async def _wait_for_task(self, timeout: int = 120):
1409
+ """Wait for task to reach RUNNING state."""
1410
+ start = time.time()
1411
+ while time.time() - start < timeout:
1412
+ response = await asyncio.to_thread(
1413
+ self.ecs.describe_tasks,
1414
+ cluster=self.cluster,
1415
+ tasks=[self.task_arn],
1416
+ )
1417
+ tasks = response.get("tasks", [])
1418
+ if tasks:
1419
+ status = tasks[0].get("lastStatus")
1420
+ if status == "RUNNING":
1421
+ # Also check that execute command agent is running
1422
+ containers = tasks[0].get("containers", [])
1423
+ for container in containers:
1424
+ managed_agents = container.get("managedAgents", [])
1425
+ for agent in managed_agents:
1426
+ if agent.get("name") == "ExecuteCommandAgent":
1427
+ if agent.get("lastStatus") == "RUNNING":
1428
+ return
1429
+ elif status in ("STOPPED", "DEACTIVATING"):
1430
+ reason = tasks[0].get("stoppedReason", "Unknown")
1431
+ raise RuntimeError(f"Task stopped: {reason}")
1432
+
1433
+ await asyncio.sleep(2)
1434
+
1435
+ raise TimeoutError(f"Task did not reach RUNNING state within {timeout}s")
1436
+
1437
+ async def _exec(
1438
+ self,
1439
+ command: str,
1440
+ timeout: int = 60,
1441
+ ) -> str:
1442
+ """
1443
+ Execute a command in the sandbox.
1444
+
1445
+ Args:
1446
+ command: Shell command to execute
1447
+ timeout: Timeout in seconds
1448
+
1449
+ Returns:
1450
+ Command output (stdout + stderr)
1451
+ """
1452
+ await self._ensure_initialized()
1453
+
1454
+ # Call ECS execute_command
1455
+ response = await asyncio.to_thread(
1456
+ self.ecs.execute_command,
1457
+ cluster=self.cluster,
1458
+ task=self.task_arn,
1459
+ container="sandbox",
1460
+ interactive=True,
1461
+ command=f"/bin/sh -c {shlex.quote(command)}",
1462
+ )
1463
+
1464
+ session = response.get("session", {})
1465
+ stream_url = session.get("streamUrl")
1466
+ token = session.get("tokenValue")
1467
+
1468
+ if not stream_url or not token:
1469
+ return f"Error: Failed to get session: {response}"
1470
+
1471
+ # Connect to websocket and read output
1472
+ try:
1473
+ output = await self._read_ssm_session(stream_url, token, timeout)
1474
+ except Exception as e:
1475
+ return f"Error executing command: {e}"
1476
+
1477
+ # Truncate if needed
1478
+ if len(output) > 5000:
1479
+ output = "...[truncated]...\n" + output[-5000:]
1480
+
1481
+ return output if output else "(no output)"
1482
+
1483
+ async def _read_ssm_session(self, stream_url: str, token: str, timeout: int) -> str:
1484
+ """
1485
+ Connect to SSM session websocket and read command output.
1486
+
1487
+ The SSM agent uses a binary protocol:
1488
+ - Header: 4-byte big-endian length + 32-byte null-padded message type
1489
+ - Payload varies by message type
1490
+
1491
+ Note: SSM retransmits messages until ACKed. Since we're just reading
1492
+ (not fully implementing the protocol), we deduplicate by tracking
1493
+ seen message hashes.
1494
+ """
1495
+ import aiohttp
1496
+
1497
+ output_chunks = []
1498
+ seen_messages: set[bytes] = set() # Dedupe retransmissions
1499
+
1500
+ async with aiohttp.ClientSession() as session:
1501
+ async with session.ws_connect(stream_url, receive_timeout=timeout) as ws:
1502
+ # Send init message with token
1503
+ init_message = {
1504
+ "MessageSchemaVersion": "1.0",
1505
+ "RequestId": str(uuid.uuid4()),
1506
+ "TokenValue": token,
1507
+ }
1508
+ await ws.send_str(json.dumps(init_message))
1509
+
1510
+ # Read messages until channel closes or timeout
1511
+ try:
1512
+ async for msg in ws:
1513
+ if msg.type == aiohttp.WSMsgType.BINARY:
1514
+ # Skip duplicate messages (SSM retransmits until ACKed)
1515
+ msg_hash = msg.data[:116] # Header is enough to identify
1516
+ if msg_hash in seen_messages:
1517
+ continue
1518
+ seen_messages.add(msg_hash)
1519
+
1520
+ parsed = self._parse_ssm_message(msg.data)
1521
+ if parsed:
1522
+ msg_type, payload = parsed
1523
+ if "output_stream_data" in msg_type:
1524
+ output_chunks.append(payload)
1525
+ elif "channel_closed" in msg_type:
1526
+ break
1527
+ elif msg.type == aiohttp.WSMsgType.ERROR:
1528
+ break
1529
+ elif msg.type == aiohttp.WSMsgType.CLOSED:
1530
+ break
1531
+ except asyncio.TimeoutError:
1532
+ pass
1533
+
1534
+ return "".join(output_chunks)
1535
+
1536
+ def _parse_ssm_message(self, data: bytes) -> tuple[str, str] | None:
1537
+ """
1538
+ Parse an SSM agent message.
1539
+
1540
+ Format:
1541
+ - Bytes 0-3: Header length (big-endian uint32)
1542
+ - Bytes 4-35: Message type (32 bytes, null-padded ASCII)
1543
+ - After header: Payload length (4 bytes) + payload
1544
+ """
1545
+ if len(data) < 36:
1546
+ return None
1547
+
1548
+ try:
1549
+ header_len = struct.unpack(">I", data[0:4])[0]
1550
+ msg_type = data[4:36].decode("ascii").rstrip("\x00")
1551
+
1552
+ # Payload starts after header
1553
+ if len(data) > header_len:
1554
+ payload_data = data[header_len:]
1555
+ if len(payload_data) >= 4:
1556
+ payload_len = struct.unpack(">I", payload_data[0:4])[0]
1557
+ if len(payload_data) >= 4 + payload_len:
1558
+ payload = payload_data[4 : 4 + payload_len].decode(
1559
+ "utf-8", errors="replace"
1560
+ )
1561
+ return msg_type, payload
1562
+
1563
+ return msg_type, ""
1564
+ except Exception:
1565
+ return None
1566
+
1567
+ async def _destroy(self):
1568
+ """Stop the task and clean up."""
1569
+ if self._destroyed:
1570
+ return
1571
+
1572
+ if self.task_arn:
1573
+ try:
1574
+ await asyncio.to_thread(
1575
+ self.ecs.stop_task,
1576
+ cluster=self.cluster,
1577
+ task=self.task_arn,
1578
+ reason="Sandbox destroyed",
1579
+ )
1580
+ except Exception:
1581
+ pass # Best effort
1582
+
1583
+ # Optionally deregister task definition
1584
+ if self.task_definition_arn:
1585
+ try:
1586
+ await asyncio.to_thread(
1587
+ self.ecs.deregister_task_definition,
1588
+ taskDefinition=self.task_definition_arn,
1589
+ )
1590
+ except Exception:
1591
+ pass
1592
+
1593
+ self._destroyed = True
1594
+ self._initialized = False
1595
+
1596
+ def get_tools(self):
1597
+ """Return list of tools for LLM use."""
1598
+ bash_tool = Tool(
1599
+ name="bash",
1600
+ description=(
1601
+ "Execute a bash command in the AWS Fargate sandbox environment. "
1602
+ "The command runs in an isolated container. "
1603
+ "Output is truncated to the last 5000 characters if longer. "
1604
+ "Note: This sandbox does not support background processes - "
1605
+ "commands must complete within the timeout."
1606
+ ),
1607
+ run=self._exec,
1608
+ parameters={
1609
+ "command": {
1610
+ "type": "string",
1611
+ "description": "The shell command to execute (e.g., 'ls -la', 'python script.py')",
1612
+ },
1613
+ "timeout": {
1614
+ "type": "integer",
1615
+ "description": "Timeout in seconds for the command execution (default: 60)",
1616
+ },
1617
+ },
1618
+ required=["command"],
1619
+ )
1620
+
1621
+ return [bash_tool]