lm-deluge 0.0.67__py3-none-any.whl → 0.0.90__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lm-deluge might be problematic. Click here for more details.
- lm_deluge/__init__.py +1 -2
- lm_deluge/api_requests/anthropic.py +117 -22
- lm_deluge/api_requests/base.py +84 -11
- lm_deluge/api_requests/bedrock.py +30 -6
- lm_deluge/api_requests/chat_reasoning.py +4 -0
- lm_deluge/api_requests/gemini.py +166 -20
- lm_deluge/api_requests/openai.py +145 -25
- lm_deluge/batches.py +15 -45
- lm_deluge/client.py +309 -50
- lm_deluge/config.py +15 -3
- lm_deluge/models/__init__.py +14 -1
- lm_deluge/models/anthropic.py +29 -14
- lm_deluge/models/arcee.py +16 -0
- lm_deluge/models/deepseek.py +36 -4
- lm_deluge/models/google.py +42 -0
- lm_deluge/models/grok.py +24 -0
- lm_deluge/models/kimi.py +36 -0
- lm_deluge/models/minimax.py +18 -0
- lm_deluge/models/openai.py +100 -0
- lm_deluge/models/openrouter.py +133 -7
- lm_deluge/models/together.py +11 -0
- lm_deluge/models/zai.py +50 -0
- lm_deluge/pipelines/gepa/__init__.py +95 -0
- lm_deluge/pipelines/gepa/core.py +354 -0
- lm_deluge/pipelines/gepa/docs/samples.py +705 -0
- lm_deluge/pipelines/gepa/examples/01_synthetic_keywords.py +140 -0
- lm_deluge/pipelines/gepa/examples/02_gsm8k_math.py +261 -0
- lm_deluge/pipelines/gepa/examples/03_hotpotqa_multihop.py +300 -0
- lm_deluge/pipelines/gepa/examples/04_batch_classification.py +271 -0
- lm_deluge/pipelines/gepa/examples/simple_qa.py +129 -0
- lm_deluge/pipelines/gepa/optimizer.py +435 -0
- lm_deluge/pipelines/gepa/proposer.py +235 -0
- lm_deluge/pipelines/gepa/util.py +165 -0
- lm_deluge/{llm_tools → pipelines}/score.py +2 -2
- lm_deluge/{llm_tools → pipelines}/translate.py +5 -3
- lm_deluge/prompt.py +537 -88
- lm_deluge/request_context.py +7 -2
- lm_deluge/server/__init__.py +24 -0
- lm_deluge/server/__main__.py +144 -0
- lm_deluge/server/adapters.py +369 -0
- lm_deluge/server/app.py +388 -0
- lm_deluge/server/auth.py +71 -0
- lm_deluge/server/model_policy.py +215 -0
- lm_deluge/server/models_anthropic.py +172 -0
- lm_deluge/server/models_openai.py +175 -0
- lm_deluge/tool/__init__.py +1130 -0
- lm_deluge/tool/builtin/anthropic/__init__.py +300 -0
- lm_deluge/tool/builtin/anthropic/bash.py +0 -0
- lm_deluge/tool/builtin/anthropic/computer_use.py +0 -0
- lm_deluge/tool/builtin/gemini.py +59 -0
- lm_deluge/tool/builtin/openai.py +74 -0
- lm_deluge/tool/cua/__init__.py +173 -0
- lm_deluge/tool/cua/actions.py +148 -0
- lm_deluge/tool/cua/base.py +27 -0
- lm_deluge/tool/cua/batch.py +215 -0
- lm_deluge/tool/cua/converters.py +466 -0
- lm_deluge/tool/cua/kernel.py +702 -0
- lm_deluge/tool/cua/trycua.py +989 -0
- lm_deluge/tool/prefab/__init__.py +45 -0
- lm_deluge/tool/prefab/batch_tool.py +156 -0
- lm_deluge/tool/prefab/docs.py +1119 -0
- lm_deluge/tool/prefab/email.py +294 -0
- lm_deluge/tool/prefab/filesystem.py +1711 -0
- lm_deluge/tool/prefab/full_text_search/__init__.py +285 -0
- lm_deluge/tool/prefab/full_text_search/tantivy_index.py +396 -0
- lm_deluge/tool/prefab/memory.py +458 -0
- lm_deluge/tool/prefab/otc/__init__.py +165 -0
- lm_deluge/tool/prefab/otc/executor.py +281 -0
- lm_deluge/tool/prefab/otc/parse.py +188 -0
- lm_deluge/tool/prefab/random.py +212 -0
- lm_deluge/tool/prefab/rlm/__init__.py +296 -0
- lm_deluge/tool/prefab/rlm/executor.py +349 -0
- lm_deluge/tool/prefab/rlm/parse.py +144 -0
- lm_deluge/tool/prefab/sandbox/__init__.py +19 -0
- lm_deluge/tool/prefab/sandbox/daytona_sandbox.py +483 -0
- lm_deluge/tool/prefab/sandbox/docker_sandbox.py +609 -0
- lm_deluge/tool/prefab/sandbox/fargate_sandbox.py +546 -0
- lm_deluge/tool/prefab/sandbox/modal_sandbox.py +469 -0
- lm_deluge/tool/prefab/sandbox/seatbelt_sandbox.py +827 -0
- lm_deluge/tool/prefab/sheets.py +385 -0
- lm_deluge/tool/prefab/skills.py +0 -0
- lm_deluge/tool/prefab/subagents.py +233 -0
- lm_deluge/tool/prefab/todos.py +342 -0
- lm_deluge/tool/prefab/tool_search.py +169 -0
- lm_deluge/tool/prefab/web_search.py +199 -0
- lm_deluge/tracker.py +16 -13
- lm_deluge/util/schema.py +412 -0
- lm_deluge/warnings.py +8 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/METADATA +23 -9
- lm_deluge-0.0.90.dist-info/RECORD +132 -0
- lm_deluge/built_in_tools/anthropic/__init__.py +0 -128
- lm_deluge/built_in_tools/openai.py +0 -28
- lm_deluge/presets/cerebras.py +0 -17
- lm_deluge/presets/meta.py +0 -13
- lm_deluge/tool.py +0 -849
- lm_deluge-0.0.67.dist-info/RECORD +0 -72
- lm_deluge/{llm_tools → pipelines}/__init__.py +1 -1
- /lm_deluge/{llm_tools → pipelines}/classify.py +0 -0
- /lm_deluge/{llm_tools → pipelines}/extract.py +0 -0
- /lm_deluge/{llm_tools → pipelines}/locate.py +0 -0
- /lm_deluge/{llm_tools → pipelines}/ocr.py +0 -0
- /lm_deluge/{built_in_tools/anthropic/bash.py → skills/anthropic.py} +0 -0
- /lm_deluge/{built_in_tools/anthropic/computer_use.py → skills/compat.py} +0 -0
- /lm_deluge/{built_in_tools → tool/builtin}/anthropic/editor.py +0 -0
- /lm_deluge/{built_in_tools → tool/builtin}/base.py +0 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/WHEEL +0 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/licenses/LICENSE +0 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,546 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import json
|
|
3
|
+
import secrets
|
|
4
|
+
import shlex
|
|
5
|
+
import struct
|
|
6
|
+
import time
|
|
7
|
+
import uuid
|
|
8
|
+
from dataclasses import dataclass, field
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from lm_deluge.tool import Tool
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class TrackedProcess:
|
|
16
|
+
"""Tracks a process running in the sandbox."""
|
|
17
|
+
|
|
18
|
+
process: Any # Modal's ContainerProcess
|
|
19
|
+
name: str
|
|
20
|
+
command: str
|
|
21
|
+
started_at: float = field(default_factory=time.time)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class FargateSandbox:
|
|
25
|
+
"""
|
|
26
|
+
AWS Fargate-based sandbox for running untrusted code in isolated containers.
|
|
27
|
+
|
|
28
|
+
Requires:
|
|
29
|
+
- boto3 installed
|
|
30
|
+
- AWS credentials configured
|
|
31
|
+
- VPC with subnets that have internet access (for pulling images)
|
|
32
|
+
- Security group that allows outbound traffic
|
|
33
|
+
|
|
34
|
+
The sandbox automatically:
|
|
35
|
+
- Creates IAM roles for task execution and ECS Exec
|
|
36
|
+
- Registers a task definition with the specified image
|
|
37
|
+
- Runs a Fargate task and waits for it to be ready
|
|
38
|
+
- Executes commands via ECS Exec (SSM Session Manager)
|
|
39
|
+
|
|
40
|
+
Example:
|
|
41
|
+
async with FargateSandbox(
|
|
42
|
+
subnets=["subnet-abc123"],
|
|
43
|
+
security_groups=["sg-abc123"],
|
|
44
|
+
) as sandbox:
|
|
45
|
+
tools = sandbox.get_tools()
|
|
46
|
+
# Use tools with your LLM...
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
# Default image - minimal Python with common tools
|
|
50
|
+
DEFAULT_IMAGE = "python:3.12-slim"
|
|
51
|
+
|
|
52
|
+
# IAM policy for ECS Exec (SSM Session Manager)
|
|
53
|
+
EXEC_POLICY = {
|
|
54
|
+
"Version": "2012-10-17",
|
|
55
|
+
"Statement": [
|
|
56
|
+
{
|
|
57
|
+
"Effect": "Allow",
|
|
58
|
+
"Action": [
|
|
59
|
+
"ssmmessages:CreateControlChannel",
|
|
60
|
+
"ssmmessages:CreateDataChannel",
|
|
61
|
+
"ssmmessages:OpenControlChannel",
|
|
62
|
+
"ssmmessages:OpenDataChannel",
|
|
63
|
+
],
|
|
64
|
+
"Resource": "*",
|
|
65
|
+
}
|
|
66
|
+
],
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
# Trust policy for ECS tasks
|
|
70
|
+
TASK_TRUST_POLICY = {
|
|
71
|
+
"Version": "2012-10-17",
|
|
72
|
+
"Statement": [
|
|
73
|
+
{
|
|
74
|
+
"Effect": "Allow",
|
|
75
|
+
"Principal": {"Service": "ecs-tasks.amazonaws.com"},
|
|
76
|
+
"Action": "sts:AssumeRole",
|
|
77
|
+
}
|
|
78
|
+
],
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
def __init__(
|
|
82
|
+
self,
|
|
83
|
+
subnets: list[str],
|
|
84
|
+
security_groups: list[str],
|
|
85
|
+
*,
|
|
86
|
+
cluster: str | None = None,
|
|
87
|
+
image: str | None = None,
|
|
88
|
+
cpu: int = 256,
|
|
89
|
+
memory: int = 512,
|
|
90
|
+
region: str | None = None,
|
|
91
|
+
task_role_arn: str | None = None,
|
|
92
|
+
execution_role_arn: str | None = None,
|
|
93
|
+
assign_public_ip: bool = True,
|
|
94
|
+
):
|
|
95
|
+
"""
|
|
96
|
+
Initialize a Fargate sandbox.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
subnets: List of VPC subnet IDs (required). Use subnets with internet
|
|
100
|
+
access (public subnets with IGW, or private with NAT).
|
|
101
|
+
security_groups: List of security group IDs (required). Must allow
|
|
102
|
+
outbound HTTPS (443) for ECS Exec to work.
|
|
103
|
+
cluster: ECS cluster name. If None, uses "lm-deluge-sandbox" (created if missing).
|
|
104
|
+
image: Docker image to use. Defaults to python:3.12-slim.
|
|
105
|
+
cpu: Fargate CPU units (256, 512, 1024, 2048, 4096). Default 256.
|
|
106
|
+
memory: Fargate memory in MB. Must be compatible with CPU. Default 512.
|
|
107
|
+
region: AWS region. If None, uses boto3 default.
|
|
108
|
+
task_role_arn: IAM role ARN for the task. If None, creates one with
|
|
109
|
+
minimal permissions (just SSM for ECS Exec).
|
|
110
|
+
execution_role_arn: IAM role ARN for task execution. If None, uses
|
|
111
|
+
the AWS managed ecsTaskExecutionRole.
|
|
112
|
+
assign_public_ip: Whether to assign a public IP. Required if using
|
|
113
|
+
public subnets without NAT. Default True.
|
|
114
|
+
"""
|
|
115
|
+
self.subnets = subnets
|
|
116
|
+
self.security_groups = security_groups
|
|
117
|
+
self.cluster = cluster or "lm-deluge-sandbox"
|
|
118
|
+
self.image = image or self.DEFAULT_IMAGE
|
|
119
|
+
self.cpu = str(cpu)
|
|
120
|
+
self.memory = str(memory)
|
|
121
|
+
self.region = region
|
|
122
|
+
self.task_role_arn = task_role_arn
|
|
123
|
+
self.execution_role_arn = execution_role_arn
|
|
124
|
+
self.assign_public_ip = assign_public_ip
|
|
125
|
+
|
|
126
|
+
# State
|
|
127
|
+
self.task_arn: str | None = None
|
|
128
|
+
self.task_definition_arn: str | None = None
|
|
129
|
+
self._initialized = False
|
|
130
|
+
self._destroyed = False
|
|
131
|
+
|
|
132
|
+
# boto3 clients (lazy init)
|
|
133
|
+
self._ecs_client = None
|
|
134
|
+
self._iam_client = None
|
|
135
|
+
|
|
136
|
+
@property
|
|
137
|
+
def ecs(self):
|
|
138
|
+
"""Lazy-load ECS client."""
|
|
139
|
+
if self._ecs_client is None:
|
|
140
|
+
import boto3
|
|
141
|
+
|
|
142
|
+
self._ecs_client = boto3.client("ecs", region_name=self.region)
|
|
143
|
+
return self._ecs_client
|
|
144
|
+
|
|
145
|
+
@property
|
|
146
|
+
def iam(self):
|
|
147
|
+
"""Lazy-load IAM client."""
|
|
148
|
+
if self._iam_client is None:
|
|
149
|
+
import boto3
|
|
150
|
+
|
|
151
|
+
self._iam_client = boto3.client("iam", region_name=self.region)
|
|
152
|
+
return self._iam_client
|
|
153
|
+
|
|
154
|
+
async def __aenter__(self):
|
|
155
|
+
"""Async context manager entry - initialize sandbox."""
|
|
156
|
+
await self._ensure_initialized()
|
|
157
|
+
return self
|
|
158
|
+
|
|
159
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
160
|
+
"""Async context manager exit - cleanup sandbox."""
|
|
161
|
+
if not self._destroyed:
|
|
162
|
+
await self._destroy()
|
|
163
|
+
return False
|
|
164
|
+
|
|
165
|
+
def __del__(self):
|
|
166
|
+
"""Cleanup sandbox when garbage collected (backup cleanup)."""
|
|
167
|
+
if not self._destroyed and self.task_arn:
|
|
168
|
+
import warnings
|
|
169
|
+
|
|
170
|
+
warnings.warn(
|
|
171
|
+
"FargateSandbox was not properly cleaned up. "
|
|
172
|
+
"Use 'async with FargateSandbox(...) as sandbox:' for automatic cleanup.",
|
|
173
|
+
ResourceWarning,
|
|
174
|
+
stacklevel=2,
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
async def _ensure_initialized(self):
|
|
178
|
+
"""Lazy initialization - create cluster, task def, and run task."""
|
|
179
|
+
if self._initialized:
|
|
180
|
+
return
|
|
181
|
+
|
|
182
|
+
# Ensure cluster exists
|
|
183
|
+
await self._ensure_cluster()
|
|
184
|
+
|
|
185
|
+
# Ensure IAM roles exist
|
|
186
|
+
await self._ensure_roles()
|
|
187
|
+
|
|
188
|
+
# Register task definition
|
|
189
|
+
await self._register_task_definition()
|
|
190
|
+
|
|
191
|
+
# Run the task
|
|
192
|
+
await self._run_task()
|
|
193
|
+
|
|
194
|
+
# Wait for task to be running
|
|
195
|
+
await self._wait_for_task()
|
|
196
|
+
|
|
197
|
+
self._initialized = True
|
|
198
|
+
|
|
199
|
+
async def _ensure_cluster(self):
|
|
200
|
+
"""Create ECS cluster if it doesn't exist."""
|
|
201
|
+
try:
|
|
202
|
+
response = await asyncio.to_thread(
|
|
203
|
+
self.ecs.describe_clusters, clusters=[self.cluster]
|
|
204
|
+
)
|
|
205
|
+
clusters = response.get("clusters", [])
|
|
206
|
+
if clusters and clusters[0].get("status") == "ACTIVE":
|
|
207
|
+
return # Cluster exists
|
|
208
|
+
except Exception:
|
|
209
|
+
pass
|
|
210
|
+
|
|
211
|
+
# Create cluster
|
|
212
|
+
await asyncio.to_thread(
|
|
213
|
+
self.ecs.create_cluster,
|
|
214
|
+
clusterName=self.cluster,
|
|
215
|
+
settings=[
|
|
216
|
+
{"name": "containerInsights", "value": "disabled"},
|
|
217
|
+
],
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
async def _ensure_roles(self):
|
|
221
|
+
"""Create IAM roles if not provided."""
|
|
222
|
+
# Task role (for ECS Exec)
|
|
223
|
+
if not self.task_role_arn:
|
|
224
|
+
role_name = "lm-deluge-sandbox-task-role"
|
|
225
|
+
try:
|
|
226
|
+
response = await asyncio.to_thread(
|
|
227
|
+
self.iam.get_role, RoleName=role_name
|
|
228
|
+
)
|
|
229
|
+
self.task_role_arn = response["Role"]["Arn"]
|
|
230
|
+
except self.iam.exceptions.NoSuchEntityException:
|
|
231
|
+
# Create the role
|
|
232
|
+
response = await asyncio.to_thread(
|
|
233
|
+
self.iam.create_role,
|
|
234
|
+
RoleName=role_name,
|
|
235
|
+
AssumeRolePolicyDocument=json.dumps(self.TASK_TRUST_POLICY),
|
|
236
|
+
Description="Task role for lm-deluge Fargate sandbox (ECS Exec)",
|
|
237
|
+
)
|
|
238
|
+
self.task_role_arn = response["Role"]["Arn"]
|
|
239
|
+
|
|
240
|
+
# Attach inline policy for ECS Exec
|
|
241
|
+
await asyncio.to_thread(
|
|
242
|
+
self.iam.put_role_policy,
|
|
243
|
+
RoleName=role_name,
|
|
244
|
+
PolicyName="ecs-exec-policy",
|
|
245
|
+
PolicyDocument=json.dumps(self.EXEC_POLICY),
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
# IAM is eventually consistent - wait a bit
|
|
249
|
+
await asyncio.sleep(5)
|
|
250
|
+
|
|
251
|
+
# Execution role (for pulling images, logs)
|
|
252
|
+
if not self.execution_role_arn:
|
|
253
|
+
role_name = "lm-deluge-sandbox-execution-role"
|
|
254
|
+
try:
|
|
255
|
+
response = await asyncio.to_thread(
|
|
256
|
+
self.iam.get_role, RoleName=role_name
|
|
257
|
+
)
|
|
258
|
+
self.execution_role_arn = response["Role"]["Arn"]
|
|
259
|
+
except self.iam.exceptions.NoSuchEntityException:
|
|
260
|
+
# Create the role
|
|
261
|
+
response = await asyncio.to_thread(
|
|
262
|
+
self.iam.create_role,
|
|
263
|
+
RoleName=role_name,
|
|
264
|
+
AssumeRolePolicyDocument=json.dumps(self.TASK_TRUST_POLICY),
|
|
265
|
+
Description="Execution role for lm-deluge Fargate sandbox",
|
|
266
|
+
)
|
|
267
|
+
self.execution_role_arn = response["Role"]["Arn"]
|
|
268
|
+
|
|
269
|
+
# Attach AWS managed policy
|
|
270
|
+
await asyncio.to_thread(
|
|
271
|
+
self.iam.attach_role_policy,
|
|
272
|
+
RoleName=role_name,
|
|
273
|
+
PolicyArn="arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy",
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
# IAM is eventually consistent - wait a bit
|
|
277
|
+
await asyncio.sleep(5)
|
|
278
|
+
|
|
279
|
+
async def _register_task_definition(self):
|
|
280
|
+
"""Register a task definition for the sandbox."""
|
|
281
|
+
family = f"lm-deluge-sandbox-{secrets.token_hex(4)}"
|
|
282
|
+
|
|
283
|
+
response = await asyncio.to_thread(
|
|
284
|
+
self.ecs.register_task_definition,
|
|
285
|
+
family=family,
|
|
286
|
+
networkMode="awsvpc",
|
|
287
|
+
requiresCompatibilities=["FARGATE"],
|
|
288
|
+
cpu=self.cpu,
|
|
289
|
+
memory=self.memory,
|
|
290
|
+
taskRoleArn=self.task_role_arn,
|
|
291
|
+
executionRoleArn=self.execution_role_arn,
|
|
292
|
+
containerDefinitions=[
|
|
293
|
+
{
|
|
294
|
+
"name": "sandbox",
|
|
295
|
+
"image": self.image,
|
|
296
|
+
"essential": True,
|
|
297
|
+
# Keep container running - sleep infinity
|
|
298
|
+
"command": ["sh", "-c", "sleep infinity"],
|
|
299
|
+
"linuxParameters": {
|
|
300
|
+
"initProcessEnabled": True, # Required for ECS Exec
|
|
301
|
+
},
|
|
302
|
+
}
|
|
303
|
+
],
|
|
304
|
+
)
|
|
305
|
+
self.task_definition_arn = response["taskDefinition"]["taskDefinitionArn"]
|
|
306
|
+
|
|
307
|
+
async def _run_task(self):
|
|
308
|
+
"""Run a Fargate task."""
|
|
309
|
+
response = await asyncio.to_thread(
|
|
310
|
+
self.ecs.run_task,
|
|
311
|
+
cluster=self.cluster,
|
|
312
|
+
taskDefinition=self.task_definition_arn,
|
|
313
|
+
launchType="FARGATE",
|
|
314
|
+
enableExecuteCommand=True, # Enable ECS Exec
|
|
315
|
+
networkConfiguration={
|
|
316
|
+
"awsvpcConfiguration": {
|
|
317
|
+
"subnets": self.subnets,
|
|
318
|
+
"securityGroups": self.security_groups,
|
|
319
|
+
"assignPublicIp": "ENABLED"
|
|
320
|
+
if self.assign_public_ip
|
|
321
|
+
else "DISABLED",
|
|
322
|
+
}
|
|
323
|
+
},
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
tasks = response.get("tasks", [])
|
|
327
|
+
if not tasks:
|
|
328
|
+
failures = response.get("failures", [])
|
|
329
|
+
raise RuntimeError(f"Failed to run task: {failures}")
|
|
330
|
+
|
|
331
|
+
self.task_arn = tasks[0]["taskArn"]
|
|
332
|
+
|
|
333
|
+
async def _wait_for_task(self, timeout: int = 120):
|
|
334
|
+
"""Wait for task to reach RUNNING state."""
|
|
335
|
+
start = time.time()
|
|
336
|
+
while time.time() - start < timeout:
|
|
337
|
+
response = await asyncio.to_thread(
|
|
338
|
+
self.ecs.describe_tasks,
|
|
339
|
+
cluster=self.cluster,
|
|
340
|
+
tasks=[self.task_arn],
|
|
341
|
+
)
|
|
342
|
+
tasks = response.get("tasks", [])
|
|
343
|
+
if tasks:
|
|
344
|
+
status = tasks[0].get("lastStatus")
|
|
345
|
+
if status == "RUNNING":
|
|
346
|
+
# Also check that execute command agent is running
|
|
347
|
+
containers = tasks[0].get("containers", [])
|
|
348
|
+
for container in containers:
|
|
349
|
+
managed_agents = container.get("managedAgents", [])
|
|
350
|
+
for agent in managed_agents:
|
|
351
|
+
if agent.get("name") == "ExecuteCommandAgent":
|
|
352
|
+
if agent.get("lastStatus") == "RUNNING":
|
|
353
|
+
return
|
|
354
|
+
elif status in ("STOPPED", "DEACTIVATING"):
|
|
355
|
+
reason = tasks[0].get("stoppedReason", "Unknown")
|
|
356
|
+
raise RuntimeError(f"Task stopped: {reason}")
|
|
357
|
+
|
|
358
|
+
await asyncio.sleep(2)
|
|
359
|
+
|
|
360
|
+
raise TimeoutError(f"Task did not reach RUNNING state within {timeout}s")
|
|
361
|
+
|
|
362
|
+
async def _exec(
|
|
363
|
+
self,
|
|
364
|
+
command: str,
|
|
365
|
+
timeout: int = 60,
|
|
366
|
+
) -> str:
|
|
367
|
+
"""
|
|
368
|
+
Execute a command in the sandbox.
|
|
369
|
+
|
|
370
|
+
Args:
|
|
371
|
+
command: Shell command to execute
|
|
372
|
+
timeout: Timeout in seconds
|
|
373
|
+
|
|
374
|
+
Returns:
|
|
375
|
+
Command output (stdout + stderr)
|
|
376
|
+
"""
|
|
377
|
+
await self._ensure_initialized()
|
|
378
|
+
|
|
379
|
+
# Call ECS execute_command
|
|
380
|
+
response = await asyncio.to_thread(
|
|
381
|
+
self.ecs.execute_command,
|
|
382
|
+
cluster=self.cluster,
|
|
383
|
+
task=self.task_arn,
|
|
384
|
+
container="sandbox",
|
|
385
|
+
interactive=True,
|
|
386
|
+
command=f"/bin/sh -c {shlex.quote(command)}",
|
|
387
|
+
)
|
|
388
|
+
|
|
389
|
+
session = response.get("session", {})
|
|
390
|
+
stream_url = session.get("streamUrl")
|
|
391
|
+
token = session.get("tokenValue")
|
|
392
|
+
|
|
393
|
+
if not stream_url or not token:
|
|
394
|
+
return f"Error: Failed to get session: {response}"
|
|
395
|
+
|
|
396
|
+
# Connect to websocket and read output
|
|
397
|
+
try:
|
|
398
|
+
output = await self._read_ssm_session(stream_url, token, timeout)
|
|
399
|
+
except Exception as e:
|
|
400
|
+
return f"Error executing command: {e}"
|
|
401
|
+
|
|
402
|
+
# Truncate if needed
|
|
403
|
+
if len(output) > 5000:
|
|
404
|
+
output = "...[truncated]...\n" + output[-5000:]
|
|
405
|
+
|
|
406
|
+
return output if output else "(no output)"
|
|
407
|
+
|
|
408
|
+
async def _read_ssm_session(self, stream_url: str, token: str, timeout: int) -> str:
|
|
409
|
+
"""
|
|
410
|
+
Connect to SSM session websocket and read command output.
|
|
411
|
+
|
|
412
|
+
The SSM agent uses a binary protocol:
|
|
413
|
+
- Header: 4-byte big-endian length + 32-byte null-padded message type
|
|
414
|
+
- Payload varies by message type
|
|
415
|
+
|
|
416
|
+
Note: SSM retransmits messages until ACKed. Since we're just reading
|
|
417
|
+
(not fully implementing the protocol), we deduplicate by tracking
|
|
418
|
+
seen message hashes.
|
|
419
|
+
"""
|
|
420
|
+
import aiohttp
|
|
421
|
+
|
|
422
|
+
output_chunks = []
|
|
423
|
+
seen_messages: set[bytes] = set() # Dedupe retransmissions
|
|
424
|
+
|
|
425
|
+
async with aiohttp.ClientSession() as session:
|
|
426
|
+
async with session.ws_connect(stream_url, receive_timeout=timeout) as ws:
|
|
427
|
+
# Send init message with token
|
|
428
|
+
init_message = {
|
|
429
|
+
"MessageSchemaVersion": "1.0",
|
|
430
|
+
"RequestId": str(uuid.uuid4()),
|
|
431
|
+
"TokenValue": token,
|
|
432
|
+
}
|
|
433
|
+
await ws.send_str(json.dumps(init_message))
|
|
434
|
+
|
|
435
|
+
# Read messages until channel closes or timeout
|
|
436
|
+
try:
|
|
437
|
+
async for msg in ws:
|
|
438
|
+
if msg.type == aiohttp.WSMsgType.BINARY:
|
|
439
|
+
# Skip duplicate messages (SSM retransmits until ACKed)
|
|
440
|
+
msg_hash = msg.data[:116] # Header is enough to identify
|
|
441
|
+
if msg_hash in seen_messages:
|
|
442
|
+
continue
|
|
443
|
+
seen_messages.add(msg_hash)
|
|
444
|
+
|
|
445
|
+
parsed = self._parse_ssm_message(msg.data)
|
|
446
|
+
if parsed:
|
|
447
|
+
msg_type, payload = parsed
|
|
448
|
+
if "output_stream_data" in msg_type:
|
|
449
|
+
output_chunks.append(payload)
|
|
450
|
+
elif "channel_closed" in msg_type:
|
|
451
|
+
break
|
|
452
|
+
elif msg.type == aiohttp.WSMsgType.ERROR:
|
|
453
|
+
break
|
|
454
|
+
elif msg.type == aiohttp.WSMsgType.CLOSED:
|
|
455
|
+
break
|
|
456
|
+
except asyncio.TimeoutError:
|
|
457
|
+
pass
|
|
458
|
+
|
|
459
|
+
return "".join(output_chunks)
|
|
460
|
+
|
|
461
|
+
def _parse_ssm_message(self, data: bytes) -> tuple[str, str] | None:
|
|
462
|
+
"""
|
|
463
|
+
Parse an SSM agent message.
|
|
464
|
+
|
|
465
|
+
Format:
|
|
466
|
+
- Bytes 0-3: Header length (big-endian uint32)
|
|
467
|
+
- Bytes 4-35: Message type (32 bytes, null-padded ASCII)
|
|
468
|
+
- After header: Payload length (4 bytes) + payload
|
|
469
|
+
"""
|
|
470
|
+
if len(data) < 36:
|
|
471
|
+
return None
|
|
472
|
+
|
|
473
|
+
try:
|
|
474
|
+
header_len = struct.unpack(">I", data[0:4])[0]
|
|
475
|
+
msg_type = data[4:36].decode("ascii").rstrip("\x00")
|
|
476
|
+
|
|
477
|
+
# Payload starts after header
|
|
478
|
+
if len(data) > header_len:
|
|
479
|
+
payload_data = data[header_len:]
|
|
480
|
+
if len(payload_data) >= 4:
|
|
481
|
+
payload_len = struct.unpack(">I", payload_data[0:4])[0]
|
|
482
|
+
if len(payload_data) >= 4 + payload_len:
|
|
483
|
+
payload = payload_data[4 : 4 + payload_len].decode(
|
|
484
|
+
"utf-8", errors="replace"
|
|
485
|
+
)
|
|
486
|
+
return msg_type, payload
|
|
487
|
+
|
|
488
|
+
return msg_type, ""
|
|
489
|
+
except Exception:
|
|
490
|
+
return None
|
|
491
|
+
|
|
492
|
+
async def _destroy(self):
|
|
493
|
+
"""Stop the task and clean up."""
|
|
494
|
+
if self._destroyed:
|
|
495
|
+
return
|
|
496
|
+
|
|
497
|
+
if self.task_arn:
|
|
498
|
+
try:
|
|
499
|
+
await asyncio.to_thread(
|
|
500
|
+
self.ecs.stop_task,
|
|
501
|
+
cluster=self.cluster,
|
|
502
|
+
task=self.task_arn,
|
|
503
|
+
reason="Sandbox destroyed",
|
|
504
|
+
)
|
|
505
|
+
except Exception:
|
|
506
|
+
pass # Best effort
|
|
507
|
+
|
|
508
|
+
# Optionally deregister task definition
|
|
509
|
+
if self.task_definition_arn:
|
|
510
|
+
try:
|
|
511
|
+
await asyncio.to_thread(
|
|
512
|
+
self.ecs.deregister_task_definition,
|
|
513
|
+
taskDefinition=self.task_definition_arn,
|
|
514
|
+
)
|
|
515
|
+
except Exception:
|
|
516
|
+
pass
|
|
517
|
+
|
|
518
|
+
self._destroyed = True
|
|
519
|
+
self._initialized = False
|
|
520
|
+
|
|
521
|
+
def get_tools(self):
|
|
522
|
+
"""Return list of tools for LLM use."""
|
|
523
|
+
bash_tool = Tool(
|
|
524
|
+
name="bash",
|
|
525
|
+
description=(
|
|
526
|
+
"Execute a bash command in the AWS Fargate sandbox environment. "
|
|
527
|
+
"The command runs in an isolated container. "
|
|
528
|
+
"Output is truncated to the last 5000 characters if longer. "
|
|
529
|
+
"Note: This sandbox does not support background processes - "
|
|
530
|
+
"commands must complete within the timeout."
|
|
531
|
+
),
|
|
532
|
+
run=self._exec,
|
|
533
|
+
parameters={
|
|
534
|
+
"command": {
|
|
535
|
+
"type": "string",
|
|
536
|
+
"description": "The shell command to execute (e.g., 'ls -la', 'python script.py')",
|
|
537
|
+
},
|
|
538
|
+
"timeout": {
|
|
539
|
+
"type": "integer",
|
|
540
|
+
"description": "Timeout in seconds for the command execution (default: 60)",
|
|
541
|
+
},
|
|
542
|
+
},
|
|
543
|
+
required=["command"],
|
|
544
|
+
)
|
|
545
|
+
|
|
546
|
+
return [bash_tool]
|