lm-deluge 0.0.88__py3-none-any.whl → 0.0.90__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lm-deluge might be problematic. Click here for more details.
- lm_deluge/__init__.py +0 -24
- lm_deluge/api_requests/anthropic.py +25 -5
- lm_deluge/api_requests/base.py +37 -0
- lm_deluge/api_requests/bedrock.py +23 -2
- lm_deluge/api_requests/gemini.py +36 -10
- lm_deluge/api_requests/openai.py +31 -4
- lm_deluge/batches.py +15 -45
- lm_deluge/client.py +27 -1
- lm_deluge/models/__init__.py +2 -0
- lm_deluge/models/anthropic.py +12 -12
- lm_deluge/models/google.py +13 -0
- lm_deluge/models/minimax.py +9 -1
- lm_deluge/models/openrouter.py +48 -0
- lm_deluge/models/zai.py +50 -1
- lm_deluge/pipelines/gepa/docs/samples.py +19 -10
- lm_deluge/prompt.py +333 -68
- lm_deluge/server/__init__.py +24 -0
- lm_deluge/server/__main__.py +144 -0
- lm_deluge/server/adapters.py +369 -0
- lm_deluge/server/app.py +388 -0
- lm_deluge/server/auth.py +71 -0
- lm_deluge/server/model_policy.py +215 -0
- lm_deluge/server/models_anthropic.py +172 -0
- lm_deluge/server/models_openai.py +175 -0
- lm_deluge/skills/anthropic.py +0 -0
- lm_deluge/skills/compat.py +0 -0
- lm_deluge/tool/__init__.py +13 -1
- lm_deluge/tool/prefab/sandbox/__init__.py +19 -0
- lm_deluge/tool/prefab/sandbox/daytona_sandbox.py +483 -0
- lm_deluge/tool/prefab/sandbox/docker_sandbox.py +609 -0
- lm_deluge/tool/prefab/sandbox/fargate_sandbox.py +546 -0
- lm_deluge/tool/prefab/sandbox/modal_sandbox.py +469 -0
- lm_deluge/tool/prefab/sandbox/seatbelt_sandbox.py +827 -0
- lm_deluge/tool/prefab/skills.py +0 -0
- {lm_deluge-0.0.88.dist-info → lm_deluge-0.0.90.dist-info}/METADATA +4 -3
- {lm_deluge-0.0.88.dist-info → lm_deluge-0.0.90.dist-info}/RECORD +39 -24
- lm_deluge/mock_openai.py +0 -643
- lm_deluge/tool/prefab/sandbox.py +0 -1621
- {lm_deluge-0.0.88.dist-info → lm_deluge-0.0.90.dist-info}/WHEEL +0 -0
- {lm_deluge-0.0.88.dist-info → lm_deluge-0.0.90.dist-info}/licenses/LICENSE +0 -0
- {lm_deluge-0.0.88.dist-info → lm_deluge-0.0.90.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,609 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import os
|
|
3
|
+
import secrets
|
|
4
|
+
import struct
|
|
5
|
+
import time
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from lm_deluge.tool import Tool
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class TrackedProcess:
|
|
14
|
+
"""Tracks a process running in the sandbox."""
|
|
15
|
+
|
|
16
|
+
process: Any
|
|
17
|
+
name: str
|
|
18
|
+
command: str
|
|
19
|
+
started_at: float = field(default_factory=time.time)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class DockerSandbox:
|
|
23
|
+
"""
|
|
24
|
+
Local Docker-based sandbox for running code in isolated containers.
|
|
25
|
+
|
|
26
|
+
Works with Docker Desktop, Colima, or any Docker-compatible runtime.
|
|
27
|
+
Each sandbox instance creates its own container.
|
|
28
|
+
|
|
29
|
+
Requires:
|
|
30
|
+
- docker package installed (pip install docker)
|
|
31
|
+
- Docker daemon running (Docker Desktop, Colima, etc.)
|
|
32
|
+
|
|
33
|
+
Example:
|
|
34
|
+
async with DockerSandbox() as sandbox:
|
|
35
|
+
tools = sandbox.get_tools()
|
|
36
|
+
# Use tools with your LLM...
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
# Default image - has uv pre-installed, Debian Bookworm base
|
|
40
|
+
DEFAULT_IMAGE = "ghcr.io/astral-sh/uv:python3.12-bookworm-slim"
|
|
41
|
+
|
|
42
|
+
def __init__(
|
|
43
|
+
self,
|
|
44
|
+
image: str | None = None,
|
|
45
|
+
*,
|
|
46
|
+
docker_host: str | None = None,
|
|
47
|
+
network_mode: str = "bridge",
|
|
48
|
+
mem_limit: str = "512m",
|
|
49
|
+
cpu_period: int = 100000,
|
|
50
|
+
cpu_quota: int | None = None,
|
|
51
|
+
working_dir: str = "/workspace",
|
|
52
|
+
stateful: bool = False,
|
|
53
|
+
):
|
|
54
|
+
"""
|
|
55
|
+
Initialize a Docker sandbox.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
image: Docker image to use. Defaults to uv's Python 3.12 image.
|
|
59
|
+
docker_host: Docker socket URL. If None, auto-detects from DOCKER_HOST
|
|
60
|
+
env var or tries common socket paths.
|
|
61
|
+
network_mode: Docker network mode. "bridge" (default) for internet access,
|
|
62
|
+
"none" for full isolation.
|
|
63
|
+
mem_limit: Memory limit (e.g., "512m", "1g"). Default "512m".
|
|
64
|
+
cpu_period: CPU period in microseconds. Default 100000.
|
|
65
|
+
cpu_quota: CPU quota in microseconds. None for no limit.
|
|
66
|
+
E.g., 50000 with period 100000 = 50% of one CPU.
|
|
67
|
+
working_dir: Working directory inside container. Default "/workspace".
|
|
68
|
+
stateful: If True, shell state (variables, cd, functions) persists between commands.
|
|
69
|
+
"""
|
|
70
|
+
self.image = image or self.DEFAULT_IMAGE
|
|
71
|
+
self.docker_host = docker_host
|
|
72
|
+
self.network_mode = network_mode
|
|
73
|
+
self.mem_limit = mem_limit
|
|
74
|
+
self.cpu_period = cpu_period
|
|
75
|
+
self.cpu_quota = cpu_quota
|
|
76
|
+
self.working_dir = working_dir
|
|
77
|
+
self.stateful = stateful
|
|
78
|
+
|
|
79
|
+
# State
|
|
80
|
+
self.container = None
|
|
81
|
+
self._client = None
|
|
82
|
+
self._initialized = False
|
|
83
|
+
self._destroyed = False
|
|
84
|
+
|
|
85
|
+
# Process tracking for background processes
|
|
86
|
+
self.processes: dict[str, TrackedProcess] = {}
|
|
87
|
+
self.process_counter: int = 0
|
|
88
|
+
|
|
89
|
+
# Stateful mode: persistent shell
|
|
90
|
+
self._shell_socket: Any | None = None
|
|
91
|
+
self._shell_exec_id: Any | None = None
|
|
92
|
+
self._shell_initialized = False
|
|
93
|
+
self._delimiter = f"__DELIM_{secrets.token_hex(8)}__"
|
|
94
|
+
self._output_buffer = b""
|
|
95
|
+
|
|
96
|
+
@property
|
|
97
|
+
def client(self):
|
|
98
|
+
"""Lazy-load Docker client."""
|
|
99
|
+
if self._client is None:
|
|
100
|
+
import docker
|
|
101
|
+
|
|
102
|
+
if self.docker_host:
|
|
103
|
+
self._client = docker.DockerClient(base_url=self.docker_host)
|
|
104
|
+
else:
|
|
105
|
+
# Auto-detect socket location
|
|
106
|
+
# Try DOCKER_HOST env first, then common socket paths
|
|
107
|
+
docker_host = os.environ.get("DOCKER_HOST")
|
|
108
|
+
if not docker_host:
|
|
109
|
+
# Common socket paths (Docker Desktop, Colima, Podman, etc.)
|
|
110
|
+
socket_paths = [
|
|
111
|
+
os.path.expanduser("~/.colima/default/docker.sock"),
|
|
112
|
+
os.path.expanduser("~/.colima/docker.sock"),
|
|
113
|
+
"/var/run/docker.sock",
|
|
114
|
+
os.path.expanduser("~/.docker/run/docker.sock"),
|
|
115
|
+
os.path.expanduser(
|
|
116
|
+
"~/.local/share/containers/podman/machine/podman.sock"
|
|
117
|
+
),
|
|
118
|
+
]
|
|
119
|
+
for path in socket_paths:
|
|
120
|
+
if os.path.exists(path):
|
|
121
|
+
docker_host = f"unix://{path}"
|
|
122
|
+
break
|
|
123
|
+
|
|
124
|
+
if docker_host:
|
|
125
|
+
self._client = docker.DockerClient(base_url=docker_host)
|
|
126
|
+
else:
|
|
127
|
+
# Fall back to default (will likely fail but gives clear error)
|
|
128
|
+
self._client = docker.from_env()
|
|
129
|
+
return self._client
|
|
130
|
+
|
|
131
|
+
async def __aenter__(self):
|
|
132
|
+
"""Async context manager entry - initialize sandbox."""
|
|
133
|
+
await self._ensure_initialized()
|
|
134
|
+
return self
|
|
135
|
+
|
|
136
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
137
|
+
"""Async context manager exit - cleanup sandbox."""
|
|
138
|
+
if not self._destroyed:
|
|
139
|
+
await self._destroy()
|
|
140
|
+
return False
|
|
141
|
+
|
|
142
|
+
def __enter__(self):
|
|
143
|
+
"""Sync context manager entry."""
|
|
144
|
+
import asyncio
|
|
145
|
+
|
|
146
|
+
asyncio.get_event_loop().run_until_complete(self._ensure_initialized())
|
|
147
|
+
return self
|
|
148
|
+
|
|
149
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
150
|
+
"""Sync context manager exit."""
|
|
151
|
+
if not self._destroyed:
|
|
152
|
+
self._destroy_sync()
|
|
153
|
+
return False
|
|
154
|
+
|
|
155
|
+
def __del__(self):
|
|
156
|
+
"""Cleanup container when garbage collected (backup cleanup)."""
|
|
157
|
+
if not self._destroyed and self.container:
|
|
158
|
+
import warnings
|
|
159
|
+
|
|
160
|
+
warnings.warn(
|
|
161
|
+
"DockerSandbox was not properly cleaned up. "
|
|
162
|
+
"Use 'with DockerSandbox(...) as sandbox:' for automatic cleanup.",
|
|
163
|
+
ResourceWarning,
|
|
164
|
+
stacklevel=2,
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
async def _ensure_initialized(self):
|
|
168
|
+
"""Lazy initialization - pull image if needed and start container."""
|
|
169
|
+
if self._initialized:
|
|
170
|
+
return
|
|
171
|
+
|
|
172
|
+
# Pull image if not present
|
|
173
|
+
await asyncio.to_thread(self._pull_image_if_needed)
|
|
174
|
+
|
|
175
|
+
# Create and start container
|
|
176
|
+
await asyncio.to_thread(self._create_container)
|
|
177
|
+
|
|
178
|
+
self._initialized = True
|
|
179
|
+
|
|
180
|
+
def _pull_image_if_needed(self):
|
|
181
|
+
"""Pull the Docker image if not already present."""
|
|
182
|
+
try:
|
|
183
|
+
self.client.images.get(self.image)
|
|
184
|
+
except Exception:
|
|
185
|
+
# Image not found locally, pull it
|
|
186
|
+
self.client.images.pull(self.image)
|
|
187
|
+
|
|
188
|
+
def _create_container(self):
|
|
189
|
+
"""Create and start the container."""
|
|
190
|
+
self.container = self.client.containers.run(
|
|
191
|
+
self.image,
|
|
192
|
+
command=["sleep", "infinity"],
|
|
193
|
+
detach=True,
|
|
194
|
+
remove=True, # Auto-remove when stopped
|
|
195
|
+
network_mode=self.network_mode,
|
|
196
|
+
mem_limit=self.mem_limit,
|
|
197
|
+
cpu_period=self.cpu_period,
|
|
198
|
+
cpu_quota=self.cpu_quota,
|
|
199
|
+
working_dir=self.working_dir,
|
|
200
|
+
# Create the working directory
|
|
201
|
+
entrypoint=[
|
|
202
|
+
"/bin/sh",
|
|
203
|
+
"-c",
|
|
204
|
+
f"mkdir -p {self.working_dir} && sleep infinity",
|
|
205
|
+
],
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
def _generate_process_name(self) -> str:
|
|
209
|
+
"""Generate a unique process name like p1, p2, etc."""
|
|
210
|
+
self.process_counter += 1
|
|
211
|
+
return f"p{self.process_counter}"
|
|
212
|
+
|
|
213
|
+
def _ensure_shell_started(self):
|
|
214
|
+
"""Start the persistent shell for stateful mode if not already running."""
|
|
215
|
+
if self._shell_initialized:
|
|
216
|
+
return
|
|
217
|
+
|
|
218
|
+
assert self.container is not None, "Container not initialized"
|
|
219
|
+
|
|
220
|
+
# Create exec with stdin enabled
|
|
221
|
+
self._shell_exec_id = self.client.api.exec_create(
|
|
222
|
+
self.container.id,
|
|
223
|
+
["bash"],
|
|
224
|
+
stdin=True,
|
|
225
|
+
tty=False, # No TTY to avoid escape codes
|
|
226
|
+
workdir=self.working_dir,
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
# Start and get socket
|
|
230
|
+
self._shell_socket = self.client.api.exec_start(
|
|
231
|
+
self._shell_exec_id,
|
|
232
|
+
socket=True,
|
|
233
|
+
demux=False,
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
self._shell_initialized = True
|
|
237
|
+
self._output_buffer = b""
|
|
238
|
+
|
|
239
|
+
def _parse_docker_stream(self, data: bytes) -> bytes:
|
|
240
|
+
"""Parse Docker's multiplexed stream format and extract content."""
|
|
241
|
+
result = b""
|
|
242
|
+
pos = 0
|
|
243
|
+
|
|
244
|
+
while pos < len(data):
|
|
245
|
+
if pos + 8 > len(data):
|
|
246
|
+
# Incomplete header, keep remainder in buffer
|
|
247
|
+
break
|
|
248
|
+
|
|
249
|
+
# Docker stream header: 8 bytes
|
|
250
|
+
# Byte 0: stream type (1=stdout, 2=stderr)
|
|
251
|
+
# Bytes 1-3: reserved
|
|
252
|
+
# Bytes 4-7: payload size (big-endian)
|
|
253
|
+
header = data[pos : pos + 8]
|
|
254
|
+
payload_size = struct.unpack(">I", header[4:8])[0]
|
|
255
|
+
|
|
256
|
+
if pos + 8 + payload_size > len(data):
|
|
257
|
+
# Incomplete payload, keep remainder in buffer
|
|
258
|
+
break
|
|
259
|
+
|
|
260
|
+
payload = data[pos + 8 : pos + 8 + payload_size]
|
|
261
|
+
result += payload
|
|
262
|
+
pos += 8 + payload_size
|
|
263
|
+
|
|
264
|
+
return result
|
|
265
|
+
|
|
266
|
+
def _read_until_delimiter(self, timeout: float | None = None) -> tuple[str, int]:
|
|
267
|
+
"""
|
|
268
|
+
Read from shell socket until we see the delimiter.
|
|
269
|
+
|
|
270
|
+
Returns:
|
|
271
|
+
Tuple of (output, exit_code)
|
|
272
|
+
"""
|
|
273
|
+
import select
|
|
274
|
+
|
|
275
|
+
end_marker = f"{self._delimiter}:END:".encode()
|
|
276
|
+
assert self._shell_socket
|
|
277
|
+
sock = self._shell_socket._sock
|
|
278
|
+
|
|
279
|
+
start_time = time.time()
|
|
280
|
+
|
|
281
|
+
while True:
|
|
282
|
+
# Check timeout
|
|
283
|
+
if timeout and (time.time() - start_time) > timeout:
|
|
284
|
+
output = self._parse_docker_stream(self._output_buffer).decode(
|
|
285
|
+
"utf-8", errors="replace"
|
|
286
|
+
)
|
|
287
|
+
self._output_buffer = b""
|
|
288
|
+
return output + "\n[Command timed out]", -1
|
|
289
|
+
|
|
290
|
+
# Wait for data with timeout
|
|
291
|
+
remaining = timeout - (time.time() - start_time) if timeout else 1.0
|
|
292
|
+
ready, _, _ = select.select([sock], [], [], max(0.1, remaining))
|
|
293
|
+
|
|
294
|
+
if ready:
|
|
295
|
+
try:
|
|
296
|
+
chunk = sock.recv(4096)
|
|
297
|
+
if not chunk:
|
|
298
|
+
# Socket closed
|
|
299
|
+
output = self._parse_docker_stream(self._output_buffer).decode(
|
|
300
|
+
"utf-8", errors="replace"
|
|
301
|
+
)
|
|
302
|
+
self._output_buffer = b""
|
|
303
|
+
return output, -1
|
|
304
|
+
self._output_buffer += chunk
|
|
305
|
+
except Exception:
|
|
306
|
+
output = self._parse_docker_stream(self._output_buffer).decode(
|
|
307
|
+
"utf-8", errors="replace"
|
|
308
|
+
)
|
|
309
|
+
self._output_buffer = b""
|
|
310
|
+
return output, -1
|
|
311
|
+
|
|
312
|
+
# Parse what we have so far
|
|
313
|
+
parsed = self._parse_docker_stream(self._output_buffer)
|
|
314
|
+
|
|
315
|
+
# Check if we have the marker
|
|
316
|
+
if end_marker in parsed:
|
|
317
|
+
# Find the marker and extract output + exit code
|
|
318
|
+
marker_idx = parsed.find(end_marker)
|
|
319
|
+
output = parsed[:marker_idx].decode("utf-8", errors="replace")
|
|
320
|
+
|
|
321
|
+
# Parse exit code from after marker
|
|
322
|
+
after_marker = parsed[marker_idx + len(end_marker) :]
|
|
323
|
+
exit_code = 0
|
|
324
|
+
exit_line = after_marker.split(b"\n")[0]
|
|
325
|
+
if exit_line.isdigit():
|
|
326
|
+
exit_code = int(exit_line)
|
|
327
|
+
elif exit_line.lstrip(b"-").isdigit():
|
|
328
|
+
exit_code = int(exit_line)
|
|
329
|
+
|
|
330
|
+
# Keep anything after the exit code line for next command
|
|
331
|
+
newline_idx = after_marker.find(b"\n")
|
|
332
|
+
if newline_idx >= 0:
|
|
333
|
+
# Reconstruct buffer with unparsed data
|
|
334
|
+
self._output_buffer = after_marker[newline_idx + 1 :]
|
|
335
|
+
else:
|
|
336
|
+
self._output_buffer = b""
|
|
337
|
+
|
|
338
|
+
return output, exit_code
|
|
339
|
+
|
|
340
|
+
def _exec_stateful_sync(self, command: str, timeout: float | None = None) -> str:
|
|
341
|
+
"""Execute a command in the persistent shell (stateful mode) - sync version."""
|
|
342
|
+
self._ensure_shell_started()
|
|
343
|
+
|
|
344
|
+
# Send the command followed by a marker that includes the exit code
|
|
345
|
+
wrapped_cmd = f"{command}; echo '{self._delimiter}:END:'$?\n"
|
|
346
|
+
assert self._shell_socket
|
|
347
|
+
self._shell_socket._sock.sendall(wrapped_cmd.encode())
|
|
348
|
+
|
|
349
|
+
# Read output until delimiter
|
|
350
|
+
output, exit_code = self._read_until_delimiter(timeout=timeout)
|
|
351
|
+
|
|
352
|
+
# Clean up output
|
|
353
|
+
output = output.strip()
|
|
354
|
+
|
|
355
|
+
# Truncate if needed
|
|
356
|
+
if len(output) > 5000:
|
|
357
|
+
output = "...[truncated]...\n" + output[-5000:]
|
|
358
|
+
|
|
359
|
+
# Include exit code if non-zero
|
|
360
|
+
if exit_code != 0:
|
|
361
|
+
output = f"[Exit code: {exit_code}]\n{output}"
|
|
362
|
+
|
|
363
|
+
return output if output else "(no output)"
|
|
364
|
+
|
|
365
|
+
async def _exec_stateful(self, command: str, timeout: float | None = None) -> str:
|
|
366
|
+
"""Execute a command in the persistent shell (stateful mode)."""
|
|
367
|
+
return await asyncio.to_thread(self._exec_stateful_sync, command, timeout)
|
|
368
|
+
|
|
369
|
+
async def _exec(
|
|
370
|
+
self,
|
|
371
|
+
command: str,
|
|
372
|
+
timeout: int | None = 120000,
|
|
373
|
+
run_in_background: bool = False,
|
|
374
|
+
name: str | None = None,
|
|
375
|
+
description: str | None = None,
|
|
376
|
+
) -> str:
|
|
377
|
+
"""
|
|
378
|
+
Execute a command in the sandbox.
|
|
379
|
+
|
|
380
|
+
Args:
|
|
381
|
+
command: Shell command to execute
|
|
382
|
+
timeout: Timeout in milliseconds (default: 120000 = 2 minutes, max: 600000)
|
|
383
|
+
run_in_background: If True, run in background and return immediately.
|
|
384
|
+
name: Name for background process (auto-generated if not provided)
|
|
385
|
+
description: Short description of what this command does (for logging)
|
|
386
|
+
|
|
387
|
+
Returns:
|
|
388
|
+
Command output if foreground, or status message if background
|
|
389
|
+
"""
|
|
390
|
+
await self._ensure_initialized()
|
|
391
|
+
assert self.container is not None, "Container not initialized"
|
|
392
|
+
|
|
393
|
+
# Convert timeout from milliseconds to seconds
|
|
394
|
+
timeout_seconds: float | None = None
|
|
395
|
+
if timeout is not None and not run_in_background:
|
|
396
|
+
timeout_seconds = min(timeout / 1000, 600) # Cap at 10 minutes
|
|
397
|
+
|
|
398
|
+
# Use stateful mode for foreground commands when enabled
|
|
399
|
+
if self.stateful and not run_in_background:
|
|
400
|
+
return await self._exec_stateful(command, timeout=timeout_seconds)
|
|
401
|
+
|
|
402
|
+
if not run_in_background:
|
|
403
|
+
# Synchronous execution with timeout
|
|
404
|
+
try:
|
|
405
|
+
exit_code, output = await asyncio.wait_for(
|
|
406
|
+
asyncio.to_thread(
|
|
407
|
+
self.container.exec_run,
|
|
408
|
+
["sh", "-c", command],
|
|
409
|
+
workdir=self.working_dir,
|
|
410
|
+
),
|
|
411
|
+
timeout=timeout_seconds,
|
|
412
|
+
)
|
|
413
|
+
except asyncio.TimeoutError:
|
|
414
|
+
return f"[Timeout after {timeout_seconds:.0f}s]"
|
|
415
|
+
|
|
416
|
+
# Decode output
|
|
417
|
+
if isinstance(output, bytes):
|
|
418
|
+
output = output.decode("utf-8", errors="replace")
|
|
419
|
+
|
|
420
|
+
# Truncate if needed
|
|
421
|
+
if len(output) > 5000:
|
|
422
|
+
output = "...[truncated]...\n" + output[-5000:]
|
|
423
|
+
|
|
424
|
+
# Include exit code if non-zero
|
|
425
|
+
if exit_code != 0:
|
|
426
|
+
output = f"[Exit code: {exit_code}]\n{output}"
|
|
427
|
+
|
|
428
|
+
return output if output else "(no output)"
|
|
429
|
+
else:
|
|
430
|
+
# Background execution
|
|
431
|
+
exec_id = await asyncio.to_thread(
|
|
432
|
+
self.client.api.exec_create,
|
|
433
|
+
self.container.id,
|
|
434
|
+
["sh", "-c", command],
|
|
435
|
+
workdir=self.working_dir,
|
|
436
|
+
)
|
|
437
|
+
await asyncio.to_thread(
|
|
438
|
+
self.client.api.exec_start,
|
|
439
|
+
exec_id,
|
|
440
|
+
detach=True,
|
|
441
|
+
)
|
|
442
|
+
|
|
443
|
+
proc_name = name or self._generate_process_name()
|
|
444
|
+
tracked = TrackedProcess(
|
|
445
|
+
process=exec_id,
|
|
446
|
+
name=proc_name,
|
|
447
|
+
command=command,
|
|
448
|
+
)
|
|
449
|
+
self.processes[proc_name] = tracked
|
|
450
|
+
|
|
451
|
+
return (
|
|
452
|
+
f"Started background process '{proc_name}'.\n"
|
|
453
|
+
f"Command: {command}\n"
|
|
454
|
+
f"Use list_processes() to check status."
|
|
455
|
+
)
|
|
456
|
+
|
|
457
|
+
def _check_process(self, name: str | None = None) -> str:
|
|
458
|
+
"""Check status of background processes."""
|
|
459
|
+
if not self.processes:
|
|
460
|
+
return "No background processes have been started."
|
|
461
|
+
|
|
462
|
+
if name:
|
|
463
|
+
proc = self.processes.get(name)
|
|
464
|
+
if not proc:
|
|
465
|
+
available = ", ".join(self.processes.keys())
|
|
466
|
+
return f"Process '{name}' not found. Available: {available}"
|
|
467
|
+
|
|
468
|
+
# Check exec status
|
|
469
|
+
exec_info = self.client.api.exec_inspect(proc.process)
|
|
470
|
+
running = exec_info.get("Running", False)
|
|
471
|
+
exit_code = exec_info.get("ExitCode")
|
|
472
|
+
|
|
473
|
+
if running:
|
|
474
|
+
status = "running"
|
|
475
|
+
else:
|
|
476
|
+
status = f"completed (exit code: {exit_code})"
|
|
477
|
+
|
|
478
|
+
elapsed = time.time() - proc.started_at
|
|
479
|
+
return f"Process: {name}\nCommand: {proc.command}\nStatus: {status}\nRunning for: {elapsed:.1f}s"
|
|
480
|
+
else:
|
|
481
|
+
# Show all processes
|
|
482
|
+
lines = ["NAME STATUS COMMAND"]
|
|
483
|
+
for proc_name, proc in self.processes.items():
|
|
484
|
+
exec_info = self.client.api.exec_inspect(proc.process)
|
|
485
|
+
running = exec_info.get("Running", False)
|
|
486
|
+
exit_code = exec_info.get("ExitCode")
|
|
487
|
+
|
|
488
|
+
if running:
|
|
489
|
+
status = "running"
|
|
490
|
+
else:
|
|
491
|
+
status = f"exit {exit_code}"
|
|
492
|
+
|
|
493
|
+
cmd_display = (
|
|
494
|
+
proc.command[:40] + "..."
|
|
495
|
+
if len(proc.command) > 40
|
|
496
|
+
else proc.command
|
|
497
|
+
)
|
|
498
|
+
lines.append(f"{proc_name:<8} {status:<19} {cmd_display}")
|
|
499
|
+
|
|
500
|
+
return "\n".join(lines)
|
|
501
|
+
|
|
502
|
+
async def _destroy(self):
|
|
503
|
+
"""Stop the container and clean up."""
|
|
504
|
+
if self._destroyed:
|
|
505
|
+
return
|
|
506
|
+
|
|
507
|
+
# Clean up shell socket if in stateful mode
|
|
508
|
+
if self._shell_socket is not None:
|
|
509
|
+
try:
|
|
510
|
+
self._shell_socket.close()
|
|
511
|
+
except Exception:
|
|
512
|
+
pass
|
|
513
|
+
self._shell_socket = None
|
|
514
|
+
self._shell_initialized = False
|
|
515
|
+
|
|
516
|
+
if self.container:
|
|
517
|
+
try:
|
|
518
|
+
await asyncio.to_thread(self.container.stop, timeout=5)
|
|
519
|
+
except Exception:
|
|
520
|
+
pass # Container might already be stopped
|
|
521
|
+
|
|
522
|
+
self._destroyed = True
|
|
523
|
+
self._initialized = False
|
|
524
|
+
|
|
525
|
+
def _destroy_sync(self):
|
|
526
|
+
"""Synchronous version of destroy."""
|
|
527
|
+
if self._destroyed:
|
|
528
|
+
return
|
|
529
|
+
|
|
530
|
+
# Clean up shell socket if in stateful mode
|
|
531
|
+
if self._shell_socket is not None:
|
|
532
|
+
try:
|
|
533
|
+
self._shell_socket.close()
|
|
534
|
+
except Exception:
|
|
535
|
+
pass
|
|
536
|
+
self._shell_socket = None
|
|
537
|
+
self._shell_initialized = False
|
|
538
|
+
|
|
539
|
+
if self.container:
|
|
540
|
+
try:
|
|
541
|
+
self.container.stop(timeout=5)
|
|
542
|
+
except Exception:
|
|
543
|
+
pass
|
|
544
|
+
|
|
545
|
+
self._destroyed = True
|
|
546
|
+
self._initialized = False
|
|
547
|
+
|
|
548
|
+
def get_tools(self):
|
|
549
|
+
"""Return list of tools for LLM use."""
|
|
550
|
+
if self.stateful:
|
|
551
|
+
bash_description = (
|
|
552
|
+
"Execute a bash command in the Docker sandbox environment. "
|
|
553
|
+
"This sandbox maintains state between commands - shell variables, "
|
|
554
|
+
"working directory (cd), and functions persist across calls. "
|
|
555
|
+
"The sandbox has Python 3.12 and uv pre-installed. "
|
|
556
|
+
"Set run_in_background=true to run servers or long-running processes "
|
|
557
|
+
"(background processes run independently and don't share state)."
|
|
558
|
+
)
|
|
559
|
+
else:
|
|
560
|
+
bash_description = (
|
|
561
|
+
"Execute a bash command in the Docker sandbox environment. "
|
|
562
|
+
"Each command runs in a fresh shell (no state persistence between commands). "
|
|
563
|
+
"The sandbox has Python 3.12 and uv pre-installed. "
|
|
564
|
+
"Set run_in_background=true to run servers or long-running processes."
|
|
565
|
+
)
|
|
566
|
+
|
|
567
|
+
bash_tool = Tool(
|
|
568
|
+
name="bash",
|
|
569
|
+
description=bash_description,
|
|
570
|
+
run=self._exec,
|
|
571
|
+
parameters={
|
|
572
|
+
"command": {
|
|
573
|
+
"type": "string",
|
|
574
|
+
"description": "Shell command to execute (e.g., 'ls -la', 'python script.py')",
|
|
575
|
+
},
|
|
576
|
+
"description": {
|
|
577
|
+
"type": "string",
|
|
578
|
+
"description": "Short description of what this command does (5-10 words)",
|
|
579
|
+
},
|
|
580
|
+
"run_in_background": {
|
|
581
|
+
"type": "boolean",
|
|
582
|
+
"description": "If true, run in background without waiting. Default: false.",
|
|
583
|
+
},
|
|
584
|
+
"name": {
|
|
585
|
+
"type": "string",
|
|
586
|
+
"description": "Name for background process (e.g., 'server'). Only used with run_in_background=true.",
|
|
587
|
+
},
|
|
588
|
+
"timeout": {
|
|
589
|
+
"type": "integer",
|
|
590
|
+
"description": "Timeout in milliseconds (default: 120000, max: 600000)",
|
|
591
|
+
},
|
|
592
|
+
},
|
|
593
|
+
required=["command"],
|
|
594
|
+
)
|
|
595
|
+
|
|
596
|
+
check_tool = Tool(
|
|
597
|
+
name="list_processes",
|
|
598
|
+
description="Check status of background processes. Shows whether each process is running or has exited.",
|
|
599
|
+
run=self._check_process,
|
|
600
|
+
parameters={
|
|
601
|
+
"name": {
|
|
602
|
+
"type": "string",
|
|
603
|
+
"description": "Process name to check, or omit to see all processes",
|
|
604
|
+
},
|
|
605
|
+
},
|
|
606
|
+
required=[],
|
|
607
|
+
)
|
|
608
|
+
|
|
609
|
+
return [bash_tool, check_tool]
|