lm-deluge 0.0.88__py3-none-any.whl → 0.0.90__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lm-deluge might be problematic. Click here for more details.

Files changed (41) hide show
  1. lm_deluge/__init__.py +0 -24
  2. lm_deluge/api_requests/anthropic.py +25 -5
  3. lm_deluge/api_requests/base.py +37 -0
  4. lm_deluge/api_requests/bedrock.py +23 -2
  5. lm_deluge/api_requests/gemini.py +36 -10
  6. lm_deluge/api_requests/openai.py +31 -4
  7. lm_deluge/batches.py +15 -45
  8. lm_deluge/client.py +27 -1
  9. lm_deluge/models/__init__.py +2 -0
  10. lm_deluge/models/anthropic.py +12 -12
  11. lm_deluge/models/google.py +13 -0
  12. lm_deluge/models/minimax.py +9 -1
  13. lm_deluge/models/openrouter.py +48 -0
  14. lm_deluge/models/zai.py +50 -1
  15. lm_deluge/pipelines/gepa/docs/samples.py +19 -10
  16. lm_deluge/prompt.py +333 -68
  17. lm_deluge/server/__init__.py +24 -0
  18. lm_deluge/server/__main__.py +144 -0
  19. lm_deluge/server/adapters.py +369 -0
  20. lm_deluge/server/app.py +388 -0
  21. lm_deluge/server/auth.py +71 -0
  22. lm_deluge/server/model_policy.py +215 -0
  23. lm_deluge/server/models_anthropic.py +172 -0
  24. lm_deluge/server/models_openai.py +175 -0
  25. lm_deluge/skills/anthropic.py +0 -0
  26. lm_deluge/skills/compat.py +0 -0
  27. lm_deluge/tool/__init__.py +13 -1
  28. lm_deluge/tool/prefab/sandbox/__init__.py +19 -0
  29. lm_deluge/tool/prefab/sandbox/daytona_sandbox.py +483 -0
  30. lm_deluge/tool/prefab/sandbox/docker_sandbox.py +609 -0
  31. lm_deluge/tool/prefab/sandbox/fargate_sandbox.py +546 -0
  32. lm_deluge/tool/prefab/sandbox/modal_sandbox.py +469 -0
  33. lm_deluge/tool/prefab/sandbox/seatbelt_sandbox.py +827 -0
  34. lm_deluge/tool/prefab/skills.py +0 -0
  35. {lm_deluge-0.0.88.dist-info → lm_deluge-0.0.90.dist-info}/METADATA +4 -3
  36. {lm_deluge-0.0.88.dist-info → lm_deluge-0.0.90.dist-info}/RECORD +39 -24
  37. lm_deluge/mock_openai.py +0 -643
  38. lm_deluge/tool/prefab/sandbox.py +0 -1621
  39. {lm_deluge-0.0.88.dist-info → lm_deluge-0.0.90.dist-info}/WHEEL +0 -0
  40. {lm_deluge-0.0.88.dist-info → lm_deluge-0.0.90.dist-info}/licenses/LICENSE +0 -0
  41. {lm_deluge-0.0.88.dist-info → lm_deluge-0.0.90.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,827 @@
1
+ """
2
+ Lightweight macOS sandbox using Apple's sandbox-exec (Seatbelt).
3
+
4
+ This provides process isolation without requiring Docker or VMs.
5
+ Uses Apple's built-in sandboxing framework for security.
6
+
7
+ Based on OpenAI Codex's implementation:
8
+ https://github.com/openai/codex/tree/main/codex-rs/core/src
9
+
10
+ Requires macOS. Will raise an error on other platforms.
11
+ """
12
+
13
+ import asyncio
14
+ import os
15
+ import subprocess
16
+ import sys
17
+ import time
18
+ from dataclasses import dataclass, field
19
+ from enum import Enum
20
+ from pathlib import Path
21
+ from typing import Any
22
+
23
+ from lm_deluge.tool import Tool
24
+
25
+ # Only use /usr/bin/sandbox-exec to prevent PATH injection attacks
26
+ SEATBELT_EXECUTABLE = "/usr/bin/sandbox-exec"
27
+
28
+
29
+ class SandboxMode(str, Enum):
30
+ """Sandbox restriction level."""
31
+
32
+ # Restricted modes - can only access workspace
33
+ WORKSPACE_READ_ONLY = "workspace_read_only"
34
+ """Can only read workspace directories, cannot write anywhere. Most restrictive."""
35
+
36
+ WORKSPACE_READ_WRITE = "workspace_read_write"
37
+ """Can only read workspace directories, can write to workspace. Tight sandbox."""
38
+
39
+ # Permissive modes - can read entire filesystem
40
+ READ_ONLY = "read_only"
41
+ """Can read entire filesystem, cannot write anywhere."""
42
+
43
+ WORKSPACE_WRITE = "workspace_write"
44
+ """Can read entire filesystem, can write only to specified directories."""
45
+
46
+ FULL_ACCESS = "full_access"
47
+ """No filesystem restrictions (still uses sandbox for other protections)."""
48
+
49
+
50
+ # Base policy inspired by Chrome's sandbox and Codex's implementation
51
+ # Starts with deny-all, then whitelists specific operations
52
+ SEATBELT_BASE_POLICY = """\
53
+ (version 1)
54
+
55
+ ; Start with closed-by-default
56
+ (deny default)
57
+
58
+ ; Child processes inherit the policy of their parent
59
+ (allow process-exec)
60
+ (allow process-fork)
61
+ (allow signal (target same-sandbox))
62
+
63
+ ; Allow cf prefs to work
64
+ (allow user-preference-read)
65
+
66
+ ; process-info
67
+ (allow process-info* (target same-sandbox))
68
+
69
+ (allow file-write-data
70
+ (require-all
71
+ (path "/dev/null")
72
+ (vnode-type CHARACTER-DEVICE)))
73
+
74
+ ; sysctls permitted
75
+ (allow sysctl-read
76
+ (sysctl-name "hw.activecpu")
77
+ (sysctl-name "hw.busfrequency_compat")
78
+ (sysctl-name "hw.byteorder")
79
+ (sysctl-name "hw.cacheconfig")
80
+ (sysctl-name "hw.cachelinesize_compat")
81
+ (sysctl-name "hw.cpufamily")
82
+ (sysctl-name "hw.cpufrequency_compat")
83
+ (sysctl-name "hw.cputype")
84
+ (sysctl-name "hw.l1dcachesize_compat")
85
+ (sysctl-name "hw.l1icachesize_compat")
86
+ (sysctl-name "hw.l2cachesize_compat")
87
+ (sysctl-name "hw.l3cachesize_compat")
88
+ (sysctl-name "hw.logicalcpu_max")
89
+ (sysctl-name "hw.machine")
90
+ (sysctl-name "hw.memsize")
91
+ (sysctl-name "hw.ncpu")
92
+ (sysctl-name "hw.nperflevels")
93
+ (sysctl-name-prefix "hw.optional.arm.")
94
+ (sysctl-name-prefix "hw.optional.armv8_")
95
+ (sysctl-name "hw.packages")
96
+ (sysctl-name "hw.pagesize_compat")
97
+ (sysctl-name "hw.pagesize")
98
+ (sysctl-name "hw.physicalcpu")
99
+ (sysctl-name "hw.physicalcpu_max")
100
+ (sysctl-name "hw.tbfrequency_compat")
101
+ (sysctl-name "hw.vectorunit")
102
+ (sysctl-name "kern.argmax")
103
+ (sysctl-name "kern.hostname")
104
+ (sysctl-name "kern.maxfilesperproc")
105
+ (sysctl-name "kern.maxproc")
106
+ (sysctl-name "kern.osproductversion")
107
+ (sysctl-name "kern.osrelease")
108
+ (sysctl-name "kern.ostype")
109
+ (sysctl-name "kern.osvariant_status")
110
+ (sysctl-name "kern.osversion")
111
+ (sysctl-name "kern.secure_kernel")
112
+ (sysctl-name "kern.usrstack64")
113
+ (sysctl-name "kern.version")
114
+ (sysctl-name "sysctl.proc_cputype")
115
+ (sysctl-name "vm.loadavg")
116
+ (sysctl-name-prefix "hw.perflevel")
117
+ (sysctl-name-prefix "kern.proc.pgrp.")
118
+ (sysctl-name-prefix "kern.proc.pid.")
119
+ (sysctl-name-prefix "net.routetable.")
120
+ )
121
+
122
+ ; Allow Java to read some CPU info
123
+ (allow sysctl-write
124
+ (sysctl-name "kern.grade_cputype"))
125
+
126
+ ; IOKit
127
+ (allow iokit-open
128
+ (iokit-registry-entry-class "RootDomainUserClient")
129
+ )
130
+
131
+ ; Needed to look up user info
132
+ (allow mach-lookup
133
+ (global-name "com.apple.system.opendirectoryd.libinfo")
134
+ )
135
+
136
+ ; Needed for python multiprocessing on MacOS for the SemLock
137
+ (allow ipc-posix-sem)
138
+
139
+ (allow mach-lookup
140
+ (global-name "com.apple.PowerManagement.control")
141
+ )
142
+
143
+ ; Allow openpty()
144
+ (allow pseudo-tty)
145
+ (allow file-read* file-write* file-ioctl (literal "/dev/ptmx"))
146
+ (allow file-read* file-write*
147
+ (require-all
148
+ (regex #"^/dev/ttys[0-9]+")
149
+ (extension "com.apple.sandbox.pty")))
150
+ ; PTYs created before entering seatbelt may lack the extension
151
+ (allow file-ioctl (regex #"^/dev/ttys[0-9]+"))
152
+ """
153
+
154
+ # Network policy - added when network access is enabled
155
+ SEATBELT_NETWORK_POLICY = """\
156
+ ; Network access policies
157
+ (allow network-outbound)
158
+ (allow network-inbound)
159
+ (allow system-socket)
160
+
161
+ (allow mach-lookup
162
+ ; Used to look up the _CS_DARWIN_USER_CACHE_DIR in the sandbox
163
+ (global-name "com.apple.bsd.dirhelper")
164
+ (global-name "com.apple.system.opendirectoryd.membership")
165
+
166
+ ; Communicate with the security server for TLS certificate information
167
+ (global-name "com.apple.SecurityServer")
168
+ (global-name "com.apple.networkd")
169
+ (global-name "com.apple.ocspd")
170
+ (global-name "com.apple.trustd.agent")
171
+
172
+ ; Read network configuration
173
+ (global-name "com.apple.SystemConfiguration.DNSConfiguration")
174
+ (global-name "com.apple.SystemConfiguration.configd")
175
+ )
176
+
177
+ (allow sysctl-read
178
+ (sysctl-name-regex #"^net.routetable")
179
+ )
180
+
181
+ (allow file-write*
182
+ (subpath (param "DARWIN_USER_CACHE_DIR"))
183
+ )
184
+ """
185
+
186
+
187
+ @dataclass
188
+ class WritableRoot:
189
+ """A directory that can be written to, with optional read-only subpaths."""
190
+
191
+ root: Path
192
+ read_only_subpaths: list[Path] = field(default_factory=list)
193
+
194
+
195
+ @dataclass
196
+ class TrackedProcess:
197
+ """Tracks a process running in the sandbox."""
198
+
199
+ process: subprocess.Popen[bytes]
200
+ name: str
201
+ command: str
202
+ started_at: float = field(default_factory=time.time)
203
+
204
+
205
+ def _get_darwin_user_cache_dir() -> Path | None:
206
+ """Get the Darwin user cache directory (like confstr in C)."""
207
+ try:
208
+ import ctypes
209
+
210
+ libc = ctypes.CDLL("/usr/lib/libc.dylib")
211
+ buf = ctypes.create_string_buffer(1024)
212
+ # _CS_DARWIN_USER_CACHE_DIR = 65538
213
+ result = libc.confstr(65538, buf, 1024)
214
+ if result > 0:
215
+ path = Path(buf.value.decode("utf-8"))
216
+ try:
217
+ return path.resolve()
218
+ except OSError:
219
+ return path
220
+ except Exception:
221
+ pass
222
+ return None
223
+
224
+
225
+ class SeatbeltSandbox:
226
+ """
227
+ Lightweight macOS sandbox using Apple's sandbox-exec (Seatbelt).
228
+
229
+ Provides process isolation without Docker or VMs by using Apple's
230
+ built-in sandboxing framework. Commands run directly on your machine
231
+ but are restricted in what they can access.
232
+
233
+ Features:
234
+ - Configurable filesystem access (read-only, workspace-write, full)
235
+ - Optional network access
236
+ - Protected subdirectories (.git, .codex automatically protected)
237
+ - Efficient - no container overhead
238
+
239
+ Limitations:
240
+ - macOS only
241
+ - Less isolation than Docker (shares kernel, users)
242
+ - Cannot limit CPU/memory like containers
243
+
244
+ Example:
245
+ async with SeatbeltSandbox(
246
+ working_dir="/tmp/sandbox",
247
+ network_access=True
248
+ ) as sandbox:
249
+ tools = sandbox.get_tools()
250
+ # Use tools with your LLM...
251
+
252
+ # Or with stricter isolation:
253
+ async with SeatbeltSandbox(
254
+ mode=SandboxMode.READ_ONLY,
255
+ network_access=False
256
+ ) as sandbox:
257
+ # Can only read files, no network
258
+ ...
259
+ """
260
+
261
+ def __init__(
262
+ self,
263
+ *,
264
+ mode: SandboxMode = SandboxMode.WORKSPACE_READ_WRITE,
265
+ working_dir: str | Path | None = None,
266
+ network_access: bool = True,
267
+ additional_writable_roots: list[str | Path] | None = None,
268
+ protected_subpaths: list[str] | None = None,
269
+ include_tmp: bool = True,
270
+ include_tmpdir: bool = True,
271
+ stateful: bool = False,
272
+ ):
273
+ """
274
+ Initialize a Seatbelt sandbox.
275
+
276
+ Args:
277
+ mode: Sandbox restriction level. Default WORKSPACE_READ_WRITE (most secure).
278
+ working_dir: Working directory for commands. Default creates temp dir.
279
+ This directory will be writable in WORKSPACE_WRITE mode.
280
+ network_access: If True, allow network access. Default True.
281
+ additional_writable_roots: Extra directories to make writable
282
+ (only applies to WORKSPACE_WRITE mode).
283
+ protected_subpaths: Directory names to always protect (read-only)
284
+ within writable roots. Default: [".git", ".codex", ".claude"]
285
+ include_tmp: Include /tmp as writable. Default True.
286
+ include_tmpdir: Include $TMPDIR as writable. Default True.
287
+ stateful: If True, use a persistent shell for state between commands.
288
+ """
289
+ if sys.platform != "darwin":
290
+ raise RuntimeError(
291
+ "SeatbeltSandbox is only available on macOS. "
292
+ f"Current platform: {sys.platform}"
293
+ )
294
+
295
+ if not os.path.exists(SEATBELT_EXECUTABLE):
296
+ raise RuntimeError(
297
+ f"sandbox-exec not found at {SEATBELT_EXECUTABLE}. "
298
+ "This is a macOS system binary that should be present on all macOS systems."
299
+ )
300
+
301
+ self.mode = mode
302
+ self.network_access = network_access
303
+ self.include_tmp = include_tmp
304
+ self.include_tmpdir = include_tmpdir
305
+ self.stateful = stateful
306
+
307
+ # Set up working directory
308
+ if working_dir is None:
309
+ # Create a temp directory for this sandbox
310
+ import tempfile
311
+
312
+ self._temp_dir = tempfile.mkdtemp(prefix="seatbelt_sandbox_")
313
+ self.working_dir = Path(self._temp_dir)
314
+ else:
315
+ self._temp_dir = None
316
+ self.working_dir = Path(working_dir)
317
+ self.working_dir.mkdir(parents=True, exist_ok=True)
318
+
319
+ # Protected subpaths - directories that are read-only within writable roots
320
+ self.protected_subpaths = protected_subpaths or [".git", ".codex", ".claude"]
321
+
322
+ # Additional writable roots
323
+ self.additional_writable_roots = [
324
+ Path(p) for p in (additional_writable_roots or [])
325
+ ]
326
+
327
+ # State
328
+ self._initialized = False
329
+ self._destroyed = False
330
+
331
+ # Process tracking
332
+ self.processes: dict[str, TrackedProcess] = {}
333
+ self.process_counter: int = 0
334
+
335
+ # Stateful shell
336
+ self._shell_process: subprocess.Popen[bytes] | None = None
337
+ self._shell_initialized = False
338
+
339
+ def _get_readable_roots(self) -> list[Path]:
340
+ """Get list of readable root directories for restricted modes."""
341
+ roots = []
342
+
343
+ # Working directory is always readable
344
+ try:
345
+ roots.append(self.working_dir.resolve())
346
+ except OSError:
347
+ roots.append(self.working_dir)
348
+
349
+ # /tmp if enabled
350
+ if self.include_tmp:
351
+ tmp_path = Path("/tmp")
352
+ if tmp_path.exists():
353
+ try:
354
+ roots.append(tmp_path.resolve())
355
+ except OSError:
356
+ roots.append(tmp_path)
357
+
358
+ # $TMPDIR if enabled
359
+ if self.include_tmpdir:
360
+ tmpdir = os.environ.get("TMPDIR")
361
+ if tmpdir:
362
+ tmpdir_path = Path(tmpdir)
363
+ if tmpdir_path.exists():
364
+ try:
365
+ resolved = tmpdir_path.resolve()
366
+ if resolved not in roots:
367
+ roots.append(resolved)
368
+ except OSError:
369
+ if tmpdir_path not in roots:
370
+ roots.append(tmpdir_path)
371
+
372
+ # Additional roots
373
+ for path in self.additional_writable_roots:
374
+ if path.exists():
375
+ try:
376
+ resolved = path.resolve()
377
+ if resolved not in roots:
378
+ roots.append(resolved)
379
+ except OSError:
380
+ if path not in roots:
381
+ roots.append(path)
382
+
383
+ return roots
384
+
385
+ def _get_writable_roots(self) -> list[WritableRoot]:
386
+ """Get list of writable roots with their protected subpaths."""
387
+ if self.mode not in (
388
+ SandboxMode.WORKSPACE_WRITE,
389
+ SandboxMode.WORKSPACE_READ_WRITE,
390
+ ):
391
+ return []
392
+
393
+ roots = []
394
+
395
+ # Working directory
396
+ wd_root = WritableRoot(root=self.working_dir.resolve())
397
+ # Find protected subpaths within working dir
398
+ for name in self.protected_subpaths:
399
+ subpath = self.working_dir / name
400
+ if subpath.exists():
401
+ wd_root.read_only_subpaths.append(subpath.resolve())
402
+ roots.append(wd_root)
403
+
404
+ # /tmp
405
+ if self.include_tmp:
406
+ tmp_path = Path("/tmp")
407
+ if tmp_path.exists():
408
+ try:
409
+ roots.append(WritableRoot(root=tmp_path.resolve()))
410
+ except OSError:
411
+ roots.append(WritableRoot(root=tmp_path))
412
+
413
+ # $TMPDIR (often different from /tmp on macOS)
414
+ if self.include_tmpdir:
415
+ tmpdir = os.environ.get("TMPDIR")
416
+ if tmpdir:
417
+ tmpdir_path = Path(tmpdir)
418
+ if tmpdir_path.exists():
419
+ try:
420
+ resolved = tmpdir_path.resolve()
421
+ # Don't duplicate if same as /tmp
422
+ if not any(r.root == resolved for r in roots):
423
+ roots.append(WritableRoot(root=resolved))
424
+ except OSError:
425
+ roots.append(WritableRoot(root=tmpdir_path))
426
+
427
+ # Additional roots
428
+ for path in self.additional_writable_roots:
429
+ if path.exists():
430
+ root = WritableRoot(root=path.resolve())
431
+ for name in self.protected_subpaths:
432
+ subpath = path / name
433
+ if subpath.exists():
434
+ root.read_only_subpaths.append(subpath.resolve())
435
+ roots.append(root)
436
+
437
+ return roots
438
+
439
+ def _build_policy(self) -> tuple[str, list[tuple[str, str]]]:
440
+ """
441
+ Build the sandbox policy string and parameters.
442
+
443
+ Returns:
444
+ Tuple of (policy_string, [(param_name, param_value), ...])
445
+ """
446
+ params: list[tuple[str, str]] = []
447
+
448
+ # Start with base policy
449
+ parts = [SEATBELT_BASE_POLICY]
450
+
451
+ # File read policy
452
+ if self.mode in (
453
+ SandboxMode.WORKSPACE_READ_ONLY,
454
+ SandboxMode.WORKSPACE_READ_WRITE,
455
+ ):
456
+ # Restricted read using deny-based approach:
457
+ # 1. Allow reading everything (needed for system libs/binaries)
458
+ # 2. Deny reading user home directories
459
+ # 3. Re-allow reading specific workspace directories
460
+ # Order matters in seatbelt - last matching rule wins
461
+
462
+ parts.append("; Allow read access to system files")
463
+ parts.append('(allow file-read* (subpath "/"))')
464
+
465
+ parts.append("; Block access to user home directories")
466
+ parts.append('(deny file-read* (subpath "/Users"))')
467
+
468
+ # Re-allow workspace directories
469
+ readable_roots = self._get_readable_roots()
470
+ if readable_roots:
471
+ allow_parts = []
472
+ for idx, root in enumerate(readable_roots):
473
+ root_param = f"READABLE_ROOT_{idx}"
474
+ params.append((root_param, str(root)))
475
+ allow_parts.append(f'(subpath (param "{root_param}"))')
476
+
477
+ parts.append("; But allow reading workspace directories")
478
+ parts.append(f"(allow file-read*\n {' '.join(allow_parts)}\n)")
479
+ else:
480
+ # Permissive read - entire filesystem
481
+ parts.append("; Allow read-only file operations")
482
+ parts.append("(allow file-read*)")
483
+
484
+ # File write policy
485
+ if self.mode == SandboxMode.FULL_ACCESS:
486
+ parts.append("; Allow full file write access")
487
+ parts.append('(allow file-write* (regex #"^/"))')
488
+ elif self.mode in (
489
+ SandboxMode.WORKSPACE_WRITE,
490
+ SandboxMode.WORKSPACE_READ_WRITE,
491
+ ):
492
+ writable_roots = self._get_writable_roots()
493
+ if writable_roots:
494
+ write_policies = []
495
+ for idx, wr in enumerate(writable_roots):
496
+ try:
497
+ canonical_root = wr.root.resolve()
498
+ except OSError:
499
+ canonical_root = wr.root
500
+
501
+ root_param = f"WRITABLE_ROOT_{idx}"
502
+ params.append((root_param, str(canonical_root)))
503
+
504
+ if not wr.read_only_subpaths:
505
+ write_policies.append(f'(subpath (param "{root_param}"))')
506
+ else:
507
+ # Build require-all with require-not for protected paths
508
+ require_parts = [f'(subpath (param "{root_param}"))']
509
+ for subidx, ro in enumerate(wr.read_only_subpaths):
510
+ try:
511
+ canonical_ro = ro.resolve()
512
+ except OSError:
513
+ canonical_ro = ro
514
+ ro_param = f"WRITABLE_ROOT_{idx}_RO_{subidx}"
515
+ require_parts.append(
516
+ f'(require-not (subpath (param "{ro_param}")))'
517
+ )
518
+ params.append((ro_param, str(canonical_ro)))
519
+ write_policies.append(
520
+ f"(require-all {' '.join(require_parts)})"
521
+ )
522
+
523
+ parts.append("; Allow write access to specific directories")
524
+ parts.append(f"(allow file-write*\n {' '.join(write_policies)}\n)")
525
+
526
+ # Network policy
527
+ if self.network_access:
528
+ # Add cache dir param for network policy
529
+ cache_dir = _get_darwin_user_cache_dir()
530
+ if cache_dir:
531
+ params.append(("DARWIN_USER_CACHE_DIR", str(cache_dir)))
532
+ else:
533
+ # Fallback to a reasonable default
534
+ params.append(
535
+ ("DARWIN_USER_CACHE_DIR", os.path.expanduser("~/Library/Caches"))
536
+ )
537
+ parts.append(SEATBELT_NETWORK_POLICY)
538
+
539
+ return "\n".join(parts), params
540
+
541
+ def _build_command_args(self, command: list[str]) -> list[str]:
542
+ """Build the full sandbox-exec command."""
543
+ policy, params = self._build_policy()
544
+
545
+ args = [SEATBELT_EXECUTABLE, "-p", policy]
546
+ for key, value in params:
547
+ args.append(f"-D{key}={value}")
548
+ args.append("--")
549
+ args.extend(command)
550
+
551
+ return args
552
+
553
+ async def __aenter__(self):
554
+ """Async context manager entry."""
555
+ await self._ensure_initialized()
556
+ return self
557
+
558
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
559
+ """Async context manager exit."""
560
+ if not self._destroyed:
561
+ await self._destroy()
562
+ return False
563
+
564
+ def __enter__(self):
565
+ """Sync context manager entry."""
566
+ asyncio.get_event_loop().run_until_complete(self._ensure_initialized())
567
+ return self
568
+
569
+ def __exit__(self, exc_type, exc_val, exc_tb):
570
+ """Sync context manager exit."""
571
+ if not self._destroyed:
572
+ self._destroy_sync()
573
+ return False
574
+
575
+ def __del__(self):
576
+ """Cleanup when garbage collected."""
577
+ if not self._destroyed:
578
+ self._destroy_sync()
579
+
580
+ async def _ensure_initialized(self):
581
+ """Initialize the sandbox."""
582
+ if self._initialized:
583
+ return
584
+
585
+ # Ensure working directory exists
586
+ self.working_dir.mkdir(parents=True, exist_ok=True)
587
+
588
+ self._initialized = True
589
+
590
+ def _generate_process_name(self) -> str:
591
+ """Generate a unique process name."""
592
+ self.process_counter += 1
593
+ return f"p{self.process_counter}"
594
+
595
+ async def _exec(
596
+ self,
597
+ command: str,
598
+ timeout: int | None = 120000,
599
+ run_in_background: bool = False,
600
+ name: str | None = None,
601
+ description: str | None = None,
602
+ ) -> str:
603
+ """
604
+ Execute a command in the sandbox.
605
+
606
+ Args:
607
+ command: Shell command to execute
608
+ timeout: Timeout in milliseconds (default: 120000 = 2 minutes, max: 600000)
609
+ run_in_background: If True, run in background and return immediately
610
+ name: Name for background process (auto-generated if not provided)
611
+ description: Short description of what this command does
612
+
613
+ Returns:
614
+ Command output if foreground, or status message if background
615
+ """
616
+ await self._ensure_initialized()
617
+
618
+ # Convert timeout from milliseconds to seconds
619
+ timeout_seconds: float | None = None
620
+ if timeout is not None and not run_in_background:
621
+ timeout_seconds = min(timeout / 1000, 600) # Cap at 10 minutes
622
+
623
+ # Build the sandboxed command
624
+ shell_cmd = ["bash", "-c", command]
625
+ sandboxed_args = self._build_command_args(shell_cmd)
626
+
627
+ if not run_in_background:
628
+ # Synchronous execution
629
+ try:
630
+ result = await asyncio.wait_for(
631
+ asyncio.to_thread(
632
+ subprocess.run,
633
+ sandboxed_args,
634
+ cwd=str(self.working_dir),
635
+ capture_output=True,
636
+ timeout=timeout_seconds,
637
+ ),
638
+ timeout=timeout_seconds + 5 if timeout_seconds else None,
639
+ )
640
+
641
+ output = result.stdout.decode("utf-8", errors="replace")
642
+ stderr = result.stderr.decode("utf-8", errors="replace")
643
+
644
+ # Combine stdout and stderr
645
+ if stderr:
646
+ output = output + "\n" + stderr if output else stderr
647
+
648
+ # Truncate if needed
649
+ if len(output) > 5000:
650
+ output = "...[truncated]...\n" + output[-5000:]
651
+
652
+ # Include exit code if non-zero
653
+ if result.returncode != 0:
654
+ output = f"[Exit code: {result.returncode}]\n{output}"
655
+
656
+ return output.strip() if output.strip() else "(no output)"
657
+
658
+ except subprocess.TimeoutExpired:
659
+ return f"[Timeout after {timeout_seconds:.0f}s]"
660
+ except asyncio.TimeoutError:
661
+ return f"[Timeout after {timeout_seconds:.0f}s]"
662
+ except Exception as e:
663
+ return f"[Error: {e}]"
664
+ else:
665
+ # Background execution
666
+ process = subprocess.Popen(
667
+ sandboxed_args,
668
+ cwd=str(self.working_dir),
669
+ stdout=subprocess.PIPE,
670
+ stderr=subprocess.PIPE,
671
+ )
672
+
673
+ proc_name = name or self._generate_process_name()
674
+ tracked = TrackedProcess(
675
+ process=process,
676
+ name=proc_name,
677
+ command=command,
678
+ )
679
+ self.processes[proc_name] = tracked
680
+
681
+ return (
682
+ f"Started background process '{proc_name}'.\n"
683
+ f"Command: {command}\n"
684
+ f"Use list_processes() to check status."
685
+ )
686
+
687
+ async def _check_process(self, name: str | None = None) -> str:
688
+ """Check status of background processes."""
689
+ if not self.processes:
690
+ return "No background processes have been started."
691
+
692
+ if name:
693
+ proc = self.processes.get(name)
694
+ if not proc:
695
+ available = ", ".join(self.processes.keys())
696
+ return f"Process '{name}' not found. Available: {available}"
697
+
698
+ # Check process status
699
+ poll_result = proc.process.poll()
700
+ if poll_result is None:
701
+ status = "running"
702
+ else:
703
+ status = f"completed (exit code: {poll_result})"
704
+
705
+ elapsed = time.time() - proc.started_at
706
+ return (
707
+ f"Process: {name}\n"
708
+ f"Command: {proc.command}\n"
709
+ f"Status: {status}\n"
710
+ f"Running for: {elapsed:.1f}s"
711
+ )
712
+ else:
713
+ # Show all processes
714
+ lines = ["NAME STATUS COMMAND"]
715
+ for proc_name, proc in self.processes.items():
716
+ poll_result = proc.process.poll()
717
+ if poll_result is None:
718
+ status = "running"
719
+ else:
720
+ status = f"exit {poll_result}"
721
+
722
+ cmd_display = (
723
+ proc.command[:40] + "..."
724
+ if len(proc.command) > 40
725
+ else proc.command
726
+ )
727
+ lines.append(f"{proc_name:<8} {status:<19} {cmd_display}")
728
+
729
+ return "\n".join(lines)
730
+
731
+ async def _destroy(self):
732
+ """Clean up the sandbox."""
733
+ self._destroy_sync()
734
+
735
+ def _destroy_sync(self):
736
+ """Synchronous cleanup."""
737
+ if self._destroyed:
738
+ return
739
+
740
+ # Kill any running background processes
741
+ for proc in self.processes.values():
742
+ if proc.process.poll() is None:
743
+ try:
744
+ proc.process.terminate()
745
+ proc.process.wait(timeout=5)
746
+ except Exception:
747
+ try:
748
+ proc.process.kill()
749
+ except Exception:
750
+ pass
751
+
752
+ # Clean up temp directory if we created one
753
+ if self._temp_dir and os.path.exists(self._temp_dir):
754
+ import shutil
755
+
756
+ try:
757
+ shutil.rmtree(self._temp_dir)
758
+ except Exception:
759
+ pass
760
+
761
+ self._destroyed = True
762
+ self._initialized = False
763
+
764
+ def get_tools(self) -> list[Any]:
765
+ """Return list of tools for LLM use."""
766
+ mode_desc = {
767
+ SandboxMode.WORKSPACE_READ_ONLY: f"workspace-read-only (can only read {self.working_dir}, cannot write)",
768
+ SandboxMode.WORKSPACE_READ_WRITE: f"workspace-read-write (can only access {self.working_dir})",
769
+ SandboxMode.READ_ONLY: "read-only (can read all files, cannot write anywhere)",
770
+ SandboxMode.WORKSPACE_WRITE: f"workspace-write (can read all, write to {self.working_dir} and temp dirs)",
771
+ SandboxMode.FULL_ACCESS: "full access (unrestricted filesystem)",
772
+ }
773
+ network_desc = (
774
+ "with network access" if self.network_access else "without network access"
775
+ )
776
+
777
+ bash_description = (
778
+ f"Execute a bash command in a macOS sandboxed environment. "
779
+ f"This sandbox is {mode_desc[self.mode]}, {network_desc}. "
780
+ f"Commands run directly on macOS with Apple sandbox-exec restrictions. "
781
+ f"Working directory: {self.working_dir}. "
782
+ f"Set run_in_background=true to run servers or long-running processes."
783
+ )
784
+
785
+ bash_tool = Tool(
786
+ name="bash",
787
+ description=bash_description,
788
+ run=self._exec,
789
+ parameters={
790
+ "command": {
791
+ "type": "string",
792
+ "description": "Shell command to execute",
793
+ },
794
+ "description": {
795
+ "type": "string",
796
+ "description": "Short description of what this command does (5-10 words)",
797
+ },
798
+ "run_in_background": {
799
+ "type": "boolean",
800
+ "description": "If true, run in background without waiting. Default: false.",
801
+ },
802
+ "name": {
803
+ "type": "string",
804
+ "description": "Name for background process (e.g., 'server'). Only used with run_in_background=true.",
805
+ },
806
+ "timeout": {
807
+ "type": "integer",
808
+ "description": "Timeout in milliseconds (default: 120000, max: 600000)",
809
+ },
810
+ },
811
+ required=["command"],
812
+ )
813
+
814
+ check_tool = Tool(
815
+ name="list_processes",
816
+ description="Check status of background processes. Shows whether each process is running or has exited.",
817
+ run=self._check_process,
818
+ parameters={
819
+ "name": {
820
+ "type": "string",
821
+ "description": "Process name to check, or omit to see all processes",
822
+ },
823
+ },
824
+ required=[],
825
+ )
826
+
827
+ return [bash_tool, check_tool]