lm-deluge 0.0.88__py3-none-any.whl → 0.0.90__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lm-deluge might be problematic. Click here for more details.

Files changed (41) hide show
  1. lm_deluge/__init__.py +0 -24
  2. lm_deluge/api_requests/anthropic.py +25 -5
  3. lm_deluge/api_requests/base.py +37 -0
  4. lm_deluge/api_requests/bedrock.py +23 -2
  5. lm_deluge/api_requests/gemini.py +36 -10
  6. lm_deluge/api_requests/openai.py +31 -4
  7. lm_deluge/batches.py +15 -45
  8. lm_deluge/client.py +27 -1
  9. lm_deluge/models/__init__.py +2 -0
  10. lm_deluge/models/anthropic.py +12 -12
  11. lm_deluge/models/google.py +13 -0
  12. lm_deluge/models/minimax.py +9 -1
  13. lm_deluge/models/openrouter.py +48 -0
  14. lm_deluge/models/zai.py +50 -1
  15. lm_deluge/pipelines/gepa/docs/samples.py +19 -10
  16. lm_deluge/prompt.py +333 -68
  17. lm_deluge/server/__init__.py +24 -0
  18. lm_deluge/server/__main__.py +144 -0
  19. lm_deluge/server/adapters.py +369 -0
  20. lm_deluge/server/app.py +388 -0
  21. lm_deluge/server/auth.py +71 -0
  22. lm_deluge/server/model_policy.py +215 -0
  23. lm_deluge/server/models_anthropic.py +172 -0
  24. lm_deluge/server/models_openai.py +175 -0
  25. lm_deluge/skills/anthropic.py +0 -0
  26. lm_deluge/skills/compat.py +0 -0
  27. lm_deluge/tool/__init__.py +13 -1
  28. lm_deluge/tool/prefab/sandbox/__init__.py +19 -0
  29. lm_deluge/tool/prefab/sandbox/daytona_sandbox.py +483 -0
  30. lm_deluge/tool/prefab/sandbox/docker_sandbox.py +609 -0
  31. lm_deluge/tool/prefab/sandbox/fargate_sandbox.py +546 -0
  32. lm_deluge/tool/prefab/sandbox/modal_sandbox.py +469 -0
  33. lm_deluge/tool/prefab/sandbox/seatbelt_sandbox.py +827 -0
  34. lm_deluge/tool/prefab/skills.py +0 -0
  35. {lm_deluge-0.0.88.dist-info → lm_deluge-0.0.90.dist-info}/METADATA +4 -3
  36. {lm_deluge-0.0.88.dist-info → lm_deluge-0.0.90.dist-info}/RECORD +39 -24
  37. lm_deluge/mock_openai.py +0 -643
  38. lm_deluge/tool/prefab/sandbox.py +0 -1621
  39. {lm_deluge-0.0.88.dist-info → lm_deluge-0.0.90.dist-info}/WHEEL +0 -0
  40. {lm_deluge-0.0.88.dist-info → lm_deluge-0.0.90.dist-info}/licenses/LICENSE +0 -0
  41. {lm_deluge-0.0.88.dist-info → lm_deluge-0.0.90.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,546 @@
1
+ import asyncio
2
+ import json
3
+ import secrets
4
+ import shlex
5
+ import struct
6
+ import time
7
+ import uuid
8
+ from dataclasses import dataclass, field
9
+ from typing import Any
10
+
11
+ from lm_deluge.tool import Tool
12
+
13
+
14
+ @dataclass
15
+ class TrackedProcess:
16
+ """Tracks a process running in the sandbox."""
17
+
18
+ process: Any # Modal's ContainerProcess
19
+ name: str
20
+ command: str
21
+ started_at: float = field(default_factory=time.time)
22
+
23
+
24
+ class FargateSandbox:
25
+ """
26
+ AWS Fargate-based sandbox for running untrusted code in isolated containers.
27
+
28
+ Requires:
29
+ - boto3 installed
30
+ - AWS credentials configured
31
+ - VPC with subnets that have internet access (for pulling images)
32
+ - Security group that allows outbound traffic
33
+
34
+ The sandbox automatically:
35
+ - Creates IAM roles for task execution and ECS Exec
36
+ - Registers a task definition with the specified image
37
+ - Runs a Fargate task and waits for it to be ready
38
+ - Executes commands via ECS Exec (SSM Session Manager)
39
+
40
+ Example:
41
+ async with FargateSandbox(
42
+ subnets=["subnet-abc123"],
43
+ security_groups=["sg-abc123"],
44
+ ) as sandbox:
45
+ tools = sandbox.get_tools()
46
+ # Use tools with your LLM...
47
+ """
48
+
49
+ # Default image - minimal Python with common tools
50
+ DEFAULT_IMAGE = "python:3.12-slim"
51
+
52
+ # IAM policy for ECS Exec (SSM Session Manager)
53
+ EXEC_POLICY = {
54
+ "Version": "2012-10-17",
55
+ "Statement": [
56
+ {
57
+ "Effect": "Allow",
58
+ "Action": [
59
+ "ssmmessages:CreateControlChannel",
60
+ "ssmmessages:CreateDataChannel",
61
+ "ssmmessages:OpenControlChannel",
62
+ "ssmmessages:OpenDataChannel",
63
+ ],
64
+ "Resource": "*",
65
+ }
66
+ ],
67
+ }
68
+
69
+ # Trust policy for ECS tasks
70
+ TASK_TRUST_POLICY = {
71
+ "Version": "2012-10-17",
72
+ "Statement": [
73
+ {
74
+ "Effect": "Allow",
75
+ "Principal": {"Service": "ecs-tasks.amazonaws.com"},
76
+ "Action": "sts:AssumeRole",
77
+ }
78
+ ],
79
+ }
80
+
81
+ def __init__(
82
+ self,
83
+ subnets: list[str],
84
+ security_groups: list[str],
85
+ *,
86
+ cluster: str | None = None,
87
+ image: str | None = None,
88
+ cpu: int = 256,
89
+ memory: int = 512,
90
+ region: str | None = None,
91
+ task_role_arn: str | None = None,
92
+ execution_role_arn: str | None = None,
93
+ assign_public_ip: bool = True,
94
+ ):
95
+ """
96
+ Initialize a Fargate sandbox.
97
+
98
+ Args:
99
+ subnets: List of VPC subnet IDs (required). Use subnets with internet
100
+ access (public subnets with IGW, or private with NAT).
101
+ security_groups: List of security group IDs (required). Must allow
102
+ outbound HTTPS (443) for ECS Exec to work.
103
+ cluster: ECS cluster name. If None, uses "lm-deluge-sandbox" (created if missing).
104
+ image: Docker image to use. Defaults to python:3.12-slim.
105
+ cpu: Fargate CPU units (256, 512, 1024, 2048, 4096). Default 256.
106
+ memory: Fargate memory in MB. Must be compatible with CPU. Default 512.
107
+ region: AWS region. If None, uses boto3 default.
108
+ task_role_arn: IAM role ARN for the task. If None, creates one with
109
+ minimal permissions (just SSM for ECS Exec).
110
+ execution_role_arn: IAM role ARN for task execution. If None, uses
111
+ the AWS managed ecsTaskExecutionRole.
112
+ assign_public_ip: Whether to assign a public IP. Required if using
113
+ public subnets without NAT. Default True.
114
+ """
115
+ self.subnets = subnets
116
+ self.security_groups = security_groups
117
+ self.cluster = cluster or "lm-deluge-sandbox"
118
+ self.image = image or self.DEFAULT_IMAGE
119
+ self.cpu = str(cpu)
120
+ self.memory = str(memory)
121
+ self.region = region
122
+ self.task_role_arn = task_role_arn
123
+ self.execution_role_arn = execution_role_arn
124
+ self.assign_public_ip = assign_public_ip
125
+
126
+ # State
127
+ self.task_arn: str | None = None
128
+ self.task_definition_arn: str | None = None
129
+ self._initialized = False
130
+ self._destroyed = False
131
+
132
+ # boto3 clients (lazy init)
133
+ self._ecs_client = None
134
+ self._iam_client = None
135
+
136
+ @property
137
+ def ecs(self):
138
+ """Lazy-load ECS client."""
139
+ if self._ecs_client is None:
140
+ import boto3
141
+
142
+ self._ecs_client = boto3.client("ecs", region_name=self.region)
143
+ return self._ecs_client
144
+
145
+ @property
146
+ def iam(self):
147
+ """Lazy-load IAM client."""
148
+ if self._iam_client is None:
149
+ import boto3
150
+
151
+ self._iam_client = boto3.client("iam", region_name=self.region)
152
+ return self._iam_client
153
+
154
+ async def __aenter__(self):
155
+ """Async context manager entry - initialize sandbox."""
156
+ await self._ensure_initialized()
157
+ return self
158
+
159
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
160
+ """Async context manager exit - cleanup sandbox."""
161
+ if not self._destroyed:
162
+ await self._destroy()
163
+ return False
164
+
165
+ def __del__(self):
166
+ """Cleanup sandbox when garbage collected (backup cleanup)."""
167
+ if not self._destroyed and self.task_arn:
168
+ import warnings
169
+
170
+ warnings.warn(
171
+ "FargateSandbox was not properly cleaned up. "
172
+ "Use 'async with FargateSandbox(...) as sandbox:' for automatic cleanup.",
173
+ ResourceWarning,
174
+ stacklevel=2,
175
+ )
176
+
177
+ async def _ensure_initialized(self):
178
+ """Lazy initialization - create cluster, task def, and run task."""
179
+ if self._initialized:
180
+ return
181
+
182
+ # Ensure cluster exists
183
+ await self._ensure_cluster()
184
+
185
+ # Ensure IAM roles exist
186
+ await self._ensure_roles()
187
+
188
+ # Register task definition
189
+ await self._register_task_definition()
190
+
191
+ # Run the task
192
+ await self._run_task()
193
+
194
+ # Wait for task to be running
195
+ await self._wait_for_task()
196
+
197
+ self._initialized = True
198
+
199
+ async def _ensure_cluster(self):
200
+ """Create ECS cluster if it doesn't exist."""
201
+ try:
202
+ response = await asyncio.to_thread(
203
+ self.ecs.describe_clusters, clusters=[self.cluster]
204
+ )
205
+ clusters = response.get("clusters", [])
206
+ if clusters and clusters[0].get("status") == "ACTIVE":
207
+ return # Cluster exists
208
+ except Exception:
209
+ pass
210
+
211
+ # Create cluster
212
+ await asyncio.to_thread(
213
+ self.ecs.create_cluster,
214
+ clusterName=self.cluster,
215
+ settings=[
216
+ {"name": "containerInsights", "value": "disabled"},
217
+ ],
218
+ )
219
+
220
+ async def _ensure_roles(self):
221
+ """Create IAM roles if not provided."""
222
+ # Task role (for ECS Exec)
223
+ if not self.task_role_arn:
224
+ role_name = "lm-deluge-sandbox-task-role"
225
+ try:
226
+ response = await asyncio.to_thread(
227
+ self.iam.get_role, RoleName=role_name
228
+ )
229
+ self.task_role_arn = response["Role"]["Arn"]
230
+ except self.iam.exceptions.NoSuchEntityException:
231
+ # Create the role
232
+ response = await asyncio.to_thread(
233
+ self.iam.create_role,
234
+ RoleName=role_name,
235
+ AssumeRolePolicyDocument=json.dumps(self.TASK_TRUST_POLICY),
236
+ Description="Task role for lm-deluge Fargate sandbox (ECS Exec)",
237
+ )
238
+ self.task_role_arn = response["Role"]["Arn"]
239
+
240
+ # Attach inline policy for ECS Exec
241
+ await asyncio.to_thread(
242
+ self.iam.put_role_policy,
243
+ RoleName=role_name,
244
+ PolicyName="ecs-exec-policy",
245
+ PolicyDocument=json.dumps(self.EXEC_POLICY),
246
+ )
247
+
248
+ # IAM is eventually consistent - wait a bit
249
+ await asyncio.sleep(5)
250
+
251
+ # Execution role (for pulling images, logs)
252
+ if not self.execution_role_arn:
253
+ role_name = "lm-deluge-sandbox-execution-role"
254
+ try:
255
+ response = await asyncio.to_thread(
256
+ self.iam.get_role, RoleName=role_name
257
+ )
258
+ self.execution_role_arn = response["Role"]["Arn"]
259
+ except self.iam.exceptions.NoSuchEntityException:
260
+ # Create the role
261
+ response = await asyncio.to_thread(
262
+ self.iam.create_role,
263
+ RoleName=role_name,
264
+ AssumeRolePolicyDocument=json.dumps(self.TASK_TRUST_POLICY),
265
+ Description="Execution role for lm-deluge Fargate sandbox",
266
+ )
267
+ self.execution_role_arn = response["Role"]["Arn"]
268
+
269
+ # Attach AWS managed policy
270
+ await asyncio.to_thread(
271
+ self.iam.attach_role_policy,
272
+ RoleName=role_name,
273
+ PolicyArn="arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy",
274
+ )
275
+
276
+ # IAM is eventually consistent - wait a bit
277
+ await asyncio.sleep(5)
278
+
279
+ async def _register_task_definition(self):
280
+ """Register a task definition for the sandbox."""
281
+ family = f"lm-deluge-sandbox-{secrets.token_hex(4)}"
282
+
283
+ response = await asyncio.to_thread(
284
+ self.ecs.register_task_definition,
285
+ family=family,
286
+ networkMode="awsvpc",
287
+ requiresCompatibilities=["FARGATE"],
288
+ cpu=self.cpu,
289
+ memory=self.memory,
290
+ taskRoleArn=self.task_role_arn,
291
+ executionRoleArn=self.execution_role_arn,
292
+ containerDefinitions=[
293
+ {
294
+ "name": "sandbox",
295
+ "image": self.image,
296
+ "essential": True,
297
+ # Keep container running - sleep infinity
298
+ "command": ["sh", "-c", "sleep infinity"],
299
+ "linuxParameters": {
300
+ "initProcessEnabled": True, # Required for ECS Exec
301
+ },
302
+ }
303
+ ],
304
+ )
305
+ self.task_definition_arn = response["taskDefinition"]["taskDefinitionArn"]
306
+
307
+ async def _run_task(self):
308
+ """Run a Fargate task."""
309
+ response = await asyncio.to_thread(
310
+ self.ecs.run_task,
311
+ cluster=self.cluster,
312
+ taskDefinition=self.task_definition_arn,
313
+ launchType="FARGATE",
314
+ enableExecuteCommand=True, # Enable ECS Exec
315
+ networkConfiguration={
316
+ "awsvpcConfiguration": {
317
+ "subnets": self.subnets,
318
+ "securityGroups": self.security_groups,
319
+ "assignPublicIp": "ENABLED"
320
+ if self.assign_public_ip
321
+ else "DISABLED",
322
+ }
323
+ },
324
+ )
325
+
326
+ tasks = response.get("tasks", [])
327
+ if not tasks:
328
+ failures = response.get("failures", [])
329
+ raise RuntimeError(f"Failed to run task: {failures}")
330
+
331
+ self.task_arn = tasks[0]["taskArn"]
332
+
333
+ async def _wait_for_task(self, timeout: int = 120):
334
+ """Wait for task to reach RUNNING state."""
335
+ start = time.time()
336
+ while time.time() - start < timeout:
337
+ response = await asyncio.to_thread(
338
+ self.ecs.describe_tasks,
339
+ cluster=self.cluster,
340
+ tasks=[self.task_arn],
341
+ )
342
+ tasks = response.get("tasks", [])
343
+ if tasks:
344
+ status = tasks[0].get("lastStatus")
345
+ if status == "RUNNING":
346
+ # Also check that execute command agent is running
347
+ containers = tasks[0].get("containers", [])
348
+ for container in containers:
349
+ managed_agents = container.get("managedAgents", [])
350
+ for agent in managed_agents:
351
+ if agent.get("name") == "ExecuteCommandAgent":
352
+ if agent.get("lastStatus") == "RUNNING":
353
+ return
354
+ elif status in ("STOPPED", "DEACTIVATING"):
355
+ reason = tasks[0].get("stoppedReason", "Unknown")
356
+ raise RuntimeError(f"Task stopped: {reason}")
357
+
358
+ await asyncio.sleep(2)
359
+
360
+ raise TimeoutError(f"Task did not reach RUNNING state within {timeout}s")
361
+
362
+ async def _exec(
363
+ self,
364
+ command: str,
365
+ timeout: int = 60,
366
+ ) -> str:
367
+ """
368
+ Execute a command in the sandbox.
369
+
370
+ Args:
371
+ command: Shell command to execute
372
+ timeout: Timeout in seconds
373
+
374
+ Returns:
375
+ Command output (stdout + stderr)
376
+ """
377
+ await self._ensure_initialized()
378
+
379
+ # Call ECS execute_command
380
+ response = await asyncio.to_thread(
381
+ self.ecs.execute_command,
382
+ cluster=self.cluster,
383
+ task=self.task_arn,
384
+ container="sandbox",
385
+ interactive=True,
386
+ command=f"/bin/sh -c {shlex.quote(command)}",
387
+ )
388
+
389
+ session = response.get("session", {})
390
+ stream_url = session.get("streamUrl")
391
+ token = session.get("tokenValue")
392
+
393
+ if not stream_url or not token:
394
+ return f"Error: Failed to get session: {response}"
395
+
396
+ # Connect to websocket and read output
397
+ try:
398
+ output = await self._read_ssm_session(stream_url, token, timeout)
399
+ except Exception as e:
400
+ return f"Error executing command: {e}"
401
+
402
+ # Truncate if needed
403
+ if len(output) > 5000:
404
+ output = "...[truncated]...\n" + output[-5000:]
405
+
406
+ return output if output else "(no output)"
407
+
408
+ async def _read_ssm_session(self, stream_url: str, token: str, timeout: int) -> str:
409
+ """
410
+ Connect to SSM session websocket and read command output.
411
+
412
+ The SSM agent uses a binary protocol:
413
+ - Header: 4-byte big-endian length + 32-byte null-padded message type
414
+ - Payload varies by message type
415
+
416
+ Note: SSM retransmits messages until ACKed. Since we're just reading
417
+ (not fully implementing the protocol), we deduplicate by tracking
418
+ seen message hashes.
419
+ """
420
+ import aiohttp
421
+
422
+ output_chunks = []
423
+ seen_messages: set[bytes] = set() # Dedupe retransmissions
424
+
425
+ async with aiohttp.ClientSession() as session:
426
+ async with session.ws_connect(stream_url, receive_timeout=timeout) as ws:
427
+ # Send init message with token
428
+ init_message = {
429
+ "MessageSchemaVersion": "1.0",
430
+ "RequestId": str(uuid.uuid4()),
431
+ "TokenValue": token,
432
+ }
433
+ await ws.send_str(json.dumps(init_message))
434
+
435
+ # Read messages until channel closes or timeout
436
+ try:
437
+ async for msg in ws:
438
+ if msg.type == aiohttp.WSMsgType.BINARY:
439
+ # Skip duplicate messages (SSM retransmits until ACKed)
440
+ msg_hash = msg.data[:116] # Header is enough to identify
441
+ if msg_hash in seen_messages:
442
+ continue
443
+ seen_messages.add(msg_hash)
444
+
445
+ parsed = self._parse_ssm_message(msg.data)
446
+ if parsed:
447
+ msg_type, payload = parsed
448
+ if "output_stream_data" in msg_type:
449
+ output_chunks.append(payload)
450
+ elif "channel_closed" in msg_type:
451
+ break
452
+ elif msg.type == aiohttp.WSMsgType.ERROR:
453
+ break
454
+ elif msg.type == aiohttp.WSMsgType.CLOSED:
455
+ break
456
+ except asyncio.TimeoutError:
457
+ pass
458
+
459
+ return "".join(output_chunks)
460
+
461
+ def _parse_ssm_message(self, data: bytes) -> tuple[str, str] | None:
462
+ """
463
+ Parse an SSM agent message.
464
+
465
+ Format:
466
+ - Bytes 0-3: Header length (big-endian uint32)
467
+ - Bytes 4-35: Message type (32 bytes, null-padded ASCII)
468
+ - After header: Payload length (4 bytes) + payload
469
+ """
470
+ if len(data) < 36:
471
+ return None
472
+
473
+ try:
474
+ header_len = struct.unpack(">I", data[0:4])[0]
475
+ msg_type = data[4:36].decode("ascii").rstrip("\x00")
476
+
477
+ # Payload starts after header
478
+ if len(data) > header_len:
479
+ payload_data = data[header_len:]
480
+ if len(payload_data) >= 4:
481
+ payload_len = struct.unpack(">I", payload_data[0:4])[0]
482
+ if len(payload_data) >= 4 + payload_len:
483
+ payload = payload_data[4 : 4 + payload_len].decode(
484
+ "utf-8", errors="replace"
485
+ )
486
+ return msg_type, payload
487
+
488
+ return msg_type, ""
489
+ except Exception:
490
+ return None
491
+
492
+ async def _destroy(self):
493
+ """Stop the task and clean up."""
494
+ if self._destroyed:
495
+ return
496
+
497
+ if self.task_arn:
498
+ try:
499
+ await asyncio.to_thread(
500
+ self.ecs.stop_task,
501
+ cluster=self.cluster,
502
+ task=self.task_arn,
503
+ reason="Sandbox destroyed",
504
+ )
505
+ except Exception:
506
+ pass # Best effort
507
+
508
+ # Optionally deregister task definition
509
+ if self.task_definition_arn:
510
+ try:
511
+ await asyncio.to_thread(
512
+ self.ecs.deregister_task_definition,
513
+ taskDefinition=self.task_definition_arn,
514
+ )
515
+ except Exception:
516
+ pass
517
+
518
+ self._destroyed = True
519
+ self._initialized = False
520
+
521
+ def get_tools(self):
522
+ """Return list of tools for LLM use."""
523
+ bash_tool = Tool(
524
+ name="bash",
525
+ description=(
526
+ "Execute a bash command in the AWS Fargate sandbox environment. "
527
+ "The command runs in an isolated container. "
528
+ "Output is truncated to the last 5000 characters if longer. "
529
+ "Note: This sandbox does not support background processes - "
530
+ "commands must complete within the timeout."
531
+ ),
532
+ run=self._exec,
533
+ parameters={
534
+ "command": {
535
+ "type": "string",
536
+ "description": "The shell command to execute (e.g., 'ls -la', 'python script.py')",
537
+ },
538
+ "timeout": {
539
+ "type": "integer",
540
+ "description": "Timeout in seconds for the command execution (default: 60)",
541
+ },
542
+ },
543
+ required=["command"],
544
+ )
545
+
546
+ return [bash_tool]