plato-sdk-v2 2.3.0__py3-none-any.whl → 2.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- plato/agents/__init__.py +25 -13
- plato/agents/artifacts.py +108 -0
- plato/agents/config.py +16 -13
- plato/agents/otel.py +261 -0
- plato/agents/runner.py +226 -122
- plato/chronos/models/__init__.py +9 -1
- plato/v1/cli/chronos.py +788 -0
- plato/v1/cli/main.py +2 -2
- plato/v1/cli/pm.py +3 -3
- plato/v1/cli/sandbox.py +246 -52
- plato/v1/cli/ssh.py +28 -9
- plato/v1/cli/templates/world-runner.Dockerfile +27 -0
- plato/v1/cli/utils.py +32 -12
- plato/v1/cli/verify.py +243 -827
- plato/worlds/README.md +2 -1
- plato/worlds/__init__.py +3 -1
- plato/worlds/base.py +462 -67
- plato/worlds/config.py +42 -3
- plato/worlds/runner.py +1 -339
- {plato_sdk_v2-2.3.0.dist-info → plato_sdk_v2-2.4.1.dist-info}/METADATA +4 -1
- {plato_sdk_v2-2.3.0.dist-info → plato_sdk_v2-2.4.1.dist-info}/RECORD +23 -27
- plato/agents/logging.py +0 -401
- plato/chronos/api/callback/__init__.py +0 -11
- plato/chronos/api/callback/push_agent_logs.py +0 -61
- plato/chronos/api/callback/update_agent_status.py +0 -57
- plato/chronos/api/callback/upload_artifacts.py +0 -59
- plato/chronos/api/callback/upload_logs_zip.py +0 -57
- plato/chronos/api/callback/upload_trajectory.py +0 -57
- plato/v1/cli/sim.py +0 -11
- {plato_sdk_v2-2.3.0.dist-info → plato_sdk_v2-2.4.1.dist-info}/WHEEL +0 -0
- {plato_sdk_v2-2.3.0.dist-info → plato_sdk_v2-2.4.1.dist-info}/entry_points.txt +0 -0
plato/agents/runner.py
CHANGED
|
@@ -1,16 +1,23 @@
|
|
|
1
|
-
"""Agent runner - run agents in Docker containers.
|
|
1
|
+
"""Agent runner - run agents in Docker containers.
|
|
2
|
+
|
|
3
|
+
Agents emit their own OTel spans for trajectory events. This runner:
|
|
4
|
+
1. Runs agents in Docker containers
|
|
5
|
+
2. Streams stdout/stderr for logging
|
|
6
|
+
3. Passes OTel environment variables for trace context propagation
|
|
7
|
+
4. Uploads artifacts to S3 when complete
|
|
8
|
+
"""
|
|
2
9
|
|
|
3
10
|
from __future__ import annotations
|
|
4
11
|
|
|
5
12
|
import asyncio
|
|
13
|
+
import base64
|
|
6
14
|
import json
|
|
7
15
|
import logging
|
|
8
16
|
import os
|
|
9
17
|
import platform
|
|
10
|
-
import
|
|
11
|
-
from pathlib import Path
|
|
18
|
+
import uuid
|
|
12
19
|
|
|
13
|
-
from
|
|
20
|
+
from opentelemetry import trace
|
|
14
21
|
|
|
15
22
|
logger = logging.getLogger(__name__)
|
|
16
23
|
|
|
@@ -20,10 +27,10 @@ async def run_agent(
|
|
|
20
27
|
config: dict,
|
|
21
28
|
secrets: dict[str, str],
|
|
22
29
|
instruction: str,
|
|
23
|
-
workspace: str,
|
|
30
|
+
workspace: str | None = None,
|
|
24
31
|
logs_dir: str | None = None,
|
|
25
32
|
pull: bool = True,
|
|
26
|
-
) ->
|
|
33
|
+
) -> str:
|
|
27
34
|
"""Run an agent in a Docker container.
|
|
28
35
|
|
|
29
36
|
Args:
|
|
@@ -31,131 +38,228 @@ async def run_agent(
|
|
|
31
38
|
config: Agent configuration dict
|
|
32
39
|
secrets: Secret values (API keys, etc.)
|
|
33
40
|
instruction: Task instruction for the agent
|
|
34
|
-
workspace:
|
|
35
|
-
logs_dir:
|
|
41
|
+
workspace: Docker volume name for workspace (created if None)
|
|
42
|
+
logs_dir: Ignored (kept for backwards compatibility)
|
|
36
43
|
pull: Whether to pull the image first
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
The container name that was created (for cleanup purposes)
|
|
47
|
+
|
|
48
|
+
Note: Agents handle their own OTel tracing. This runner only passes
|
|
49
|
+
the trace context (TRACEPARENT) so agent spans link to the parent step.
|
|
50
|
+
|
|
51
|
+
Note: This uses Docker volumes (not bind mounts) for DIND compatibility.
|
|
52
|
+
The workspace parameter should be a Docker volume name.
|
|
37
53
|
"""
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
54
|
+
# Get session info from environment variables
|
|
55
|
+
session_id = os.environ.get("SESSION_ID")
|
|
56
|
+
otel_url = os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT")
|
|
57
|
+
upload_url = os.environ.get("UPLOAD_URL")
|
|
58
|
+
|
|
59
|
+
# Pull image if requested
|
|
60
|
+
if pull:
|
|
61
|
+
pull_proc = await asyncio.create_subprocess_exec(
|
|
62
|
+
"docker",
|
|
63
|
+
"pull",
|
|
64
|
+
image,
|
|
65
|
+
stdout=asyncio.subprocess.PIPE,
|
|
66
|
+
stderr=asyncio.subprocess.STDOUT,
|
|
67
|
+
)
|
|
68
|
+
await pull_proc.wait()
|
|
69
|
+
|
|
70
|
+
# Encode config as base64 to pass via environment variable
|
|
71
|
+
# This avoids file mount issues in Docker-in-Docker scenarios
|
|
72
|
+
config_json = json.dumps(config)
|
|
73
|
+
config_b64 = base64.b64encode(config_json.encode()).decode()
|
|
74
|
+
|
|
75
|
+
# Generate a unique container name for inspection
|
|
76
|
+
container_name = f"agent-{uuid.uuid4().hex[:8]}"
|
|
77
|
+
|
|
78
|
+
# Use WORKSPACE_VOLUME env var if set (for DIND compatibility)
|
|
79
|
+
# Otherwise create a new volume
|
|
80
|
+
workspace_volume = os.environ.get("WORKSPACE_VOLUME") or workspace or f"workspace-{uuid.uuid4().hex[:8]}"
|
|
81
|
+
if not os.environ.get("WORKSPACE_VOLUME") and not workspace:
|
|
82
|
+
await asyncio.create_subprocess_exec(
|
|
83
|
+
"docker",
|
|
84
|
+
"volume",
|
|
85
|
+
"create",
|
|
86
|
+
workspace_volume,
|
|
87
|
+
stdout=asyncio.subprocess.DEVNULL,
|
|
88
|
+
stderr=asyncio.subprocess.DEVNULL,
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
# Create logs volume
|
|
92
|
+
logs_volume = f"logs-{uuid.uuid4().hex[:8]}"
|
|
93
|
+
await asyncio.create_subprocess_exec(
|
|
94
|
+
"docker",
|
|
95
|
+
"volume",
|
|
96
|
+
"create",
|
|
97
|
+
logs_volume,
|
|
98
|
+
stdout=asyncio.subprocess.DEVNULL,
|
|
99
|
+
stderr=asyncio.subprocess.DEVNULL,
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
try:
|
|
103
|
+
# Build docker command
|
|
104
|
+
docker_cmd = ["docker", "run", "--rm", "--privileged", "--name", container_name]
|
|
105
|
+
|
|
106
|
+
# Determine if we need host networking
|
|
107
|
+
use_host_network = False
|
|
108
|
+
is_macos = platform.system() == "Darwin"
|
|
109
|
+
|
|
110
|
+
if not is_macos:
|
|
111
|
+
try:
|
|
112
|
+
proc = await asyncio.create_subprocess_exec(
|
|
113
|
+
"iptables",
|
|
114
|
+
"-L",
|
|
115
|
+
"-n",
|
|
116
|
+
stdout=asyncio.subprocess.DEVNULL,
|
|
117
|
+
stderr=asyncio.subprocess.DEVNULL,
|
|
118
|
+
)
|
|
119
|
+
await proc.wait()
|
|
120
|
+
has_iptables = proc.returncode == 0
|
|
121
|
+
except (FileNotFoundError, PermissionError):
|
|
122
|
+
has_iptables = False
|
|
123
|
+
|
|
124
|
+
use_host_network = not has_iptables
|
|
90
125
|
|
|
126
|
+
if use_host_network:
|
|
127
|
+
docker_cmd.extend(["--network=host", "--add-host=localhost:127.0.0.1"])
|
|
128
|
+
|
|
129
|
+
# Use Docker volumes instead of bind mounts for DIND compatibility
|
|
130
|
+
docker_cmd.extend(
|
|
131
|
+
[
|
|
132
|
+
"-v",
|
|
133
|
+
f"{workspace_volume}:/workspace",
|
|
134
|
+
"-v",
|
|
135
|
+
f"{logs_volume}:/logs",
|
|
136
|
+
"-v",
|
|
137
|
+
"/var/run/docker.sock:/var/run/docker.sock",
|
|
138
|
+
"-w",
|
|
139
|
+
"/workspace",
|
|
140
|
+
"-e",
|
|
141
|
+
f"AGENT_CONFIG_B64={config_b64}",
|
|
142
|
+
]
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
# Pass session info to agent
|
|
146
|
+
if otel_url:
|
|
147
|
+
traces_endpoint = f"{otel_url.rstrip('/')}/v1/traces"
|
|
148
|
+
docker_cmd.extend(["-e", f"OTEL_EXPORTER_OTLP_ENDPOINT={otel_url}"])
|
|
149
|
+
docker_cmd.extend(["-e", f"OTEL_EXPORTER_OTLP_TRACES_ENDPOINT={traces_endpoint}"])
|
|
150
|
+
docker_cmd.extend(["-e", "OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf"])
|
|
151
|
+
if session_id:
|
|
152
|
+
docker_cmd.extend(["-e", f"SESSION_ID={session_id}"])
|
|
153
|
+
if upload_url:
|
|
154
|
+
docker_cmd.extend(["-e", f"UPLOAD_URL={upload_url}"])
|
|
155
|
+
|
|
156
|
+
# Pass trace context to agent for parent linking
|
|
157
|
+
# Agent spans will be children of the current step span
|
|
158
|
+
current_span = trace.get_current_span()
|
|
159
|
+
span_context = current_span.get_span_context()
|
|
160
|
+
if span_context.is_valid:
|
|
161
|
+
trace_id = format(span_context.trace_id, "032x")
|
|
162
|
+
span_id = format(span_context.span_id, "016x")
|
|
163
|
+
# W3C Trace Context format for TRACEPARENT
|
|
164
|
+
traceparent = f"00-{trace_id}-{span_id}-01"
|
|
91
165
|
docker_cmd.extend(
|
|
92
166
|
[
|
|
93
|
-
"-
|
|
94
|
-
f"{
|
|
95
|
-
"-
|
|
96
|
-
f"{
|
|
97
|
-
"-
|
|
98
|
-
f"{
|
|
99
|
-
"-w",
|
|
100
|
-
"/workspace",
|
|
167
|
+
"-e",
|
|
168
|
+
f"TRACEPARENT={traceparent}",
|
|
169
|
+
"-e",
|
|
170
|
+
f"OTEL_TRACE_ID={trace_id}",
|
|
171
|
+
"-e",
|
|
172
|
+
f"OTEL_PARENT_SPAN_ID={span_id}",
|
|
101
173
|
]
|
|
102
174
|
)
|
|
103
175
|
|
|
104
|
-
|
|
105
|
-
|
|
176
|
+
for key, value in secrets.items():
|
|
177
|
+
docker_cmd.extend(["-e", f"{key.upper()}={value}"])
|
|
106
178
|
|
|
107
|
-
|
|
179
|
+
docker_cmd.append(image)
|
|
108
180
|
|
|
109
|
-
|
|
110
|
-
|
|
181
|
+
# Pass instruction via CLI arg
|
|
182
|
+
docker_cmd.extend(["--instruction", instruction])
|
|
111
183
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
184
|
+
logger.info(f"Starting container: {container_name}")
|
|
185
|
+
|
|
186
|
+
# Run container - agents emit their own OTel spans
|
|
187
|
+
# Use large limit to handle agents that output long lines (e.g., JSON with file contents)
|
|
188
|
+
process = await asyncio.create_subprocess_exec(
|
|
189
|
+
*docker_cmd,
|
|
190
|
+
stdout=asyncio.subprocess.PIPE,
|
|
191
|
+
stderr=asyncio.subprocess.STDOUT,
|
|
192
|
+
limit=100 * 1024 * 1024, # 100MB buffer limit
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
# Get and print container IP in background
|
|
196
|
+
async def print_container_ip():
|
|
197
|
+
await asyncio.sleep(3) # Wait for container to start
|
|
198
|
+
try:
|
|
199
|
+
inspect_proc = await asyncio.create_subprocess_exec(
|
|
200
|
+
"docker",
|
|
201
|
+
"inspect",
|
|
202
|
+
"-f",
|
|
203
|
+
"{{.NetworkSettings.IPAddress}}",
|
|
204
|
+
container_name,
|
|
205
|
+
stdout=asyncio.subprocess.PIPE,
|
|
206
|
+
stderr=asyncio.subprocess.PIPE,
|
|
207
|
+
)
|
|
208
|
+
stdout, _ = await inspect_proc.communicate()
|
|
209
|
+
container_ip = stdout.decode().strip()
|
|
210
|
+
if container_ip:
|
|
211
|
+
logger.info("=" * 50)
|
|
212
|
+
logger.info(f"Container: {container_name}")
|
|
213
|
+
logger.info(f"Container IP: {container_ip}")
|
|
214
|
+
logger.info(f"noVNC: http://{container_ip}:6080")
|
|
215
|
+
logger.info("=" * 50)
|
|
216
|
+
except Exception:
|
|
217
|
+
pass
|
|
218
|
+
|
|
219
|
+
asyncio.create_task(print_container_ip())
|
|
220
|
+
|
|
221
|
+
# Stream and capture output for error reporting using chunked reads to handle large lines
|
|
222
|
+
output_lines: list[str] = []
|
|
223
|
+
assert process.stdout is not None
|
|
224
|
+
buffer = ""
|
|
225
|
+
while True:
|
|
226
|
+
try:
|
|
227
|
+
chunk = await process.stdout.read(65536)
|
|
228
|
+
except Exception:
|
|
229
|
+
break
|
|
230
|
+
if not chunk:
|
|
231
|
+
break
|
|
232
|
+
buffer += chunk.decode(errors="replace")
|
|
233
|
+
|
|
234
|
+
while "\n" in buffer:
|
|
235
|
+
line, buffer = buffer.split("\n", 1)
|
|
236
|
+
output_lines.append(line)
|
|
237
|
+
# Print agent output in real-time
|
|
238
|
+
print(f"[agent] {line}")
|
|
239
|
+
|
|
240
|
+
# Handle any remaining content in buffer
|
|
241
|
+
if buffer.strip():
|
|
242
|
+
output_lines.append(buffer)
|
|
243
|
+
print(f"[agent] {buffer}")
|
|
244
|
+
|
|
245
|
+
await process.wait()
|
|
246
|
+
|
|
247
|
+
exit_code = process.returncode or 0
|
|
248
|
+
if exit_code != 0:
|
|
249
|
+
error_context = "\n".join(output_lines[-50:]) if output_lines else "No output captured"
|
|
250
|
+
raise RuntimeError(f"Agent failed with exit code {exit_code}\n\nAgent output:\n{error_context}")
|
|
251
|
+
|
|
252
|
+
finally:
|
|
253
|
+
# Clean up volumes
|
|
254
|
+
await asyncio.create_subprocess_exec(
|
|
255
|
+
"docker",
|
|
256
|
+
"volume",
|
|
257
|
+
"rm",
|
|
258
|
+
"-f",
|
|
259
|
+
logs_volume,
|
|
260
|
+
stdout=asyncio.subprocess.DEVNULL,
|
|
261
|
+
stderr=asyncio.subprocess.DEVNULL,
|
|
262
|
+
)
|
|
263
|
+
# Note: workspace_volume is not cleaned up as it may be shared
|
|
118
264
|
|
|
119
|
-
|
|
120
|
-
assert process.stdout is not None
|
|
121
|
-
while True:
|
|
122
|
-
line = await process.stdout.readline()
|
|
123
|
-
if not line:
|
|
124
|
-
break
|
|
125
|
-
logger.info(f"[agent] {line.decode().rstrip()}")
|
|
126
|
-
|
|
127
|
-
await process.wait()
|
|
128
|
-
|
|
129
|
-
if process.returncode != 0:
|
|
130
|
-
raise RuntimeError(f"Agent failed with exit code {process.returncode}")
|
|
131
|
-
|
|
132
|
-
agent_span.log("Agent completed successfully")
|
|
133
|
-
|
|
134
|
-
finally:
|
|
135
|
-
os.unlink(config_file.name)
|
|
136
|
-
|
|
137
|
-
# Load trajectory and add to span
|
|
138
|
-
trajectory_path = Path(logs_dir) / "agent" / "trajectory.json"
|
|
139
|
-
if trajectory_path.exists():
|
|
140
|
-
try:
|
|
141
|
-
with open(trajectory_path) as f:
|
|
142
|
-
trajectory = json.load(f)
|
|
143
|
-
if isinstance(trajectory, dict) and "schema_version" in trajectory:
|
|
144
|
-
# Add agent image to trajectory
|
|
145
|
-
agent_data = trajectory.get("agent", {})
|
|
146
|
-
extra = agent_data.get("extra") or {}
|
|
147
|
-
extra["image"] = image
|
|
148
|
-
agent_data["extra"] = extra
|
|
149
|
-
trajectory["agent"] = agent_data
|
|
150
|
-
|
|
151
|
-
# Log trajectory as separate event
|
|
152
|
-
await log_event(
|
|
153
|
-
span_type="trajectory",
|
|
154
|
-
log_type="atif",
|
|
155
|
-
extra=trajectory,
|
|
156
|
-
source="agent",
|
|
157
|
-
)
|
|
158
|
-
except Exception as e:
|
|
159
|
-
logger.warning(f"Failed to load trajectory: {e}")
|
|
160
|
-
|
|
161
|
-
await upload_artifacts(logs_dir)
|
|
265
|
+
return container_name
|
plato/chronos/models/__init__.py
CHANGED
|
@@ -10,10 +10,18 @@ from pydantic import AwareDatetime, BaseModel, ConfigDict, Field
|
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class AgentConfig(BaseModel):
|
|
13
|
+
"""Agent config - supports multiple formats.
|
|
14
|
+
|
|
15
|
+
New format: agent + version (version optional, defaults to latest)
|
|
16
|
+
Legacy format: agent_id (public_id)
|
|
17
|
+
"""
|
|
18
|
+
|
|
13
19
|
model_config = ConfigDict(
|
|
14
20
|
extra="allow",
|
|
15
21
|
)
|
|
16
|
-
|
|
22
|
+
agent: Annotated[str | None, Field(title="Agent")] = None
|
|
23
|
+
version: Annotated[str | None, Field(title="Version")] = None
|
|
24
|
+
agent_id: Annotated[str | None, Field(title="Agent Id")] = None # backwards compat
|
|
17
25
|
config: Annotated[dict[str, Any] | None, Field(title="Config")] = {}
|
|
18
26
|
|
|
19
27
|
|