plato-sdk-v2 2.2.4__py3-none-any.whl → 2.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- plato/agents/__init__.py +4 -0
- plato/agents/logging.py +114 -0
- plato/agents/runner.py +32 -2
- plato/v1/cli/main.py +0 -2
- plato/v1/cli/sandbox.py +192 -50
- plato/v1/cli/ssh.py +16 -4
- plato/v1/cli/verify.py +243 -827
- plato/v2/async_/environment.py +31 -0
- plato/v2/async_/session.py +33 -0
- plato/v2/sync/environment.py +31 -0
- plato/v2/sync/session.py +33 -0
- plato/worlds/__init__.py +3 -1
- plato/worlds/base.py +276 -2
- plato/worlds/config.py +38 -1
- plato/worlds/runner.py +97 -45
- {plato_sdk_v2-2.2.4.dist-info → plato_sdk_v2-2.3.3.dist-info}/METADATA +3 -1
- {plato_sdk_v2-2.2.4.dist-info → plato_sdk_v2-2.3.3.dist-info}/RECORD +19 -20
- plato/v1/cli/sim.py +0 -11
- {plato_sdk_v2-2.2.4.dist-info → plato_sdk_v2-2.3.3.dist-info}/WHEEL +0 -0
- {plato_sdk_v2-2.2.4.dist-info → plato_sdk_v2-2.3.3.dist-info}/entry_points.txt +0 -0
plato/v2/async_/environment.py
CHANGED
|
@@ -7,6 +7,7 @@ from typing import TYPE_CHECKING
|
|
|
7
7
|
|
|
8
8
|
from plato._generated.api.v2 import jobs
|
|
9
9
|
from plato._generated.models import (
|
|
10
|
+
AppApiV2SchemasSessionCreateSnapshotRequest,
|
|
10
11
|
ConnectRoutingInfoResult,
|
|
11
12
|
CreateCheckpointRequest,
|
|
12
13
|
CreateSnapshotResult,
|
|
@@ -136,6 +137,36 @@ class Environment:
|
|
|
136
137
|
x_api_key=self._api_key,
|
|
137
138
|
)
|
|
138
139
|
|
|
140
|
+
async def snapshot_store(
|
|
141
|
+
self,
|
|
142
|
+
override_service: str | None = None,
|
|
143
|
+
override_version: str | None = None,
|
|
144
|
+
override_dataset: str | None = None,
|
|
145
|
+
) -> CreateSnapshotResult:
|
|
146
|
+
"""Create a snapshot-store snapshot of this environment.
|
|
147
|
+
|
|
148
|
+
Uses the snapshot-store pipeline for chunk-based deduplication and
|
|
149
|
+
efficient storage. This is the preferred method for new base snapshots.
|
|
150
|
+
|
|
151
|
+
Args:
|
|
152
|
+
override_service: Override simulator/service name in artifact metadata.
|
|
153
|
+
override_version: Override version/git_hash in artifact metadata.
|
|
154
|
+
override_dataset: Override dataset name in artifact metadata.
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
CreateSnapshotResult with artifact_id.
|
|
158
|
+
"""
|
|
159
|
+
return await jobs.snapshot_store.asyncio(
|
|
160
|
+
client=self._http,
|
|
161
|
+
job_id=self.job_id,
|
|
162
|
+
body=AppApiV2SchemasSessionCreateSnapshotRequest(
|
|
163
|
+
override_service=override_service,
|
|
164
|
+
override_version=override_version,
|
|
165
|
+
override_dataset=override_dataset,
|
|
166
|
+
),
|
|
167
|
+
x_api_key=self._api_key,
|
|
168
|
+
)
|
|
169
|
+
|
|
139
170
|
async def close(self) -> None:
|
|
140
171
|
"""Close this environment."""
|
|
141
172
|
await jobs.close.asyncio(
|
plato/v2/async_/session.py
CHANGED
|
@@ -34,6 +34,7 @@ from plato._generated.api.v2.sessions import reset as sessions_reset
|
|
|
34
34
|
from plato._generated.api.v2.sessions import set_date as sessions_set_date
|
|
35
35
|
from plato._generated.api.v2.sessions import setup_sandbox as sessions_setup_sandbox
|
|
36
36
|
from plato._generated.api.v2.sessions import snapshot as sessions_snapshot
|
|
37
|
+
from plato._generated.api.v2.sessions import snapshot_store as sessions_snapshot_store
|
|
37
38
|
from plato._generated.api.v2.sessions import state as sessions_state
|
|
38
39
|
from plato._generated.api.v2.sessions import wait_for_ready as sessions_wait_for_ready
|
|
39
40
|
from plato._generated.models import (
|
|
@@ -622,6 +623,38 @@ class Session:
|
|
|
622
623
|
x_api_key=self._api_key,
|
|
623
624
|
)
|
|
624
625
|
|
|
626
|
+
async def snapshot_store(
|
|
627
|
+
self,
|
|
628
|
+
override_service: str | None = None,
|
|
629
|
+
override_version: str | None = None,
|
|
630
|
+
override_dataset: str | None = None,
|
|
631
|
+
) -> AppApiV2SchemasSessionCreateSnapshotResponse:
|
|
632
|
+
"""Create a snapshot-store snapshot of all environments in the session.
|
|
633
|
+
|
|
634
|
+
Uses the snapshot-store pipeline for chunk-based deduplication and
|
|
635
|
+
efficient storage. This is the preferred method for new base snapshots.
|
|
636
|
+
|
|
637
|
+
Args:
|
|
638
|
+
override_service: Override simulator/service name in artifact metadata.
|
|
639
|
+
override_version: Override version/git_hash in artifact metadata.
|
|
640
|
+
override_dataset: Override dataset name in artifact metadata.
|
|
641
|
+
|
|
642
|
+
Returns:
|
|
643
|
+
Snapshot response with info per job_id.
|
|
644
|
+
"""
|
|
645
|
+
self._check_closed()
|
|
646
|
+
|
|
647
|
+
return await sessions_snapshot_store.asyncio(
|
|
648
|
+
client=self._http,
|
|
649
|
+
session_id=self.session_id,
|
|
650
|
+
body=AppApiV2SchemasSessionCreateSnapshotRequest(
|
|
651
|
+
override_service=override_service,
|
|
652
|
+
override_version=override_version,
|
|
653
|
+
override_dataset=override_dataset,
|
|
654
|
+
),
|
|
655
|
+
x_api_key=self._api_key,
|
|
656
|
+
)
|
|
657
|
+
|
|
625
658
|
async def disk_snapshot(
|
|
626
659
|
self,
|
|
627
660
|
override_service: str | None = None,
|
plato/v2/sync/environment.py
CHANGED
|
@@ -7,6 +7,7 @@ from typing import TYPE_CHECKING
|
|
|
7
7
|
|
|
8
8
|
from plato._generated.api.v2 import jobs
|
|
9
9
|
from plato._generated.models import (
|
|
10
|
+
AppApiV2SchemasSessionCreateSnapshotRequest,
|
|
10
11
|
CreateCheckpointRequest,
|
|
11
12
|
CreateSnapshotResult,
|
|
12
13
|
ExecuteCommandRequest,
|
|
@@ -135,6 +136,36 @@ class Environment:
|
|
|
135
136
|
x_api_key=self._api_key,
|
|
136
137
|
)
|
|
137
138
|
|
|
139
|
+
def snapshot_store(
|
|
140
|
+
self,
|
|
141
|
+
override_service: str | None = None,
|
|
142
|
+
override_version: str | None = None,
|
|
143
|
+
override_dataset: str | None = None,
|
|
144
|
+
) -> CreateSnapshotResult:
|
|
145
|
+
"""Create a snapshot-store snapshot of this environment.
|
|
146
|
+
|
|
147
|
+
Uses the snapshot-store pipeline for chunk-based deduplication and
|
|
148
|
+
efficient storage. This is the preferred method for new base snapshots.
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
override_service: Override simulator/service name in artifact metadata.
|
|
152
|
+
override_version: Override version/git_hash in artifact metadata.
|
|
153
|
+
override_dataset: Override dataset name in artifact metadata.
|
|
154
|
+
|
|
155
|
+
Returns:
|
|
156
|
+
CreateSnapshotResult with artifact_id.
|
|
157
|
+
"""
|
|
158
|
+
return jobs.snapshot_store.sync(
|
|
159
|
+
client=self._http,
|
|
160
|
+
job_id=self.job_id,
|
|
161
|
+
body=AppApiV2SchemasSessionCreateSnapshotRequest(
|
|
162
|
+
override_service=override_service,
|
|
163
|
+
override_version=override_version,
|
|
164
|
+
override_dataset=override_dataset,
|
|
165
|
+
),
|
|
166
|
+
x_api_key=self._api_key,
|
|
167
|
+
)
|
|
168
|
+
|
|
138
169
|
def close(self) -> None:
|
|
139
170
|
"""Close this environment."""
|
|
140
171
|
jobs.close.sync(
|
plato/v2/sync/session.py
CHANGED
|
@@ -32,6 +32,7 @@ from plato._generated.api.v2.sessions import reset as sessions_reset
|
|
|
32
32
|
from plato._generated.api.v2.sessions import set_date as sessions_set_date
|
|
33
33
|
from plato._generated.api.v2.sessions import setup_sandbox as sessions_setup_sandbox
|
|
34
34
|
from plato._generated.api.v2.sessions import snapshot as sessions_snapshot
|
|
35
|
+
from plato._generated.api.v2.sessions import snapshot_store as sessions_snapshot_store
|
|
35
36
|
from plato._generated.api.v2.sessions import state as sessions_state
|
|
36
37
|
from plato._generated.api.v2.sessions import wait_for_ready as sessions_wait_for_ready
|
|
37
38
|
from plato._generated.models import (
|
|
@@ -595,6 +596,38 @@ class Session:
|
|
|
595
596
|
x_api_key=self._api_key,
|
|
596
597
|
)
|
|
597
598
|
|
|
599
|
+
def snapshot_store(
|
|
600
|
+
self,
|
|
601
|
+
override_service: str | None = None,
|
|
602
|
+
override_version: str | None = None,
|
|
603
|
+
override_dataset: str | None = None,
|
|
604
|
+
) -> AppApiV2SchemasSessionCreateSnapshotResponse:
|
|
605
|
+
"""Create a snapshot-store snapshot of all environments in the session.
|
|
606
|
+
|
|
607
|
+
Uses the snapshot-store pipeline for chunk-based deduplication and
|
|
608
|
+
efficient storage. This is the preferred method for new base snapshots.
|
|
609
|
+
|
|
610
|
+
Args:
|
|
611
|
+
override_service: Override simulator/service name in artifact metadata.
|
|
612
|
+
override_version: Override version/git_hash in artifact metadata.
|
|
613
|
+
override_dataset: Override dataset name in artifact metadata.
|
|
614
|
+
|
|
615
|
+
Returns:
|
|
616
|
+
Snapshot response with info per job_id.
|
|
617
|
+
"""
|
|
618
|
+
self._check_closed()
|
|
619
|
+
|
|
620
|
+
return sessions_snapshot_store.sync(
|
|
621
|
+
client=self._http,
|
|
622
|
+
session_id=self.session_id,
|
|
623
|
+
body=AppApiV2SchemasSessionCreateSnapshotRequest(
|
|
624
|
+
override_service=override_service,
|
|
625
|
+
override_version=override_version,
|
|
626
|
+
override_dataset=override_dataset,
|
|
627
|
+
),
|
|
628
|
+
x_api_key=self._api_key,
|
|
629
|
+
)
|
|
630
|
+
|
|
598
631
|
def disk_snapshot(
|
|
599
632
|
self,
|
|
600
633
|
override_service: str | None = None,
|
plato/worlds/__init__.py
CHANGED
|
@@ -52,7 +52,7 @@ from plato.worlds.base import (
|
|
|
52
52
|
get_world,
|
|
53
53
|
register_world,
|
|
54
54
|
)
|
|
55
|
-
from plato.worlds.config import Agent, AgentConfig, Env, EnvConfig, RunConfig, Secret
|
|
55
|
+
from plato.worlds.config import Agent, AgentConfig, CheckpointConfig, Env, EnvConfig, RunConfig, Secret, StateConfig
|
|
56
56
|
from plato.worlds.runner import run_world
|
|
57
57
|
|
|
58
58
|
__all__ = [
|
|
@@ -66,6 +66,8 @@ __all__ = [
|
|
|
66
66
|
"get_world",
|
|
67
67
|
# Config
|
|
68
68
|
"RunConfig",
|
|
69
|
+
"CheckpointConfig",
|
|
70
|
+
"StateConfig",
|
|
69
71
|
"AgentConfig",
|
|
70
72
|
"Agent",
|
|
71
73
|
"Secret",
|
plato/worlds/base.py
CHANGED
|
@@ -3,7 +3,9 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
import logging
|
|
6
|
+
import subprocess
|
|
6
7
|
from abc import ABC, abstractmethod
|
|
8
|
+
from pathlib import Path
|
|
7
9
|
from typing import TYPE_CHECKING, Any, ClassVar, Generic, TypeVar, get_args, get_origin
|
|
8
10
|
|
|
9
11
|
from pydantic import BaseModel, Field
|
|
@@ -18,6 +20,8 @@ from plato.agents.logging import init_logging as _init_chronos_logging
|
|
|
18
20
|
from plato.agents.logging import log_event as _log_event
|
|
19
21
|
from plato.agents.logging import reset_logging as _reset_chronos_logging
|
|
20
22
|
from plato.agents.logging import span as _span
|
|
23
|
+
from plato.agents.logging import upload_artifact as _upload_artifact
|
|
24
|
+
from plato.agents.logging import upload_checkpoint as _upload_checkpoint
|
|
21
25
|
|
|
22
26
|
logger = logging.getLogger(__name__)
|
|
23
27
|
|
|
@@ -195,6 +199,248 @@ class BaseWorld(ABC, Generic[ConfigT]):
|
|
|
195
199
|
except Exception as e:
|
|
196
200
|
self.logger.warning(f"Error stopping Plato heartbeat: {e}")
|
|
197
201
|
|
|
202
|
+
async def _create_checkpoint(self) -> dict[str, str] | None:
|
|
203
|
+
"""Create a checkpoint snapshot of all environments (excluding configured envs).
|
|
204
|
+
|
|
205
|
+
Uses snapshot_store for efficient chunk-based deduplication.
|
|
206
|
+
|
|
207
|
+
Returns:
|
|
208
|
+
Dict mapping environment alias to artifact_id, or None if no session connected.
|
|
209
|
+
"""
|
|
210
|
+
if not self.plato_session:
|
|
211
|
+
self.logger.warning("Cannot create checkpoint: Plato session not connected")
|
|
212
|
+
return None
|
|
213
|
+
|
|
214
|
+
exclude_envs = set(self.config.checkpoint.exclude_envs)
|
|
215
|
+
envs_to_snapshot = [env for env in self.plato_session.envs if env.alias not in exclude_envs]
|
|
216
|
+
|
|
217
|
+
if not envs_to_snapshot:
|
|
218
|
+
self.logger.info("No environments to checkpoint (all excluded)")
|
|
219
|
+
return {}
|
|
220
|
+
|
|
221
|
+
self.logger.info(
|
|
222
|
+
f"Creating checkpoint for {len(envs_to_snapshot)} environment(s): {[e.alias for e in envs_to_snapshot]}"
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
results: dict[str, str] = {}
|
|
226
|
+
for env in envs_to_snapshot:
|
|
227
|
+
try:
|
|
228
|
+
result = await env.snapshot_store()
|
|
229
|
+
artifact_id = result.artifact_id
|
|
230
|
+
results[env.alias] = artifact_id
|
|
231
|
+
|
|
232
|
+
# Check for success/error fields (available after SDK regeneration)
|
|
233
|
+
success = getattr(result, "success", True)
|
|
234
|
+
error = getattr(result, "error", None)
|
|
235
|
+
|
|
236
|
+
if not success or error:
|
|
237
|
+
self.logger.error(
|
|
238
|
+
f"Checkpoint failed for '{env.alias}': {error or 'unknown error'} (job_id={env.job_id})"
|
|
239
|
+
)
|
|
240
|
+
elif artifact_id:
|
|
241
|
+
self.logger.info(f"Checkpoint created for '{env.alias}': {artifact_id}")
|
|
242
|
+
else:
|
|
243
|
+
self.logger.warning(
|
|
244
|
+
f"Checkpoint for '{env.alias}' returned empty artifact_id (job_id={env.job_id})"
|
|
245
|
+
)
|
|
246
|
+
except Exception as e:
|
|
247
|
+
self.logger.error(f"Failed to checkpoint '{env.alias}': {e}")
|
|
248
|
+
|
|
249
|
+
return results
|
|
250
|
+
|
|
251
|
+
def _init_state_directory(self) -> None:
|
|
252
|
+
"""Initialize the state directory as a git repository.
|
|
253
|
+
|
|
254
|
+
Creates the state directory if it doesn't exist and initializes it
|
|
255
|
+
as a git repository with an initial commit.
|
|
256
|
+
"""
|
|
257
|
+
if not self.config.state.enabled:
|
|
258
|
+
return
|
|
259
|
+
|
|
260
|
+
state_path = Path(self.config.state.path)
|
|
261
|
+
|
|
262
|
+
# Create directory if it doesn't exist
|
|
263
|
+
if not state_path.exists():
|
|
264
|
+
state_path.mkdir(parents=True)
|
|
265
|
+
self.logger.info(f"Created state directory: {state_path}")
|
|
266
|
+
|
|
267
|
+
# Check if already a git repo
|
|
268
|
+
git_dir = state_path / ".git"
|
|
269
|
+
if git_dir.exists():
|
|
270
|
+
self.logger.info(f"State directory already initialized: {state_path}")
|
|
271
|
+
return
|
|
272
|
+
|
|
273
|
+
# Initialize git repo
|
|
274
|
+
try:
|
|
275
|
+
subprocess.run(
|
|
276
|
+
["git", "init"],
|
|
277
|
+
cwd=state_path,
|
|
278
|
+
capture_output=True,
|
|
279
|
+
check=True,
|
|
280
|
+
)
|
|
281
|
+
# Create initial commit (even if empty)
|
|
282
|
+
subprocess.run(
|
|
283
|
+
["git", "config", "user.email", "plato@plato.so"],
|
|
284
|
+
cwd=state_path,
|
|
285
|
+
capture_output=True,
|
|
286
|
+
check=True,
|
|
287
|
+
)
|
|
288
|
+
subprocess.run(
|
|
289
|
+
["git", "config", "user.name", "Plato"],
|
|
290
|
+
cwd=state_path,
|
|
291
|
+
capture_output=True,
|
|
292
|
+
check=True,
|
|
293
|
+
)
|
|
294
|
+
# Add all files and create initial commit
|
|
295
|
+
subprocess.run(
|
|
296
|
+
["git", "add", "-A"],
|
|
297
|
+
cwd=state_path,
|
|
298
|
+
capture_output=True,
|
|
299
|
+
check=True,
|
|
300
|
+
)
|
|
301
|
+
subprocess.run(
|
|
302
|
+
["git", "commit", "--allow-empty", "-m", "Initial state"],
|
|
303
|
+
cwd=state_path,
|
|
304
|
+
capture_output=True,
|
|
305
|
+
check=True,
|
|
306
|
+
)
|
|
307
|
+
self.logger.info(f"Initialized git repo in state directory: {state_path}")
|
|
308
|
+
except subprocess.CalledProcessError as e:
|
|
309
|
+
self.logger.warning(f"Failed to initialize state git repo: {e.stderr}")
|
|
310
|
+
|
|
311
|
+
def _commit_state(self, message: str) -> bool:
|
|
312
|
+
"""Commit current state directory changes.
|
|
313
|
+
|
|
314
|
+
Args:
|
|
315
|
+
message: Commit message
|
|
316
|
+
|
|
317
|
+
Returns:
|
|
318
|
+
True if commit was created (or no changes), False on error.
|
|
319
|
+
"""
|
|
320
|
+
if not self.config.state.enabled:
|
|
321
|
+
return True
|
|
322
|
+
|
|
323
|
+
state_path = Path(self.config.state.path)
|
|
324
|
+
if not state_path.exists():
|
|
325
|
+
return True
|
|
326
|
+
|
|
327
|
+
try:
|
|
328
|
+
# Add all changes
|
|
329
|
+
subprocess.run(
|
|
330
|
+
["git", "add", "-A"],
|
|
331
|
+
cwd=state_path,
|
|
332
|
+
capture_output=True,
|
|
333
|
+
check=True,
|
|
334
|
+
)
|
|
335
|
+
# Check if there are changes to commit
|
|
336
|
+
result = subprocess.run(
|
|
337
|
+
["git", "status", "--porcelain"],
|
|
338
|
+
cwd=state_path,
|
|
339
|
+
capture_output=True,
|
|
340
|
+
text=True,
|
|
341
|
+
check=True,
|
|
342
|
+
)
|
|
343
|
+
if not result.stdout.strip():
|
|
344
|
+
self.logger.debug("No state changes to commit")
|
|
345
|
+
return True
|
|
346
|
+
|
|
347
|
+
# Commit changes
|
|
348
|
+
subprocess.run(
|
|
349
|
+
["git", "commit", "-m", message],
|
|
350
|
+
cwd=state_path,
|
|
351
|
+
capture_output=True,
|
|
352
|
+
check=True,
|
|
353
|
+
)
|
|
354
|
+
self.logger.info(f"Committed state changes: {message}")
|
|
355
|
+
return True
|
|
356
|
+
except subprocess.CalledProcessError as e:
|
|
357
|
+
self.logger.warning(f"Failed to commit state: {e.stderr}")
|
|
358
|
+
return False
|
|
359
|
+
|
|
360
|
+
def _create_state_bundle(self) -> bytes | None:
|
|
361
|
+
"""Create a git bundle of the state directory.
|
|
362
|
+
|
|
363
|
+
Returns:
|
|
364
|
+
Bundle bytes if successful, None otherwise.
|
|
365
|
+
"""
|
|
366
|
+
if not self.config.state.enabled:
|
|
367
|
+
return None
|
|
368
|
+
|
|
369
|
+
state_path = Path(self.config.state.path)
|
|
370
|
+
if not state_path.exists():
|
|
371
|
+
return None
|
|
372
|
+
|
|
373
|
+
git_dir = state_path / ".git"
|
|
374
|
+
if not git_dir.exists():
|
|
375
|
+
self.logger.warning("State directory is not a git repository")
|
|
376
|
+
return None
|
|
377
|
+
|
|
378
|
+
try:
|
|
379
|
+
# Create bundle to stdout
|
|
380
|
+
result = subprocess.run(
|
|
381
|
+
["git", "bundle", "create", "-", "--all"],
|
|
382
|
+
cwd=state_path,
|
|
383
|
+
capture_output=True,
|
|
384
|
+
check=True,
|
|
385
|
+
)
|
|
386
|
+
bundle_data = result.stdout
|
|
387
|
+
self.logger.info(f"Created state bundle: {len(bundle_data)} bytes")
|
|
388
|
+
return bundle_data
|
|
389
|
+
except subprocess.CalledProcessError as e:
|
|
390
|
+
self.logger.warning(f"Failed to create state bundle: {e.stderr}")
|
|
391
|
+
return None
|
|
392
|
+
|
|
393
|
+
async def _create_and_upload_checkpoint(self) -> dict[str, Any] | None:
|
|
394
|
+
"""Create a full checkpoint including env snapshots and state bundle.
|
|
395
|
+
|
|
396
|
+
This method:
|
|
397
|
+
1. Commits any pending state changes
|
|
398
|
+
2. Creates env snapshots using snapshot_store
|
|
399
|
+
3. Creates and uploads state bundle as an artifact
|
|
400
|
+
4. Calls the checkpoint endpoint with all data
|
|
401
|
+
|
|
402
|
+
Returns:
|
|
403
|
+
Checkpoint result dict if successful, None otherwise.
|
|
404
|
+
"""
|
|
405
|
+
# Commit state changes first
|
|
406
|
+
self._commit_state(f"Checkpoint at step {self._step_count}")
|
|
407
|
+
|
|
408
|
+
# Create env snapshots
|
|
409
|
+
env_snapshots = await self._create_checkpoint()
|
|
410
|
+
if env_snapshots is None:
|
|
411
|
+
env_snapshots = {}
|
|
412
|
+
|
|
413
|
+
# Create and upload state bundle
|
|
414
|
+
state_artifact_id: str | None = None
|
|
415
|
+
if self.config.state.enabled:
|
|
416
|
+
bundle_data = self._create_state_bundle()
|
|
417
|
+
if bundle_data:
|
|
418
|
+
result = await _upload_artifact(
|
|
419
|
+
data=bundle_data,
|
|
420
|
+
artifact_type="state",
|
|
421
|
+
filename=f"state_step_{self._step_count}.bundle",
|
|
422
|
+
extra={
|
|
423
|
+
"step_number": self._step_count,
|
|
424
|
+
"state_path": self.config.state.path,
|
|
425
|
+
},
|
|
426
|
+
)
|
|
427
|
+
if result:
|
|
428
|
+
state_artifact_id = result.get("artifact_id")
|
|
429
|
+
self.logger.info(f"Uploaded state artifact: {state_artifact_id}")
|
|
430
|
+
|
|
431
|
+
# Upload checkpoint with all data
|
|
432
|
+
checkpoint_result = await _upload_checkpoint(
|
|
433
|
+
step_number=self._step_count,
|
|
434
|
+
env_snapshots=env_snapshots,
|
|
435
|
+
state_artifact_id=state_artifact_id,
|
|
436
|
+
extra={
|
|
437
|
+
"world_name": self.name,
|
|
438
|
+
"world_version": self.get_version(),
|
|
439
|
+
},
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
return checkpoint_result
|
|
443
|
+
|
|
198
444
|
def get_env(self, alias: str) -> Environment | None:
|
|
199
445
|
"""Get an environment by alias.
|
|
200
446
|
|
|
@@ -236,6 +482,9 @@ class BaseWorld(ABC, Generic[ConfigT]):
|
|
|
236
482
|
Returns:
|
|
237
483
|
Dict of environment variable name -> value
|
|
238
484
|
|
|
485
|
+
Raises:
|
|
486
|
+
ImportError: If a sim environment is configured but package is not installed.
|
|
487
|
+
|
|
239
488
|
Example:
|
|
240
489
|
env_vars = self.get_sim_env_vars()
|
|
241
490
|
# Returns: {"AWS_ENDPOINT_URL": "https://...", "GITEA_URL": "https://...", ...}
|
|
@@ -263,7 +512,13 @@ class BaseWorld(ABC, Generic[ConfigT]):
|
|
|
263
512
|
env_vars.update(sim_vars)
|
|
264
513
|
self.logger.info(f"{package_name} env vars: {list(sim_vars.keys())}")
|
|
265
514
|
except ImportError:
|
|
266
|
-
|
|
515
|
+
raise ImportError(
|
|
516
|
+
f"Environment '{env_alias}' is configured but 'plato.sims.{package_name}' "
|
|
517
|
+
f"package is not installed.\n\n"
|
|
518
|
+
f"Install sims packages:\n"
|
|
519
|
+
f' export INDEX_URL="https://__token__:${{PLATO_API_KEY}}@plato.so/api/v2/pypi/sims/simple/"\n'
|
|
520
|
+
f" uv pip install '.[sims]' --extra-index-url $INDEX_URL"
|
|
521
|
+
) from None
|
|
267
522
|
except Exception as e:
|
|
268
523
|
self.logger.warning(f"Failed to get {package_name} env vars: {e}")
|
|
269
524
|
|
|
@@ -278,6 +533,9 @@ class BaseWorld(ABC, Generic[ConfigT]):
|
|
|
278
533
|
Returns:
|
|
279
534
|
Markdown string with instructions, or empty string if no sims configured.
|
|
280
535
|
|
|
536
|
+
Raises:
|
|
537
|
+
ImportError: If a sim environment is configured but package is not installed.
|
|
538
|
+
|
|
281
539
|
Example:
|
|
282
540
|
instructions = self.get_sim_instructions()
|
|
283
541
|
# Returns markdown with LocalStack/Gitea setup instructions
|
|
@@ -306,7 +564,13 @@ class BaseWorld(ABC, Generic[ConfigT]):
|
|
|
306
564
|
instructions_parts.append(instructions)
|
|
307
565
|
self.logger.info(f"Added {package_name} instructions to prompt")
|
|
308
566
|
except ImportError:
|
|
309
|
-
|
|
567
|
+
raise ImportError(
|
|
568
|
+
f"Environment '{env_alias}' is configured but 'plato.sims.{package_name}' "
|
|
569
|
+
f"package is not installed.\n\n"
|
|
570
|
+
f"Install sims packages:\n"
|
|
571
|
+
f' export INDEX_URL="https://__token__:${{PLATO_API_KEY}}@plato.so/api/v2/pypi/sims/simple/"\n'
|
|
572
|
+
f" uv pip install '.[sims]' --extra-index-url $INDEX_URL"
|
|
573
|
+
) from None
|
|
310
574
|
except Exception as e:
|
|
311
575
|
self.logger.warning(f"Failed to get {package_name} instructions: {e}")
|
|
312
576
|
|
|
@@ -363,6 +627,9 @@ The following services are available for your use:
|
|
|
363
627
|
|
|
364
628
|
self.logger.info(f"Starting world '{self.name}'")
|
|
365
629
|
|
|
630
|
+
# Initialize state directory (creates git repo if needed)
|
|
631
|
+
self._init_state_directory()
|
|
632
|
+
|
|
366
633
|
# Initialize the logging singleton for agents to use
|
|
367
634
|
if config.callback_url and config.session_id:
|
|
368
635
|
_init_chronos_logging(
|
|
@@ -415,6 +682,13 @@ The following services are available for your use:
|
|
|
415
682
|
|
|
416
683
|
self.logger.info(f"Step {self._step_count}: done={result.done}")
|
|
417
684
|
|
|
685
|
+
# Create checkpoint if enabled and interval matches
|
|
686
|
+
# Note: The checkpoint event is created by the callback endpoint,
|
|
687
|
+
# so we don't need a span wrapper here (would create duplicates)
|
|
688
|
+
if self.config.checkpoint.enabled and self._step_count % self.config.checkpoint.interval == 0:
|
|
689
|
+
self.logger.info(f"Creating checkpoint after step {self._step_count}")
|
|
690
|
+
await self._create_and_upload_checkpoint()
|
|
691
|
+
|
|
418
692
|
if result.done:
|
|
419
693
|
break
|
|
420
694
|
|
plato/worlds/config.py
CHANGED
|
@@ -72,6 +72,36 @@ class Env:
|
|
|
72
72
|
self.required = required
|
|
73
73
|
|
|
74
74
|
|
|
75
|
+
class StateConfig(BaseModel):
|
|
76
|
+
"""Configuration for world state persistence.
|
|
77
|
+
|
|
78
|
+
The state directory is a git-tracked directory that persists across checkpoints.
|
|
79
|
+
At each checkpoint, the state directory is git bundled and uploaded as an artifact.
|
|
80
|
+
On restore, bootstrap.sh downloads and unbundles the state before the world starts.
|
|
81
|
+
|
|
82
|
+
Attributes:
|
|
83
|
+
enabled: Whether to enable state persistence (default: True).
|
|
84
|
+
path: Path to the state directory (default: /state).
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
enabled: bool = True
|
|
88
|
+
path: str = "/state"
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class CheckpointConfig(BaseModel):
|
|
92
|
+
"""Configuration for automatic checkpointing during world execution.
|
|
93
|
+
|
|
94
|
+
Attributes:
|
|
95
|
+
enabled: Whether to enable automatic checkpoints after steps.
|
|
96
|
+
interval: Create checkpoint every N steps (default: 1 = every step).
|
|
97
|
+
exclude_envs: Environment aliases to exclude from checkpoints (default: ["runtime"]).
|
|
98
|
+
"""
|
|
99
|
+
|
|
100
|
+
enabled: bool = True
|
|
101
|
+
interval: int = 1
|
|
102
|
+
exclude_envs: list[str] = Field(default_factory=lambda: ["runtime"])
|
|
103
|
+
|
|
104
|
+
|
|
75
105
|
class RunConfig(BaseModel):
|
|
76
106
|
"""Base configuration for running a world.
|
|
77
107
|
|
|
@@ -98,6 +128,7 @@ class RunConfig(BaseModel):
|
|
|
98
128
|
session_id: Unique Chronos session identifier
|
|
99
129
|
callback_url: Callback URL for status updates
|
|
100
130
|
plato_session: Serialized Plato session for connecting to existing VM session
|
|
131
|
+
checkpoint: Configuration for automatic checkpoints after steps
|
|
101
132
|
"""
|
|
102
133
|
|
|
103
134
|
session_id: str = ""
|
|
@@ -108,6 +139,12 @@ class RunConfig(BaseModel):
|
|
|
108
139
|
# This is the output of Session.dump() - used to restore session with Session.load()
|
|
109
140
|
plato_session: SerializedSession | None = None
|
|
110
141
|
|
|
142
|
+
# Checkpoint configuration for automatic snapshots after steps
|
|
143
|
+
checkpoint: CheckpointConfig = Field(default_factory=CheckpointConfig)
|
|
144
|
+
|
|
145
|
+
# State persistence configuration
|
|
146
|
+
state: StateConfig = Field(default_factory=StateConfig)
|
|
147
|
+
|
|
111
148
|
model_config = {"extra": "allow"}
|
|
112
149
|
|
|
113
150
|
@classmethod
|
|
@@ -145,7 +182,7 @@ class RunConfig(BaseModel):
|
|
|
145
182
|
envs = []
|
|
146
183
|
|
|
147
184
|
# Skip runtime fields
|
|
148
|
-
runtime_fields = {"session_id", "callback_url", "all_secrets", "plato_session"}
|
|
185
|
+
runtime_fields = {"session_id", "callback_url", "all_secrets", "plato_session", "checkpoint", "state"}
|
|
149
186
|
|
|
150
187
|
for field_name, prop_schema in properties.items():
|
|
151
188
|
if field_name in runtime_fields:
|