plato-sdk-v2 2.0.64__py3-none-any.whl → 2.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- plato/__init__.py +0 -9
- plato/_sims_generator/__init__.py +19 -4
- plato/_sims_generator/instruction.py +203 -0
- plato/_sims_generator/templates/instruction/helpers.py.jinja +161 -0
- plato/_sims_generator/templates/instruction/init.py.jinja +43 -0
- plato/agents/__init__.py +99 -430
- plato/agents/base.py +145 -0
- plato/agents/build.py +61 -0
- plato/agents/config.py +160 -0
- plato/agents/logging.py +515 -0
- plato/agents/runner.py +191 -0
- plato/agents/trajectory.py +266 -0
- plato/chronos/models/__init__.py +1 -1
- plato/sims/cli.py +299 -123
- plato/sims/registry.py +77 -4
- plato/v1/cli/agent.py +88 -84
- plato/v1/cli/pm.py +84 -44
- plato/v1/cli/sandbox.py +241 -61
- plato/v1/cli/ssh.py +16 -4
- plato/v1/cli/verify.py +685 -0
- plato/v1/cli/world.py +3 -0
- plato/v1/flow_executor.py +21 -17
- plato/v1/models/env.py +11 -11
- plato/v1/sdk.py +2 -2
- plato/v1/sync_env.py +11 -11
- plato/v1/sync_flow_executor.py +21 -17
- plato/v1/sync_sdk.py +4 -2
- plato/v2/__init__.py +2 -0
- plato/v2/async_/environment.py +31 -0
- plato/v2/async_/session.py +72 -4
- plato/v2/sync/environment.py +31 -0
- plato/v2/sync/session.py +72 -4
- plato/worlds/README.md +71 -56
- plato/worlds/__init__.py +56 -18
- plato/worlds/base.py +578 -93
- plato/worlds/config.py +276 -74
- plato/worlds/runner.py +475 -80
- {plato_sdk_v2-2.0.64.dist-info → plato_sdk_v2-2.3.4.dist-info}/METADATA +3 -3
- {plato_sdk_v2-2.0.64.dist-info → plato_sdk_v2-2.3.4.dist-info}/RECORD +41 -36
- {plato_sdk_v2-2.0.64.dist-info → plato_sdk_v2-2.3.4.dist-info}/entry_points.txt +1 -0
- plato/agents/callback.py +0 -246
- plato/world/__init__.py +0 -44
- plato/world/base.py +0 -267
- plato/world/config.py +0 -139
- plato/world/types.py +0 -47
- {plato_sdk_v2-2.0.64.dist-info → plato_sdk_v2-2.3.4.dist-info}/WHEEL +0 -0
plato/v1/flow_executor.py
CHANGED
|
@@ -7,15 +7,13 @@ import logging
|
|
|
7
7
|
import os
|
|
8
8
|
import sys
|
|
9
9
|
from pathlib import Path
|
|
10
|
-
from typing import
|
|
10
|
+
from typing import TYPE_CHECKING, cast
|
|
11
11
|
from urllib.parse import urljoin
|
|
12
12
|
|
|
13
13
|
sys.path.insert(0, os.path.dirname(__file__))
|
|
14
14
|
|
|
15
|
-
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
16
|
from playwright.async_api import Page
|
|
17
|
-
except ImportError:
|
|
18
|
-
Page = Any # Fallback if playwright not installed
|
|
19
17
|
|
|
20
18
|
from plato.v1.models.flow import (
|
|
21
19
|
CheckElementStep,
|
|
@@ -42,7 +40,7 @@ class FlowExecutor:
|
|
|
42
40
|
|
|
43
41
|
def __init__(
|
|
44
42
|
self,
|
|
45
|
-
page: Page,
|
|
43
|
+
page: "Page",
|
|
46
44
|
flow: Flow,
|
|
47
45
|
screenshots_dir: Path | None = None,
|
|
48
46
|
logger: logging.Logger = logging.getLogger(__name__),
|
|
@@ -108,29 +106,29 @@ class FlowExecutor:
|
|
|
108
106
|
async def _execute_step(self, step: FlowStep) -> bool:
|
|
109
107
|
"""Execute a single step in a flow using type attribute."""
|
|
110
108
|
if step.type == "wait_for_selector":
|
|
111
|
-
return await self._wait_for_selector(step)
|
|
109
|
+
return await self._wait_for_selector(cast(WaitForSelectorStep, step))
|
|
112
110
|
elif step.type == "click":
|
|
113
|
-
return await self._click(step)
|
|
111
|
+
return await self._click(cast(ClickStep, step))
|
|
114
112
|
elif step.type == "fill":
|
|
115
|
-
return await self._fill(step)
|
|
113
|
+
return await self._fill(cast(FillStep, step))
|
|
116
114
|
elif step.type == "wait":
|
|
117
|
-
return await self._wait(step)
|
|
115
|
+
return await self._wait(cast(WaitStep, step))
|
|
118
116
|
elif step.type == "navigate":
|
|
119
|
-
return await self._navigate(step)
|
|
117
|
+
return await self._navigate(cast(NavigateStep, step))
|
|
120
118
|
elif step.type == "wait_for_url":
|
|
121
|
-
return await self._wait_for_url(step)
|
|
119
|
+
return await self._wait_for_url(cast(WaitForUrlStep, step))
|
|
122
120
|
elif step.type == "check_element":
|
|
123
|
-
return await self._check_element(step)
|
|
121
|
+
return await self._check_element(cast(CheckElementStep, step))
|
|
124
122
|
elif step.type == "verify":
|
|
125
|
-
return await self._verify(step)
|
|
123
|
+
return await self._verify(cast(VerifyStep, step))
|
|
126
124
|
elif step.type == "screenshot":
|
|
127
|
-
return await self._screenshot(step)
|
|
125
|
+
return await self._screenshot(cast(ScreenshotStep, step))
|
|
128
126
|
elif step.type == "verify_text":
|
|
129
|
-
return await self._verify_text(step)
|
|
127
|
+
return await self._verify_text(cast(VerifyTextStep, step))
|
|
130
128
|
elif step.type == "verify_url":
|
|
131
|
-
return await self._verify_url(step)
|
|
129
|
+
return await self._verify_url(cast(VerifyUrlStep, step))
|
|
132
130
|
elif step.type == "verify_no_errors":
|
|
133
|
-
return await self._verify_no_errors(step)
|
|
131
|
+
return await self._verify_no_errors(cast(VerifyNoErrorsStep, step))
|
|
134
132
|
else:
|
|
135
133
|
self.logger.error(f"❌ Unknown step type: {step.type}")
|
|
136
134
|
return False
|
|
@@ -248,6 +246,7 @@ class FlowExecutor:
|
|
|
248
246
|
|
|
249
247
|
async def _verify_element_exists(self, step: VerifyStep) -> bool:
|
|
250
248
|
"""Verify that an element exists in the DOM."""
|
|
249
|
+
assert step.selector is not None # Guaranteed by VerifyStep model validator
|
|
251
250
|
try:
|
|
252
251
|
element = await self.page.query_selector(step.selector)
|
|
253
252
|
if element:
|
|
@@ -262,6 +261,7 @@ class FlowExecutor:
|
|
|
262
261
|
|
|
263
262
|
async def _verify_element_visible(self, step: VerifyStep) -> bool:
|
|
264
263
|
"""Verify that an element is visible on the page."""
|
|
264
|
+
assert step.selector is not None # Guaranteed by VerifyStep model validator
|
|
265
265
|
try:
|
|
266
266
|
element = await self.page.query_selector(step.selector)
|
|
267
267
|
if element:
|
|
@@ -281,6 +281,8 @@ class FlowExecutor:
|
|
|
281
281
|
|
|
282
282
|
async def _verify_element_text(self, step: VerifyStep) -> bool:
|
|
283
283
|
"""Verify that an element contains specific text."""
|
|
284
|
+
assert step.selector is not None # Guaranteed by VerifyStep model validator
|
|
285
|
+
assert step.text is not None # Guaranteed by VerifyStep model validator
|
|
284
286
|
try:
|
|
285
287
|
element = await self.page.query_selector(step.selector)
|
|
286
288
|
if element:
|
|
@@ -318,6 +320,7 @@ class FlowExecutor:
|
|
|
318
320
|
|
|
319
321
|
async def _verify_element_count(self, step: VerifyStep) -> bool:
|
|
320
322
|
"""Verify the count of elements matching a selector."""
|
|
323
|
+
assert step.selector is not None # Guaranteed by VerifyStep model validator
|
|
321
324
|
try:
|
|
322
325
|
elements = await self.page.query_selector_all(step.selector)
|
|
323
326
|
actual_count = len(elements)
|
|
@@ -336,6 +339,7 @@ class FlowExecutor:
|
|
|
336
339
|
|
|
337
340
|
async def _verify_page_title(self, step: VerifyStep) -> bool:
|
|
338
341
|
"""Verify the page title."""
|
|
342
|
+
assert step.title is not None # Guaranteed by VerifyStep model validator
|
|
339
343
|
try:
|
|
340
344
|
actual_title = await self.page.title()
|
|
341
345
|
|
plato/v1/models/env.py
CHANGED
|
@@ -13,11 +13,6 @@ from urllib.parse import urlparse
|
|
|
13
13
|
|
|
14
14
|
import yaml
|
|
15
15
|
|
|
16
|
-
try:
|
|
17
|
-
from playwright.async_api import Page
|
|
18
|
-
except ImportError:
|
|
19
|
-
Page = Any # Fallback if playwright not installed
|
|
20
|
-
|
|
21
16
|
from plato.v1.exceptions import PlatoClientError
|
|
22
17
|
from plato.v1.flow_executor import FlowExecutor
|
|
23
18
|
from plato.v1.models.flow import Flow
|
|
@@ -27,6 +22,8 @@ logger = logging.getLogger(__name__)
|
|
|
27
22
|
|
|
28
23
|
# Using TYPE_CHECKING for proper type annotation without circular imports
|
|
29
24
|
if TYPE_CHECKING:
|
|
25
|
+
from playwright.async_api import Page
|
|
26
|
+
|
|
30
27
|
from plato.sdk import Plato
|
|
31
28
|
|
|
32
29
|
|
|
@@ -48,8 +45,8 @@ class PlatoEnvironment:
|
|
|
48
45
|
|
|
49
46
|
_current_task: PlatoTask | None = None
|
|
50
47
|
_client: "Plato" = None
|
|
51
|
-
id: str = None
|
|
52
|
-
env_id: str = None
|
|
48
|
+
id: str = None # type: ignore
|
|
49
|
+
env_id: str = None # type: ignore
|
|
53
50
|
alias: str | None = None
|
|
54
51
|
_run_session_id: str | None = None
|
|
55
52
|
_heartbeat_task: asyncio.Task | None = None
|
|
@@ -66,7 +63,7 @@ class PlatoEnvironment:
|
|
|
66
63
|
):
|
|
67
64
|
self._client = client
|
|
68
65
|
self.id = id
|
|
69
|
-
self.env_id = env_id
|
|
66
|
+
self.env_id = env_id # type: ignore[assignment]
|
|
70
67
|
self.alias = alias
|
|
71
68
|
self._run_session_id = None
|
|
72
69
|
self._heartbeat_task = None
|
|
@@ -77,7 +74,7 @@ class PlatoEnvironment:
|
|
|
77
74
|
|
|
78
75
|
async def login(
|
|
79
76
|
self,
|
|
80
|
-
page: Page,
|
|
77
|
+
page: "Page",
|
|
81
78
|
throw_on_login_error: bool = False,
|
|
82
79
|
screenshots_dir: Path | None = None,
|
|
83
80
|
dataset: str = "base",
|
|
@@ -97,7 +94,7 @@ class PlatoEnvironment:
|
|
|
97
94
|
f"{self._client.base_url}/env/{self.id}/flows",
|
|
98
95
|
headers=headers,
|
|
99
96
|
) as resp:
|
|
100
|
-
await self._client._handle_response_error(resp)
|
|
97
|
+
await self._client._handle_response_error(resp)
|
|
101
98
|
body_text = await resp.text()
|
|
102
99
|
# Endpoint may return JSON with { data: { flows: "...yaml..." } } or raw YAML
|
|
103
100
|
try:
|
|
@@ -394,7 +391,10 @@ class PlatoEnvironment:
|
|
|
394
391
|
if not self._run_session_id:
|
|
395
392
|
raise PlatoClientError("No active run session. Call reset() first.")
|
|
396
393
|
|
|
397
|
-
if not self._current_task
|
|
394
|
+
if not self._current_task:
|
|
395
|
+
logger.warning("No current task set")
|
|
396
|
+
raise PlatoClientError("No evaluation config found for task")
|
|
397
|
+
if not self._current_task.eval_config:
|
|
398
398
|
logger.warning(f"No evaluation config found for task: {self._current_task.name}")
|
|
399
399
|
raise PlatoClientError("No evaluation config found for task")
|
|
400
400
|
|
plato/v1/sdk.py
CHANGED
|
@@ -538,7 +538,7 @@ class Plato:
|
|
|
538
538
|
default_scoring_config=t.get("defaultScoringConfig", {}),
|
|
539
539
|
scoring_type=[ScoringType(st) for st in t.get("scoringTypes", [])]
|
|
540
540
|
if t.get("scoringTypes")
|
|
541
|
-
else
|
|
541
|
+
else [ScoringType.MUTATIONS], # Use default when not provided
|
|
542
542
|
output_schema=t.get("outputSchema"),
|
|
543
543
|
is_sample=t.get("isSample", False),
|
|
544
544
|
simulator_artifact_id=(
|
|
@@ -685,7 +685,7 @@ class Plato:
|
|
|
685
685
|
return await response.json()
|
|
686
686
|
|
|
687
687
|
async def create_simulator(
|
|
688
|
-
self, name: str, description: str = None, sim_type: str = "docker_app"
|
|
688
|
+
self, name: str, description: str | None = None, sim_type: str = "docker_app"
|
|
689
689
|
) -> dict[str, Any]:
|
|
690
690
|
"""Create a new simulator.
|
|
691
691
|
|
plato/v1/sync_env.py
CHANGED
|
@@ -10,11 +10,6 @@ from urllib.parse import urlparse
|
|
|
10
10
|
|
|
11
11
|
import yaml
|
|
12
12
|
|
|
13
|
-
try:
|
|
14
|
-
from playwright.sync_api import Page
|
|
15
|
-
except ImportError:
|
|
16
|
-
Page = Any # Fallback if playwright not installed
|
|
17
|
-
|
|
18
13
|
from plato.v1.exceptions import PlatoClientError
|
|
19
14
|
from plato.v1.models.flow import Flow
|
|
20
15
|
from plato.v1.models.task import CustomEvalConfig, EvaluationResult, PlatoTask
|
|
@@ -24,6 +19,8 @@ logger = logging.getLogger(__name__)
|
|
|
24
19
|
|
|
25
20
|
# Using TYPE_CHECKING for proper type annotation without circular imports
|
|
26
21
|
if TYPE_CHECKING:
|
|
22
|
+
from playwright.sync_api import Page
|
|
23
|
+
|
|
27
24
|
from plato.sync_sdk import SyncPlato
|
|
28
25
|
|
|
29
26
|
|
|
@@ -45,8 +42,8 @@ class SyncPlatoEnvironment:
|
|
|
45
42
|
|
|
46
43
|
_current_task: PlatoTask | None = None
|
|
47
44
|
_client: "SyncPlato" = None
|
|
48
|
-
id: str = None
|
|
49
|
-
env_id: str = None
|
|
45
|
+
id: str = None # type: ignore
|
|
46
|
+
env_id: str = None # type: ignore
|
|
50
47
|
alias: str | None = None
|
|
51
48
|
_run_session_id: str | None = None
|
|
52
49
|
_heartbeat_thread: threading.Thread | None = None
|
|
@@ -64,7 +61,7 @@ class SyncPlatoEnvironment:
|
|
|
64
61
|
):
|
|
65
62
|
self._client = client
|
|
66
63
|
self.id = id
|
|
67
|
-
self.env_id = env_id
|
|
64
|
+
self.env_id = env_id # type: ignore[assignment]
|
|
68
65
|
self.alias = alias
|
|
69
66
|
self._run_session_id = active_session
|
|
70
67
|
self._heartbeat_thread = None
|
|
@@ -75,7 +72,7 @@ class SyncPlatoEnvironment:
|
|
|
75
72
|
|
|
76
73
|
def login(
|
|
77
74
|
self,
|
|
78
|
-
page: Page,
|
|
75
|
+
page: "Page",
|
|
79
76
|
throw_on_login_error: bool = False,
|
|
80
77
|
screenshots_dir: Path | None = None,
|
|
81
78
|
dataset: str = "base",
|
|
@@ -95,7 +92,7 @@ class SyncPlatoEnvironment:
|
|
|
95
92
|
try:
|
|
96
93
|
headers = {"X-API-Key": self._client.api_key}
|
|
97
94
|
resp = self._client.http_session.get(f"{self._client.base_url}/env/{self.id}/flows", headers=headers)
|
|
98
|
-
self._client._handle_response_error(resp)
|
|
95
|
+
self._client._handle_response_error(resp)
|
|
99
96
|
body_text = resp.text
|
|
100
97
|
# Endpoint may return JSON with { data: { flows: "...yaml..." } } or raw YAML
|
|
101
98
|
try:
|
|
@@ -379,7 +376,10 @@ class SyncPlatoEnvironment:
|
|
|
379
376
|
if not self._run_session_id:
|
|
380
377
|
raise PlatoClientError("No active run session. Call reset() first.")
|
|
381
378
|
|
|
382
|
-
if not self._current_task
|
|
379
|
+
if not self._current_task:
|
|
380
|
+
logger.warning("No current task set")
|
|
381
|
+
raise PlatoClientError("No evaluation config found for task")
|
|
382
|
+
if not self._current_task.eval_config:
|
|
383
383
|
logger.warning(f"No evaluation config found for task: {self._current_task.name}")
|
|
384
384
|
raise PlatoClientError("No evaluation config found for task")
|
|
385
385
|
|
plato/v1/sync_flow_executor.py
CHANGED
|
@@ -7,15 +7,13 @@ import logging
|
|
|
7
7
|
import os
|
|
8
8
|
import sys
|
|
9
9
|
from pathlib import Path
|
|
10
|
-
from typing import
|
|
10
|
+
from typing import TYPE_CHECKING, cast
|
|
11
11
|
from urllib.parse import urljoin
|
|
12
12
|
|
|
13
13
|
sys.path.insert(0, os.path.dirname(__file__))
|
|
14
14
|
|
|
15
|
-
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
16
|
from playwright.sync_api import Page
|
|
17
|
-
except ImportError:
|
|
18
|
-
Page = Any # Fallback if playwright not installed
|
|
19
17
|
|
|
20
18
|
from plato.v1.models.flow import (
|
|
21
19
|
CheckElementStep,
|
|
@@ -40,7 +38,7 @@ class SyncFlowExecutor:
|
|
|
40
38
|
|
|
41
39
|
def __init__(
|
|
42
40
|
self,
|
|
43
|
-
page: Page,
|
|
41
|
+
page: "Page",
|
|
44
42
|
flow: Flow,
|
|
45
43
|
screenshots_dir: Path | None = None,
|
|
46
44
|
logger: logging.Logger = logging.getLogger(__name__),
|
|
@@ -104,29 +102,29 @@ class SyncFlowExecutor:
|
|
|
104
102
|
def _execute_step(self, step: FlowStep) -> bool:
|
|
105
103
|
"""Execute a single step in a flow using action attribute."""
|
|
106
104
|
if step.type == "wait_for_selector":
|
|
107
|
-
return self._wait_for_selector(step)
|
|
105
|
+
return self._wait_for_selector(cast(WaitForSelectorStep, step))
|
|
108
106
|
elif step.type == "click":
|
|
109
|
-
return self._click(step)
|
|
107
|
+
return self._click(cast(ClickStep, step))
|
|
110
108
|
elif step.type == "fill":
|
|
111
|
-
return self._fill(step)
|
|
109
|
+
return self._fill(cast(FillStep, step))
|
|
112
110
|
elif step.type == "wait":
|
|
113
|
-
return self._wait(step)
|
|
111
|
+
return self._wait(cast(WaitStep, step))
|
|
114
112
|
elif step.type == "navigate":
|
|
115
|
-
return self._navigate(step)
|
|
113
|
+
return self._navigate(cast(NavigateStep, step))
|
|
116
114
|
elif step.type == "wait_for_url":
|
|
117
|
-
return self._wait_for_url(step)
|
|
115
|
+
return self._wait_for_url(cast(WaitForUrlStep, step))
|
|
118
116
|
elif step.type == "check_element":
|
|
119
|
-
return self._check_element(step)
|
|
117
|
+
return self._check_element(cast(CheckElementStep, step))
|
|
120
118
|
elif step.type == "verify":
|
|
121
|
-
return self._verify(step)
|
|
119
|
+
return self._verify(cast(VerifyStep, step))
|
|
122
120
|
elif step.type == "screenshot":
|
|
123
|
-
return self._screenshot(step)
|
|
121
|
+
return self._screenshot(cast(ScreenshotStep, step))
|
|
124
122
|
elif step.type == "verify_text":
|
|
125
|
-
return self._verify_text(step)
|
|
123
|
+
return self._verify_text(cast(VerifyTextStep, step))
|
|
126
124
|
elif step.type == "verify_url":
|
|
127
|
-
return self._verify_url(step)
|
|
125
|
+
return self._verify_url(cast(VerifyUrlStep, step))
|
|
128
126
|
elif step.type == "verify_no_errors":
|
|
129
|
-
return self._verify_no_errors(step)
|
|
127
|
+
return self._verify_no_errors(cast(VerifyNoErrorsStep, step))
|
|
130
128
|
else:
|
|
131
129
|
self.logger.error(f"❌ Unknown step action: {step.type}")
|
|
132
130
|
return False
|
|
@@ -242,6 +240,7 @@ class SyncFlowExecutor:
|
|
|
242
240
|
|
|
243
241
|
def _verify_element_exists(self, step: VerifyStep) -> bool:
|
|
244
242
|
"""Verify that an element exists in the DOM."""
|
|
243
|
+
assert step.selector is not None # Guaranteed by VerifyStep model validator
|
|
245
244
|
try:
|
|
246
245
|
element = self.page.query_selector(step.selector)
|
|
247
246
|
if element:
|
|
@@ -256,6 +255,7 @@ class SyncFlowExecutor:
|
|
|
256
255
|
|
|
257
256
|
def _verify_element_visible(self, step: VerifyStep) -> bool:
|
|
258
257
|
"""Verify that an element is visible on the page."""
|
|
258
|
+
assert step.selector is not None # Guaranteed by VerifyStep model validator
|
|
259
259
|
try:
|
|
260
260
|
element = self.page.query_selector(step.selector)
|
|
261
261
|
if element:
|
|
@@ -275,6 +275,8 @@ class SyncFlowExecutor:
|
|
|
275
275
|
|
|
276
276
|
def _verify_element_text(self, step: VerifyStep) -> bool:
|
|
277
277
|
"""Verify that an element contains specific text."""
|
|
278
|
+
assert step.selector is not None # Guaranteed by VerifyStep model validator
|
|
279
|
+
assert step.text is not None # Guaranteed by VerifyStep model validator
|
|
278
280
|
try:
|
|
279
281
|
element = self.page.query_selector(step.selector)
|
|
280
282
|
if element:
|
|
@@ -312,6 +314,7 @@ class SyncFlowExecutor:
|
|
|
312
314
|
|
|
313
315
|
def _verify_element_count(self, step: VerifyStep) -> bool:
|
|
314
316
|
"""Verify the count of elements matching a selector."""
|
|
317
|
+
assert step.selector is not None # Guaranteed by VerifyStep model validator
|
|
315
318
|
try:
|
|
316
319
|
elements = self.page.query_selector_all(step.selector)
|
|
317
320
|
actual_count = len(elements)
|
|
@@ -330,6 +333,7 @@ class SyncFlowExecutor:
|
|
|
330
333
|
|
|
331
334
|
def _verify_page_title(self, step: VerifyStep) -> bool:
|
|
332
335
|
"""Verify the page title."""
|
|
336
|
+
assert step.title is not None # Guaranteed by VerifyStep model validator
|
|
333
337
|
try:
|
|
334
338
|
actual_title = self.page.title()
|
|
335
339
|
|
plato/v1/sync_sdk.py
CHANGED
|
@@ -89,7 +89,7 @@ class SyncPlato:
|
|
|
89
89
|
Raises:
|
|
90
90
|
PlatoClientError: With the actual error message from the response
|
|
91
91
|
"""
|
|
92
|
-
if response.status_code >= 400:
|
|
92
|
+
if response.status_code >= 400: # type: ignore[operator]
|
|
93
93
|
try:
|
|
94
94
|
# Try to get the error message from the response body
|
|
95
95
|
error_data = response.json()
|
|
@@ -525,7 +525,9 @@ class SyncPlato:
|
|
|
525
525
|
average_steps=t.get("averageStepsTaken"),
|
|
526
526
|
num_validator_human_scores=t.get("defaultScoringConfig", {}).get("num_sessions_used", 0),
|
|
527
527
|
default_scoring_config=t.get("defaultScoringConfig", {}),
|
|
528
|
-
scoring_type=[ScoringType(st) for st in t.get("scoringTypes", [])]
|
|
528
|
+
scoring_type=[ScoringType(st) for st in t.get("scoringTypes", [])]
|
|
529
|
+
if t.get("scoringTypes")
|
|
530
|
+
else [ScoringType.MUTATIONS],
|
|
529
531
|
output_schema=t.get("outputSchema"),
|
|
530
532
|
is_sample=t.get("isSample", False),
|
|
531
533
|
simulator_artifact_id=(
|
plato/v2/__init__.py
CHANGED
|
@@ -15,6 +15,7 @@ from plato.v2.async_.client import AsyncPlato
|
|
|
15
15
|
from plato.v2.async_.environment import Environment as AsyncEnvironment
|
|
16
16
|
from plato.v2.async_.flow_executor import FlowExecutionError as AsyncFlowExecutionError
|
|
17
17
|
from plato.v2.async_.flow_executor import FlowExecutor as AsyncFlowExecutor
|
|
18
|
+
from plato.v2.async_.session import SerializedSession
|
|
18
19
|
from plato.v2.async_.session import Session as AsyncSession
|
|
19
20
|
from plato.v2.sync.client import Plato
|
|
20
21
|
from plato.v2.sync.environment import Environment
|
|
@@ -45,6 +46,7 @@ __all__ = [
|
|
|
45
46
|
"AsyncEnvironment",
|
|
46
47
|
"AsyncFlowExecutor",
|
|
47
48
|
"AsyncFlowExecutionError",
|
|
49
|
+
"SerializedSession",
|
|
48
50
|
# Models
|
|
49
51
|
"Flow",
|
|
50
52
|
# Helpers
|
plato/v2/async_/environment.py
CHANGED
|
@@ -7,6 +7,7 @@ from typing import TYPE_CHECKING
|
|
|
7
7
|
|
|
8
8
|
from plato._generated.api.v2 import jobs
|
|
9
9
|
from plato._generated.models import (
|
|
10
|
+
AppApiV2SchemasSessionCreateSnapshotRequest,
|
|
10
11
|
ConnectRoutingInfoResult,
|
|
11
12
|
CreateCheckpointRequest,
|
|
12
13
|
CreateSnapshotResult,
|
|
@@ -136,6 +137,36 @@ class Environment:
|
|
|
136
137
|
x_api_key=self._api_key,
|
|
137
138
|
)
|
|
138
139
|
|
|
140
|
+
async def snapshot_store(
|
|
141
|
+
self,
|
|
142
|
+
override_service: str | None = None,
|
|
143
|
+
override_version: str | None = None,
|
|
144
|
+
override_dataset: str | None = None,
|
|
145
|
+
) -> CreateSnapshotResult:
|
|
146
|
+
"""Create a snapshot-store snapshot of this environment.
|
|
147
|
+
|
|
148
|
+
Uses the snapshot-store pipeline for chunk-based deduplication and
|
|
149
|
+
efficient storage. This is the preferred method for new base snapshots.
|
|
150
|
+
|
|
151
|
+
Args:
|
|
152
|
+
override_service: Override simulator/service name in artifact metadata.
|
|
153
|
+
override_version: Override version/git_hash in artifact metadata.
|
|
154
|
+
override_dataset: Override dataset name in artifact metadata.
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
CreateSnapshotResult with artifact_id.
|
|
158
|
+
"""
|
|
159
|
+
return await jobs.snapshot_store.asyncio(
|
|
160
|
+
client=self._http,
|
|
161
|
+
job_id=self.job_id,
|
|
162
|
+
body=AppApiV2SchemasSessionCreateSnapshotRequest(
|
|
163
|
+
override_service=override_service,
|
|
164
|
+
override_version=override_version,
|
|
165
|
+
override_dataset=override_dataset,
|
|
166
|
+
),
|
|
167
|
+
x_api_key=self._api_key,
|
|
168
|
+
)
|
|
169
|
+
|
|
139
170
|
async def close(self) -> None:
|
|
140
171
|
"""Close this environment."""
|
|
141
172
|
await jobs.close.asyncio(
|
plato/v2/async_/session.py
CHANGED
|
@@ -32,7 +32,9 @@ from plato._generated.api.v2.sessions import heartbeat as sessions_heartbeat
|
|
|
32
32
|
from plato._generated.api.v2.sessions import make as sessions_make
|
|
33
33
|
from plato._generated.api.v2.sessions import reset as sessions_reset
|
|
34
34
|
from plato._generated.api.v2.sessions import set_date as sessions_set_date
|
|
35
|
+
from plato._generated.api.v2.sessions import setup_sandbox as sessions_setup_sandbox
|
|
35
36
|
from plato._generated.api.v2.sessions import snapshot as sessions_snapshot
|
|
37
|
+
from plato._generated.api.v2.sessions import snapshot_store as sessions_snapshot_store
|
|
36
38
|
from plato._generated.api.v2.sessions import state as sessions_state
|
|
37
39
|
from plato._generated.api.v2.sessions import wait_for_ready as sessions_wait_for_ready
|
|
38
40
|
from plato._generated.models import (
|
|
@@ -40,6 +42,8 @@ from plato._generated.models import (
|
|
|
40
42
|
AppApiV2SchemasSessionCreateSnapshotResponse,
|
|
41
43
|
AppApiV2SchemasSessionEvaluateResponse,
|
|
42
44
|
AppApiV2SchemasSessionHeartbeatResponse,
|
|
45
|
+
AppApiV2SchemasSessionSetupSandboxRequest,
|
|
46
|
+
AppApiV2SchemasSessionSetupSandboxResponse,
|
|
43
47
|
CreateDiskSnapshotRequest,
|
|
44
48
|
CreateDiskSnapshotResponse,
|
|
45
49
|
CreateSessionFromEnvs,
|
|
@@ -557,6 +561,38 @@ class Session:
|
|
|
557
561
|
x_api_key=self._api_key,
|
|
558
562
|
)
|
|
559
563
|
|
|
564
|
+
async def setup_sandbox(
|
|
565
|
+
self,
|
|
566
|
+
timeout: int = 120,
|
|
567
|
+
) -> AppApiV2SchemasSessionSetupSandboxResponse:
|
|
568
|
+
"""Setup sandbox environment with Docker overlay on all environments.
|
|
569
|
+
|
|
570
|
+
This configures the VMs for Docker usage with overlay2 storage driver,
|
|
571
|
+
which is significantly faster than the default vfs driver. Should be called
|
|
572
|
+
after session creation and before pulling Docker images.
|
|
573
|
+
|
|
574
|
+
The setup includes:
|
|
575
|
+
- Mounting /dev/vdb to /mnt/docker for Docker storage
|
|
576
|
+
- Configuring Docker with overlay2 storage driver
|
|
577
|
+
- Setting up ECR and Docker Hub authentication
|
|
578
|
+
- Creating a docker-user service for non-root Docker access
|
|
579
|
+
|
|
580
|
+
Args:
|
|
581
|
+
timeout: Setup timeout in seconds (default: 120).
|
|
582
|
+
|
|
583
|
+
Returns:
|
|
584
|
+
SetupSandboxResponse with results per job_id.
|
|
585
|
+
"""
|
|
586
|
+
self._check_closed()
|
|
587
|
+
|
|
588
|
+
request = AppApiV2SchemasSessionSetupSandboxRequest(timeout=timeout)
|
|
589
|
+
return await sessions_setup_sandbox.asyncio(
|
|
590
|
+
client=self._http,
|
|
591
|
+
session_id=self.session_id,
|
|
592
|
+
body=request,
|
|
593
|
+
x_api_key=self._api_key,
|
|
594
|
+
)
|
|
595
|
+
|
|
560
596
|
async def evaluate(self, **kwargs) -> AppApiV2SchemasSessionEvaluateResponse:
|
|
561
597
|
"""Evaluate the session against task criteria.
|
|
562
598
|
|
|
@@ -587,6 +623,38 @@ class Session:
|
|
|
587
623
|
x_api_key=self._api_key,
|
|
588
624
|
)
|
|
589
625
|
|
|
626
|
+
async def snapshot_store(
|
|
627
|
+
self,
|
|
628
|
+
override_service: str | None = None,
|
|
629
|
+
override_version: str | None = None,
|
|
630
|
+
override_dataset: str | None = None,
|
|
631
|
+
) -> AppApiV2SchemasSessionCreateSnapshotResponse:
|
|
632
|
+
"""Create a snapshot-store snapshot of all environments in the session.
|
|
633
|
+
|
|
634
|
+
Uses the snapshot-store pipeline for chunk-based deduplication and
|
|
635
|
+
efficient storage. This is the preferred method for new base snapshots.
|
|
636
|
+
|
|
637
|
+
Args:
|
|
638
|
+
override_service: Override simulator/service name in artifact metadata.
|
|
639
|
+
override_version: Override version/git_hash in artifact metadata.
|
|
640
|
+
override_dataset: Override dataset name in artifact metadata.
|
|
641
|
+
|
|
642
|
+
Returns:
|
|
643
|
+
Snapshot response with info per job_id.
|
|
644
|
+
"""
|
|
645
|
+
self._check_closed()
|
|
646
|
+
|
|
647
|
+
return await sessions_snapshot_store.asyncio(
|
|
648
|
+
client=self._http,
|
|
649
|
+
session_id=self.session_id,
|
|
650
|
+
body=AppApiV2SchemasSessionCreateSnapshotRequest(
|
|
651
|
+
override_service=override_service,
|
|
652
|
+
override_version=override_version,
|
|
653
|
+
override_dataset=override_dataset,
|
|
654
|
+
),
|
|
655
|
+
x_api_key=self._api_key,
|
|
656
|
+
)
|
|
657
|
+
|
|
590
658
|
async def disk_snapshot(
|
|
591
659
|
self,
|
|
592
660
|
override_service: str | None = None,
|
|
@@ -764,10 +832,10 @@ class Session:
|
|
|
764
832
|
"""
|
|
765
833
|
self._check_closed()
|
|
766
834
|
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
raise ImportError("The login() method requires playwright. Install it with: pip install playwright")
|
|
835
|
+
import importlib.util
|
|
836
|
+
|
|
837
|
+
if importlib.util.find_spec("playwright") is None:
|
|
838
|
+
raise ImportError("The login() method requires playwright. Install it with: pip install playwright")
|
|
771
839
|
|
|
772
840
|
context = await browser.new_context()
|
|
773
841
|
pages: dict[str, Page] = {}
|
plato/v2/sync/environment.py
CHANGED
|
@@ -7,6 +7,7 @@ from typing import TYPE_CHECKING
|
|
|
7
7
|
|
|
8
8
|
from plato._generated.api.v2 import jobs
|
|
9
9
|
from plato._generated.models import (
|
|
10
|
+
AppApiV2SchemasSessionCreateSnapshotRequest,
|
|
10
11
|
CreateCheckpointRequest,
|
|
11
12
|
CreateSnapshotResult,
|
|
12
13
|
ExecuteCommandRequest,
|
|
@@ -135,6 +136,36 @@ class Environment:
|
|
|
135
136
|
x_api_key=self._api_key,
|
|
136
137
|
)
|
|
137
138
|
|
|
139
|
+
def snapshot_store(
|
|
140
|
+
self,
|
|
141
|
+
override_service: str | None = None,
|
|
142
|
+
override_version: str | None = None,
|
|
143
|
+
override_dataset: str | None = None,
|
|
144
|
+
) -> CreateSnapshotResult:
|
|
145
|
+
"""Create a snapshot-store snapshot of this environment.
|
|
146
|
+
|
|
147
|
+
Uses the snapshot-store pipeline for chunk-based deduplication and
|
|
148
|
+
efficient storage. This is the preferred method for new base snapshots.
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
override_service: Override simulator/service name in artifact metadata.
|
|
152
|
+
override_version: Override version/git_hash in artifact metadata.
|
|
153
|
+
override_dataset: Override dataset name in artifact metadata.
|
|
154
|
+
|
|
155
|
+
Returns:
|
|
156
|
+
CreateSnapshotResult with artifact_id.
|
|
157
|
+
"""
|
|
158
|
+
return jobs.snapshot_store.sync(
|
|
159
|
+
client=self._http,
|
|
160
|
+
job_id=self.job_id,
|
|
161
|
+
body=AppApiV2SchemasSessionCreateSnapshotRequest(
|
|
162
|
+
override_service=override_service,
|
|
163
|
+
override_version=override_version,
|
|
164
|
+
override_dataset=override_dataset,
|
|
165
|
+
),
|
|
166
|
+
x_api_key=self._api_key,
|
|
167
|
+
)
|
|
168
|
+
|
|
138
169
|
def close(self) -> None:
|
|
139
170
|
"""Close this environment."""
|
|
140
171
|
jobs.close.sync(
|