plato-sdk-v2 2.0.50__py3-none-any.whl → 2.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- plato/__init__.py +7 -6
- plato/_generated/__init__.py +1 -1
- plato/_generated/api/v1/env/evaluate_session.py +3 -3
- plato/_generated/api/v1/env/log_state_mutation.py +4 -4
- plato/_generated/api/v1/sandbox/checkpoint_vm.py +3 -3
- plato/_generated/api/v1/sandbox/save_vm_snapshot.py +3 -3
- plato/_generated/api/v1/sandbox/setup_sandbox.py +8 -8
- plato/_generated/api/v1/session/__init__.py +2 -0
- plato/_generated/api/v1/session/get_sessions_for_archival.py +100 -0
- plato/_generated/api/v1/testcases/__init__.py +6 -2
- plato/_generated/api/v1/testcases/get_mutation_groups_for_testcase.py +98 -0
- plato/_generated/api/v1/testcases/{get_next_output_testcase_for_scoring.py → get_next_testcase_for_scoring.py} +23 -10
- plato/_generated/api/v1/testcases/get_testcase_metadata_for_scoring.py +74 -0
- plato/_generated/api/v2/__init__.py +2 -1
- plato/_generated/api/v2/jobs/__init__.py +4 -0
- plato/_generated/api/v2/jobs/checkpoint.py +3 -3
- plato/_generated/api/v2/jobs/disk_snapshot.py +3 -3
- plato/_generated/api/v2/jobs/log_for_job.py +4 -39
- plato/_generated/api/v2/jobs/make.py +4 -4
- plato/_generated/api/v2/jobs/setup_sandbox.py +97 -0
- plato/_generated/api/v2/jobs/snapshot.py +3 -3
- plato/_generated/api/v2/jobs/snapshot_store.py +91 -0
- plato/_generated/api/v2/sessions/__init__.py +4 -0
- plato/_generated/api/v2/sessions/checkpoint.py +3 -3
- plato/_generated/api/v2/sessions/disk_snapshot.py +3 -3
- plato/_generated/api/v2/sessions/evaluate.py +3 -3
- plato/_generated/api/v2/sessions/log_job_mutation.py +4 -39
- plato/_generated/api/v2/sessions/make.py +4 -4
- plato/_generated/api/v2/sessions/setup_sandbox.py +98 -0
- plato/_generated/api/v2/sessions/snapshot.py +3 -3
- plato/_generated/api/v2/sessions/snapshot_store.py +94 -0
- plato/_generated/api/v2/user/__init__.py +7 -0
- plato/_generated/api/v2/user/get_current_user.py +76 -0
- plato/_generated/models/__init__.py +174 -23
- plato/_sims_generator/__init__.py +19 -4
- plato/_sims_generator/instruction.py +203 -0
- plato/_sims_generator/templates/instruction/helpers.py.jinja +161 -0
- plato/_sims_generator/templates/instruction/init.py.jinja +43 -0
- plato/agents/__init__.py +107 -517
- plato/agents/base.py +145 -0
- plato/agents/build.py +61 -0
- plato/agents/config.py +160 -0
- plato/agents/logging.py +401 -0
- plato/agents/runner.py +161 -0
- plato/agents/trajectory.py +266 -0
- plato/chronos/__init__.py +37 -0
- plato/chronos/api/__init__.py +3 -0
- plato/chronos/api/agents/__init__.py +13 -0
- plato/chronos/api/agents/create_agent.py +63 -0
- plato/chronos/api/agents/delete_agent.py +61 -0
- plato/chronos/api/agents/get_agent.py +62 -0
- plato/chronos/api/agents/get_agent_schema.py +72 -0
- plato/chronos/api/agents/get_agent_versions.py +62 -0
- plato/chronos/api/agents/list_agents.py +57 -0
- plato/chronos/api/agents/lookup_agent.py +74 -0
- plato/chronos/api/auth/__init__.py +9 -0
- plato/chronos/api/auth/debug_auth_api_auth_debug_get.py +43 -0
- plato/chronos/api/auth/get_auth_status_api_auth_status_get.py +61 -0
- plato/chronos/api/auth/get_current_user_route_api_auth_me_get.py +60 -0
- plato/chronos/api/callback/__init__.py +11 -0
- plato/chronos/api/callback/push_agent_logs.py +61 -0
- plato/chronos/api/callback/update_agent_status.py +57 -0
- plato/chronos/api/callback/upload_artifacts.py +59 -0
- plato/chronos/api/callback/upload_logs_zip.py +57 -0
- plato/chronos/api/callback/upload_trajectory.py +57 -0
- plato/chronos/api/default/__init__.py +7 -0
- plato/chronos/api/default/health.py +43 -0
- plato/chronos/api/jobs/__init__.py +7 -0
- plato/chronos/api/jobs/launch_job.py +63 -0
- plato/chronos/api/registry/__init__.py +19 -0
- plato/chronos/api/registry/get_agent_schema_api_registry_agents__agent_name__schema_get.py +62 -0
- plato/chronos/api/registry/get_agent_versions_api_registry_agents__agent_name__versions_get.py +52 -0
- plato/chronos/api/registry/get_world_schema_api_registry_worlds__package_name__schema_get.py +68 -0
- plato/chronos/api/registry/get_world_versions_api_registry_worlds__package_name__versions_get.py +52 -0
- plato/chronos/api/registry/list_registry_agents_api_registry_agents_get.py +44 -0
- plato/chronos/api/registry/list_registry_worlds_api_registry_worlds_get.py +44 -0
- plato/chronos/api/runtimes/__init__.py +11 -0
- plato/chronos/api/runtimes/create_runtime.py +63 -0
- plato/chronos/api/runtimes/delete_runtime.py +61 -0
- plato/chronos/api/runtimes/get_runtime.py +62 -0
- plato/chronos/api/runtimes/list_runtimes.py +57 -0
- plato/chronos/api/runtimes/test_runtime.py +67 -0
- plato/chronos/api/secrets/__init__.py +11 -0
- plato/chronos/api/secrets/create_secret.py +63 -0
- plato/chronos/api/secrets/delete_secret.py +61 -0
- plato/chronos/api/secrets/get_secret.py +62 -0
- plato/chronos/api/secrets/list_secrets.py +57 -0
- plato/chronos/api/secrets/update_secret.py +68 -0
- plato/chronos/api/sessions/__init__.py +10 -0
- plato/chronos/api/sessions/get_session.py +62 -0
- plato/chronos/api/sessions/get_session_logs.py +72 -0
- plato/chronos/api/sessions/get_session_logs_download.py +62 -0
- plato/chronos/api/sessions/list_sessions.py +57 -0
- plato/chronos/api/status/__init__.py +8 -0
- plato/chronos/api/status/get_status_api_status_get.py +44 -0
- plato/chronos/api/status/get_version_info_api_version_get.py +44 -0
- plato/chronos/api/templates/__init__.py +11 -0
- plato/chronos/api/templates/create_template.py +63 -0
- plato/chronos/api/templates/delete_template.py +61 -0
- plato/chronos/api/templates/get_template.py +62 -0
- plato/chronos/api/templates/list_templates.py +57 -0
- plato/chronos/api/templates/update_template.py +68 -0
- plato/chronos/api/trajectories/__init__.py +8 -0
- plato/chronos/api/trajectories/get_trajectory.py +62 -0
- plato/chronos/api/trajectories/list_trajectories.py +62 -0
- plato/chronos/api/worlds/__init__.py +10 -0
- plato/chronos/api/worlds/create_world.py +63 -0
- plato/chronos/api/worlds/delete_world.py +61 -0
- plato/chronos/api/worlds/get_world.py +62 -0
- plato/chronos/api/worlds/list_worlds.py +57 -0
- plato/chronos/client.py +171 -0
- plato/chronos/errors.py +141 -0
- plato/chronos/models/__init__.py +647 -0
- plato/chronos/py.typed +0 -0
- plato/sims/cli.py +299 -123
- plato/sims/registry.py +77 -4
- plato/v1/cli/agent.py +88 -84
- plato/v1/cli/main.py +2 -0
- plato/v1/cli/pm.py +441 -119
- plato/v1/cli/sandbox.py +747 -191
- plato/v1/cli/sim.py +11 -0
- plato/v1/cli/verify.py +1269 -0
- plato/v1/cli/world.py +3 -0
- plato/v1/flow_executor.py +21 -17
- plato/v1/models/env.py +11 -11
- plato/v1/sdk.py +2 -2
- plato/v1/sync_env.py +11 -11
- plato/v1/sync_flow_executor.py +21 -17
- plato/v1/sync_sdk.py +4 -2
- plato/v2/__init__.py +2 -0
- plato/v2/async_/environment.py +20 -1
- plato/v2/async_/session.py +54 -3
- plato/v2/sync/environment.py +2 -1
- plato/v2/sync/session.py +52 -2
- plato/worlds/README.md +218 -0
- plato/worlds/__init__.py +54 -18
- plato/worlds/base.py +304 -93
- plato/worlds/config.py +239 -73
- plato/worlds/runner.py +391 -80
- {plato_sdk_v2-2.0.50.dist-info → plato_sdk_v2-2.2.4.dist-info}/METADATA +1 -3
- {plato_sdk_v2-2.0.50.dist-info → plato_sdk_v2-2.2.4.dist-info}/RECORD +143 -68
- {plato_sdk_v2-2.0.50.dist-info → plato_sdk_v2-2.2.4.dist-info}/entry_points.txt +1 -0
- plato/_generated/api/v2/interfaces/__init__.py +0 -27
- plato/_generated/api/v2/interfaces/v2_interface_browser_create.py +0 -68
- plato/_generated/api/v2/interfaces/v2_interface_cdp_url.py +0 -65
- plato/_generated/api/v2/interfaces/v2_interface_click.py +0 -64
- plato/_generated/api/v2/interfaces/v2_interface_close.py +0 -59
- plato/_generated/api/v2/interfaces/v2_interface_computer_create.py +0 -68
- plato/_generated/api/v2/interfaces/v2_interface_cursor.py +0 -64
- plato/_generated/api/v2/interfaces/v2_interface_key.py +0 -68
- plato/_generated/api/v2/interfaces/v2_interface_screenshot.py +0 -65
- plato/_generated/api/v2/interfaces/v2_interface_scroll.py +0 -70
- plato/_generated/api/v2/interfaces/v2_interface_type.py +0 -64
- plato/world/__init__.py +0 -44
- plato/world/base.py +0 -267
- plato/world/config.py +0 -139
- plato/world/types.py +0 -47
- {plato_sdk_v2-2.0.50.dist-info → plato_sdk_v2-2.2.4.dist-info}/WHEEL +0 -0
plato/v1/cli/world.py
CHANGED
|
@@ -235,6 +235,8 @@ def world_publish(
|
|
|
235
235
|
upload_url = f"{api_url}/v2/pypi/worlds/"
|
|
236
236
|
console.print(f"\n[cyan]Uploading to {upload_url}...[/cyan]")
|
|
237
237
|
|
|
238
|
+
# api_key is guaranteed to be set (checked earlier when not dry_run)
|
|
239
|
+
assert api_key is not None, "api_key must be set when not in dry_run mode"
|
|
238
240
|
try:
|
|
239
241
|
result = subprocess.run(
|
|
240
242
|
[
|
|
@@ -250,6 +252,7 @@ def world_publish(
|
|
|
250
252
|
],
|
|
251
253
|
capture_output=True,
|
|
252
254
|
text=True,
|
|
255
|
+
check=False,
|
|
253
256
|
)
|
|
254
257
|
|
|
255
258
|
if result.returncode == 0:
|
plato/v1/flow_executor.py
CHANGED
|
@@ -7,15 +7,13 @@ import logging
|
|
|
7
7
|
import os
|
|
8
8
|
import sys
|
|
9
9
|
from pathlib import Path
|
|
10
|
-
from typing import
|
|
10
|
+
from typing import TYPE_CHECKING, cast
|
|
11
11
|
from urllib.parse import urljoin
|
|
12
12
|
|
|
13
13
|
sys.path.insert(0, os.path.dirname(__file__))
|
|
14
14
|
|
|
15
|
-
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
16
|
from playwright.async_api import Page
|
|
17
|
-
except ImportError:
|
|
18
|
-
Page = Any # Fallback if playwright not installed
|
|
19
17
|
|
|
20
18
|
from plato.v1.models.flow import (
|
|
21
19
|
CheckElementStep,
|
|
@@ -42,7 +40,7 @@ class FlowExecutor:
|
|
|
42
40
|
|
|
43
41
|
def __init__(
|
|
44
42
|
self,
|
|
45
|
-
page: Page,
|
|
43
|
+
page: "Page",
|
|
46
44
|
flow: Flow,
|
|
47
45
|
screenshots_dir: Path | None = None,
|
|
48
46
|
logger: logging.Logger = logging.getLogger(__name__),
|
|
@@ -108,29 +106,29 @@ class FlowExecutor:
|
|
|
108
106
|
async def _execute_step(self, step: FlowStep) -> bool:
|
|
109
107
|
"""Execute a single step in a flow using type attribute."""
|
|
110
108
|
if step.type == "wait_for_selector":
|
|
111
|
-
return await self._wait_for_selector(step)
|
|
109
|
+
return await self._wait_for_selector(cast(WaitForSelectorStep, step))
|
|
112
110
|
elif step.type == "click":
|
|
113
|
-
return await self._click(step)
|
|
111
|
+
return await self._click(cast(ClickStep, step))
|
|
114
112
|
elif step.type == "fill":
|
|
115
|
-
return await self._fill(step)
|
|
113
|
+
return await self._fill(cast(FillStep, step))
|
|
116
114
|
elif step.type == "wait":
|
|
117
|
-
return await self._wait(step)
|
|
115
|
+
return await self._wait(cast(WaitStep, step))
|
|
118
116
|
elif step.type == "navigate":
|
|
119
|
-
return await self._navigate(step)
|
|
117
|
+
return await self._navigate(cast(NavigateStep, step))
|
|
120
118
|
elif step.type == "wait_for_url":
|
|
121
|
-
return await self._wait_for_url(step)
|
|
119
|
+
return await self._wait_for_url(cast(WaitForUrlStep, step))
|
|
122
120
|
elif step.type == "check_element":
|
|
123
|
-
return await self._check_element(step)
|
|
121
|
+
return await self._check_element(cast(CheckElementStep, step))
|
|
124
122
|
elif step.type == "verify":
|
|
125
|
-
return await self._verify(step)
|
|
123
|
+
return await self._verify(cast(VerifyStep, step))
|
|
126
124
|
elif step.type == "screenshot":
|
|
127
|
-
return await self._screenshot(step)
|
|
125
|
+
return await self._screenshot(cast(ScreenshotStep, step))
|
|
128
126
|
elif step.type == "verify_text":
|
|
129
|
-
return await self._verify_text(step)
|
|
127
|
+
return await self._verify_text(cast(VerifyTextStep, step))
|
|
130
128
|
elif step.type == "verify_url":
|
|
131
|
-
return await self._verify_url(step)
|
|
129
|
+
return await self._verify_url(cast(VerifyUrlStep, step))
|
|
132
130
|
elif step.type == "verify_no_errors":
|
|
133
|
-
return await self._verify_no_errors(step)
|
|
131
|
+
return await self._verify_no_errors(cast(VerifyNoErrorsStep, step))
|
|
134
132
|
else:
|
|
135
133
|
self.logger.error(f"❌ Unknown step type: {step.type}")
|
|
136
134
|
return False
|
|
@@ -248,6 +246,7 @@ class FlowExecutor:
|
|
|
248
246
|
|
|
249
247
|
async def _verify_element_exists(self, step: VerifyStep) -> bool:
|
|
250
248
|
"""Verify that an element exists in the DOM."""
|
|
249
|
+
assert step.selector is not None # Guaranteed by VerifyStep model validator
|
|
251
250
|
try:
|
|
252
251
|
element = await self.page.query_selector(step.selector)
|
|
253
252
|
if element:
|
|
@@ -262,6 +261,7 @@ class FlowExecutor:
|
|
|
262
261
|
|
|
263
262
|
async def _verify_element_visible(self, step: VerifyStep) -> bool:
|
|
264
263
|
"""Verify that an element is visible on the page."""
|
|
264
|
+
assert step.selector is not None # Guaranteed by VerifyStep model validator
|
|
265
265
|
try:
|
|
266
266
|
element = await self.page.query_selector(step.selector)
|
|
267
267
|
if element:
|
|
@@ -281,6 +281,8 @@ class FlowExecutor:
|
|
|
281
281
|
|
|
282
282
|
async def _verify_element_text(self, step: VerifyStep) -> bool:
|
|
283
283
|
"""Verify that an element contains specific text."""
|
|
284
|
+
assert step.selector is not None # Guaranteed by VerifyStep model validator
|
|
285
|
+
assert step.text is not None # Guaranteed by VerifyStep model validator
|
|
284
286
|
try:
|
|
285
287
|
element = await self.page.query_selector(step.selector)
|
|
286
288
|
if element:
|
|
@@ -318,6 +320,7 @@ class FlowExecutor:
|
|
|
318
320
|
|
|
319
321
|
async def _verify_element_count(self, step: VerifyStep) -> bool:
|
|
320
322
|
"""Verify the count of elements matching a selector."""
|
|
323
|
+
assert step.selector is not None # Guaranteed by VerifyStep model validator
|
|
321
324
|
try:
|
|
322
325
|
elements = await self.page.query_selector_all(step.selector)
|
|
323
326
|
actual_count = len(elements)
|
|
@@ -336,6 +339,7 @@ class FlowExecutor:
|
|
|
336
339
|
|
|
337
340
|
async def _verify_page_title(self, step: VerifyStep) -> bool:
|
|
338
341
|
"""Verify the page title."""
|
|
342
|
+
assert step.title is not None # Guaranteed by VerifyStep model validator
|
|
339
343
|
try:
|
|
340
344
|
actual_title = await self.page.title()
|
|
341
345
|
|
plato/v1/models/env.py
CHANGED
|
@@ -13,11 +13,6 @@ from urllib.parse import urlparse
|
|
|
13
13
|
|
|
14
14
|
import yaml
|
|
15
15
|
|
|
16
|
-
try:
|
|
17
|
-
from playwright.async_api import Page
|
|
18
|
-
except ImportError:
|
|
19
|
-
Page = Any # Fallback if playwright not installed
|
|
20
|
-
|
|
21
16
|
from plato.v1.exceptions import PlatoClientError
|
|
22
17
|
from plato.v1.flow_executor import FlowExecutor
|
|
23
18
|
from plato.v1.models.flow import Flow
|
|
@@ -27,6 +22,8 @@ logger = logging.getLogger(__name__)
|
|
|
27
22
|
|
|
28
23
|
# Using TYPE_CHECKING for proper type annotation without circular imports
|
|
29
24
|
if TYPE_CHECKING:
|
|
25
|
+
from playwright.async_api import Page
|
|
26
|
+
|
|
30
27
|
from plato.sdk import Plato
|
|
31
28
|
|
|
32
29
|
|
|
@@ -48,8 +45,8 @@ class PlatoEnvironment:
|
|
|
48
45
|
|
|
49
46
|
_current_task: PlatoTask | None = None
|
|
50
47
|
_client: "Plato" = None
|
|
51
|
-
id: str = None
|
|
52
|
-
env_id: str = None
|
|
48
|
+
id: str = None # type: ignore
|
|
49
|
+
env_id: str = None # type: ignore
|
|
53
50
|
alias: str | None = None
|
|
54
51
|
_run_session_id: str | None = None
|
|
55
52
|
_heartbeat_task: asyncio.Task | None = None
|
|
@@ -66,7 +63,7 @@ class PlatoEnvironment:
|
|
|
66
63
|
):
|
|
67
64
|
self._client = client
|
|
68
65
|
self.id = id
|
|
69
|
-
self.env_id = env_id
|
|
66
|
+
self.env_id = env_id # type: ignore[assignment]
|
|
70
67
|
self.alias = alias
|
|
71
68
|
self._run_session_id = None
|
|
72
69
|
self._heartbeat_task = None
|
|
@@ -77,7 +74,7 @@ class PlatoEnvironment:
|
|
|
77
74
|
|
|
78
75
|
async def login(
|
|
79
76
|
self,
|
|
80
|
-
page: Page,
|
|
77
|
+
page: "Page",
|
|
81
78
|
throw_on_login_error: bool = False,
|
|
82
79
|
screenshots_dir: Path | None = None,
|
|
83
80
|
dataset: str = "base",
|
|
@@ -97,7 +94,7 @@ class PlatoEnvironment:
|
|
|
97
94
|
f"{self._client.base_url}/env/{self.id}/flows",
|
|
98
95
|
headers=headers,
|
|
99
96
|
) as resp:
|
|
100
|
-
await self._client._handle_response_error(resp)
|
|
97
|
+
await self._client._handle_response_error(resp)
|
|
101
98
|
body_text = await resp.text()
|
|
102
99
|
# Endpoint may return JSON with { data: { flows: "...yaml..." } } or raw YAML
|
|
103
100
|
try:
|
|
@@ -394,7 +391,10 @@ class PlatoEnvironment:
|
|
|
394
391
|
if not self._run_session_id:
|
|
395
392
|
raise PlatoClientError("No active run session. Call reset() first.")
|
|
396
393
|
|
|
397
|
-
if not self._current_task
|
|
394
|
+
if not self._current_task:
|
|
395
|
+
logger.warning("No current task set")
|
|
396
|
+
raise PlatoClientError("No evaluation config found for task")
|
|
397
|
+
if not self._current_task.eval_config:
|
|
398
398
|
logger.warning(f"No evaluation config found for task: {self._current_task.name}")
|
|
399
399
|
raise PlatoClientError("No evaluation config found for task")
|
|
400
400
|
|
plato/v1/sdk.py
CHANGED
|
@@ -538,7 +538,7 @@ class Plato:
|
|
|
538
538
|
default_scoring_config=t.get("defaultScoringConfig", {}),
|
|
539
539
|
scoring_type=[ScoringType(st) for st in t.get("scoringTypes", [])]
|
|
540
540
|
if t.get("scoringTypes")
|
|
541
|
-
else
|
|
541
|
+
else [ScoringType.MUTATIONS], # Use default when not provided
|
|
542
542
|
output_schema=t.get("outputSchema"),
|
|
543
543
|
is_sample=t.get("isSample", False),
|
|
544
544
|
simulator_artifact_id=(
|
|
@@ -685,7 +685,7 @@ class Plato:
|
|
|
685
685
|
return await response.json()
|
|
686
686
|
|
|
687
687
|
async def create_simulator(
|
|
688
|
-
self, name: str, description: str = None, sim_type: str = "docker_app"
|
|
688
|
+
self, name: str, description: str | None = None, sim_type: str = "docker_app"
|
|
689
689
|
) -> dict[str, Any]:
|
|
690
690
|
"""Create a new simulator.
|
|
691
691
|
|
plato/v1/sync_env.py
CHANGED
|
@@ -10,11 +10,6 @@ from urllib.parse import urlparse
|
|
|
10
10
|
|
|
11
11
|
import yaml
|
|
12
12
|
|
|
13
|
-
try:
|
|
14
|
-
from playwright.sync_api import Page
|
|
15
|
-
except ImportError:
|
|
16
|
-
Page = Any # Fallback if playwright not installed
|
|
17
|
-
|
|
18
13
|
from plato.v1.exceptions import PlatoClientError
|
|
19
14
|
from plato.v1.models.flow import Flow
|
|
20
15
|
from plato.v1.models.task import CustomEvalConfig, EvaluationResult, PlatoTask
|
|
@@ -24,6 +19,8 @@ logger = logging.getLogger(__name__)
|
|
|
24
19
|
|
|
25
20
|
# Using TYPE_CHECKING for proper type annotation without circular imports
|
|
26
21
|
if TYPE_CHECKING:
|
|
22
|
+
from playwright.sync_api import Page
|
|
23
|
+
|
|
27
24
|
from plato.sync_sdk import SyncPlato
|
|
28
25
|
|
|
29
26
|
|
|
@@ -45,8 +42,8 @@ class SyncPlatoEnvironment:
|
|
|
45
42
|
|
|
46
43
|
_current_task: PlatoTask | None = None
|
|
47
44
|
_client: "SyncPlato" = None
|
|
48
|
-
id: str = None
|
|
49
|
-
env_id: str = None
|
|
45
|
+
id: str = None # type: ignore
|
|
46
|
+
env_id: str = None # type: ignore
|
|
50
47
|
alias: str | None = None
|
|
51
48
|
_run_session_id: str | None = None
|
|
52
49
|
_heartbeat_thread: threading.Thread | None = None
|
|
@@ -64,7 +61,7 @@ class SyncPlatoEnvironment:
|
|
|
64
61
|
):
|
|
65
62
|
self._client = client
|
|
66
63
|
self.id = id
|
|
67
|
-
self.env_id = env_id
|
|
64
|
+
self.env_id = env_id # type: ignore[assignment]
|
|
68
65
|
self.alias = alias
|
|
69
66
|
self._run_session_id = active_session
|
|
70
67
|
self._heartbeat_thread = None
|
|
@@ -75,7 +72,7 @@ class SyncPlatoEnvironment:
|
|
|
75
72
|
|
|
76
73
|
def login(
|
|
77
74
|
self,
|
|
78
|
-
page: Page,
|
|
75
|
+
page: "Page",
|
|
79
76
|
throw_on_login_error: bool = False,
|
|
80
77
|
screenshots_dir: Path | None = None,
|
|
81
78
|
dataset: str = "base",
|
|
@@ -95,7 +92,7 @@ class SyncPlatoEnvironment:
|
|
|
95
92
|
try:
|
|
96
93
|
headers = {"X-API-Key": self._client.api_key}
|
|
97
94
|
resp = self._client.http_session.get(f"{self._client.base_url}/env/{self.id}/flows", headers=headers)
|
|
98
|
-
self._client._handle_response_error(resp)
|
|
95
|
+
self._client._handle_response_error(resp)
|
|
99
96
|
body_text = resp.text
|
|
100
97
|
# Endpoint may return JSON with { data: { flows: "...yaml..." } } or raw YAML
|
|
101
98
|
try:
|
|
@@ -379,7 +376,10 @@ class SyncPlatoEnvironment:
|
|
|
379
376
|
if not self._run_session_id:
|
|
380
377
|
raise PlatoClientError("No active run session. Call reset() first.")
|
|
381
378
|
|
|
382
|
-
if not self._current_task
|
|
379
|
+
if not self._current_task:
|
|
380
|
+
logger.warning("No current task set")
|
|
381
|
+
raise PlatoClientError("No evaluation config found for task")
|
|
382
|
+
if not self._current_task.eval_config:
|
|
383
383
|
logger.warning(f"No evaluation config found for task: {self._current_task.name}")
|
|
384
384
|
raise PlatoClientError("No evaluation config found for task")
|
|
385
385
|
|
plato/v1/sync_flow_executor.py
CHANGED
|
@@ -7,15 +7,13 @@ import logging
|
|
|
7
7
|
import os
|
|
8
8
|
import sys
|
|
9
9
|
from pathlib import Path
|
|
10
|
-
from typing import
|
|
10
|
+
from typing import TYPE_CHECKING, cast
|
|
11
11
|
from urllib.parse import urljoin
|
|
12
12
|
|
|
13
13
|
sys.path.insert(0, os.path.dirname(__file__))
|
|
14
14
|
|
|
15
|
-
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
16
|
from playwright.sync_api import Page
|
|
17
|
-
except ImportError:
|
|
18
|
-
Page = Any # Fallback if playwright not installed
|
|
19
17
|
|
|
20
18
|
from plato.v1.models.flow import (
|
|
21
19
|
CheckElementStep,
|
|
@@ -40,7 +38,7 @@ class SyncFlowExecutor:
|
|
|
40
38
|
|
|
41
39
|
def __init__(
|
|
42
40
|
self,
|
|
43
|
-
page: Page,
|
|
41
|
+
page: "Page",
|
|
44
42
|
flow: Flow,
|
|
45
43
|
screenshots_dir: Path | None = None,
|
|
46
44
|
logger: logging.Logger = logging.getLogger(__name__),
|
|
@@ -104,29 +102,29 @@ class SyncFlowExecutor:
|
|
|
104
102
|
def _execute_step(self, step: FlowStep) -> bool:
|
|
105
103
|
"""Execute a single step in a flow using action attribute."""
|
|
106
104
|
if step.type == "wait_for_selector":
|
|
107
|
-
return self._wait_for_selector(step)
|
|
105
|
+
return self._wait_for_selector(cast(WaitForSelectorStep, step))
|
|
108
106
|
elif step.type == "click":
|
|
109
|
-
return self._click(step)
|
|
107
|
+
return self._click(cast(ClickStep, step))
|
|
110
108
|
elif step.type == "fill":
|
|
111
|
-
return self._fill(step)
|
|
109
|
+
return self._fill(cast(FillStep, step))
|
|
112
110
|
elif step.type == "wait":
|
|
113
|
-
return self._wait(step)
|
|
111
|
+
return self._wait(cast(WaitStep, step))
|
|
114
112
|
elif step.type == "navigate":
|
|
115
|
-
return self._navigate(step)
|
|
113
|
+
return self._navigate(cast(NavigateStep, step))
|
|
116
114
|
elif step.type == "wait_for_url":
|
|
117
|
-
return self._wait_for_url(step)
|
|
115
|
+
return self._wait_for_url(cast(WaitForUrlStep, step))
|
|
118
116
|
elif step.type == "check_element":
|
|
119
|
-
return self._check_element(step)
|
|
117
|
+
return self._check_element(cast(CheckElementStep, step))
|
|
120
118
|
elif step.type == "verify":
|
|
121
|
-
return self._verify(step)
|
|
119
|
+
return self._verify(cast(VerifyStep, step))
|
|
122
120
|
elif step.type == "screenshot":
|
|
123
|
-
return self._screenshot(step)
|
|
121
|
+
return self._screenshot(cast(ScreenshotStep, step))
|
|
124
122
|
elif step.type == "verify_text":
|
|
125
|
-
return self._verify_text(step)
|
|
123
|
+
return self._verify_text(cast(VerifyTextStep, step))
|
|
126
124
|
elif step.type == "verify_url":
|
|
127
|
-
return self._verify_url(step)
|
|
125
|
+
return self._verify_url(cast(VerifyUrlStep, step))
|
|
128
126
|
elif step.type == "verify_no_errors":
|
|
129
|
-
return self._verify_no_errors(step)
|
|
127
|
+
return self._verify_no_errors(cast(VerifyNoErrorsStep, step))
|
|
130
128
|
else:
|
|
131
129
|
self.logger.error(f"❌ Unknown step action: {step.type}")
|
|
132
130
|
return False
|
|
@@ -242,6 +240,7 @@ class SyncFlowExecutor:
|
|
|
242
240
|
|
|
243
241
|
def _verify_element_exists(self, step: VerifyStep) -> bool:
|
|
244
242
|
"""Verify that an element exists in the DOM."""
|
|
243
|
+
assert step.selector is not None # Guaranteed by VerifyStep model validator
|
|
245
244
|
try:
|
|
246
245
|
element = self.page.query_selector(step.selector)
|
|
247
246
|
if element:
|
|
@@ -256,6 +255,7 @@ class SyncFlowExecutor:
|
|
|
256
255
|
|
|
257
256
|
def _verify_element_visible(self, step: VerifyStep) -> bool:
|
|
258
257
|
"""Verify that an element is visible on the page."""
|
|
258
|
+
assert step.selector is not None # Guaranteed by VerifyStep model validator
|
|
259
259
|
try:
|
|
260
260
|
element = self.page.query_selector(step.selector)
|
|
261
261
|
if element:
|
|
@@ -275,6 +275,8 @@ class SyncFlowExecutor:
|
|
|
275
275
|
|
|
276
276
|
def _verify_element_text(self, step: VerifyStep) -> bool:
|
|
277
277
|
"""Verify that an element contains specific text."""
|
|
278
|
+
assert step.selector is not None # Guaranteed by VerifyStep model validator
|
|
279
|
+
assert step.text is not None # Guaranteed by VerifyStep model validator
|
|
278
280
|
try:
|
|
279
281
|
element = self.page.query_selector(step.selector)
|
|
280
282
|
if element:
|
|
@@ -312,6 +314,7 @@ class SyncFlowExecutor:
|
|
|
312
314
|
|
|
313
315
|
def _verify_element_count(self, step: VerifyStep) -> bool:
|
|
314
316
|
"""Verify the count of elements matching a selector."""
|
|
317
|
+
assert step.selector is not None # Guaranteed by VerifyStep model validator
|
|
315
318
|
try:
|
|
316
319
|
elements = self.page.query_selector_all(step.selector)
|
|
317
320
|
actual_count = len(elements)
|
|
@@ -330,6 +333,7 @@ class SyncFlowExecutor:
|
|
|
330
333
|
|
|
331
334
|
def _verify_page_title(self, step: VerifyStep) -> bool:
|
|
332
335
|
"""Verify the page title."""
|
|
336
|
+
assert step.title is not None # Guaranteed by VerifyStep model validator
|
|
333
337
|
try:
|
|
334
338
|
actual_title = self.page.title()
|
|
335
339
|
|
plato/v1/sync_sdk.py
CHANGED
|
@@ -89,7 +89,7 @@ class SyncPlato:
|
|
|
89
89
|
Raises:
|
|
90
90
|
PlatoClientError: With the actual error message from the response
|
|
91
91
|
"""
|
|
92
|
-
if response.status_code >= 400:
|
|
92
|
+
if response.status_code >= 400: # type: ignore[operator]
|
|
93
93
|
try:
|
|
94
94
|
# Try to get the error message from the response body
|
|
95
95
|
error_data = response.json()
|
|
@@ -525,7 +525,9 @@ class SyncPlato:
|
|
|
525
525
|
average_steps=t.get("averageStepsTaken"),
|
|
526
526
|
num_validator_human_scores=t.get("defaultScoringConfig", {}).get("num_sessions_used", 0),
|
|
527
527
|
default_scoring_config=t.get("defaultScoringConfig", {}),
|
|
528
|
-
scoring_type=[ScoringType(st) for st in t.get("scoringTypes", [])]
|
|
528
|
+
scoring_type=[ScoringType(st) for st in t.get("scoringTypes", [])]
|
|
529
|
+
if t.get("scoringTypes")
|
|
530
|
+
else [ScoringType.MUTATIONS],
|
|
529
531
|
output_schema=t.get("outputSchema"),
|
|
530
532
|
is_sample=t.get("isSample", False),
|
|
531
533
|
simulator_artifact_id=(
|
plato/v2/__init__.py
CHANGED
|
@@ -15,6 +15,7 @@ from plato.v2.async_.client import AsyncPlato
|
|
|
15
15
|
from plato.v2.async_.environment import Environment as AsyncEnvironment
|
|
16
16
|
from plato.v2.async_.flow_executor import FlowExecutionError as AsyncFlowExecutionError
|
|
17
17
|
from plato.v2.async_.flow_executor import FlowExecutor as AsyncFlowExecutor
|
|
18
|
+
from plato.v2.async_.session import SerializedSession
|
|
18
19
|
from plato.v2.async_.session import Session as AsyncSession
|
|
19
20
|
from plato.v2.sync.client import Plato
|
|
20
21
|
from plato.v2.sync.environment import Environment
|
|
@@ -45,6 +46,7 @@ __all__ = [
|
|
|
45
46
|
"AsyncEnvironment",
|
|
46
47
|
"AsyncFlowExecutor",
|
|
47
48
|
"AsyncFlowExecutionError",
|
|
49
|
+
"SerializedSession",
|
|
48
50
|
# Models
|
|
49
51
|
"Flow",
|
|
50
52
|
# Helpers
|
plato/v2/async_/environment.py
CHANGED
|
@@ -7,6 +7,8 @@ from typing import TYPE_CHECKING
|
|
|
7
7
|
|
|
8
8
|
from plato._generated.api.v2 import jobs
|
|
9
9
|
from plato._generated.models import (
|
|
10
|
+
ConnectRoutingInfoResult,
|
|
11
|
+
CreateCheckpointRequest,
|
|
10
12
|
CreateSnapshotResult,
|
|
11
13
|
ExecuteCommandRequest,
|
|
12
14
|
ExecuteCommandResult,
|
|
@@ -130,7 +132,7 @@ class Environment:
|
|
|
130
132
|
return await jobs.snapshot.asyncio(
|
|
131
133
|
client=self._http,
|
|
132
134
|
job_id=self.job_id,
|
|
133
|
-
body=
|
|
135
|
+
body=CreateCheckpointRequest(),
|
|
134
136
|
x_api_key=self._api_key,
|
|
135
137
|
)
|
|
136
138
|
|
|
@@ -142,5 +144,22 @@ class Environment:
|
|
|
142
144
|
x_api_key=self._api_key,
|
|
143
145
|
)
|
|
144
146
|
|
|
147
|
+
async def get_connection_info(self) -> ConnectRoutingInfoResult:
|
|
148
|
+
"""Get connection/routing info for this environment.
|
|
149
|
+
|
|
150
|
+
Returns connection details including:
|
|
151
|
+
- vm_gateway_ip: Gateway IP for VM to reach the host
|
|
152
|
+
- vm_private_ip: Private IP of the VM
|
|
153
|
+
- worker_private_ip: Private IP of the worker
|
|
154
|
+
- ready: Whether the job is ready
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
ConnectRoutingInfoResult with routing information.
|
|
158
|
+
"""
|
|
159
|
+
return await jobs.connect_routing_info.asyncio(
|
|
160
|
+
client=self._http,
|
|
161
|
+
job_id=self.job_id,
|
|
162
|
+
)
|
|
163
|
+
|
|
145
164
|
def __repr__(self) -> str:
|
|
146
165
|
return f"Environment(alias={self.alias!r}, job_id={self.job_id!r})"
|
plato/v2/async_/session.py
CHANGED
|
@@ -12,12 +12,15 @@ import uuid
|
|
|
12
12
|
from dataclasses import dataclass
|
|
13
13
|
from datetime import datetime
|
|
14
14
|
from pathlib import Path
|
|
15
|
+
from typing import TYPE_CHECKING
|
|
15
16
|
|
|
16
17
|
import httpx
|
|
17
18
|
import tenacity
|
|
18
|
-
from playwright.async_api import Browser, BrowserContext, Page
|
|
19
19
|
from pydantic import BaseModel
|
|
20
20
|
|
|
21
|
+
if TYPE_CHECKING:
|
|
22
|
+
from playwright.async_api import Browser, BrowserContext, Page
|
|
23
|
+
|
|
21
24
|
from plato._generated.api.v2.jobs import get_flows as jobs_get_flows
|
|
22
25
|
from plato._generated.api.v2.jobs import public_url as jobs_public_url
|
|
23
26
|
from plato._generated.api.v2.sessions import close as sessions_close
|
|
@@ -29,6 +32,7 @@ from plato._generated.api.v2.sessions import heartbeat as sessions_heartbeat
|
|
|
29
32
|
from plato._generated.api.v2.sessions import make as sessions_make
|
|
30
33
|
from plato._generated.api.v2.sessions import reset as sessions_reset
|
|
31
34
|
from plato._generated.api.v2.sessions import set_date as sessions_set_date
|
|
35
|
+
from plato._generated.api.v2.sessions import setup_sandbox as sessions_setup_sandbox
|
|
32
36
|
from plato._generated.api.v2.sessions import snapshot as sessions_snapshot
|
|
33
37
|
from plato._generated.api.v2.sessions import state as sessions_state
|
|
34
38
|
from plato._generated.api.v2.sessions import wait_for_ready as sessions_wait_for_ready
|
|
@@ -37,6 +41,8 @@ from plato._generated.models import (
|
|
|
37
41
|
AppApiV2SchemasSessionCreateSnapshotResponse,
|
|
38
42
|
AppApiV2SchemasSessionEvaluateResponse,
|
|
39
43
|
AppApiV2SchemasSessionHeartbeatResponse,
|
|
44
|
+
AppApiV2SchemasSessionSetupSandboxRequest,
|
|
45
|
+
AppApiV2SchemasSessionSetupSandboxResponse,
|
|
40
46
|
CreateDiskSnapshotRequest,
|
|
41
47
|
CreateDiskSnapshotResponse,
|
|
42
48
|
CreateSessionFromEnvs,
|
|
@@ -67,7 +73,10 @@ logger = logging.getLogger(__name__)
|
|
|
67
73
|
|
|
68
74
|
@dataclass
|
|
69
75
|
class LoginResult:
|
|
70
|
-
"""Result of login operation containing browser context and pages.
|
|
76
|
+
"""Result of login operation containing browser context and pages.
|
|
77
|
+
|
|
78
|
+
Requires playwright to be installed.
|
|
79
|
+
"""
|
|
71
80
|
|
|
72
81
|
context: BrowserContext
|
|
73
82
|
pages: dict[str, Page]
|
|
@@ -291,9 +300,10 @@ class Session:
|
|
|
291
300
|
)
|
|
292
301
|
|
|
293
302
|
# Use generated API function
|
|
303
|
+
# Note: API supports both CreateSessionFromEnvs and CreateSessionFromTask via discriminator
|
|
294
304
|
response = await sessions_make.asyncio(
|
|
295
305
|
client=http_client,
|
|
296
|
-
body=request_body,
|
|
306
|
+
body=request_body, # type: ignore[arg-type]
|
|
297
307
|
x_api_key=api_key,
|
|
298
308
|
)
|
|
299
309
|
|
|
@@ -550,6 +560,38 @@ class Session:
|
|
|
550
560
|
x_api_key=self._api_key,
|
|
551
561
|
)
|
|
552
562
|
|
|
563
|
+
async def setup_sandbox(
|
|
564
|
+
self,
|
|
565
|
+
timeout: int = 120,
|
|
566
|
+
) -> AppApiV2SchemasSessionSetupSandboxResponse:
|
|
567
|
+
"""Setup sandbox environment with Docker overlay on all environments.
|
|
568
|
+
|
|
569
|
+
This configures the VMs for Docker usage with overlay2 storage driver,
|
|
570
|
+
which is significantly faster than the default vfs driver. Should be called
|
|
571
|
+
after session creation and before pulling Docker images.
|
|
572
|
+
|
|
573
|
+
The setup includes:
|
|
574
|
+
- Mounting /dev/vdb to /mnt/docker for Docker storage
|
|
575
|
+
- Configuring Docker with overlay2 storage driver
|
|
576
|
+
- Setting up ECR and Docker Hub authentication
|
|
577
|
+
- Creating a docker-user service for non-root Docker access
|
|
578
|
+
|
|
579
|
+
Args:
|
|
580
|
+
timeout: Setup timeout in seconds (default: 120).
|
|
581
|
+
|
|
582
|
+
Returns:
|
|
583
|
+
SetupSandboxResponse with results per job_id.
|
|
584
|
+
"""
|
|
585
|
+
self._check_closed()
|
|
586
|
+
|
|
587
|
+
request = AppApiV2SchemasSessionSetupSandboxRequest(timeout=timeout)
|
|
588
|
+
return await sessions_setup_sandbox.asyncio(
|
|
589
|
+
client=self._http,
|
|
590
|
+
session_id=self.session_id,
|
|
591
|
+
body=request,
|
|
592
|
+
x_api_key=self._api_key,
|
|
593
|
+
)
|
|
594
|
+
|
|
553
595
|
async def evaluate(self, **kwargs) -> AppApiV2SchemasSessionEvaluateResponse:
|
|
554
596
|
"""Evaluate the session against task criteria.
|
|
555
597
|
|
|
@@ -738,6 +780,9 @@ class Session:
|
|
|
738
780
|
Navigates each page to the environment's public URL and executes
|
|
739
781
|
the login flow.
|
|
740
782
|
|
|
783
|
+
Requires playwright to be installed:
|
|
784
|
+
pip install playwright
|
|
785
|
+
|
|
741
786
|
Args:
|
|
742
787
|
browser: Playwright Browser instance.
|
|
743
788
|
dataset: Dataset name for login flow (default: "base" uses "login" flow).
|
|
@@ -750,9 +795,15 @@ class Session:
|
|
|
750
795
|
|
|
751
796
|
Raises:
|
|
752
797
|
RuntimeError: If login fails.
|
|
798
|
+
ImportError: If playwright is not installed.
|
|
753
799
|
"""
|
|
754
800
|
self._check_closed()
|
|
755
801
|
|
|
802
|
+
import importlib.util
|
|
803
|
+
|
|
804
|
+
if importlib.util.find_spec("playwright") is None:
|
|
805
|
+
raise ImportError("The login() method requires playwright. Install it with: pip install playwright")
|
|
806
|
+
|
|
756
807
|
context = await browser.new_context()
|
|
757
808
|
pages: dict[str, Page] = {}
|
|
758
809
|
|
plato/v2/sync/environment.py
CHANGED
|
@@ -7,6 +7,7 @@ from typing import TYPE_CHECKING
|
|
|
7
7
|
|
|
8
8
|
from plato._generated.api.v2 import jobs
|
|
9
9
|
from plato._generated.models import (
|
|
10
|
+
CreateCheckpointRequest,
|
|
10
11
|
CreateSnapshotResult,
|
|
11
12
|
ExecuteCommandRequest,
|
|
12
13
|
ExecuteCommandResult,
|
|
@@ -130,7 +131,7 @@ class Environment:
|
|
|
130
131
|
return jobs.snapshot.sync(
|
|
131
132
|
client=self._http,
|
|
132
133
|
job_id=self.job_id,
|
|
133
|
-
body=
|
|
134
|
+
body=CreateCheckpointRequest(),
|
|
134
135
|
x_api_key=self._api_key,
|
|
135
136
|
)
|
|
136
137
|
|