plato-sdk-v2 2.0.50__py3-none-any.whl → 2.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. plato/__init__.py +7 -6
  2. plato/_generated/__init__.py +1 -1
  3. plato/_generated/api/v1/env/evaluate_session.py +3 -3
  4. plato/_generated/api/v1/env/log_state_mutation.py +4 -4
  5. plato/_generated/api/v1/sandbox/checkpoint_vm.py +3 -3
  6. plato/_generated/api/v1/sandbox/save_vm_snapshot.py +3 -3
  7. plato/_generated/api/v1/sandbox/setup_sandbox.py +8 -8
  8. plato/_generated/api/v1/session/__init__.py +2 -0
  9. plato/_generated/api/v1/session/get_sessions_for_archival.py +100 -0
  10. plato/_generated/api/v1/testcases/__init__.py +6 -2
  11. plato/_generated/api/v1/testcases/get_mutation_groups_for_testcase.py +98 -0
  12. plato/_generated/api/v1/testcases/{get_next_output_testcase_for_scoring.py → get_next_testcase_for_scoring.py} +23 -10
  13. plato/_generated/api/v1/testcases/get_testcase_metadata_for_scoring.py +74 -0
  14. plato/_generated/api/v2/__init__.py +2 -1
  15. plato/_generated/api/v2/jobs/__init__.py +4 -0
  16. plato/_generated/api/v2/jobs/checkpoint.py +3 -3
  17. plato/_generated/api/v2/jobs/disk_snapshot.py +3 -3
  18. plato/_generated/api/v2/jobs/log_for_job.py +4 -39
  19. plato/_generated/api/v2/jobs/make.py +4 -4
  20. plato/_generated/api/v2/jobs/setup_sandbox.py +97 -0
  21. plato/_generated/api/v2/jobs/snapshot.py +3 -3
  22. plato/_generated/api/v2/jobs/snapshot_store.py +91 -0
  23. plato/_generated/api/v2/sessions/__init__.py +4 -0
  24. plato/_generated/api/v2/sessions/checkpoint.py +3 -3
  25. plato/_generated/api/v2/sessions/disk_snapshot.py +3 -3
  26. plato/_generated/api/v2/sessions/evaluate.py +3 -3
  27. plato/_generated/api/v2/sessions/log_job_mutation.py +4 -39
  28. plato/_generated/api/v2/sessions/make.py +4 -4
  29. plato/_generated/api/v2/sessions/setup_sandbox.py +98 -0
  30. plato/_generated/api/v2/sessions/snapshot.py +3 -3
  31. plato/_generated/api/v2/sessions/snapshot_store.py +94 -0
  32. plato/_generated/api/v2/user/__init__.py +7 -0
  33. plato/_generated/api/v2/user/get_current_user.py +76 -0
  34. plato/_generated/models/__init__.py +174 -23
  35. plato/_sims_generator/__init__.py +19 -4
  36. plato/_sims_generator/instruction.py +203 -0
  37. plato/_sims_generator/templates/instruction/helpers.py.jinja +161 -0
  38. plato/_sims_generator/templates/instruction/init.py.jinja +43 -0
  39. plato/agents/__init__.py +107 -517
  40. plato/agents/base.py +145 -0
  41. plato/agents/build.py +61 -0
  42. plato/agents/config.py +160 -0
  43. plato/agents/logging.py +401 -0
  44. plato/agents/runner.py +161 -0
  45. plato/agents/trajectory.py +266 -0
  46. plato/chronos/__init__.py +37 -0
  47. plato/chronos/api/__init__.py +3 -0
  48. plato/chronos/api/agents/__init__.py +13 -0
  49. plato/chronos/api/agents/create_agent.py +63 -0
  50. plato/chronos/api/agents/delete_agent.py +61 -0
  51. plato/chronos/api/agents/get_agent.py +62 -0
  52. plato/chronos/api/agents/get_agent_schema.py +72 -0
  53. plato/chronos/api/agents/get_agent_versions.py +62 -0
  54. plato/chronos/api/agents/list_agents.py +57 -0
  55. plato/chronos/api/agents/lookup_agent.py +74 -0
  56. plato/chronos/api/auth/__init__.py +9 -0
  57. plato/chronos/api/auth/debug_auth_api_auth_debug_get.py +43 -0
  58. plato/chronos/api/auth/get_auth_status_api_auth_status_get.py +61 -0
  59. plato/chronos/api/auth/get_current_user_route_api_auth_me_get.py +60 -0
  60. plato/chronos/api/callback/__init__.py +11 -0
  61. plato/chronos/api/callback/push_agent_logs.py +61 -0
  62. plato/chronos/api/callback/update_agent_status.py +57 -0
  63. plato/chronos/api/callback/upload_artifacts.py +59 -0
  64. plato/chronos/api/callback/upload_logs_zip.py +57 -0
  65. plato/chronos/api/callback/upload_trajectory.py +57 -0
  66. plato/chronos/api/default/__init__.py +7 -0
  67. plato/chronos/api/default/health.py +43 -0
  68. plato/chronos/api/jobs/__init__.py +7 -0
  69. plato/chronos/api/jobs/launch_job.py +63 -0
  70. plato/chronos/api/registry/__init__.py +19 -0
  71. plato/chronos/api/registry/get_agent_schema_api_registry_agents__agent_name__schema_get.py +62 -0
  72. plato/chronos/api/registry/get_agent_versions_api_registry_agents__agent_name__versions_get.py +52 -0
  73. plato/chronos/api/registry/get_world_schema_api_registry_worlds__package_name__schema_get.py +68 -0
  74. plato/chronos/api/registry/get_world_versions_api_registry_worlds__package_name__versions_get.py +52 -0
  75. plato/chronos/api/registry/list_registry_agents_api_registry_agents_get.py +44 -0
  76. plato/chronos/api/registry/list_registry_worlds_api_registry_worlds_get.py +44 -0
  77. plato/chronos/api/runtimes/__init__.py +11 -0
  78. plato/chronos/api/runtimes/create_runtime.py +63 -0
  79. plato/chronos/api/runtimes/delete_runtime.py +61 -0
  80. plato/chronos/api/runtimes/get_runtime.py +62 -0
  81. plato/chronos/api/runtimes/list_runtimes.py +57 -0
  82. plato/chronos/api/runtimes/test_runtime.py +67 -0
  83. plato/chronos/api/secrets/__init__.py +11 -0
  84. plato/chronos/api/secrets/create_secret.py +63 -0
  85. plato/chronos/api/secrets/delete_secret.py +61 -0
  86. plato/chronos/api/secrets/get_secret.py +62 -0
  87. plato/chronos/api/secrets/list_secrets.py +57 -0
  88. plato/chronos/api/secrets/update_secret.py +68 -0
  89. plato/chronos/api/sessions/__init__.py +10 -0
  90. plato/chronos/api/sessions/get_session.py +62 -0
  91. plato/chronos/api/sessions/get_session_logs.py +72 -0
  92. plato/chronos/api/sessions/get_session_logs_download.py +62 -0
  93. plato/chronos/api/sessions/list_sessions.py +57 -0
  94. plato/chronos/api/status/__init__.py +8 -0
  95. plato/chronos/api/status/get_status_api_status_get.py +44 -0
  96. plato/chronos/api/status/get_version_info_api_version_get.py +44 -0
  97. plato/chronos/api/templates/__init__.py +11 -0
  98. plato/chronos/api/templates/create_template.py +63 -0
  99. plato/chronos/api/templates/delete_template.py +61 -0
  100. plato/chronos/api/templates/get_template.py +62 -0
  101. plato/chronos/api/templates/list_templates.py +57 -0
  102. plato/chronos/api/templates/update_template.py +68 -0
  103. plato/chronos/api/trajectories/__init__.py +8 -0
  104. plato/chronos/api/trajectories/get_trajectory.py +62 -0
  105. plato/chronos/api/trajectories/list_trajectories.py +62 -0
  106. plato/chronos/api/worlds/__init__.py +10 -0
  107. plato/chronos/api/worlds/create_world.py +63 -0
  108. plato/chronos/api/worlds/delete_world.py +61 -0
  109. plato/chronos/api/worlds/get_world.py +62 -0
  110. plato/chronos/api/worlds/list_worlds.py +57 -0
  111. plato/chronos/client.py +171 -0
  112. plato/chronos/errors.py +141 -0
  113. plato/chronos/models/__init__.py +647 -0
  114. plato/chronos/py.typed +0 -0
  115. plato/sims/cli.py +299 -123
  116. plato/sims/registry.py +77 -4
  117. plato/v1/cli/agent.py +88 -84
  118. plato/v1/cli/main.py +2 -0
  119. plato/v1/cli/pm.py +441 -119
  120. plato/v1/cli/sandbox.py +747 -191
  121. plato/v1/cli/sim.py +11 -0
  122. plato/v1/cli/verify.py +1269 -0
  123. plato/v1/cli/world.py +3 -0
  124. plato/v1/flow_executor.py +21 -17
  125. plato/v1/models/env.py +11 -11
  126. plato/v1/sdk.py +2 -2
  127. plato/v1/sync_env.py +11 -11
  128. plato/v1/sync_flow_executor.py +21 -17
  129. plato/v1/sync_sdk.py +4 -2
  130. plato/v2/__init__.py +2 -0
  131. plato/v2/async_/environment.py +20 -1
  132. plato/v2/async_/session.py +54 -3
  133. plato/v2/sync/environment.py +2 -1
  134. plato/v2/sync/session.py +52 -2
  135. plato/worlds/README.md +218 -0
  136. plato/worlds/__init__.py +54 -18
  137. plato/worlds/base.py +304 -93
  138. plato/worlds/config.py +239 -73
  139. plato/worlds/runner.py +391 -80
  140. {plato_sdk_v2-2.0.50.dist-info → plato_sdk_v2-2.2.4.dist-info}/METADATA +1 -3
  141. {plato_sdk_v2-2.0.50.dist-info → plato_sdk_v2-2.2.4.dist-info}/RECORD +143 -68
  142. {plato_sdk_v2-2.0.50.dist-info → plato_sdk_v2-2.2.4.dist-info}/entry_points.txt +1 -0
  143. plato/_generated/api/v2/interfaces/__init__.py +0 -27
  144. plato/_generated/api/v2/interfaces/v2_interface_browser_create.py +0 -68
  145. plato/_generated/api/v2/interfaces/v2_interface_cdp_url.py +0 -65
  146. plato/_generated/api/v2/interfaces/v2_interface_click.py +0 -64
  147. plato/_generated/api/v2/interfaces/v2_interface_close.py +0 -59
  148. plato/_generated/api/v2/interfaces/v2_interface_computer_create.py +0 -68
  149. plato/_generated/api/v2/interfaces/v2_interface_cursor.py +0 -64
  150. plato/_generated/api/v2/interfaces/v2_interface_key.py +0 -68
  151. plato/_generated/api/v2/interfaces/v2_interface_screenshot.py +0 -65
  152. plato/_generated/api/v2/interfaces/v2_interface_scroll.py +0 -70
  153. plato/_generated/api/v2/interfaces/v2_interface_type.py +0 -64
  154. plato/world/__init__.py +0 -44
  155. plato/world/base.py +0 -267
  156. plato/world/config.py +0 -139
  157. plato/world/types.py +0 -47
  158. {plato_sdk_v2-2.0.50.dist-info → plato_sdk_v2-2.2.4.dist-info}/WHEEL +0 -0
plato/v1/cli/world.py CHANGED
@@ -235,6 +235,8 @@ def world_publish(
235
235
  upload_url = f"{api_url}/v2/pypi/worlds/"
236
236
  console.print(f"\n[cyan]Uploading to {upload_url}...[/cyan]")
237
237
 
238
+ # api_key is guaranteed to be set (checked earlier when not dry_run)
239
+ assert api_key is not None, "api_key must be set when not in dry_run mode"
238
240
  try:
239
241
  result = subprocess.run(
240
242
  [
@@ -250,6 +252,7 @@ def world_publish(
250
252
  ],
251
253
  capture_output=True,
252
254
  text=True,
255
+ check=False,
253
256
  )
254
257
 
255
258
  if result.returncode == 0:
plato/v1/flow_executor.py CHANGED
@@ -7,15 +7,13 @@ import logging
7
7
  import os
8
8
  import sys
9
9
  from pathlib import Path
10
- from typing import Any
10
+ from typing import TYPE_CHECKING, cast
11
11
  from urllib.parse import urljoin
12
12
 
13
13
  sys.path.insert(0, os.path.dirname(__file__))
14
14
 
15
- try:
15
+ if TYPE_CHECKING:
16
16
  from playwright.async_api import Page
17
- except ImportError:
18
- Page = Any # Fallback if playwright not installed
19
17
 
20
18
  from plato.v1.models.flow import (
21
19
  CheckElementStep,
@@ -42,7 +40,7 @@ class FlowExecutor:
42
40
 
43
41
  def __init__(
44
42
  self,
45
- page: Page,
43
+ page: "Page",
46
44
  flow: Flow,
47
45
  screenshots_dir: Path | None = None,
48
46
  logger: logging.Logger = logging.getLogger(__name__),
@@ -108,29 +106,29 @@ class FlowExecutor:
108
106
  async def _execute_step(self, step: FlowStep) -> bool:
109
107
  """Execute a single step in a flow using type attribute."""
110
108
  if step.type == "wait_for_selector":
111
- return await self._wait_for_selector(step)
109
+ return await self._wait_for_selector(cast(WaitForSelectorStep, step))
112
110
  elif step.type == "click":
113
- return await self._click(step)
111
+ return await self._click(cast(ClickStep, step))
114
112
  elif step.type == "fill":
115
- return await self._fill(step)
113
+ return await self._fill(cast(FillStep, step))
116
114
  elif step.type == "wait":
117
- return await self._wait(step)
115
+ return await self._wait(cast(WaitStep, step))
118
116
  elif step.type == "navigate":
119
- return await self._navigate(step)
117
+ return await self._navigate(cast(NavigateStep, step))
120
118
  elif step.type == "wait_for_url":
121
- return await self._wait_for_url(step)
119
+ return await self._wait_for_url(cast(WaitForUrlStep, step))
122
120
  elif step.type == "check_element":
123
- return await self._check_element(step)
121
+ return await self._check_element(cast(CheckElementStep, step))
124
122
  elif step.type == "verify":
125
- return await self._verify(step)
123
+ return await self._verify(cast(VerifyStep, step))
126
124
  elif step.type == "screenshot":
127
- return await self._screenshot(step)
125
+ return await self._screenshot(cast(ScreenshotStep, step))
128
126
  elif step.type == "verify_text":
129
- return await self._verify_text(step)
127
+ return await self._verify_text(cast(VerifyTextStep, step))
130
128
  elif step.type == "verify_url":
131
- return await self._verify_url(step)
129
+ return await self._verify_url(cast(VerifyUrlStep, step))
132
130
  elif step.type == "verify_no_errors":
133
- return await self._verify_no_errors(step)
131
+ return await self._verify_no_errors(cast(VerifyNoErrorsStep, step))
134
132
  else:
135
133
  self.logger.error(f"❌ Unknown step type: {step.type}")
136
134
  return False
@@ -248,6 +246,7 @@ class FlowExecutor:
248
246
 
249
247
  async def _verify_element_exists(self, step: VerifyStep) -> bool:
250
248
  """Verify that an element exists in the DOM."""
249
+ assert step.selector is not None # Guaranteed by VerifyStep model validator
251
250
  try:
252
251
  element = await self.page.query_selector(step.selector)
253
252
  if element:
@@ -262,6 +261,7 @@ class FlowExecutor:
262
261
 
263
262
  async def _verify_element_visible(self, step: VerifyStep) -> bool:
264
263
  """Verify that an element is visible on the page."""
264
+ assert step.selector is not None # Guaranteed by VerifyStep model validator
265
265
  try:
266
266
  element = await self.page.query_selector(step.selector)
267
267
  if element:
@@ -281,6 +281,8 @@ class FlowExecutor:
281
281
 
282
282
  async def _verify_element_text(self, step: VerifyStep) -> bool:
283
283
  """Verify that an element contains specific text."""
284
+ assert step.selector is not None # Guaranteed by VerifyStep model validator
285
+ assert step.text is not None # Guaranteed by VerifyStep model validator
284
286
  try:
285
287
  element = await self.page.query_selector(step.selector)
286
288
  if element:
@@ -318,6 +320,7 @@ class FlowExecutor:
318
320
 
319
321
  async def _verify_element_count(self, step: VerifyStep) -> bool:
320
322
  """Verify the count of elements matching a selector."""
323
+ assert step.selector is not None # Guaranteed by VerifyStep model validator
321
324
  try:
322
325
  elements = await self.page.query_selector_all(step.selector)
323
326
  actual_count = len(elements)
@@ -336,6 +339,7 @@ class FlowExecutor:
336
339
 
337
340
  async def _verify_page_title(self, step: VerifyStep) -> bool:
338
341
  """Verify the page title."""
342
+ assert step.title is not None # Guaranteed by VerifyStep model validator
339
343
  try:
340
344
  actual_title = await self.page.title()
341
345
 
plato/v1/models/env.py CHANGED
@@ -13,11 +13,6 @@ from urllib.parse import urlparse
13
13
 
14
14
  import yaml
15
15
 
16
- try:
17
- from playwright.async_api import Page
18
- except ImportError:
19
- Page = Any # Fallback if playwright not installed
20
-
21
16
  from plato.v1.exceptions import PlatoClientError
22
17
  from plato.v1.flow_executor import FlowExecutor
23
18
  from plato.v1.models.flow import Flow
@@ -27,6 +22,8 @@ logger = logging.getLogger(__name__)
27
22
 
28
23
  # Using TYPE_CHECKING for proper type annotation without circular imports
29
24
  if TYPE_CHECKING:
25
+ from playwright.async_api import Page
26
+
30
27
  from plato.sdk import Plato
31
28
 
32
29
 
@@ -48,8 +45,8 @@ class PlatoEnvironment:
48
45
 
49
46
  _current_task: PlatoTask | None = None
50
47
  _client: "Plato" = None
51
- id: str = None
52
- env_id: str = None
48
+ id: str = None # type: ignore
49
+ env_id: str = None # type: ignore
53
50
  alias: str | None = None
54
51
  _run_session_id: str | None = None
55
52
  _heartbeat_task: asyncio.Task | None = None
@@ -66,7 +63,7 @@ class PlatoEnvironment:
66
63
  ):
67
64
  self._client = client
68
65
  self.id = id
69
- self.env_id = env_id
66
+ self.env_id = env_id # type: ignore[assignment]
70
67
  self.alias = alias
71
68
  self._run_session_id = None
72
69
  self._heartbeat_task = None
@@ -77,7 +74,7 @@ class PlatoEnvironment:
77
74
 
78
75
  async def login(
79
76
  self,
80
- page: Page,
77
+ page: "Page",
81
78
  throw_on_login_error: bool = False,
82
79
  screenshots_dir: Path | None = None,
83
80
  dataset: str = "base",
@@ -97,7 +94,7 @@ class PlatoEnvironment:
97
94
  f"{self._client.base_url}/env/{self.id}/flows",
98
95
  headers=headers,
99
96
  ) as resp:
100
- await self._client._handle_response_error(resp) # type: ignore[attr-defined]
97
+ await self._client._handle_response_error(resp)
101
98
  body_text = await resp.text()
102
99
  # Endpoint may return JSON with { data: { flows: "...yaml..." } } or raw YAML
103
100
  try:
@@ -394,7 +391,10 @@ class PlatoEnvironment:
394
391
  if not self._run_session_id:
395
392
  raise PlatoClientError("No active run session. Call reset() first.")
396
393
 
397
- if not self._current_task or not self._current_task.eval_config:
394
+ if not self._current_task:
395
+ logger.warning("No current task set")
396
+ raise PlatoClientError("No evaluation config found for task")
397
+ if not self._current_task.eval_config:
398
398
  logger.warning(f"No evaluation config found for task: {self._current_task.name}")
399
399
  raise PlatoClientError("No evaluation config found for task")
400
400
 
plato/v1/sdk.py CHANGED
@@ -538,7 +538,7 @@ class Plato:
538
538
  default_scoring_config=t.get("defaultScoringConfig", {}),
539
539
  scoring_type=[ScoringType(st) for st in t.get("scoringTypes", [])]
540
540
  if t.get("scoringTypes")
541
- else None,
541
+ else [ScoringType.MUTATIONS], # Use default when not provided
542
542
  output_schema=t.get("outputSchema"),
543
543
  is_sample=t.get("isSample", False),
544
544
  simulator_artifact_id=(
@@ -685,7 +685,7 @@ class Plato:
685
685
  return await response.json()
686
686
 
687
687
  async def create_simulator(
688
- self, name: str, description: str = None, sim_type: str = "docker_app"
688
+ self, name: str, description: str | None = None, sim_type: str = "docker_app"
689
689
  ) -> dict[str, Any]:
690
690
  """Create a new simulator.
691
691
 
plato/v1/sync_env.py CHANGED
@@ -10,11 +10,6 @@ from urllib.parse import urlparse
10
10
 
11
11
  import yaml
12
12
 
13
- try:
14
- from playwright.sync_api import Page
15
- except ImportError:
16
- Page = Any # Fallback if playwright not installed
17
-
18
13
  from plato.v1.exceptions import PlatoClientError
19
14
  from plato.v1.models.flow import Flow
20
15
  from plato.v1.models.task import CustomEvalConfig, EvaluationResult, PlatoTask
@@ -24,6 +19,8 @@ logger = logging.getLogger(__name__)
24
19
 
25
20
  # Using TYPE_CHECKING for proper type annotation without circular imports
26
21
  if TYPE_CHECKING:
22
+ from playwright.sync_api import Page
23
+
27
24
  from plato.sync_sdk import SyncPlato
28
25
 
29
26
 
@@ -45,8 +42,8 @@ class SyncPlatoEnvironment:
45
42
 
46
43
  _current_task: PlatoTask | None = None
47
44
  _client: "SyncPlato" = None
48
- id: str = None
49
- env_id: str = None
45
+ id: str = None # type: ignore
46
+ env_id: str = None # type: ignore
50
47
  alias: str | None = None
51
48
  _run_session_id: str | None = None
52
49
  _heartbeat_thread: threading.Thread | None = None
@@ -64,7 +61,7 @@ class SyncPlatoEnvironment:
64
61
  ):
65
62
  self._client = client
66
63
  self.id = id
67
- self.env_id = env_id
64
+ self.env_id = env_id # type: ignore[assignment]
68
65
  self.alias = alias
69
66
  self._run_session_id = active_session
70
67
  self._heartbeat_thread = None
@@ -75,7 +72,7 @@ class SyncPlatoEnvironment:
75
72
 
76
73
  def login(
77
74
  self,
78
- page: Page,
75
+ page: "Page",
79
76
  throw_on_login_error: bool = False,
80
77
  screenshots_dir: Path | None = None,
81
78
  dataset: str = "base",
@@ -95,7 +92,7 @@ class SyncPlatoEnvironment:
95
92
  try:
96
93
  headers = {"X-API-Key": self._client.api_key}
97
94
  resp = self._client.http_session.get(f"{self._client.base_url}/env/{self.id}/flows", headers=headers)
98
- self._client._handle_response_error(resp) # type: ignore[attr-defined]
95
+ self._client._handle_response_error(resp)
99
96
  body_text = resp.text
100
97
  # Endpoint may return JSON with { data: { flows: "...yaml..." } } or raw YAML
101
98
  try:
@@ -379,7 +376,10 @@ class SyncPlatoEnvironment:
379
376
  if not self._run_session_id:
380
377
  raise PlatoClientError("No active run session. Call reset() first.")
381
378
 
382
- if not self._current_task or not self._current_task.eval_config:
379
+ if not self._current_task:
380
+ logger.warning("No current task set")
381
+ raise PlatoClientError("No evaluation config found for task")
382
+ if not self._current_task.eval_config:
383
383
  logger.warning(f"No evaluation config found for task: {self._current_task.name}")
384
384
  raise PlatoClientError("No evaluation config found for task")
385
385
 
@@ -7,15 +7,13 @@ import logging
7
7
  import os
8
8
  import sys
9
9
  from pathlib import Path
10
- from typing import Any
10
+ from typing import TYPE_CHECKING, cast
11
11
  from urllib.parse import urljoin
12
12
 
13
13
  sys.path.insert(0, os.path.dirname(__file__))
14
14
 
15
- try:
15
+ if TYPE_CHECKING:
16
16
  from playwright.sync_api import Page
17
- except ImportError:
18
- Page = Any # Fallback if playwright not installed
19
17
 
20
18
  from plato.v1.models.flow import (
21
19
  CheckElementStep,
@@ -40,7 +38,7 @@ class SyncFlowExecutor:
40
38
 
41
39
  def __init__(
42
40
  self,
43
- page: Page,
41
+ page: "Page",
44
42
  flow: Flow,
45
43
  screenshots_dir: Path | None = None,
46
44
  logger: logging.Logger = logging.getLogger(__name__),
@@ -104,29 +102,29 @@ class SyncFlowExecutor:
104
102
  def _execute_step(self, step: FlowStep) -> bool:
105
103
  """Execute a single step in a flow using action attribute."""
106
104
  if step.type == "wait_for_selector":
107
- return self._wait_for_selector(step)
105
+ return self._wait_for_selector(cast(WaitForSelectorStep, step))
108
106
  elif step.type == "click":
109
- return self._click(step)
107
+ return self._click(cast(ClickStep, step))
110
108
  elif step.type == "fill":
111
- return self._fill(step)
109
+ return self._fill(cast(FillStep, step))
112
110
  elif step.type == "wait":
113
- return self._wait(step)
111
+ return self._wait(cast(WaitStep, step))
114
112
  elif step.type == "navigate":
115
- return self._navigate(step)
113
+ return self._navigate(cast(NavigateStep, step))
116
114
  elif step.type == "wait_for_url":
117
- return self._wait_for_url(step)
115
+ return self._wait_for_url(cast(WaitForUrlStep, step))
118
116
  elif step.type == "check_element":
119
- return self._check_element(step)
117
+ return self._check_element(cast(CheckElementStep, step))
120
118
  elif step.type == "verify":
121
- return self._verify(step)
119
+ return self._verify(cast(VerifyStep, step))
122
120
  elif step.type == "screenshot":
123
- return self._screenshot(step)
121
+ return self._screenshot(cast(ScreenshotStep, step))
124
122
  elif step.type == "verify_text":
125
- return self._verify_text(step)
123
+ return self._verify_text(cast(VerifyTextStep, step))
126
124
  elif step.type == "verify_url":
127
- return self._verify_url(step)
125
+ return self._verify_url(cast(VerifyUrlStep, step))
128
126
  elif step.type == "verify_no_errors":
129
- return self._verify_no_errors(step)
127
+ return self._verify_no_errors(cast(VerifyNoErrorsStep, step))
130
128
  else:
131
129
  self.logger.error(f"❌ Unknown step action: {step.type}")
132
130
  return False
@@ -242,6 +240,7 @@ class SyncFlowExecutor:
242
240
 
243
241
  def _verify_element_exists(self, step: VerifyStep) -> bool:
244
242
  """Verify that an element exists in the DOM."""
243
+ assert step.selector is not None # Guaranteed by VerifyStep model validator
245
244
  try:
246
245
  element = self.page.query_selector(step.selector)
247
246
  if element:
@@ -256,6 +255,7 @@ class SyncFlowExecutor:
256
255
 
257
256
  def _verify_element_visible(self, step: VerifyStep) -> bool:
258
257
  """Verify that an element is visible on the page."""
258
+ assert step.selector is not None # Guaranteed by VerifyStep model validator
259
259
  try:
260
260
  element = self.page.query_selector(step.selector)
261
261
  if element:
@@ -275,6 +275,8 @@ class SyncFlowExecutor:
275
275
 
276
276
  def _verify_element_text(self, step: VerifyStep) -> bool:
277
277
  """Verify that an element contains specific text."""
278
+ assert step.selector is not None # Guaranteed by VerifyStep model validator
279
+ assert step.text is not None # Guaranteed by VerifyStep model validator
278
280
  try:
279
281
  element = self.page.query_selector(step.selector)
280
282
  if element:
@@ -312,6 +314,7 @@ class SyncFlowExecutor:
312
314
 
313
315
  def _verify_element_count(self, step: VerifyStep) -> bool:
314
316
  """Verify the count of elements matching a selector."""
317
+ assert step.selector is not None # Guaranteed by VerifyStep model validator
315
318
  try:
316
319
  elements = self.page.query_selector_all(step.selector)
317
320
  actual_count = len(elements)
@@ -330,6 +333,7 @@ class SyncFlowExecutor:
330
333
 
331
334
  def _verify_page_title(self, step: VerifyStep) -> bool:
332
335
  """Verify the page title."""
336
+ assert step.title is not None # Guaranteed by VerifyStep model validator
333
337
  try:
334
338
  actual_title = self.page.title()
335
339
 
plato/v1/sync_sdk.py CHANGED
@@ -89,7 +89,7 @@ class SyncPlato:
89
89
  Raises:
90
90
  PlatoClientError: With the actual error message from the response
91
91
  """
92
- if response.status_code >= 400:
92
+ if response.status_code >= 400: # type: ignore[operator]
93
93
  try:
94
94
  # Try to get the error message from the response body
95
95
  error_data = response.json()
@@ -525,7 +525,9 @@ class SyncPlato:
525
525
  average_steps=t.get("averageStepsTaken"),
526
526
  num_validator_human_scores=t.get("defaultScoringConfig", {}).get("num_sessions_used", 0),
527
527
  default_scoring_config=t.get("defaultScoringConfig", {}),
528
- scoring_type=[ScoringType(st) for st in t.get("scoringTypes", [])] if t.get("scoringTypes") else None,
528
+ scoring_type=[ScoringType(st) for st in t.get("scoringTypes", [])]
529
+ if t.get("scoringTypes")
530
+ else [ScoringType.MUTATIONS],
529
531
  output_schema=t.get("outputSchema"),
530
532
  is_sample=t.get("isSample", False),
531
533
  simulator_artifact_id=(
plato/v2/__init__.py CHANGED
@@ -15,6 +15,7 @@ from plato.v2.async_.client import AsyncPlato
15
15
  from plato.v2.async_.environment import Environment as AsyncEnvironment
16
16
  from plato.v2.async_.flow_executor import FlowExecutionError as AsyncFlowExecutionError
17
17
  from plato.v2.async_.flow_executor import FlowExecutor as AsyncFlowExecutor
18
+ from plato.v2.async_.session import SerializedSession
18
19
  from plato.v2.async_.session import Session as AsyncSession
19
20
  from plato.v2.sync.client import Plato
20
21
  from plato.v2.sync.environment import Environment
@@ -45,6 +46,7 @@ __all__ = [
45
46
  "AsyncEnvironment",
46
47
  "AsyncFlowExecutor",
47
48
  "AsyncFlowExecutionError",
49
+ "SerializedSession",
48
50
  # Models
49
51
  "Flow",
50
52
  # Helpers
@@ -7,6 +7,8 @@ from typing import TYPE_CHECKING
7
7
 
8
8
  from plato._generated.api.v2 import jobs
9
9
  from plato._generated.models import (
10
+ ConnectRoutingInfoResult,
11
+ CreateCheckpointRequest,
10
12
  CreateSnapshotResult,
11
13
  ExecuteCommandRequest,
12
14
  ExecuteCommandResult,
@@ -130,7 +132,7 @@ class Environment:
130
132
  return await jobs.snapshot.asyncio(
131
133
  client=self._http,
132
134
  job_id=self.job_id,
133
- body=None,
135
+ body=CreateCheckpointRequest(),
134
136
  x_api_key=self._api_key,
135
137
  )
136
138
 
@@ -142,5 +144,22 @@ class Environment:
142
144
  x_api_key=self._api_key,
143
145
  )
144
146
 
147
+ async def get_connection_info(self) -> ConnectRoutingInfoResult:
148
+ """Get connection/routing info for this environment.
149
+
150
+ Returns connection details including:
151
+ - vm_gateway_ip: Gateway IP for VM to reach the host
152
+ - vm_private_ip: Private IP of the VM
153
+ - worker_private_ip: Private IP of the worker
154
+ - ready: Whether the job is ready
155
+
156
+ Returns:
157
+ ConnectRoutingInfoResult with routing information.
158
+ """
159
+ return await jobs.connect_routing_info.asyncio(
160
+ client=self._http,
161
+ job_id=self.job_id,
162
+ )
163
+
145
164
  def __repr__(self) -> str:
146
165
  return f"Environment(alias={self.alias!r}, job_id={self.job_id!r})"
@@ -12,12 +12,15 @@ import uuid
12
12
  from dataclasses import dataclass
13
13
  from datetime import datetime
14
14
  from pathlib import Path
15
+ from typing import TYPE_CHECKING
15
16
 
16
17
  import httpx
17
18
  import tenacity
18
- from playwright.async_api import Browser, BrowserContext, Page
19
19
  from pydantic import BaseModel
20
20
 
21
+ if TYPE_CHECKING:
22
+ from playwright.async_api import Browser, BrowserContext, Page
23
+
21
24
  from plato._generated.api.v2.jobs import get_flows as jobs_get_flows
22
25
  from plato._generated.api.v2.jobs import public_url as jobs_public_url
23
26
  from plato._generated.api.v2.sessions import close as sessions_close
@@ -29,6 +32,7 @@ from plato._generated.api.v2.sessions import heartbeat as sessions_heartbeat
29
32
  from plato._generated.api.v2.sessions import make as sessions_make
30
33
  from plato._generated.api.v2.sessions import reset as sessions_reset
31
34
  from plato._generated.api.v2.sessions import set_date as sessions_set_date
35
+ from plato._generated.api.v2.sessions import setup_sandbox as sessions_setup_sandbox
32
36
  from plato._generated.api.v2.sessions import snapshot as sessions_snapshot
33
37
  from plato._generated.api.v2.sessions import state as sessions_state
34
38
  from plato._generated.api.v2.sessions import wait_for_ready as sessions_wait_for_ready
@@ -37,6 +41,8 @@ from plato._generated.models import (
37
41
  AppApiV2SchemasSessionCreateSnapshotResponse,
38
42
  AppApiV2SchemasSessionEvaluateResponse,
39
43
  AppApiV2SchemasSessionHeartbeatResponse,
44
+ AppApiV2SchemasSessionSetupSandboxRequest,
45
+ AppApiV2SchemasSessionSetupSandboxResponse,
40
46
  CreateDiskSnapshotRequest,
41
47
  CreateDiskSnapshotResponse,
42
48
  CreateSessionFromEnvs,
@@ -67,7 +73,10 @@ logger = logging.getLogger(__name__)
67
73
 
68
74
  @dataclass
69
75
  class LoginResult:
70
- """Result of login operation containing browser context and pages."""
76
+ """Result of login operation containing browser context and pages.
77
+
78
+ Requires playwright to be installed.
79
+ """
71
80
 
72
81
  context: BrowserContext
73
82
  pages: dict[str, Page]
@@ -291,9 +300,10 @@ class Session:
291
300
  )
292
301
 
293
302
  # Use generated API function
303
+ # Note: API supports both CreateSessionFromEnvs and CreateSessionFromTask via discriminator
294
304
  response = await sessions_make.asyncio(
295
305
  client=http_client,
296
- body=request_body,
306
+ body=request_body, # type: ignore[arg-type]
297
307
  x_api_key=api_key,
298
308
  )
299
309
 
@@ -550,6 +560,38 @@ class Session:
550
560
  x_api_key=self._api_key,
551
561
  )
552
562
 
563
+ async def setup_sandbox(
564
+ self,
565
+ timeout: int = 120,
566
+ ) -> AppApiV2SchemasSessionSetupSandboxResponse:
567
+ """Setup sandbox environment with Docker overlay on all environments.
568
+
569
+ This configures the VMs for Docker usage with overlay2 storage driver,
570
+ which is significantly faster than the default vfs driver. Should be called
571
+ after session creation and before pulling Docker images.
572
+
573
+ The setup includes:
574
+ - Mounting /dev/vdb to /mnt/docker for Docker storage
575
+ - Configuring Docker with overlay2 storage driver
576
+ - Setting up ECR and Docker Hub authentication
577
+ - Creating a docker-user service for non-root Docker access
578
+
579
+ Args:
580
+ timeout: Setup timeout in seconds (default: 120).
581
+
582
+ Returns:
583
+ SetupSandboxResponse with results per job_id.
584
+ """
585
+ self._check_closed()
586
+
587
+ request = AppApiV2SchemasSessionSetupSandboxRequest(timeout=timeout)
588
+ return await sessions_setup_sandbox.asyncio(
589
+ client=self._http,
590
+ session_id=self.session_id,
591
+ body=request,
592
+ x_api_key=self._api_key,
593
+ )
594
+
553
595
  async def evaluate(self, **kwargs) -> AppApiV2SchemasSessionEvaluateResponse:
554
596
  """Evaluate the session against task criteria.
555
597
 
@@ -738,6 +780,9 @@ class Session:
738
780
  Navigates each page to the environment's public URL and executes
739
781
  the login flow.
740
782
 
783
+ Requires playwright to be installed:
784
+ pip install playwright
785
+
741
786
  Args:
742
787
  browser: Playwright Browser instance.
743
788
  dataset: Dataset name for login flow (default: "base" uses "login" flow).
@@ -750,9 +795,15 @@ class Session:
750
795
 
751
796
  Raises:
752
797
  RuntimeError: If login fails.
798
+ ImportError: If playwright is not installed.
753
799
  """
754
800
  self._check_closed()
755
801
 
802
+ import importlib.util
803
+
804
+ if importlib.util.find_spec("playwright") is None:
805
+ raise ImportError("The login() method requires playwright. Install it with: pip install playwright")
806
+
756
807
  context = await browser.new_context()
757
808
  pages: dict[str, Page] = {}
758
809
 
@@ -7,6 +7,7 @@ from typing import TYPE_CHECKING
7
7
 
8
8
  from plato._generated.api.v2 import jobs
9
9
  from plato._generated.models import (
10
+ CreateCheckpointRequest,
10
11
  CreateSnapshotResult,
11
12
  ExecuteCommandRequest,
12
13
  ExecuteCommandResult,
@@ -130,7 +131,7 @@ class Environment:
130
131
  return jobs.snapshot.sync(
131
132
  client=self._http,
132
133
  job_id=self.job_id,
133
- body=None,
134
+ body=CreateCheckpointRequest(),
134
135
  x_api_key=self._api_key,
135
136
  )
136
137