fleet-python 0.2.124__tar.gz → 0.2.126__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. {fleet_python-0.2.124/fleet_python.egg-info → fleet_python-0.2.126}/PKG-INFO +1 -1
  2. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/__init__.py +1 -7
  3. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/_async/__init__.py +1 -1
  4. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/_async/base.py +1 -1
  5. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/_async/client.py +5 -68
  6. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/_async/models.py +2 -0
  7. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/base.py +1 -1
  8. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/client.py +5 -62
  9. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/judge.py +0 -54
  10. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/models.py +2 -0
  11. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/verifiers/__init__.py +0 -4
  12. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/verifiers/db.py +0 -429
  13. {fleet_python-0.2.124 → fleet_python-0.2.126/fleet_python.egg-info}/PKG-INFO +1 -1
  14. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet_python.egg-info/SOURCES.txt +0 -6
  15. {fleet_python-0.2.124 → fleet_python-0.2.126}/pyproject.toml +1 -1
  16. {fleet_python-0.2.124 → fleet_python-0.2.126}/tests/test_expect_only.py +15 -15
  17. fleet_python-0.2.124/examples/task_bundle_editing/download_task.py +0 -276
  18. fleet_python-0.2.124/examples/task_bundle_editing/launch_job.py +0 -160
  19. fleet_python-0.2.124/examples/task_bundle_editing/upload_task.py +0 -406
  20. fleet_python-0.2.124/examples/task_bundle_editing/validate_task.py +0 -336
  21. fleet_python-0.2.124/fleet/verifiers/local_executor.py +0 -371
  22. fleet_python-0.2.124/tests/test_judge_criteria_markers.py +0 -192
  23. {fleet_python-0.2.124 → fleet_python-0.2.126}/LICENSE +0 -0
  24. {fleet_python-0.2.124 → fleet_python-0.2.126}/README.md +0 -0
  25. {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/diff_example.py +0 -0
  26. {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/dsl_example.py +0 -0
  27. {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/example.py +0 -0
  28. {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/exampleResume.py +0 -0
  29. {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/example_account.py +0 -0
  30. {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/example_action_log.py +0 -0
  31. {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/example_client.py +0 -0
  32. {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/example_mcp_anthropic.py +0 -0
  33. {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/example_mcp_openai.py +0 -0
  34. {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/example_sync.py +0 -0
  35. {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/example_task.py +0 -0
  36. {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/example_tasks.py +0 -0
  37. {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/example_verifier.py +0 -0
  38. {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/export_tasks.py +0 -0
  39. {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/export_tasks_filtered.py +0 -0
  40. {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/fetch_tasks.py +0 -0
  41. {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/gemini_example.py +0 -0
  42. {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/import_tasks.py +0 -0
  43. {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/iterate_verifiers.py +0 -0
  44. {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/json_tasks_example.py +0 -0
  45. {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/nova_act_example.py +0 -0
  46. {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/openai_example.py +0 -0
  47. {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/openai_simple_example.py +0 -0
  48. {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/query_builder_example.py +0 -0
  49. {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/quickstart.py +0 -0
  50. {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/test_cdp_logging.py +0 -0
  51. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/_async/env/__init__.py +0 -0
  52. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/_async/env/client.py +0 -0
  53. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/_async/exceptions.py +0 -0
  54. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/_async/global_client.py +0 -0
  55. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/_async/instance/__init__.py +0 -0
  56. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/_async/instance/base.py +0 -0
  57. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/_async/instance/client.py +0 -0
  58. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/_async/judge.py +0 -0
  59. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/_async/resources/__init__.py +0 -0
  60. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/_async/resources/api.py +0 -0
  61. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/_async/resources/base.py +0 -0
  62. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/_async/resources/browser.py +0 -0
  63. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/_async/resources/filesystem.py +0 -0
  64. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/_async/resources/mcp.py +0 -0
  65. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/_async/resources/sqlite.py +0 -0
  66. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/_async/tasks.py +0 -0
  67. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/_async/verifiers/__init__.py +0 -0
  68. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/_async/verifiers/bundler.py +0 -0
  69. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/_async/verifiers/verifier.py +0 -0
  70. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/agent/__init__.py +0 -0
  71. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/agent/gemini_cua/Dockerfile +0 -0
  72. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/agent/gemini_cua/__init__.py +0 -0
  73. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/agent/gemini_cua/agent.py +0 -0
  74. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/agent/gemini_cua/mcp/main.py +0 -0
  75. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/agent/gemini_cua/mcp_server/__init__.py +0 -0
  76. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/agent/gemini_cua/mcp_server/main.py +0 -0
  77. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/agent/gemini_cua/mcp_server/tools.py +0 -0
  78. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/agent/gemini_cua/requirements.txt +0 -0
  79. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/agent/gemini_cua/start.sh +0 -0
  80. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/agent/orchestrator.py +0 -0
  81. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/agent/types.py +0 -0
  82. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/agent/utils.py +0 -0
  83. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/cli.py +0 -0
  84. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/config.py +0 -0
  85. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/env/__init__.py +0 -0
  86. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/env/client.py +0 -0
  87. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/eval/__init__.py +0 -0
  88. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/eval/uploader.py +0 -0
  89. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/exceptions.py +0 -0
  90. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/global_client.py +0 -0
  91. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/instance/__init__.py +0 -0
  92. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/instance/base.py +0 -0
  93. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/instance/client.py +0 -0
  94. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/instance/models.py +0 -0
  95. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/proxy/__init__.py +0 -0
  96. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/proxy/proxy.py +0 -0
  97. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/proxy/whitelist.py +0 -0
  98. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/resources/__init__.py +0 -0
  99. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/resources/api.py +0 -0
  100. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/resources/base.py +0 -0
  101. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/resources/browser.py +0 -0
  102. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/resources/filesystem.py +0 -0
  103. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/resources/mcp.py +0 -0
  104. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/resources/sqlite.py +0 -0
  105. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/tasks.py +0 -0
  106. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/types.py +0 -0
  107. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/utils/__init__.py +0 -0
  108. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/utils/http_logging.py +0 -0
  109. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/utils/logging.py +0 -0
  110. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/utils/playwright.py +0 -0
  111. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/verifiers/bundler.py +0 -0
  112. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/verifiers/code.py +0 -0
  113. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/verifiers/decorator.py +0 -0
  114. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/verifiers/parse.py +0 -0
  115. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/verifiers/sql_differ.py +0 -0
  116. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/verifiers/verifier.py +0 -0
  117. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet_python.egg-info/dependency_links.txt +0 -0
  118. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet_python.egg-info/entry_points.txt +0 -0
  119. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet_python.egg-info/requires.txt +0 -0
  120. {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet_python.egg-info/top_level.txt +0 -0
  121. {fleet_python-0.2.124 → fleet_python-0.2.126}/scripts/fix_sync_imports.py +0 -0
  122. {fleet_python-0.2.124 → fleet_python-0.2.126}/scripts/unasync.py +0 -0
  123. {fleet_python-0.2.124 → fleet_python-0.2.126}/setup.cfg +0 -0
  124. {fleet_python-0.2.124 → fleet_python-0.2.126}/tests/__init__.py +0 -0
  125. {fleet_python-0.2.124 → fleet_python-0.2.126}/tests/test_app_method.py +0 -0
  126. {fleet_python-0.2.124 → fleet_python-0.2.126}/tests/test_expect_exactly.py +0 -0
  127. {fleet_python-0.2.124 → fleet_python-0.2.126}/tests/test_instance_dispatch.py +0 -0
  128. {fleet_python-0.2.124 → fleet_python-0.2.126}/tests/test_sqlite_resource_dual_mode.py +0 -0
  129. {fleet_python-0.2.124 → fleet_python-0.2.126}/tests/test_sqlite_shared_memory_behavior.py +0 -0
  130. {fleet_python-0.2.124 → fleet_python-0.2.126}/tests/test_verifier_from_string.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fleet-python
3
- Version: 0.2.124
3
+ Version: 0.2.126
4
4
  Summary: Python SDK for Fleet environments
5
5
  Author-email: Fleet AI <nic@fleet.so>
6
6
  License: Apache-2.0
@@ -38,9 +38,6 @@ from .verifiers import (
38
38
  SnapshotDiff,
39
39
  TASK_FAILED_SCORE,
40
40
  TASK_SUCCESSFUL_SCORE,
41
- execute_verifier_local,
42
- LocalEnvironment,
43
- diff_dbs,
44
41
  )
45
42
 
46
43
  # Import async verifiers (default verifier is async for modern usage)
@@ -79,7 +76,7 @@ from . import env
79
76
  from . import global_client as _global_client
80
77
  from ._async import global_client as _async_global_client
81
78
 
82
- __version__ = "0.2.124"
79
+ __version__ = "0.2.126"
83
80
 
84
81
  __all__ = [
85
82
  # Core classes
@@ -117,9 +114,6 @@ __all__ = [
117
114
  "SnapshotDiff",
118
115
  "TASK_FAILED_SCORE",
119
116
  "TASK_SUCCESSFUL_SCORE",
120
- "execute_verifier_local",
121
- "LocalEnvironment",
122
- "diff_dbs",
123
117
  # Environment module
124
118
  "env",
125
119
  # Global client helpers
@@ -44,7 +44,7 @@ from ..types import VerifierFunction
44
44
  from .. import env
45
45
  from . import global_client as _async_global_client
46
46
 
47
- __version__ = "0.2.124"
47
+ __version__ = "0.2.126"
48
48
 
49
49
  __all__ = [
50
50
  # Core classes
@@ -26,7 +26,7 @@ from .exceptions import (
26
26
  try:
27
27
  from .. import __version__
28
28
  except ImportError:
29
- __version__ = "0.2.124"
29
+ __version__ = "0.2.126"
30
30
 
31
31
  logger = logging.getLogger(__name__)
32
32
 
@@ -601,6 +601,9 @@ class AsyncFleet:
601
601
  )
602
602
 
603
603
  instance = AsyncEnv(client=self.client, **response.json())
604
+ # Resources are loaded lazily on first `db()`/`browser()`/`resources()` access
605
+ # via `_load_resources()`, so we don't preload here. Eagerly loading would
606
+ # fail-fast with a 502 while the container is still warming up.
604
607
  return instance
605
608
 
606
609
  async def make_for_task(self, task: Task) -> AsyncEnv:
@@ -652,6 +655,7 @@ class AsyncFleet:
652
655
  else:
653
656
  response = await self.client.request("GET", f"/v1/env/instances/{instance_id}")
654
657
  instance = AsyncEnv(client=self.client, **response.json())
658
+ # Resources load lazily on first `db()`/`browser()`/`resources()` access.
655
659
  return instance
656
660
 
657
661
  def _create_url_instance(self, base_url: str) -> AsyncEnv:
@@ -832,74 +836,7 @@ class AsyncFleet:
832
836
  At least one of run_id or profile_id must be provided.
833
837
  """
834
838
  return await _delete_instances_batch(self.client, run_id=run_id, profile_id=profile_id)
835
-
836
- @staticmethod
837
- async def execute_verifier_local(
838
- verifier_func: str,
839
- seed_db: str,
840
- current_db: str,
841
- final_answer: Optional[str] = None,
842
- ) -> Dict[str, Any]:
843
- """Execute a verifier function locally against SQLite database files.
844
-
845
- No authentication or remote server required. The verifier code is executed
846
- in an isolated namespace with the same helpers available in production
847
- (``normalized_contains``, ``IgnoreConfig``, ``DatabaseSnapshot``, etc.).
848
-
849
- Args:
850
- verifier_func: Python source code containing the verifier function definition.
851
- seed_db: Path to the seed (before) SQLite database file.
852
- current_db: Path to the current (after) SQLite database file.
853
- final_answer: Optional final answer string passed to the verifier.
854
-
855
- Returns:
856
- Dict with keys ``success``, ``result``, ``error``, and ``stdout``.
857
-
858
- Example::
859
-
860
- result = await fleet.execute_verifier_local(
861
- verifier_func=verifier_code_string,
862
- seed_db="./seed.db",
863
- current_db="./current.db",
864
- )
865
- print(result["result"]) # 1 (TASK_SUCCESSFUL_SCORE) or 0
866
- """
867
- import asyncio
868
- from ..verifiers.local_executor import execute_verifier_local
869
-
870
- return await asyncio.to_thread(
871
- execute_verifier_local, verifier_func, seed_db, current_db, final_answer
872
- )
873
-
874
- @staticmethod
875
- async def diff_dbs(
876
- seed_db: str,
877
- current_db: str,
878
- ignore_tables: Optional[set] = None,
879
- ignore_table_fields: Optional[Dict[str, set]] = None,
880
- ) -> Dict[str, Any]:
881
- """Compute a structured diff between two local SQLite databases.
882
-
883
- Returns the same format as the runner's ``/diff/structured`` endpoint.
884
- No authentication or network access required.
885
-
886
- Args:
887
- seed_db: Path to the seed (before) SQLite database file.
888
- current_db: Path to the current (after) SQLite database file.
889
- ignore_tables: Optional set of table names to skip entirely.
890
- ignore_table_fields: Optional mapping of ``{table: {field, ...}}``
891
- to strip from the output.
892
-
893
- Returns:
894
- Dict with keys ``success``, ``diff``, and ``message``.
895
- """
896
- import asyncio
897
- from ..verifiers.local_executor import diff_dbs
898
-
899
- return await asyncio.to_thread(
900
- diff_dbs, seed_db, current_db, ignore_tables, ignore_table_fields
901
- )
902
-
839
+
903
840
  async def list_runs(
904
841
  self, profile_id: Optional[str] = None, status: Optional[str] = "active"
905
842
  ) -> List[Run]:
@@ -51,6 +51,7 @@ class Instance(BaseModel):
51
51
  team_id: str = Field(..., title="Team Id")
52
52
  region: str = Field(..., title="Region")
53
53
  env_variables: Optional[Dict[str, Any]] = Field(None, title="Env Variables")
54
+ multi_env_list: Optional[List[str]] = Field(None, title="Multi Env List")
54
55
 
55
56
 
56
57
  class InstanceRequest(BaseModel):
@@ -357,6 +358,7 @@ class InstanceResponse(BaseModel):
357
358
  data_version: Optional[str] = Field(None, title="Data Version")
358
359
  urls: Optional[InstanceURLs] = Field(None, title="Urls")
359
360
  health: Optional[bool] = Field(None, title="Health")
361
+ multi_env_list: Optional[List[str]] = Field(None, title="Multi Env List")
360
362
 
361
363
 
362
364
  class AccountResponse(BaseModel):
@@ -27,7 +27,7 @@ from .exceptions import (
27
27
  try:
28
28
  from . import __version__
29
29
  except ImportError:
30
- __version__ = "0.2.124"
30
+ __version__ = "0.2.126"
31
31
 
32
32
  logger = logging.getLogger(__name__)
33
33
 
@@ -613,6 +613,9 @@ class Fleet:
613
613
  )
614
614
 
615
615
  instance = SyncEnv(client=self.client, **response.json())
616
+ # Resources load lazily on first `db()`/`browser()`/`resources()` access via
617
+ # `_load_resources()`. Skipping the eager preload avoids fail-fast 502s while
618
+ # the container is still warming up.
616
619
  return instance
617
620
 
618
621
  def make_for_task(self, task: Task) -> SyncEnv:
@@ -664,6 +667,7 @@ class Fleet:
664
667
  else:
665
668
  response = self.client.request("GET", f"/v1/env/instances/{instance_id}")
666
669
  instance = SyncEnv(client=self.client, **response.json())
670
+ # Resources load lazily on first `db()`/`browser()`/`resources()` access.
667
671
  return instance
668
672
 
669
673
  def _create_url_instance(self, base_url: str) -> SyncEnv:
@@ -844,68 +848,7 @@ class Fleet:
844
848
  At least one of run_id or profile_id must be provided.
845
849
  """
846
850
  return _delete_instances_batch(self.client, run_id=run_id, profile_id=profile_id)
847
-
848
- @staticmethod
849
- def execute_verifier_local(
850
- verifier_func: str,
851
- seed_db: str,
852
- current_db: str,
853
- final_answer: Optional[str] = None,
854
- ) -> Dict[str, Any]:
855
- """Execute a verifier function locally against SQLite database files.
856
-
857
- No authentication or remote server required. The verifier code is executed
858
- in an isolated namespace with the same helpers available in production
859
- (``normalized_contains``, ``IgnoreConfig``, ``DatabaseSnapshot``, etc.).
860
-
861
- Args:
862
- verifier_func: Python source code containing the verifier function definition.
863
- seed_db: Path to the seed (before) SQLite database file.
864
- current_db: Path to the current (after) SQLite database file.
865
- final_answer: Optional final answer string passed to the verifier.
866
-
867
- Returns:
868
- Dict with keys ``success``, ``result``, ``error``, and ``stdout``.
869
-
870
- Example::
871
-
872
- result = fleet.execute_verifier_local(
873
- verifier_func=verifier_code_string,
874
- seed_db="./seed.db",
875
- current_db="./current.db",
876
- )
877
- print(result["result"]) # 1 (TASK_SUCCESSFUL_SCORE) or 0
878
- """
879
- from .verifiers.local_executor import execute_verifier_local
880
-
881
- return execute_verifier_local(verifier_func, seed_db, current_db, final_answer)
882
-
883
- @staticmethod
884
- def diff_dbs(
885
- seed_db: str,
886
- current_db: str,
887
- ignore_tables: Optional[set] = None,
888
- ignore_table_fields: Optional[Dict[str, set]] = None,
889
- ) -> Dict[str, Any]:
890
- """Compute a structured diff between two local SQLite databases.
891
-
892
- Returns the same format as the runner's ``/diff/structured`` endpoint.
893
- No authentication or network access required.
894
-
895
- Args:
896
- seed_db: Path to the seed (before) SQLite database file.
897
- current_db: Path to the current (after) SQLite database file.
898
- ignore_tables: Optional set of table names to skip entirely.
899
- ignore_table_fields: Optional mapping of ``{table: {field, ...}}``
900
- to strip from the output.
901
-
902
- Returns:
903
- Dict with keys ``success``, ``diff``, and ``message``.
904
- """
905
- from .verifiers.local_executor import diff_dbs
906
-
907
- return diff_dbs(seed_db, current_db, ignore_tables, ignore_table_fields)
908
-
851
+
909
852
  def list_runs(
910
853
  self, profile_id: Optional[str] = None, status: Optional[str] = "active"
911
854
  ) -> List[Run]:
@@ -823,54 +823,6 @@ def _parse_grade_response(data: dict) -> JudgeResult:
823
823
  return JudgeResult(score, details=data)
824
824
 
825
825
 
826
- def _print_criteria_markers(criteria: list) -> None:
827
- """Emit ``>>> CRITERIA >>>`` stdout markers for structured criteria display.
828
-
829
- The orchestrator (theseus PR #1967) scans verifier stdout for these
830
- markers and wraps the execution result so the client (client PR #1737)
831
- can render an expandable rubric breakdown.
832
-
833
- Converts from the orchestrator judge-response format::
834
-
835
- {"name": str, "score": int, "max_score": int, "reasoning": str}
836
-
837
- to the client-expected marker format::
838
-
839
- {"criteria": str, "score": float, "score_out_of": float, "description"?: str}
840
-
841
- Each criterion's score is normalised to a 0.0–1.0 float using its own
842
- ``max_score``.
843
- """
844
- marker_criteria = []
845
- for c in criteria:
846
- name = c.get("name", "")
847
- cscore = c.get("score", 0)
848
- cmax = c.get("max_score", 0)
849
-
850
- # Normalise per-criterion score to 0.0–1.0
851
- if cmax and float(cmax) > 0:
852
- norm_score = float(cscore) / float(cmax)
853
- else:
854
- norm_score = float(cscore)
855
-
856
- entry: dict = {
857
- "criteria": name,
858
- "score": round(norm_score, 4),
859
- "score_out_of": 1.0,
860
- }
861
-
862
- reasoning = c.get("reasoning", "")
863
- if reasoning:
864
- entry["description"] = reasoning
865
-
866
- marker_criteria.append(entry)
867
-
868
- if marker_criteria:
869
- print(">>> CRITERIA >>>")
870
- print(json.dumps(marker_criteria))
871
- print("<<< CRITERIA <<<")
872
-
873
-
874
826
  def _print_judge_result(data: dict) -> None:
875
827
  """Print detailed judge grading result for verifier stdout capture."""
876
828
  model = data.get("model_used", "unknown")
@@ -896,12 +848,6 @@ def _print_judge_result(data: dict) -> None:
896
848
  if len(reasoning) > 200:
897
849
  reasoning = reasoning[:200] + "..."
898
850
  print(f"[C] {name}: {cscore}/{cmax} — {reasoning}")
899
-
900
- # Emit structured criteria via stdout markers so the orchestrator
901
- # (_extract_criteria_from_stdout) and client can render a rubric
902
- # breakdown. Schema per element:
903
- # {"criteria": str, "score": float, "score_out_of": float, "description"?: str}
904
- _print_criteria_markers(criteria)
905
851
  else:
906
852
  print(f"[C] Score: {normalized:.2f}")
907
853
 
@@ -52,6 +52,7 @@ class Instance(BaseModel):
52
52
  region: str = Field(..., title="Region")
53
53
  env_variables: Optional[Dict[str, Any]] = Field(None, title="Env Variables")
54
54
  run_id: Optional[str] = Field(None, title="Run Id")
55
+ multi_env_list: Optional[List[str]] = Field(None, title="Multi Env List")
55
56
 
56
57
 
57
58
  class InstanceRequest(BaseModel):
@@ -369,6 +370,7 @@ class InstanceResponse(BaseModel):
369
370
  profile_id: Optional[str] = Field(None, title="Profile Id")
370
371
  heartbeat_interval: Optional[int] = Field(None, title="Heartbeat Interval")
371
372
  heartbeat_region: Optional[str] = Field(None, title="Heartbeat Region")
373
+ multi_env_list: Optional[List[str]] = Field(None, title="Multi Env List")
372
374
 
373
375
 
374
376
  class Run(BaseModel):
@@ -6,7 +6,6 @@ from .verifier import (
6
6
  verifier,
7
7
  SyncVerifierFunction,
8
8
  )
9
- from .local_executor import execute_verifier_local, LocalEnvironment, diff_dbs
10
9
 
11
10
  __all__ = [
12
11
  "DatabaseSnapshot",
@@ -16,7 +15,4 @@ __all__ = [
16
15
  "TASK_FAILED_SCORE",
17
16
  "verifier",
18
17
  "SyncVerifierFunction",
19
- "execute_verifier_local",
20
- "LocalEnvironment",
21
- "diff_dbs",
22
18
  ]