fleet-python 0.2.124__tar.gz → 0.2.126__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {fleet_python-0.2.124/fleet_python.egg-info → fleet_python-0.2.126}/PKG-INFO +1 -1
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/__init__.py +1 -7
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/_async/__init__.py +1 -1
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/_async/base.py +1 -1
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/_async/client.py +5 -68
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/_async/models.py +2 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/base.py +1 -1
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/client.py +5 -62
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/judge.py +0 -54
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/models.py +2 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/verifiers/__init__.py +0 -4
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/verifiers/db.py +0 -429
- {fleet_python-0.2.124 → fleet_python-0.2.126/fleet_python.egg-info}/PKG-INFO +1 -1
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet_python.egg-info/SOURCES.txt +0 -6
- {fleet_python-0.2.124 → fleet_python-0.2.126}/pyproject.toml +1 -1
- {fleet_python-0.2.124 → fleet_python-0.2.126}/tests/test_expect_only.py +15 -15
- fleet_python-0.2.124/examples/task_bundle_editing/download_task.py +0 -276
- fleet_python-0.2.124/examples/task_bundle_editing/launch_job.py +0 -160
- fleet_python-0.2.124/examples/task_bundle_editing/upload_task.py +0 -406
- fleet_python-0.2.124/examples/task_bundle_editing/validate_task.py +0 -336
- fleet_python-0.2.124/fleet/verifiers/local_executor.py +0 -371
- fleet_python-0.2.124/tests/test_judge_criteria_markers.py +0 -192
- {fleet_python-0.2.124 → fleet_python-0.2.126}/LICENSE +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/README.md +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/diff_example.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/dsl_example.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/example.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/exampleResume.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/example_account.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/example_action_log.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/example_client.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/example_mcp_anthropic.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/example_mcp_openai.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/example_sync.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/example_task.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/example_tasks.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/example_verifier.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/export_tasks.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/export_tasks_filtered.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/fetch_tasks.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/gemini_example.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/import_tasks.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/iterate_verifiers.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/json_tasks_example.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/nova_act_example.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/openai_example.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/openai_simple_example.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/query_builder_example.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/quickstart.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/examples/test_cdp_logging.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/_async/env/__init__.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/_async/env/client.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/_async/exceptions.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/_async/global_client.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/_async/instance/__init__.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/_async/instance/base.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/_async/instance/client.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/_async/judge.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/_async/resources/__init__.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/_async/resources/api.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/_async/resources/base.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/_async/resources/browser.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/_async/resources/filesystem.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/_async/resources/mcp.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/_async/resources/sqlite.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/_async/tasks.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/_async/verifiers/__init__.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/_async/verifiers/bundler.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/_async/verifiers/verifier.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/agent/__init__.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/agent/gemini_cua/Dockerfile +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/agent/gemini_cua/__init__.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/agent/gemini_cua/agent.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/agent/gemini_cua/mcp/main.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/agent/gemini_cua/mcp_server/__init__.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/agent/gemini_cua/mcp_server/main.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/agent/gemini_cua/mcp_server/tools.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/agent/gemini_cua/requirements.txt +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/agent/gemini_cua/start.sh +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/agent/orchestrator.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/agent/types.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/agent/utils.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/cli.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/config.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/env/__init__.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/env/client.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/eval/__init__.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/eval/uploader.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/exceptions.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/global_client.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/instance/__init__.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/instance/base.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/instance/client.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/instance/models.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/proxy/__init__.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/proxy/proxy.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/proxy/whitelist.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/resources/__init__.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/resources/api.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/resources/base.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/resources/browser.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/resources/filesystem.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/resources/mcp.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/resources/sqlite.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/tasks.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/types.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/utils/__init__.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/utils/http_logging.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/utils/logging.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/utils/playwright.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/verifiers/bundler.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/verifiers/code.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/verifiers/decorator.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/verifiers/parse.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/verifiers/sql_differ.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet/verifiers/verifier.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet_python.egg-info/dependency_links.txt +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet_python.egg-info/entry_points.txt +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet_python.egg-info/requires.txt +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/fleet_python.egg-info/top_level.txt +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/scripts/fix_sync_imports.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/scripts/unasync.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/setup.cfg +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/tests/__init__.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/tests/test_app_method.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/tests/test_expect_exactly.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/tests/test_instance_dispatch.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/tests/test_sqlite_resource_dual_mode.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/tests/test_sqlite_shared_memory_behavior.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.126}/tests/test_verifier_from_string.py +0 -0
|
@@ -38,9 +38,6 @@ from .verifiers import (
|
|
|
38
38
|
SnapshotDiff,
|
|
39
39
|
TASK_FAILED_SCORE,
|
|
40
40
|
TASK_SUCCESSFUL_SCORE,
|
|
41
|
-
execute_verifier_local,
|
|
42
|
-
LocalEnvironment,
|
|
43
|
-
diff_dbs,
|
|
44
41
|
)
|
|
45
42
|
|
|
46
43
|
# Import async verifiers (default verifier is async for modern usage)
|
|
@@ -79,7 +76,7 @@ from . import env
|
|
|
79
76
|
from . import global_client as _global_client
|
|
80
77
|
from ._async import global_client as _async_global_client
|
|
81
78
|
|
|
82
|
-
__version__ = "0.2.
|
|
79
|
+
__version__ = "0.2.126"
|
|
83
80
|
|
|
84
81
|
__all__ = [
|
|
85
82
|
# Core classes
|
|
@@ -117,9 +114,6 @@ __all__ = [
|
|
|
117
114
|
"SnapshotDiff",
|
|
118
115
|
"TASK_FAILED_SCORE",
|
|
119
116
|
"TASK_SUCCESSFUL_SCORE",
|
|
120
|
-
"execute_verifier_local",
|
|
121
|
-
"LocalEnvironment",
|
|
122
|
-
"diff_dbs",
|
|
123
117
|
# Environment module
|
|
124
118
|
"env",
|
|
125
119
|
# Global client helpers
|
|
@@ -601,6 +601,9 @@ class AsyncFleet:
|
|
|
601
601
|
)
|
|
602
602
|
|
|
603
603
|
instance = AsyncEnv(client=self.client, **response.json())
|
|
604
|
+
# Resources are loaded lazily on first `db()`/`browser()`/`resources()` access
|
|
605
|
+
# via `_load_resources()`, so we don't preload here. Eagerly loading would
|
|
606
|
+
# fail-fast with a 502 while the container is still warming up.
|
|
604
607
|
return instance
|
|
605
608
|
|
|
606
609
|
async def make_for_task(self, task: Task) -> AsyncEnv:
|
|
@@ -652,6 +655,7 @@ class AsyncFleet:
|
|
|
652
655
|
else:
|
|
653
656
|
response = await self.client.request("GET", f"/v1/env/instances/{instance_id}")
|
|
654
657
|
instance = AsyncEnv(client=self.client, **response.json())
|
|
658
|
+
# Resources load lazily on first `db()`/`browser()`/`resources()` access.
|
|
655
659
|
return instance
|
|
656
660
|
|
|
657
661
|
def _create_url_instance(self, base_url: str) -> AsyncEnv:
|
|
@@ -832,74 +836,7 @@ class AsyncFleet:
|
|
|
832
836
|
At least one of run_id or profile_id must be provided.
|
|
833
837
|
"""
|
|
834
838
|
return await _delete_instances_batch(self.client, run_id=run_id, profile_id=profile_id)
|
|
835
|
-
|
|
836
|
-
@staticmethod
|
|
837
|
-
async def execute_verifier_local(
|
|
838
|
-
verifier_func: str,
|
|
839
|
-
seed_db: str,
|
|
840
|
-
current_db: str,
|
|
841
|
-
final_answer: Optional[str] = None,
|
|
842
|
-
) -> Dict[str, Any]:
|
|
843
|
-
"""Execute a verifier function locally against SQLite database files.
|
|
844
|
-
|
|
845
|
-
No authentication or remote server required. The verifier code is executed
|
|
846
|
-
in an isolated namespace with the same helpers available in production
|
|
847
|
-
(``normalized_contains``, ``IgnoreConfig``, ``DatabaseSnapshot``, etc.).
|
|
848
|
-
|
|
849
|
-
Args:
|
|
850
|
-
verifier_func: Python source code containing the verifier function definition.
|
|
851
|
-
seed_db: Path to the seed (before) SQLite database file.
|
|
852
|
-
current_db: Path to the current (after) SQLite database file.
|
|
853
|
-
final_answer: Optional final answer string passed to the verifier.
|
|
854
|
-
|
|
855
|
-
Returns:
|
|
856
|
-
Dict with keys ``success``, ``result``, ``error``, and ``stdout``.
|
|
857
|
-
|
|
858
|
-
Example::
|
|
859
|
-
|
|
860
|
-
result = await fleet.execute_verifier_local(
|
|
861
|
-
verifier_func=verifier_code_string,
|
|
862
|
-
seed_db="./seed.db",
|
|
863
|
-
current_db="./current.db",
|
|
864
|
-
)
|
|
865
|
-
print(result["result"]) # 1 (TASK_SUCCESSFUL_SCORE) or 0
|
|
866
|
-
"""
|
|
867
|
-
import asyncio
|
|
868
|
-
from ..verifiers.local_executor import execute_verifier_local
|
|
869
|
-
|
|
870
|
-
return await asyncio.to_thread(
|
|
871
|
-
execute_verifier_local, verifier_func, seed_db, current_db, final_answer
|
|
872
|
-
)
|
|
873
|
-
|
|
874
|
-
@staticmethod
|
|
875
|
-
async def diff_dbs(
|
|
876
|
-
seed_db: str,
|
|
877
|
-
current_db: str,
|
|
878
|
-
ignore_tables: Optional[set] = None,
|
|
879
|
-
ignore_table_fields: Optional[Dict[str, set]] = None,
|
|
880
|
-
) -> Dict[str, Any]:
|
|
881
|
-
"""Compute a structured diff between two local SQLite databases.
|
|
882
|
-
|
|
883
|
-
Returns the same format as the runner's ``/diff/structured`` endpoint.
|
|
884
|
-
No authentication or network access required.
|
|
885
|
-
|
|
886
|
-
Args:
|
|
887
|
-
seed_db: Path to the seed (before) SQLite database file.
|
|
888
|
-
current_db: Path to the current (after) SQLite database file.
|
|
889
|
-
ignore_tables: Optional set of table names to skip entirely.
|
|
890
|
-
ignore_table_fields: Optional mapping of ``{table: {field, ...}}``
|
|
891
|
-
to strip from the output.
|
|
892
|
-
|
|
893
|
-
Returns:
|
|
894
|
-
Dict with keys ``success``, ``diff``, and ``message``.
|
|
895
|
-
"""
|
|
896
|
-
import asyncio
|
|
897
|
-
from ..verifiers.local_executor import diff_dbs
|
|
898
|
-
|
|
899
|
-
return await asyncio.to_thread(
|
|
900
|
-
diff_dbs, seed_db, current_db, ignore_tables, ignore_table_fields
|
|
901
|
-
)
|
|
902
|
-
|
|
839
|
+
|
|
903
840
|
async def list_runs(
|
|
904
841
|
self, profile_id: Optional[str] = None, status: Optional[str] = "active"
|
|
905
842
|
) -> List[Run]:
|
|
@@ -51,6 +51,7 @@ class Instance(BaseModel):
|
|
|
51
51
|
team_id: str = Field(..., title="Team Id")
|
|
52
52
|
region: str = Field(..., title="Region")
|
|
53
53
|
env_variables: Optional[Dict[str, Any]] = Field(None, title="Env Variables")
|
|
54
|
+
multi_env_list: Optional[List[str]] = Field(None, title="Multi Env List")
|
|
54
55
|
|
|
55
56
|
|
|
56
57
|
class InstanceRequest(BaseModel):
|
|
@@ -357,6 +358,7 @@ class InstanceResponse(BaseModel):
|
|
|
357
358
|
data_version: Optional[str] = Field(None, title="Data Version")
|
|
358
359
|
urls: Optional[InstanceURLs] = Field(None, title="Urls")
|
|
359
360
|
health: Optional[bool] = Field(None, title="Health")
|
|
361
|
+
multi_env_list: Optional[List[str]] = Field(None, title="Multi Env List")
|
|
360
362
|
|
|
361
363
|
|
|
362
364
|
class AccountResponse(BaseModel):
|
|
@@ -613,6 +613,9 @@ class Fleet:
|
|
|
613
613
|
)
|
|
614
614
|
|
|
615
615
|
instance = SyncEnv(client=self.client, **response.json())
|
|
616
|
+
# Resources load lazily on first `db()`/`browser()`/`resources()` access via
|
|
617
|
+
# `_load_resources()`. Skipping the eager preload avoids fail-fast 502s while
|
|
618
|
+
# the container is still warming up.
|
|
616
619
|
return instance
|
|
617
620
|
|
|
618
621
|
def make_for_task(self, task: Task) -> SyncEnv:
|
|
@@ -664,6 +667,7 @@ class Fleet:
|
|
|
664
667
|
else:
|
|
665
668
|
response = self.client.request("GET", f"/v1/env/instances/{instance_id}")
|
|
666
669
|
instance = SyncEnv(client=self.client, **response.json())
|
|
670
|
+
# Resources load lazily on first `db()`/`browser()`/`resources()` access.
|
|
667
671
|
return instance
|
|
668
672
|
|
|
669
673
|
def _create_url_instance(self, base_url: str) -> SyncEnv:
|
|
@@ -844,68 +848,7 @@ class Fleet:
|
|
|
844
848
|
At least one of run_id or profile_id must be provided.
|
|
845
849
|
"""
|
|
846
850
|
return _delete_instances_batch(self.client, run_id=run_id, profile_id=profile_id)
|
|
847
|
-
|
|
848
|
-
@staticmethod
|
|
849
|
-
def execute_verifier_local(
|
|
850
|
-
verifier_func: str,
|
|
851
|
-
seed_db: str,
|
|
852
|
-
current_db: str,
|
|
853
|
-
final_answer: Optional[str] = None,
|
|
854
|
-
) -> Dict[str, Any]:
|
|
855
|
-
"""Execute a verifier function locally against SQLite database files.
|
|
856
|
-
|
|
857
|
-
No authentication or remote server required. The verifier code is executed
|
|
858
|
-
in an isolated namespace with the same helpers available in production
|
|
859
|
-
(``normalized_contains``, ``IgnoreConfig``, ``DatabaseSnapshot``, etc.).
|
|
860
|
-
|
|
861
|
-
Args:
|
|
862
|
-
verifier_func: Python source code containing the verifier function definition.
|
|
863
|
-
seed_db: Path to the seed (before) SQLite database file.
|
|
864
|
-
current_db: Path to the current (after) SQLite database file.
|
|
865
|
-
final_answer: Optional final answer string passed to the verifier.
|
|
866
|
-
|
|
867
|
-
Returns:
|
|
868
|
-
Dict with keys ``success``, ``result``, ``error``, and ``stdout``.
|
|
869
|
-
|
|
870
|
-
Example::
|
|
871
|
-
|
|
872
|
-
result = fleet.execute_verifier_local(
|
|
873
|
-
verifier_func=verifier_code_string,
|
|
874
|
-
seed_db="./seed.db",
|
|
875
|
-
current_db="./current.db",
|
|
876
|
-
)
|
|
877
|
-
print(result["result"]) # 1 (TASK_SUCCESSFUL_SCORE) or 0
|
|
878
|
-
"""
|
|
879
|
-
from .verifiers.local_executor import execute_verifier_local
|
|
880
|
-
|
|
881
|
-
return execute_verifier_local(verifier_func, seed_db, current_db, final_answer)
|
|
882
|
-
|
|
883
|
-
@staticmethod
|
|
884
|
-
def diff_dbs(
|
|
885
|
-
seed_db: str,
|
|
886
|
-
current_db: str,
|
|
887
|
-
ignore_tables: Optional[set] = None,
|
|
888
|
-
ignore_table_fields: Optional[Dict[str, set]] = None,
|
|
889
|
-
) -> Dict[str, Any]:
|
|
890
|
-
"""Compute a structured diff between two local SQLite databases.
|
|
891
|
-
|
|
892
|
-
Returns the same format as the runner's ``/diff/structured`` endpoint.
|
|
893
|
-
No authentication or network access required.
|
|
894
|
-
|
|
895
|
-
Args:
|
|
896
|
-
seed_db: Path to the seed (before) SQLite database file.
|
|
897
|
-
current_db: Path to the current (after) SQLite database file.
|
|
898
|
-
ignore_tables: Optional set of table names to skip entirely.
|
|
899
|
-
ignore_table_fields: Optional mapping of ``{table: {field, ...}}``
|
|
900
|
-
to strip from the output.
|
|
901
|
-
|
|
902
|
-
Returns:
|
|
903
|
-
Dict with keys ``success``, ``diff``, and ``message``.
|
|
904
|
-
"""
|
|
905
|
-
from .verifiers.local_executor import diff_dbs
|
|
906
|
-
|
|
907
|
-
return diff_dbs(seed_db, current_db, ignore_tables, ignore_table_fields)
|
|
908
|
-
|
|
851
|
+
|
|
909
852
|
def list_runs(
|
|
910
853
|
self, profile_id: Optional[str] = None, status: Optional[str] = "active"
|
|
911
854
|
) -> List[Run]:
|
|
@@ -823,54 +823,6 @@ def _parse_grade_response(data: dict) -> JudgeResult:
|
|
|
823
823
|
return JudgeResult(score, details=data)
|
|
824
824
|
|
|
825
825
|
|
|
826
|
-
def _print_criteria_markers(criteria: list) -> None:
|
|
827
|
-
"""Emit ``>>> CRITERIA >>>`` stdout markers for structured criteria display.
|
|
828
|
-
|
|
829
|
-
The orchestrator (theseus PR #1967) scans verifier stdout for these
|
|
830
|
-
markers and wraps the execution result so the client (client PR #1737)
|
|
831
|
-
can render an expandable rubric breakdown.
|
|
832
|
-
|
|
833
|
-
Converts from the orchestrator judge-response format::
|
|
834
|
-
|
|
835
|
-
{"name": str, "score": int, "max_score": int, "reasoning": str}
|
|
836
|
-
|
|
837
|
-
to the client-expected marker format::
|
|
838
|
-
|
|
839
|
-
{"criteria": str, "score": float, "score_out_of": float, "description"?: str}
|
|
840
|
-
|
|
841
|
-
Each criterion's score is normalised to a 0.0–1.0 float using its own
|
|
842
|
-
``max_score``.
|
|
843
|
-
"""
|
|
844
|
-
marker_criteria = []
|
|
845
|
-
for c in criteria:
|
|
846
|
-
name = c.get("name", "")
|
|
847
|
-
cscore = c.get("score", 0)
|
|
848
|
-
cmax = c.get("max_score", 0)
|
|
849
|
-
|
|
850
|
-
# Normalise per-criterion score to 0.0–1.0
|
|
851
|
-
if cmax and float(cmax) > 0:
|
|
852
|
-
norm_score = float(cscore) / float(cmax)
|
|
853
|
-
else:
|
|
854
|
-
norm_score = float(cscore)
|
|
855
|
-
|
|
856
|
-
entry: dict = {
|
|
857
|
-
"criteria": name,
|
|
858
|
-
"score": round(norm_score, 4),
|
|
859
|
-
"score_out_of": 1.0,
|
|
860
|
-
}
|
|
861
|
-
|
|
862
|
-
reasoning = c.get("reasoning", "")
|
|
863
|
-
if reasoning:
|
|
864
|
-
entry["description"] = reasoning
|
|
865
|
-
|
|
866
|
-
marker_criteria.append(entry)
|
|
867
|
-
|
|
868
|
-
if marker_criteria:
|
|
869
|
-
print(">>> CRITERIA >>>")
|
|
870
|
-
print(json.dumps(marker_criteria))
|
|
871
|
-
print("<<< CRITERIA <<<")
|
|
872
|
-
|
|
873
|
-
|
|
874
826
|
def _print_judge_result(data: dict) -> None:
|
|
875
827
|
"""Print detailed judge grading result for verifier stdout capture."""
|
|
876
828
|
model = data.get("model_used", "unknown")
|
|
@@ -896,12 +848,6 @@ def _print_judge_result(data: dict) -> None:
|
|
|
896
848
|
if len(reasoning) > 200:
|
|
897
849
|
reasoning = reasoning[:200] + "..."
|
|
898
850
|
print(f"[C] {name}: {cscore}/{cmax} — {reasoning}")
|
|
899
|
-
|
|
900
|
-
# Emit structured criteria via stdout markers so the orchestrator
|
|
901
|
-
# (_extract_criteria_from_stdout) and client can render a rubric
|
|
902
|
-
# breakdown. Schema per element:
|
|
903
|
-
# {"criteria": str, "score": float, "score_out_of": float, "description"?: str}
|
|
904
|
-
_print_criteria_markers(criteria)
|
|
905
851
|
else:
|
|
906
852
|
print(f"[C] Score: {normalized:.2f}")
|
|
907
853
|
|
|
@@ -52,6 +52,7 @@ class Instance(BaseModel):
|
|
|
52
52
|
region: str = Field(..., title="Region")
|
|
53
53
|
env_variables: Optional[Dict[str, Any]] = Field(None, title="Env Variables")
|
|
54
54
|
run_id: Optional[str] = Field(None, title="Run Id")
|
|
55
|
+
multi_env_list: Optional[List[str]] = Field(None, title="Multi Env List")
|
|
55
56
|
|
|
56
57
|
|
|
57
58
|
class InstanceRequest(BaseModel):
|
|
@@ -369,6 +370,7 @@ class InstanceResponse(BaseModel):
|
|
|
369
370
|
profile_id: Optional[str] = Field(None, title="Profile Id")
|
|
370
371
|
heartbeat_interval: Optional[int] = Field(None, title="Heartbeat Interval")
|
|
371
372
|
heartbeat_region: Optional[str] = Field(None, title="Heartbeat Region")
|
|
373
|
+
multi_env_list: Optional[List[str]] = Field(None, title="Multi Env List")
|
|
372
374
|
|
|
373
375
|
|
|
374
376
|
class Run(BaseModel):
|
|
@@ -6,7 +6,6 @@ from .verifier import (
|
|
|
6
6
|
verifier,
|
|
7
7
|
SyncVerifierFunction,
|
|
8
8
|
)
|
|
9
|
-
from .local_executor import execute_verifier_local, LocalEnvironment, diff_dbs
|
|
10
9
|
|
|
11
10
|
__all__ = [
|
|
12
11
|
"DatabaseSnapshot",
|
|
@@ -16,7 +15,4 @@ __all__ = [
|
|
|
16
15
|
"TASK_FAILED_SCORE",
|
|
17
16
|
"verifier",
|
|
18
17
|
"SyncVerifierFunction",
|
|
19
|
-
"execute_verifier_local",
|
|
20
|
-
"LocalEnvironment",
|
|
21
|
-
"diff_dbs",
|
|
22
18
|
]
|