fleet-python 0.2.124__tar.gz → 0.2.125__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {fleet_python-0.2.124/fleet_python.egg-info → fleet_python-0.2.125}/PKG-INFO +1 -1
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/__init__.py +1 -7
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/_async/__init__.py +1 -1
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/_async/base.py +1 -1
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/_async/client.py +3 -68
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/base.py +1 -1
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/client.py +3 -62
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/judge.py +0 -54
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/verifiers/__init__.py +0 -4
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/verifiers/db.py +0 -429
- {fleet_python-0.2.124 → fleet_python-0.2.125/fleet_python.egg-info}/PKG-INFO +1 -1
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet_python.egg-info/SOURCES.txt +0 -6
- {fleet_python-0.2.124 → fleet_python-0.2.125}/pyproject.toml +1 -1
- {fleet_python-0.2.124 → fleet_python-0.2.125}/tests/test_expect_only.py +15 -15
- fleet_python-0.2.124/examples/task_bundle_editing/download_task.py +0 -276
- fleet_python-0.2.124/examples/task_bundle_editing/launch_job.py +0 -160
- fleet_python-0.2.124/examples/task_bundle_editing/upload_task.py +0 -406
- fleet_python-0.2.124/examples/task_bundle_editing/validate_task.py +0 -336
- fleet_python-0.2.124/fleet/verifiers/local_executor.py +0 -371
- fleet_python-0.2.124/tests/test_judge_criteria_markers.py +0 -192
- {fleet_python-0.2.124 → fleet_python-0.2.125}/LICENSE +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/README.md +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/examples/diff_example.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/examples/dsl_example.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/examples/example.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/examples/exampleResume.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/examples/example_account.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/examples/example_action_log.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/examples/example_client.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/examples/example_mcp_anthropic.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/examples/example_mcp_openai.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/examples/example_sync.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/examples/example_task.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/examples/example_tasks.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/examples/example_verifier.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/examples/export_tasks.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/examples/export_tasks_filtered.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/examples/fetch_tasks.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/examples/gemini_example.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/examples/import_tasks.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/examples/iterate_verifiers.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/examples/json_tasks_example.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/examples/nova_act_example.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/examples/openai_example.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/examples/openai_simple_example.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/examples/query_builder_example.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/examples/quickstart.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/examples/test_cdp_logging.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/_async/env/__init__.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/_async/env/client.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/_async/exceptions.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/_async/global_client.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/_async/instance/__init__.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/_async/instance/base.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/_async/instance/client.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/_async/judge.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/_async/models.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/_async/resources/__init__.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/_async/resources/api.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/_async/resources/base.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/_async/resources/browser.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/_async/resources/filesystem.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/_async/resources/mcp.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/_async/resources/sqlite.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/_async/tasks.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/_async/verifiers/__init__.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/_async/verifiers/bundler.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/_async/verifiers/verifier.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/agent/__init__.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/agent/gemini_cua/Dockerfile +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/agent/gemini_cua/__init__.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/agent/gemini_cua/agent.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/agent/gemini_cua/mcp/main.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/agent/gemini_cua/mcp_server/__init__.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/agent/gemini_cua/mcp_server/main.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/agent/gemini_cua/mcp_server/tools.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/agent/gemini_cua/requirements.txt +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/agent/gemini_cua/start.sh +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/agent/orchestrator.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/agent/types.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/agent/utils.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/cli.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/config.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/env/__init__.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/env/client.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/eval/__init__.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/eval/uploader.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/exceptions.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/global_client.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/instance/__init__.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/instance/base.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/instance/client.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/instance/models.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/models.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/proxy/__init__.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/proxy/proxy.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/proxy/whitelist.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/resources/__init__.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/resources/api.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/resources/base.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/resources/browser.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/resources/filesystem.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/resources/mcp.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/resources/sqlite.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/tasks.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/types.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/utils/__init__.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/utils/http_logging.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/utils/logging.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/utils/playwright.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/verifiers/bundler.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/verifiers/code.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/verifiers/decorator.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/verifiers/parse.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/verifiers/sql_differ.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet/verifiers/verifier.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet_python.egg-info/dependency_links.txt +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet_python.egg-info/entry_points.txt +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet_python.egg-info/requires.txt +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/fleet_python.egg-info/top_level.txt +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/scripts/fix_sync_imports.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/scripts/unasync.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/setup.cfg +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/tests/__init__.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/tests/test_app_method.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/tests/test_expect_exactly.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/tests/test_instance_dispatch.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/tests/test_sqlite_resource_dual_mode.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/tests/test_sqlite_shared_memory_behavior.py +0 -0
- {fleet_python-0.2.124 → fleet_python-0.2.125}/tests/test_verifier_from_string.py +0 -0
|
@@ -38,9 +38,6 @@ from .verifiers import (
|
|
|
38
38
|
SnapshotDiff,
|
|
39
39
|
TASK_FAILED_SCORE,
|
|
40
40
|
TASK_SUCCESSFUL_SCORE,
|
|
41
|
-
execute_verifier_local,
|
|
42
|
-
LocalEnvironment,
|
|
43
|
-
diff_dbs,
|
|
44
41
|
)
|
|
45
42
|
|
|
46
43
|
# Import async verifiers (default verifier is async for modern usage)
|
|
@@ -79,7 +76,7 @@ from . import env
|
|
|
79
76
|
from . import global_client as _global_client
|
|
80
77
|
from ._async import global_client as _async_global_client
|
|
81
78
|
|
|
82
|
-
__version__ = "0.2.
|
|
79
|
+
__version__ = "0.2.125"
|
|
83
80
|
|
|
84
81
|
__all__ = [
|
|
85
82
|
# Core classes
|
|
@@ -117,9 +114,6 @@ __all__ = [
|
|
|
117
114
|
"SnapshotDiff",
|
|
118
115
|
"TASK_FAILED_SCORE",
|
|
119
116
|
"TASK_SUCCESSFUL_SCORE",
|
|
120
|
-
"execute_verifier_local",
|
|
121
|
-
"LocalEnvironment",
|
|
122
|
-
"diff_dbs",
|
|
123
117
|
# Environment module
|
|
124
118
|
"env",
|
|
125
119
|
# Global client helpers
|
|
@@ -601,6 +601,7 @@ class AsyncFleet:
|
|
|
601
601
|
)
|
|
602
602
|
|
|
603
603
|
instance = AsyncEnv(client=self.client, **response.json())
|
|
604
|
+
await instance.instance.load()
|
|
604
605
|
return instance
|
|
605
606
|
|
|
606
607
|
async def make_for_task(self, task: Task) -> AsyncEnv:
|
|
@@ -652,6 +653,7 @@ class AsyncFleet:
|
|
|
652
653
|
else:
|
|
653
654
|
response = await self.client.request("GET", f"/v1/env/instances/{instance_id}")
|
|
654
655
|
instance = AsyncEnv(client=self.client, **response.json())
|
|
656
|
+
await instance.instance.load()
|
|
655
657
|
return instance
|
|
656
658
|
|
|
657
659
|
def _create_url_instance(self, base_url: str) -> AsyncEnv:
|
|
@@ -832,74 +834,7 @@ class AsyncFleet:
|
|
|
832
834
|
At least one of run_id or profile_id must be provided.
|
|
833
835
|
"""
|
|
834
836
|
return await _delete_instances_batch(self.client, run_id=run_id, profile_id=profile_id)
|
|
835
|
-
|
|
836
|
-
@staticmethod
|
|
837
|
-
async def execute_verifier_local(
|
|
838
|
-
verifier_func: str,
|
|
839
|
-
seed_db: str,
|
|
840
|
-
current_db: str,
|
|
841
|
-
final_answer: Optional[str] = None,
|
|
842
|
-
) -> Dict[str, Any]:
|
|
843
|
-
"""Execute a verifier function locally against SQLite database files.
|
|
844
|
-
|
|
845
|
-
No authentication or remote server required. The verifier code is executed
|
|
846
|
-
in an isolated namespace with the same helpers available in production
|
|
847
|
-
(``normalized_contains``, ``IgnoreConfig``, ``DatabaseSnapshot``, etc.).
|
|
848
|
-
|
|
849
|
-
Args:
|
|
850
|
-
verifier_func: Python source code containing the verifier function definition.
|
|
851
|
-
seed_db: Path to the seed (before) SQLite database file.
|
|
852
|
-
current_db: Path to the current (after) SQLite database file.
|
|
853
|
-
final_answer: Optional final answer string passed to the verifier.
|
|
854
|
-
|
|
855
|
-
Returns:
|
|
856
|
-
Dict with keys ``success``, ``result``, ``error``, and ``stdout``.
|
|
857
|
-
|
|
858
|
-
Example::
|
|
859
|
-
|
|
860
|
-
result = await fleet.execute_verifier_local(
|
|
861
|
-
verifier_func=verifier_code_string,
|
|
862
|
-
seed_db="./seed.db",
|
|
863
|
-
current_db="./current.db",
|
|
864
|
-
)
|
|
865
|
-
print(result["result"]) # 1 (TASK_SUCCESSFUL_SCORE) or 0
|
|
866
|
-
"""
|
|
867
|
-
import asyncio
|
|
868
|
-
from ..verifiers.local_executor import execute_verifier_local
|
|
869
|
-
|
|
870
|
-
return await asyncio.to_thread(
|
|
871
|
-
execute_verifier_local, verifier_func, seed_db, current_db, final_answer
|
|
872
|
-
)
|
|
873
|
-
|
|
874
|
-
@staticmethod
|
|
875
|
-
async def diff_dbs(
|
|
876
|
-
seed_db: str,
|
|
877
|
-
current_db: str,
|
|
878
|
-
ignore_tables: Optional[set] = None,
|
|
879
|
-
ignore_table_fields: Optional[Dict[str, set]] = None,
|
|
880
|
-
) -> Dict[str, Any]:
|
|
881
|
-
"""Compute a structured diff between two local SQLite databases.
|
|
882
|
-
|
|
883
|
-
Returns the same format as the runner's ``/diff/structured`` endpoint.
|
|
884
|
-
No authentication or network access required.
|
|
885
|
-
|
|
886
|
-
Args:
|
|
887
|
-
seed_db: Path to the seed (before) SQLite database file.
|
|
888
|
-
current_db: Path to the current (after) SQLite database file.
|
|
889
|
-
ignore_tables: Optional set of table names to skip entirely.
|
|
890
|
-
ignore_table_fields: Optional mapping of ``{table: {field, ...}}``
|
|
891
|
-
to strip from the output.
|
|
892
|
-
|
|
893
|
-
Returns:
|
|
894
|
-
Dict with keys ``success``, ``diff``, and ``message``.
|
|
895
|
-
"""
|
|
896
|
-
import asyncio
|
|
897
|
-
from ..verifiers.local_executor import diff_dbs
|
|
898
|
-
|
|
899
|
-
return await asyncio.to_thread(
|
|
900
|
-
diff_dbs, seed_db, current_db, ignore_tables, ignore_table_fields
|
|
901
|
-
)
|
|
902
|
-
|
|
837
|
+
|
|
903
838
|
async def list_runs(
|
|
904
839
|
self, profile_id: Optional[str] = None, status: Optional[str] = "active"
|
|
905
840
|
) -> List[Run]:
|
|
@@ -613,6 +613,7 @@ class Fleet:
|
|
|
613
613
|
)
|
|
614
614
|
|
|
615
615
|
instance = SyncEnv(client=self.client, **response.json())
|
|
616
|
+
instance.instance.load()
|
|
616
617
|
return instance
|
|
617
618
|
|
|
618
619
|
def make_for_task(self, task: Task) -> SyncEnv:
|
|
@@ -664,6 +665,7 @@ class Fleet:
|
|
|
664
665
|
else:
|
|
665
666
|
response = self.client.request("GET", f"/v1/env/instances/{instance_id}")
|
|
666
667
|
instance = SyncEnv(client=self.client, **response.json())
|
|
668
|
+
instance.instance.load()
|
|
667
669
|
return instance
|
|
668
670
|
|
|
669
671
|
def _create_url_instance(self, base_url: str) -> SyncEnv:
|
|
@@ -844,68 +846,7 @@ class Fleet:
|
|
|
844
846
|
At least one of run_id or profile_id must be provided.
|
|
845
847
|
"""
|
|
846
848
|
return _delete_instances_batch(self.client, run_id=run_id, profile_id=profile_id)
|
|
847
|
-
|
|
848
|
-
@staticmethod
|
|
849
|
-
def execute_verifier_local(
|
|
850
|
-
verifier_func: str,
|
|
851
|
-
seed_db: str,
|
|
852
|
-
current_db: str,
|
|
853
|
-
final_answer: Optional[str] = None,
|
|
854
|
-
) -> Dict[str, Any]:
|
|
855
|
-
"""Execute a verifier function locally against SQLite database files.
|
|
856
|
-
|
|
857
|
-
No authentication or remote server required. The verifier code is executed
|
|
858
|
-
in an isolated namespace with the same helpers available in production
|
|
859
|
-
(``normalized_contains``, ``IgnoreConfig``, ``DatabaseSnapshot``, etc.).
|
|
860
|
-
|
|
861
|
-
Args:
|
|
862
|
-
verifier_func: Python source code containing the verifier function definition.
|
|
863
|
-
seed_db: Path to the seed (before) SQLite database file.
|
|
864
|
-
current_db: Path to the current (after) SQLite database file.
|
|
865
|
-
final_answer: Optional final answer string passed to the verifier.
|
|
866
|
-
|
|
867
|
-
Returns:
|
|
868
|
-
Dict with keys ``success``, ``result``, ``error``, and ``stdout``.
|
|
869
|
-
|
|
870
|
-
Example::
|
|
871
|
-
|
|
872
|
-
result = fleet.execute_verifier_local(
|
|
873
|
-
verifier_func=verifier_code_string,
|
|
874
|
-
seed_db="./seed.db",
|
|
875
|
-
current_db="./current.db",
|
|
876
|
-
)
|
|
877
|
-
print(result["result"]) # 1 (TASK_SUCCESSFUL_SCORE) or 0
|
|
878
|
-
"""
|
|
879
|
-
from .verifiers.local_executor import execute_verifier_local
|
|
880
|
-
|
|
881
|
-
return execute_verifier_local(verifier_func, seed_db, current_db, final_answer)
|
|
882
|
-
|
|
883
|
-
@staticmethod
|
|
884
|
-
def diff_dbs(
|
|
885
|
-
seed_db: str,
|
|
886
|
-
current_db: str,
|
|
887
|
-
ignore_tables: Optional[set] = None,
|
|
888
|
-
ignore_table_fields: Optional[Dict[str, set]] = None,
|
|
889
|
-
) -> Dict[str, Any]:
|
|
890
|
-
"""Compute a structured diff between two local SQLite databases.
|
|
891
|
-
|
|
892
|
-
Returns the same format as the runner's ``/diff/structured`` endpoint.
|
|
893
|
-
No authentication or network access required.
|
|
894
|
-
|
|
895
|
-
Args:
|
|
896
|
-
seed_db: Path to the seed (before) SQLite database file.
|
|
897
|
-
current_db: Path to the current (after) SQLite database file.
|
|
898
|
-
ignore_tables: Optional set of table names to skip entirely.
|
|
899
|
-
ignore_table_fields: Optional mapping of ``{table: {field, ...}}``
|
|
900
|
-
to strip from the output.
|
|
901
|
-
|
|
902
|
-
Returns:
|
|
903
|
-
Dict with keys ``success``, ``diff``, and ``message``.
|
|
904
|
-
"""
|
|
905
|
-
from .verifiers.local_executor import diff_dbs
|
|
906
|
-
|
|
907
|
-
return diff_dbs(seed_db, current_db, ignore_tables, ignore_table_fields)
|
|
908
|
-
|
|
849
|
+
|
|
909
850
|
def list_runs(
|
|
910
851
|
self, profile_id: Optional[str] = None, status: Optional[str] = "active"
|
|
911
852
|
) -> List[Run]:
|
|
@@ -823,54 +823,6 @@ def _parse_grade_response(data: dict) -> JudgeResult:
|
|
|
823
823
|
return JudgeResult(score, details=data)
|
|
824
824
|
|
|
825
825
|
|
|
826
|
-
def _print_criteria_markers(criteria: list) -> None:
|
|
827
|
-
"""Emit ``>>> CRITERIA >>>`` stdout markers for structured criteria display.
|
|
828
|
-
|
|
829
|
-
The orchestrator (theseus PR #1967) scans verifier stdout for these
|
|
830
|
-
markers and wraps the execution result so the client (client PR #1737)
|
|
831
|
-
can render an expandable rubric breakdown.
|
|
832
|
-
|
|
833
|
-
Converts from the orchestrator judge-response format::
|
|
834
|
-
|
|
835
|
-
{"name": str, "score": int, "max_score": int, "reasoning": str}
|
|
836
|
-
|
|
837
|
-
to the client-expected marker format::
|
|
838
|
-
|
|
839
|
-
{"criteria": str, "score": float, "score_out_of": float, "description"?: str}
|
|
840
|
-
|
|
841
|
-
Each criterion's score is normalised to a 0.0–1.0 float using its own
|
|
842
|
-
``max_score``.
|
|
843
|
-
"""
|
|
844
|
-
marker_criteria = []
|
|
845
|
-
for c in criteria:
|
|
846
|
-
name = c.get("name", "")
|
|
847
|
-
cscore = c.get("score", 0)
|
|
848
|
-
cmax = c.get("max_score", 0)
|
|
849
|
-
|
|
850
|
-
# Normalise per-criterion score to 0.0–1.0
|
|
851
|
-
if cmax and float(cmax) > 0:
|
|
852
|
-
norm_score = float(cscore) / float(cmax)
|
|
853
|
-
else:
|
|
854
|
-
norm_score = float(cscore)
|
|
855
|
-
|
|
856
|
-
entry: dict = {
|
|
857
|
-
"criteria": name,
|
|
858
|
-
"score": round(norm_score, 4),
|
|
859
|
-
"score_out_of": 1.0,
|
|
860
|
-
}
|
|
861
|
-
|
|
862
|
-
reasoning = c.get("reasoning", "")
|
|
863
|
-
if reasoning:
|
|
864
|
-
entry["description"] = reasoning
|
|
865
|
-
|
|
866
|
-
marker_criteria.append(entry)
|
|
867
|
-
|
|
868
|
-
if marker_criteria:
|
|
869
|
-
print(">>> CRITERIA >>>")
|
|
870
|
-
print(json.dumps(marker_criteria))
|
|
871
|
-
print("<<< CRITERIA <<<")
|
|
872
|
-
|
|
873
|
-
|
|
874
826
|
def _print_judge_result(data: dict) -> None:
|
|
875
827
|
"""Print detailed judge grading result for verifier stdout capture."""
|
|
876
828
|
model = data.get("model_used", "unknown")
|
|
@@ -896,12 +848,6 @@ def _print_judge_result(data: dict) -> None:
|
|
|
896
848
|
if len(reasoning) > 200:
|
|
897
849
|
reasoning = reasoning[:200] + "..."
|
|
898
850
|
print(f"[C] {name}: {cscore}/{cmax} — {reasoning}")
|
|
899
|
-
|
|
900
|
-
# Emit structured criteria via stdout markers so the orchestrator
|
|
901
|
-
# (_extract_criteria_from_stdout) and client can render a rubric
|
|
902
|
-
# breakdown. Schema per element:
|
|
903
|
-
# {"criteria": str, "score": float, "score_out_of": float, "description"?: str}
|
|
904
|
-
_print_criteria_markers(criteria)
|
|
905
851
|
else:
|
|
906
852
|
print(f"[C] Score: {normalized:.2f}")
|
|
907
853
|
|
|
@@ -6,7 +6,6 @@ from .verifier import (
|
|
|
6
6
|
verifier,
|
|
7
7
|
SyncVerifierFunction,
|
|
8
8
|
)
|
|
9
|
-
from .local_executor import execute_verifier_local, LocalEnvironment, diff_dbs
|
|
10
9
|
|
|
11
10
|
__all__ = [
|
|
12
11
|
"DatabaseSnapshot",
|
|
@@ -16,7 +15,4 @@ __all__ = [
|
|
|
16
15
|
"TASK_FAILED_SCORE",
|
|
17
16
|
"verifier",
|
|
18
17
|
"SyncVerifierFunction",
|
|
19
|
-
"execute_verifier_local",
|
|
20
|
-
"LocalEnvironment",
|
|
21
|
-
"diff_dbs",
|
|
22
18
|
]
|