PyPI - fleet-python - Versions diffs - 0.2.115__tar.gz → 0.2.117__tar.gz - Mend

fleet-python 0.2.115tar.gz → 0.2.117tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (126) hide show

{fleet_python-0.2.115/fleet_python.egg-info → fleet_python-0.2.117}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: fleet-python
-Version: 0.2.115
+Version: 0.2.117
 Summary: Python SDK for Fleet environments
 Author-email: Fleet AI <nic@fleet.so>
 License: Apache-2.0

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/__init__.py RENAMED Viewed

@@ -38,6 +38,8 @@ from .verifiers import (
     SnapshotDiff,
     TASK_FAILED_SCORE,
     TASK_SUCCESSFUL_SCORE,
+    execute_verifier_local,
+    LocalEnvironment,
 )
 # Import async verifiers (default verifier is async for modern usage)
@@ -76,7 +78,7 @@ from . import env
 from . import global_client as _global_client
 from ._async import global_client as _async_global_client
-__version__ = "0.2.115"
+__version__ = "0.2.117"
 __all__ = [
     # Core classes
@@ -114,6 +116,8 @@ __all__ = [
     "SnapshotDiff",
     "TASK_FAILED_SCORE",
     "TASK_SUCCESSFUL_SCORE",
+    "execute_verifier_local",
+    "LocalEnvironment",
     # Environment module
     "env",
     # Global client helpers

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/_async/__init__.py RENAMED Viewed

@@ -44,7 +44,7 @@ from ..types import VerifierFunction
 from .. import env
 from . import global_client as _async_global_client
-__version__ = "0.2.115"
+__version__ = "0.2.117"
 __all__ = [
     # Core classes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/_async/base.py RENAMED Viewed

@@ -26,7 +26,7 @@ from .exceptions import (
 try:
     from .. import __version__
 except ImportError:
-    __version__ = "0.2.115"
+    __version__ = "0.2.117"
 logger = logging.getLogger(__name__)

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/_async/client.py RENAMED Viewed

@@ -601,7 +601,6 @@ class AsyncFleet:
         )
         instance = AsyncEnv(client=self.client, **response.json())
-        await instance.instance.load()
         return instance
     async def make_for_task(self, task: Task) -> AsyncEnv:
@@ -653,7 +652,6 @@ class AsyncFleet:
         else:
             response = await self.client.request("GET", f"/v1/env/instances/{instance_id}")
             instance = AsyncEnv(client=self.client, **response.json())
-            await instance.instance.load()
             return instance
     def _create_url_instance(self, base_url: str) -> AsyncEnv:
@@ -834,7 +832,45 @@ class AsyncFleet:
             At least one of run_id or profile_id must be provided.
         """
         return await _delete_instances_batch(self.client, run_id=run_id, profile_id=profile_id)
+    @staticmethod
+    async def execute_verifier_local(
+        verifier_func: str,
+        seed_db: str,
+        current_db: str,
+        final_answer: Optional[str] = None,
+    ) -> Dict[str, Any]:
+        """Execute a verifier function locally against SQLite database files.
+        No authentication or remote server required. The verifier code is executed
+        in an isolated namespace with the same helpers available in production
+        (``normalized_contains``, ``IgnoreConfig``, ``DatabaseSnapshot``, etc.).
+        Args:
+            verifier_func: Python source code containing the verifier function definition.
+            seed_db: Path to the seed (before) SQLite database file.
+            current_db: Path to the current (after) SQLite database file.
+            final_answer: Optional final answer string passed to the verifier.
+        Returns:
+            Dict with keys ``success``, ``result``, ``error``, and ``stdout``.
+        Example::
+            result = await fleet.execute_verifier_local(
+                verifier_func=verifier_code_string,
+                seed_db="./seed.db",
+                current_db="./current.db",
+            )
+            print(result["result"])  # 1 (TASK_SUCCESSFUL_SCORE) or 0
+        """
+        import asyncio
+        from ..verifiers.local_executor import execute_verifier_local
+        return await asyncio.to_thread(
+            execute_verifier_local, verifier_func, seed_db, current_db, final_answer
+        )
     async def list_runs(
         self, profile_id: Optional[str] = None, status: Optional[str] = "active"
     ) -> List[Run]:

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/base.py RENAMED Viewed

@@ -27,7 +27,7 @@ from .exceptions import (
 try:
     from . import __version__
 except ImportError:
-    __version__ = "0.2.115"
+    __version__ = "0.2.117"
 logger = logging.getLogger(__name__)

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/client.py RENAMED Viewed

@@ -613,7 +613,6 @@ class Fleet:
         )
         instance = SyncEnv(client=self.client, **response.json())
-        instance.instance.load()
         return instance
     def make_for_task(self, task: Task) -> SyncEnv:
@@ -665,7 +664,6 @@ class Fleet:
         else:
             response = self.client.request("GET", f"/v1/env/instances/{instance_id}")
             instance = SyncEnv(client=self.client, **response.json())
-            instance.instance.load()
             return instance
     def _create_url_instance(self, base_url: str) -> SyncEnv:
@@ -846,7 +844,42 @@ class Fleet:
             At least one of run_id or profile_id must be provided.
         """
         return _delete_instances_batch(self.client, run_id=run_id, profile_id=profile_id)
+    @staticmethod
+    def execute_verifier_local(
+        verifier_func: str,
+        seed_db: str,
+        current_db: str,
+        final_answer: Optional[str] = None,
+    ) -> Dict[str, Any]:
+        """Execute a verifier function locally against SQLite database files.
+        No authentication or remote server required. The verifier code is executed
+        in an isolated namespace with the same helpers available in production
+        (``normalized_contains``, ``IgnoreConfig``, ``DatabaseSnapshot``, etc.).
+        Args:
+            verifier_func: Python source code containing the verifier function definition.
+            seed_db: Path to the seed (before) SQLite database file.
+            current_db: Path to the current (after) SQLite database file.
+            final_answer: Optional final answer string passed to the verifier.
+        Returns:
+            Dict with keys ``success``, ``result``, ``error``, and ``stdout``.
+        Example::
+            result = fleet.execute_verifier_local(
+                verifier_func=verifier_code_string,
+                seed_db="./seed.db",
+                current_db="./current.db",
+            )
+            print(result["result"])  # 1 (TASK_SUCCESSFUL_SCORE) or 0
+        """
+        from .verifiers.local_executor import execute_verifier_local
+        return execute_verifier_local(verifier_func, seed_db, current_db, final_answer)
     def list_runs(
         self, profile_id: Optional[str] = None, status: Optional[str] = "active"
     ) -> List[Run]:

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/judge.py RENAMED Viewed

@@ -823,6 +823,54 @@ def _parse_grade_response(data: dict) -> JudgeResult:
     return JudgeResult(score, details=data)
+def _print_criteria_markers(criteria: list) -> None:
+    """Emit ``>>> CRITERIA >>>`` stdout markers for structured criteria display.
+    The orchestrator (theseus PR #1967) scans verifier stdout for these
+    markers and wraps the execution result so the client (client PR #1737)
+    can render an expandable rubric breakdown.
+    Converts from the orchestrator judge-response format::
+        {"name": str, "score": int, "max_score": int, "reasoning": str}
+    to the client-expected marker format::
+        {"criteria": str, "score": float, "score_out_of": float, "description"?: str}
+    Each criterion's score is normalised to a 0.0–1.0 float using its own
+    ``max_score``.
+    """
+    marker_criteria = []
+    for c in criteria:
+        name = c.get("name", "")
+        cscore = c.get("score", 0)
+        cmax = c.get("max_score", 0)
+        # Normalise per-criterion score to 0.0–1.0
+        if cmax and float(cmax) > 0:
+            norm_score = float(cscore) / float(cmax)
+        else:
+            norm_score = float(cscore)
+        entry: dict = {
+            "criteria": name,
+            "score": round(norm_score, 4),
+            "score_out_of": 1.0,
+        }
+        reasoning = c.get("reasoning", "")
+        if reasoning:
+            entry["description"] = reasoning
+        marker_criteria.append(entry)
+    if marker_criteria:
+        print(">>> CRITERIA >>>")
+        print(json.dumps(marker_criteria))
+        print("<<< CRITERIA <<<")
 def _print_judge_result(data: dict) -> None:
     """Print detailed judge grading result for verifier stdout capture."""
     model = data.get("model_used", "unknown")
@@ -848,6 +896,12 @@ def _print_judge_result(data: dict) -> None:
             if len(reasoning) > 200:
                 reasoning = reasoning[:200] + "..."
             print(f"[C]   {name}: {cscore}/{cmax} — {reasoning}")
+        # Emit structured criteria via stdout markers so the orchestrator
+        # (_extract_criteria_from_stdout) and client can render a rubric
+        # breakdown.  Schema per element:
+        #   {"criteria": str, "score": float, "score_out_of": float, "description"?: str}
+        _print_criteria_markers(criteria)
     else:
         print(f"[C] Score: {normalized:.2f}")

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/verifiers/__init__.py RENAMED Viewed

@@ -6,6 +6,7 @@ from .verifier import (
     verifier,
     SyncVerifierFunction,
 )
+from .local_executor import execute_verifier_local, LocalEnvironment
 __all__ = [
     "DatabaseSnapshot",
@@ -15,4 +16,6 @@ __all__ = [
     "TASK_FAILED_SCORE",
     "verifier",
     "SyncVerifierFunction",
+    "execute_verifier_local",
+    "LocalEnvironment",
 ]

fleet_python-0.2.117/fleet/verifiers/local_executor.py ADDED Viewed

@@ -0,0 +1,247 @@
+"""Local verifier execution against SQLite files.
+Executes verifier function code directly against local SQLite database files,
+without requiring authentication or a remote runner API server.
+"""
+import inspect
+import json
+import re
+import string
+import traceback
+from io import StringIO
+from typing import Any, Dict, Optional
+from .db import DatabaseSnapshot, IgnoreConfig, SnapshotDiff
+from .code import TASK_SUCCESSFUL_SCORE, TASK_FAILED_SCORE
+# ---------------------------------------------------------------------------
+#  Helper functions injected into verifier execution namespace
+# ---------------------------------------------------------------------------
+_TRANSLATOR = str.maketrans(string.punctuation, " " * len(string.punctuation))
+def _normalize_text(value: str) -> str:
+    text = value.lower().translate(_TRANSLATOR)
+    return "".join(text.split())
+def _stringify_content(content: Any) -> str:
+    if isinstance(content, (dict, list)):
+        return json.dumps(content, sort_keys=True)
+    return str(content)
+def normalized_contains(target: str, blob: Any) -> bool:
+    """Check if target is contained in blob after normalising punctuation and case."""
+    normalized_target = _normalize_text(target)
+    normalized_blob = _normalize_text(_stringify_content(blob))
+    return normalized_target in normalized_blob
+def normalized_string_comparison(target: str, blob: Any) -> bool:
+    """Check if target equals blob after normalising punctuation and case."""
+    normalized_target = _normalize_text(target)
+    normalized_blob = _normalize_text(_stringify_content(blob))
+    return normalized_target == normalized_blob
+def extract_numbers(text: str) -> list:
+    """Extract all numbers from a string."""
+    cleaned_text = text.replace(",", "")
+    pattern = r"-?\d+\.?\d*"
+    matches = re.findall(pattern, cleaned_text)
+    return [float(num) for num in matches]
+def contains_number(text: str, target_number) -> bool:
+    """Check if text contains the target number."""
+    numbers = extract_numbers(text)
+    try:
+        if isinstance(target_number, str):
+            target_number = target_number.replace(",", "")
+        target = float(target_number)
+    except (ValueError, AttributeError):
+        return False
+    return target in numbers
+# ---------------------------------------------------------------------------
+#  Lightweight Environment mock for local verifier execution
+# ---------------------------------------------------------------------------
+class _LocalInstance:
+    """Mock instance that supports load() as a no-op."""
+    def load(self):
+        pass
+class LocalEnvironment:
+    """Lightweight environment that wraps local SQLite files for verifier execution.
+    Provides the same interface verifier functions expect from ``env``:
+    ``env.db("seed")``, ``env.db("current")``, and ``env.instance.load()``.
+    """
+    def __init__(self, seed_db: str, current_db: str):
+        self._snapshots: Dict[str, DatabaseSnapshot] = {
+            "seed": DatabaseSnapshot(seed_db, name="seed"),
+            "current": DatabaseSnapshot(current_db, name="current"),
+        }
+        self.instance = _LocalInstance()
+    def db(self, name: str = "current") -> DatabaseSnapshot:
+        if name not in self._snapshots:
+            raise KeyError(
+                f"Unknown database '{name}'. Available: {list(self._snapshots.keys())}"
+            )
+        return self._snapshots[name]
+# ---------------------------------------------------------------------------
+#  Core execution function
+# ---------------------------------------------------------------------------
+def execute_verifier_local(
+    verifier_func: str,
+    seed_db: str,
+    current_db: str,
+    final_answer: Optional[str] = None,
+) -> Dict[str, Any]:
+    """Execute a verifier function string locally against SQLite database files.
+    No authentication or remote server required. The function is executed in an
+    isolated namespace with the same helpers available to production verifiers.
+    Args:
+        verifier_func: Python source code containing the verifier function definition.
+        seed_db: Path to the seed (before) SQLite database file.
+        current_db: Path to the current (after) SQLite database file.
+        final_answer: Optional final answer string passed to the verifier.
+    Returns:
+        Dict with keys:
+            - ``success`` (bool): Whether execution completed without errors.
+            - ``result`` (Any): The return value of the verifier function (typically a score).
+            - ``error`` (str | None): Error message and traceback if execution failed.
+            - ``stdout`` (str): Captured stdout output from the verifier function.
+    """
+    import sys
+    # Capture stdout
+    captured_stdout = StringIO()
+    try:
+        # Build the local environment
+        env = LocalEnvironment(seed_db, current_db)
+        # Clean the verifier code – strip decorators and fleet imports
+        cleaned_code = re.sub(r"@verifier\([^)]*\)\s*\n", "", verifier_func)
+        cleaned_code = re.sub(
+            r"^from fleet\.verifiers.*import.*$\n?",
+            "",
+            cleaned_code,
+            flags=re.MULTILINE,
+        )
+        cleaned_code = re.sub(
+            r"^from fleet import verifier.*$\n?",
+            "",
+            cleaned_code,
+            flags=re.MULTILINE,
+        )
+        cleaned_code = re.sub(
+            r"^import fleet\.verifiers.*$\n?",
+            "",
+            cleaned_code,
+            flags=re.MULTILINE,
+        )
+        cleaned_code = re.sub(
+            r"^import fleet$\n?", "", cleaned_code, flags=re.MULTILINE
+        )
+        # Build execution namespace with all helpers available to verifiers
+        exec_globals: Dict[str, Any] = {
+            # Score constants
+            "TASK_SUCCESSFUL_SCORE": TASK_SUCCESSFUL_SCORE,
+            "TASK_FAILED_SCORE": TASK_FAILED_SCORE,
+            # Helper functions
+            "normalized_contains": normalized_contains,
+            "normalized_string_comparison": normalized_string_comparison,
+            "extract_numbers": extract_numbers,
+            "contains_number": contains_number,
+            # Database classes
+            "DatabaseSnapshot": DatabaseSnapshot,
+            "IgnoreConfig": IgnoreConfig,
+            "SnapshotDiff": SnapshotDiff,
+            # Environment type hint (not enforced at runtime)
+            "Environment": type(env),
+            # Standard library modules commonly used in verifiers
+            "json": json,
+            "re": re,
+            "string": string,
+            # Builtins
+            "__builtins__": __builtins__,
+        }
+        # Execute the verifier code to define the function(s)
+        local_namespace: Dict[str, Any] = {}
+        exec(cleaned_code, exec_globals, local_namespace)
+        # Merge so helper functions defined in verifier code are accessible
+        exec_globals.update(local_namespace)
+        # Find the verifier function (the one defined in user code)
+        func_obj = None
+        for name, obj in local_namespace.items():
+            if inspect.isfunction(obj) and obj.__code__.co_filename == "<string>":
+                func_obj = obj
+                break
+        if func_obj is None:
+            return {
+                "success": False,
+                "result": None,
+                "error": "No function found in verifier code",
+                "stdout": "",
+            }
+        # Redirect stdout to capture print() output from verifiers
+        old_stdout = sys.stdout
+        sys.stdout = captured_stdout
+        try:
+            # Execute the verifier – verifiers take (env, final_answer=None)
+            sig = inspect.signature(func_obj)
+            params = list(sig.parameters.values())
+            if len(params) >= 2:
+                result = func_obj(env, final_answer)
+            elif len(params) == 1:
+                result = func_obj(env)
+            else:
+                result = func_obj()
+        finally:
+            sys.stdout = old_stdout
+        return {
+            "success": True,
+            "result": result,
+            "error": None,
+            "stdout": captured_stdout.getvalue(),
+        }
+    except Exception as e:
+        # Restore stdout if it was redirected
+        if sys.stdout is not sys.__stdout__ and sys.stdout is captured_stdout:
+            sys.stdout = sys.__stdout__
+        error_msg = f"{type(e).__name__}: {e}\n{traceback.format_exc()}"
+        return {
+            "success": False,
+            "result": None,
+            "error": error_msg,
+            "stdout": captured_stdout.getvalue(),
+        }

{fleet_python-0.2.115 → fleet_python-0.2.117/fleet_python.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: fleet-python
-Version: 0.2.115
+Version: 0.2.117
 Summary: Python SDK for Fleet environments
 Author-email: Fleet AI <nic@fleet.so>
 License: Apache-2.0

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet_python.egg-info/SOURCES.txt RENAMED Viewed

@@ -101,6 +101,7 @@ fleet/verifiers/bundler.py
 fleet/verifiers/code.py
 fleet/verifiers/db.py
 fleet/verifiers/decorator.py
+fleet/verifiers/local_executor.py
 fleet/verifiers/parse.py
 fleet/verifiers/sql_differ.py
 fleet/verifiers/verifier.py
@@ -117,6 +118,7 @@ tests/test_app_method.py
 tests/test_expect_exactly.py
 tests/test_expect_only.py
 tests/test_instance_dispatch.py
+tests/test_judge_criteria_markers.py
 tests/test_sqlite_resource_dual_mode.py
 tests/test_sqlite_shared_memory_behavior.py
 tests/test_verifier_from_string.py

{fleet_python-0.2.115 → fleet_python-0.2.117}/pyproject.toml RENAMED Viewed

@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "fleet-python"
-version = "0.2.115"
+version = "0.2.117"
 description = "Python SDK for Fleet environments"
 authors = [
     {name = "Fleet AI", email = "nic@fleet.so"},

fleet_python-0.2.117/tests/test_judge_criteria_markers.py ADDED Viewed

@@ -0,0 +1,192 @@
+"""Tests for structured criteria stdout markers in fleet.judge.
+Validates that _print_criteria_markers emits the correct
+>>> CRITERIA >>> / <<< CRITERIA <<< markers that the orchestrator
+(theseus PR #1967) and client (client PR #1737) expect.
+"""
+import json
+import re
+from io import StringIO
+from unittest.mock import patch
+import pytest
+from fleet.judge import (
+    _print_criteria_markers,
+    _print_judge_result,
+    _parse_grade_response,
+)
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+_MARKER_RE = re.compile(
+    r">>> CRITERIA >>>\s*\n(.*?)\n<<< CRITERIA <<<",
+    re.DOTALL,
+)
+def _capture_print(fn, *args, **kwargs):
+    """Capture all print() output from a function call."""
+    buf = StringIO()
+    with patch("builtins.print", side_effect=lambda *a, **kw: buf.write(" ".join(str(x) for x in a) + "\n")):
+        fn(*args, **kwargs)
+    return buf.getvalue()
+def _extract_criteria_from_stdout(stdout: str):
+    """Mirror the orchestrator's extraction logic (theseus PR #1967)."""
+    m = _MARKER_RE.search(stdout)
+    if not m:
+        return None
+    parsed = json.loads(m.group(1).strip())
+    if isinstance(parsed, list):
+        return parsed
+    return None
+# ---------------------------------------------------------------------------
+# _print_criteria_markers
+# ---------------------------------------------------------------------------
+class TestPrintCriteriaMarkers:
+    """Tests for _print_criteria_markers."""
+    def test_basic_criteria_output(self):
+        """Emits valid markers with normalised scores."""
+        criteria = [
+            {"name": "Accuracy", "score": 8, "max_score": 10, "reasoning": "Good job"},
+            {"name": "Style", "score": 5, "max_score": 5, "reasoning": "Perfect"},
+        ]
+        stdout = _capture_print(_print_criteria_markers, criteria)
+        parsed = _extract_criteria_from_stdout(stdout)
+        assert parsed is not None, f"Markers not found in stdout:\n{stdout}"
+        assert len(parsed) == 2
+        assert parsed[0]["criteria"] == "Accuracy"
+        assert parsed[0]["score"] == pytest.approx(0.8, abs=0.01)
+        assert parsed[0]["score_out_of"] == 1.0
+        assert parsed[0]["description"] == "Good job"
+        assert parsed[1]["criteria"] == "Style"
+        assert parsed[1]["score"] == pytest.approx(1.0, abs=0.01)
+        assert parsed[1]["score_out_of"] == 1.0
+    def test_zero_max_score_passthrough(self):
+        """When max_score is 0, raw score passes through."""
+        criteria = [
+            {"name": "Metric", "score": 0.75, "max_score": 0},
+        ]
+        stdout = _capture_print(_print_criteria_markers, criteria)
+        parsed = _extract_criteria_from_stdout(stdout)
+        assert parsed is not None
+        assert parsed[0]["score"] == pytest.approx(0.75, abs=0.01)
+    def test_empty_criteria_no_markers(self):
+        """Empty list should produce no markers."""
+        stdout = _capture_print(_print_criteria_markers, [])
+        assert ">>> CRITERIA >>>" not in stdout
+    def test_reasoning_maps_to_description(self):
+        """The 'reasoning' field maps to 'description' in the marker schema."""
+        criteria = [
+            {"name": "Test", "score": 3, "max_score": 5, "reasoning": "Some reasoning here"},
+        ]
+        stdout = _capture_print(_print_criteria_markers, criteria)
+        parsed = _extract_criteria_from_stdout(stdout)
+        assert parsed[0]["description"] == "Some reasoning here"
+    def test_missing_reasoning_no_description(self):
+        """When reasoning is empty, description key should be absent."""
+        criteria = [
+            {"name": "Test", "score": 3, "max_score": 5, "reasoning": ""},
+        ]
+        stdout = _capture_print(_print_criteria_markers, criteria)
+        parsed = _extract_criteria_from_stdout(stdout)
+        assert "description" not in parsed[0]
+    def test_output_parseable_by_orchestrator_regex(self):
+        """Ensure the output matches the exact regex the orchestrator uses."""
+        criteria = [
+            {"name": "A", "score": 1, "max_score": 2, "reasoning": "half"},
+        ]
+        stdout = _capture_print(_print_criteria_markers, criteria)
+        # Use the exact regex from theseus PR #1967
+        m = re.search(
+            r">>> CRITERIA >>>\s*\n(.*?)\n<<< CRITERIA <<<",
+            stdout,
+            re.DOTALL,
+        )
+        assert m is not None, "Output doesn't match orchestrator regex"
+        data = json.loads(m.group(1).strip())
+        assert isinstance(data, list)
+        assert data[0]["criteria"] == "A"
+# ---------------------------------------------------------------------------
+# _print_judge_result integration
+# ---------------------------------------------------------------------------
+class TestPrintJudgeResult:
+    """Tests for _print_judge_result emitting criteria markers."""
+    def test_criteria_markers_emitted(self):
+        """_print_judge_result emits criteria markers when criteria present."""
+        data = {
+            "model_used": "claude-sonnet",
+            "provider_used": "anthropic",
+            "total_score": 15,
+            "max_score": 20,
+            "normalized_score": 0.75,
+            "criteria": [
+                {"name": "Accuracy", "score": 8, "max_score": 10, "reasoning": "Good"},
+                {"name": "Style", "score": 7, "max_score": 10, "reasoning": "Decent"},
+            ],
+        }
+        stdout = _capture_print(_print_judge_result, data)
+        parsed = _extract_criteria_from_stdout(stdout)
+        assert parsed is not None
+        assert len(parsed) == 2
+    def test_no_criteria_no_markers(self):
+        """_print_judge_result doesn't emit markers when no criteria."""
+        data = {
+            "model_used": "claude-sonnet",
+            "provider_used": "anthropic",
+            "total_score": 0,
+            "max_score": 0,
+            "normalized_score": 0.5,
+        }
+        stdout = _capture_print(_print_judge_result, data)
+        assert ">>> CRITERIA >>>" not in stdout
+# ---------------------------------------------------------------------------
+# _parse_grade_response integration
+# ---------------------------------------------------------------------------
+class TestParseGradeResponse:
+    """Tests for _parse_grade_response emitting criteria markers."""
+    def test_full_flow_emits_markers(self):
+        """_parse_grade_response → _print_judge_result → criteria markers."""
+        data = {
+            "model_used": "claude-sonnet",
+            "provider_used": "anthropic",
+            "total_score": 9,
+            "max_score": 10,
+            "normalized_score": 0.9,
+            "criteria": [
+                {"name": "Completeness", "score": 9, "max_score": 10, "reasoning": "Almost perfect"},
+            ],
+        }
+        stdout = _capture_print(_parse_grade_response, data)
+        parsed = _extract_criteria_from_stdout(stdout)
+        assert parsed is not None
+        assert parsed[0]["criteria"] == "Completeness"
+        assert parsed[0]["score"] == pytest.approx(0.9, abs=0.01)

{fleet_python-0.2.115 → fleet_python-0.2.117}/LICENSE RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/README.md RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/examples/diff_example.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/examples/dsl_example.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/examples/example.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/examples/exampleResume.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/examples/example_account.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/examples/example_action_log.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/examples/example_client.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/examples/example_mcp_anthropic.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/examples/example_mcp_openai.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/examples/example_sync.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/examples/example_task.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/examples/example_tasks.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/examples/example_verifier.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/examples/export_tasks.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/examples/export_tasks_filtered.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/examples/fetch_tasks.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/examples/gemini_example.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/examples/import_tasks.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/examples/iterate_verifiers.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/examples/json_tasks_example.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/examples/nova_act_example.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/examples/openai_example.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/examples/openai_simple_example.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/examples/query_builder_example.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/examples/quickstart.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/examples/test_cdp_logging.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/_async/env/__init__.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/_async/env/client.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/_async/exceptions.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/_async/global_client.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/_async/instance/__init__.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/_async/instance/base.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/_async/instance/client.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/_async/judge.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/_async/models.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/_async/resources/__init__.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/_async/resources/api.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/_async/resources/base.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/_async/resources/browser.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/_async/resources/filesystem.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/_async/resources/mcp.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/_async/resources/sqlite.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/_async/tasks.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/_async/verifiers/__init__.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/_async/verifiers/bundler.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/_async/verifiers/verifier.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/agent/__init__.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/agent/gemini_cua/Dockerfile RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/agent/gemini_cua/__init__.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/agent/gemini_cua/agent.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/agent/gemini_cua/mcp/main.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/agent/gemini_cua/mcp_server/__init__.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/agent/gemini_cua/mcp_server/main.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/agent/gemini_cua/mcp_server/tools.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/agent/gemini_cua/requirements.txt RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/agent/gemini_cua/start.sh RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/agent/orchestrator.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/agent/types.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/agent/utils.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/cli.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/config.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/env/__init__.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/env/client.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/eval/__init__.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/eval/uploader.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/exceptions.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/global_client.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/instance/__init__.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/instance/base.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/instance/client.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/instance/models.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/models.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/proxy/__init__.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/proxy/proxy.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/proxy/whitelist.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/resources/__init__.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/resources/api.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/resources/base.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/resources/browser.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/resources/filesystem.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/resources/mcp.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/resources/sqlite.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/tasks.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/types.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/utils/__init__.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/utils/http_logging.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/utils/logging.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/utils/playwright.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/verifiers/bundler.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/verifiers/code.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/verifiers/db.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/verifiers/decorator.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/verifiers/parse.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/verifiers/sql_differ.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet/verifiers/verifier.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet_python.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet_python.egg-info/entry_points.txt RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet_python.egg-info/requires.txt RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/fleet_python.egg-info/top_level.txt RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/scripts/fix_sync_imports.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/scripts/unasync.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/setup.cfg RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/tests/__init__.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/tests/test_app_method.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/tests/test_expect_exactly.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/tests/test_expect_only.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/tests/test_instance_dispatch.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/tests/test_sqlite_resource_dual_mode.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/tests/test_sqlite_shared_memory_behavior.py RENAMED Viewed

File without changes

{fleet_python-0.2.115 → fleet_python-0.2.117}/tests/test_verifier_from_string.py RENAMED Viewed

File without changes

fleet-python 0.2.115__tar.gz → 0.2.117__tar.gz

fleet-python 0.2.115tar.gz → 0.2.117tar.gz