PyPI - fleet-python - Versions diffs - 0.2.111__tar.gz → 0.2.113__tar.gz - Mend

fleet-python 0.2.111tar.gz → 0.2.113tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (124) hide show

{fleet_python-0.2.111/fleet_python.egg-info → fleet_python-0.2.113}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: fleet-python
-Version: 0.2.111
+Version: 0.2.113
 Summary: Python SDK for Fleet environments
 Author-email: Fleet AI <nic@fleet.so>
 License: Apache-2.0

{fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/__init__.py RENAMED Viewed

@@ -68,12 +68,15 @@ from .tasks import (
 # Import shared types
 from .types import VerifierFunction
+# Import judge data classes
+from .judge import Rubric, Criterion, Image, JudgeResult
 # Create a module-level env attribute for convenient access
 from . import env
 from . import global_client as _global_client
 from ._async import global_client as _async_global_client
-__version__ = "0.2.111"
+__version__ = "0.2.113"
 __all__ = [
     # Core classes
@@ -90,6 +93,11 @@ __all__ = [
     # Task models
     "Task",
     "VerifierFunction",
+    # Judge
+    "Rubric",
+    "Criterion",
+    "Image",
+    "JudgeResult",
     # Exceptions
     "FleetError",
     "FleetAPIError",

{fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/_async/__init__.py RENAMED Viewed

@@ -44,7 +44,7 @@ from ..types import VerifierFunction
 from .. import env
 from . import global_client as _async_global_client
-__version__ = "0.2.111"
+__version__ = "0.2.113"
 __all__ = [
     # Core classes

{fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/_async/base.py RENAMED Viewed

@@ -26,7 +26,7 @@ from .exceptions import (
 try:
     from .. import __version__
 except ImportError:
-    __version__ = "0.2.111"
+    __version__ = "0.2.113"
 logger = logging.getLogger(__name__)

{fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/_async/client.py RENAMED Viewed

@@ -54,6 +54,7 @@ from .tasks import Task
 if TYPE_CHECKING:
     from .verifiers import AsyncVerifierFunction
+    from .judge import AsyncJudge
 def _json_default(x: Any) -> Any:
@@ -344,6 +345,7 @@ class AsyncEnv(EnvironmentBase):
         self._client = client
         self._apps: Dict[str, AsyncInstanceClient] = {}
         self._instance: Optional[AsyncInstanceClient] = None
+        self._judge: Optional["AsyncJudge"] = None
     @property
     def instance(self) -> AsyncInstanceClient:
@@ -419,6 +421,18 @@ class AsyncEnv(EnvironmentBase):
         mcp_url = f"{self.urls.root}mcp"
         return AsyncMCPResource(url=mcp_url, env_key=self.env_key)
+    @property
+    def judge(self) -> "AsyncJudge":
+        """LLM-as-judge grading via orchestrator API."""
+        if self._judge is None:
+            from .judge import AsyncJudge
+            self._judge = AsyncJudge(
+                client=self._load_client,
+                instance_id=self.instance_id,
+            )
+        return self._judge
     def state(self, uri: str) -> Resource:
         return self.instance.state(uri)

fleet_python-0.2.113/fleet/_async/judge.py ADDED Viewed

@@ -0,0 +1,121 @@
+"""Fleet SDK Judge - Async version.
+Provides env.judge.grade() for async verifier scripts.
+"""
+from typing import Dict, List, Optional, Union, TYPE_CHECKING
+# Import shared classes and helpers from the sync module
+from ..judge import (
+    Criterion,
+    Image,
+    JudgeResult,
+    Rubric,
+    _build_grade_request,
+    _collect_image_from_env_async,
+    _guess_media_type,
+    _parse_grade_response,
+)
+if TYPE_CHECKING:
+    from .base import AsyncWrapper
+# Re-export data classes so `from fleet._async.judge import ...` works
+__all__ = [
+    "AsyncJudge",
+    "Criterion",
+    "Image",
+    "JudgeResult",
+    "Rubric",
+]
+class AsyncJudge:
+    """LLM-as-judge grading — calls orchestrator API, not environment API.
+    Accessed as env.judge on AsyncEnv instances.
+    """
+    def __init__(self, client: "AsyncWrapper", instance_id: str):
+        self._client = client
+        self._instance_id = instance_id
+    async def grade(
+        self,
+        rubric: Union[str, Rubric],
+        submission: Optional[str] = None,
+        *,
+        ground_truth: Optional[Union[str, dict]] = None,
+        problem: Optional[str] = None,
+        context: Optional[str] = None,
+        reference_claims: Optional[str] = None,
+        conversation: Optional[List[dict]] = None,
+        images: Optional[Dict[str, Image]] = None,
+        model: Optional[str] = None,
+        provider: Optional[str] = None,
+        agentic: bool = False,
+        collect: Optional[Dict[str, List[str]]] = None,
+        task_id: Optional[str] = None,
+    ) -> JudgeResult:
+        """Grade a submission using LLM-as-judge via the orchestrator API.
+        Returns a JudgeResult (float subclass with .details, .criteria, .feedback)
+        that can be returned directly from a verifier function.
+        Args:
+            rubric: Grading rubric — either a string or a structured Rubric object.
+            submission: The agent's final answer / submission text.
+            ground_truth: Expected answer (string or dict).
+            problem: The original problem statement.
+            context: Additional context for the judge.
+            reference_claims: Reference analysis claims.
+            conversation: Conversation history as list of message dicts.
+            images: Named images for the judge (e.g., gold reference, agent output).
+            model: Override LLM model (server picks default if None).
+            provider: Override LLM provider (server picks default if None).
+            agentic: If True, the orchestrator collects artifacts from the instance.
+            collect: File patterns for orchestrator to collect (agentic mode).
+            task_id: Optional task ID for tracking.
+        """
+        # Resolve Image.from_env images asynchronously before building request
+        resolved_images = images
+        if images and not agentic:
+            resolved_images = {}
+            for label, img in images.items():
+                if img.source == "env" and img._env is not None:
+                    b64 = await _collect_image_from_env_async(img._env, img.filename)
+                    if b64 is not None:
+                        resolved_images[label] = Image.from_base64(
+                            b64,
+                            img.filename or "image.png",
+                            _guess_media_type(img.filename or "image.png"),
+                        )
+                    else:
+                        # Async collection failed — use collect source directly
+                        # (don't keep the env image or serialize() will retry sync)
+                        resolved_images[label] = Image(
+                            source="collect",
+                            filename=img.filename,
+                        )
+                else:
+                    resolved_images[label] = img
+        body = _build_grade_request(
+            self._instance_id,
+            rubric,
+            submission,
+            ground_truth=ground_truth,
+            problem=problem,
+            context=context,
+            reference_claims=reference_claims,
+            conversation=conversation,
+            images=resolved_images,
+            model=model,
+            provider=provider,
+            agentic=agentic,
+            collect=collect,
+            task_id=task_id,
+        )
+        response = await self._client.request("POST", "/v1/judge/grade", json=body)
+        return _parse_grade_response(response.json())

{fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/base.py RENAMED Viewed

@@ -27,7 +27,7 @@ from .exceptions import (
 try:
     from . import __version__
 except ImportError:
-    __version__ = "0.2.111"
+    __version__ = "0.2.113"
 logger = logging.getLogger(__name__)

{fleet_python-0.2.111 → fleet_python-0.2.113}/fleet/client.py RENAMED Viewed

@@ -59,6 +59,7 @@ from .tasks import Task
 if TYPE_CHECKING:
     from .verifiers import SyncVerifierFunction
+    from .judge import SyncJudge
 def _json_default(x: Any) -> Any:
@@ -348,6 +349,7 @@ class SyncEnv(EnvironmentBase):
         self._client = client
         self._apps: Dict[str, InstanceClient] = {}
         self._instance: Optional[InstanceClient] = None
+        self._judge: Optional["SyncJudge"] = None
         self._manager_url_override: Optional[str] = None  # For URL mode
     @property
@@ -431,6 +433,18 @@ class SyncEnv(EnvironmentBase):
         mcp_url = f"{self.urls.root}mcp"
         return SyncMCPResource(url=mcp_url, env_key=self.env_key)
+    @property
+    def judge(self) -> "SyncJudge":
+        """LLM-as-judge grading via orchestrator API."""
+        if self._judge is None:
+            from .judge import SyncJudge
+            self._judge = SyncJudge(
+                client=self._load_client,
+                instance_id=self.instance_id,
+            )
+        return self._judge
     def state(self, uri: str) -> Resource:
         return self.instance.state(uri)

fleet-python 0.2.111__tar.gz → 0.2.113__tar.gz

fleet-python 0.2.111tar.gz → 0.2.113tar.gz