PyPI - fleet-python - Versions diffs - 0.2.114__tar.gz → 0.2.116__tar.gz - Mend

fleet-python 0.2.114tar.gz → 0.2.116tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (125) hide show

{fleet_python-0.2.114/fleet_python.egg-info → fleet_python-0.2.116}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: fleet-python
-Version: 0.2.114
+Version: 0.2.116
 Summary: Python SDK for Fleet environments
 Author-email: Fleet AI <nic@fleet.so>
 License: Apache-2.0

{fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/__init__.py RENAMED Viewed

@@ -69,14 +69,14 @@ from .tasks import (
 from .types import VerifierFunction
 # Import judge data classes
-from .judge import Rubric, Criterion, Image, JudgeResult
+from .judge import Rubric, Criterion, File, Image, JudgeResult
 # Create a module-level env attribute for convenient access
 from . import env
 from . import global_client as _global_client
 from ._async import global_client as _async_global_client
-__version__ = "0.2.114"
+__version__ = "0.2.116"
 __all__ = [
     # Core classes
@@ -96,6 +96,7 @@ __all__ = [
     # Judge
     "Rubric",
     "Criterion",
+    "File",
     "Image",
     "JudgeResult",
     # Exceptions

{fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/_async/__init__.py RENAMED Viewed

@@ -44,7 +44,7 @@ from ..types import VerifierFunction
 from .. import env
 from . import global_client as _async_global_client
-__version__ = "0.2.114"
+__version__ = "0.2.116"
 __all__ = [
     # Core classes

{fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/_async/base.py RENAMED Viewed

@@ -26,7 +26,7 @@ from .exceptions import (
 try:
     from .. import __version__
 except ImportError:
-    __version__ = "0.2.114"
+    __version__ = "0.2.116"
 logger = logging.getLogger(__name__)

{fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/_async/client.py RENAMED Viewed

@@ -601,7 +601,6 @@ class AsyncFleet:
         )
         instance = AsyncEnv(client=self.client, **response.json())
-        await instance.instance.load()
         return instance
     async def make_for_task(self, task: Task) -> AsyncEnv:
@@ -653,7 +652,6 @@ class AsyncFleet:
         else:
             response = await self.client.request("GET", f"/v1/env/instances/{instance_id}")
             instance = AsyncEnv(client=self.client, **response.json())
-            await instance.instance.load()
             return instance
     def _create_url_instance(self, base_url: str) -> AsyncEnv:

{fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/_async/judge.py RENAMED Viewed

@@ -8,11 +8,14 @@ from typing import Dict, List, Optional, Union, TYPE_CHECKING
 # Import shared classes and helpers from the sync module
 from ..judge import (
     Criterion,
+    File,
     Image,
     JudgeResult,
     Rubric,
     _build_grade_request,
+    _collect_file_from_env_async,
     _collect_image_from_env_async,
+    _guess_file_media_type,
     _guess_media_type,
     _parse_grade_response,
     _print_judge_call_start,
@@ -25,6 +28,7 @@ if TYPE_CHECKING:
 __all__ = [
     "AsyncJudge",
     "Criterion",
+    "File",
     "Image",
     "JudgeResult",
     "Rubric",
@@ -52,6 +56,7 @@ class AsyncJudge:
         reference_claims: Optional[str] = None,
         conversation: Optional[List[dict]] = None,
         images: Optional[Dict[str, Image]] = None,
+        files: Optional[Dict[str, File]] = None,
         model: Optional[str] = None,
         provider: Optional[str] = None,
         agentic: bool = False,
@@ -72,6 +77,7 @@ class AsyncJudge:
             reference_claims: Reference analysis claims.
             conversation: Conversation history as list of message dicts.
             images: Named images for the judge (e.g., gold reference, agent output).
+            files: Named files for the judge (PDF, CSV, STEP, etc.).
             model: Override LLM model (server picks default if None).
             provider: Override LLM provider (server picks default if None).
             agentic: If True, the orchestrator collects artifacts from the instance.
@@ -101,6 +107,28 @@ class AsyncJudge:
                 else:
                     resolved_images[label] = img
+        # Resolve File.from_env files asynchronously before building request
+        resolved_files = files
+        if files and not agentic:
+            resolved_files = {}
+            for label, f in files.items():
+                if f.source == "env" and f._env is not None:
+                    b64 = await _collect_file_from_env_async(f._env, f.filename)
+                    if b64 is not None:
+                        resolved_files[label] = File.from_base64(
+                            b64,
+                            f.filename or "file",
+                            _guess_file_media_type(f.filename or "file"),
+                        )
+                    else:
+                        # Async collection failed — use collect source directly
+                        resolved_files[label] = File(
+                            source="collect",
+                            filename=f.filename,
+                        )
+                else:
+                    resolved_files[label] = f
         body = _build_grade_request(
             self._instance_id,
             rubric,
@@ -111,6 +139,7 @@ class AsyncJudge:
             reference_claims=reference_claims,
             conversation=conversation,
             images=resolved_images,
+            files=resolved_files,
             model=model,
             provider=provider,
             agentic=agentic,
@@ -118,6 +147,6 @@ class AsyncJudge:
             task_id=task_id,
         )
-        _print_judge_call_start(rubric, resolved_images, agentic, model)
+        _print_judge_call_start(rubric, resolved_images, agentic, model, files=resolved_files)
         response = await self._client.request("POST", "/v1/judge/grade", json=body)
         return _parse_grade_response(response.json())

{fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/base.py RENAMED Viewed

@@ -27,7 +27,7 @@ from .exceptions import (
 try:
     from . import __version__
 except ImportError:
-    __version__ = "0.2.114"
+    __version__ = "0.2.116"
 logger = logging.getLogger(__name__)

{fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/client.py RENAMED Viewed

@@ -613,7 +613,6 @@ class Fleet:
         )
         instance = SyncEnv(client=self.client, **response.json())
-        instance.instance.load()
         return instance
     def make_for_task(self, task: Task) -> SyncEnv:
@@ -665,7 +664,6 @@ class Fleet:
         else:
             response = self.client.request("GET", f"/v1/env/instances/{instance_id}")
             instance = SyncEnv(client=self.client, **response.json())
-            instance.instance.load()
             return instance
     def _create_url_instance(self, base_url: str) -> SyncEnv:

{fleet_python-0.2.114 → fleet_python-0.2.116}/fleet/judge.py RENAMED Viewed

@@ -38,6 +38,47 @@ def _guess_media_type(filename: str) -> str:
     }.get(ext, "image/png")
+def _guess_file_media_type(filename: str) -> str:
+    """Guess media type from filename extension for arbitrary files.
+    Broader than _guess_media_type — covers documents, CAD, data formats, etc.
+    """
+    ext = filename.lower().rsplit(".", 1)[-1] if "." in filename else ""
+    return {
+        # Images
+        "png": "image/png",
+        "jpg": "image/jpeg",
+        "jpeg": "image/jpeg",
+        "gif": "image/gif",
+        "webp": "image/webp",
+        "svg": "image/svg+xml",
+        # Documents
+        "pdf": "application/pdf",
+        "txt": "text/plain",
+        "md": "text/markdown",
+        "html": "text/html",
+        "htm": "text/html",
+        "csv": "text/csv",
+        "tsv": "text/tab-separated-values",
+        # Data
+        "json": "application/json",
+        "xml": "application/xml",
+        "yaml": "application/x-yaml",
+        "yml": "application/x-yaml",
+        # CAD / Engineering
+        "step": "application/step",
+        "stp": "application/step",
+        "stl": "model/stl",
+        "iges": "model/iges",
+        "igs": "model/iges",
+        "obj": "model/obj",
+        # Archives
+        "zip": "application/zip",
+        "gz": "application/gzip",
+        "tar": "application/x-tar",
+    }.get(ext, "application/octet-stream")
 @dataclass
 class Criterion:
     """A single rubric criterion for grading.
@@ -199,6 +240,99 @@ class Image:
         return d
+class File:
+    """Reference to an arbitrary file for LLM judge grading.
+    Supports any file type (PDF, CSV, STEP, STL, etc.) via the Anthropic
+    Files API. Use the static constructors to create instances:
+        File.s3("s3://bucket/key")                     - S3 URL, fetched server-side
+        File.from_base64(data, "part.step", "application/step") - Inline base64 data
+        File.from_env(env, "exported_part.step")        - Collect from environment
+    """
+    def __init__(
+        self,
+        *,
+        source: str,
+        url: Optional[str] = None,
+        data: Optional[str] = None,
+        filename: Optional[str] = None,
+        media_type: Optional[str] = None,
+        _env: Optional[Any] = None,
+    ):
+        self.source = source
+        self.url = url
+        self.data = data
+        self.filename = filename
+        self.media_type = media_type
+        self._env = _env
+    @staticmethod
+    def s3(url: str, media_type: Optional[str] = None) -> "File":
+        """Reference a file in S3. The orchestrator fetches it server-side."""
+        return File(source="s3", url=url, media_type=media_type)
+    @staticmethod
+    def from_base64(
+        data: str, filename: str, media_type: Optional[str] = None
+    ) -> "File":
+        """Inline base64 file data."""
+        return File(
+            source="base64",
+            data=data,
+            filename=filename,
+            media_type=media_type or _guess_file_media_type(filename),
+        )
+    @staticmethod
+    def from_env(env: Any, filename: str) -> "File":
+        """Collect a file from the environment.
+        In non-agentic mode, the SDK collects the file client-side (DB -> filesystem)
+        and sends base64 to the orchestrator.
+        In agentic mode, only the filename hint is sent and the orchestrator collects it.
+        """
+        return File(source="env", filename=filename, _env=env)
+    def serialize(self, *, label: Optional[str] = None, agentic: bool = False) -> dict:
+        """Serialize for the orchestrator API request body."""
+        d: dict
+        if self.source == "s3":
+            d = {"source": "s3", "url": self.url}
+            if self.media_type:
+                d["media_type"] = self.media_type
+        elif self.source == "base64":
+            d = {
+                "source": "base64",
+                "data": self.data,
+                "filename": self.filename,
+                "media_type": self.media_type or _guess_file_media_type(self.filename or "file"),
+            }
+        elif self.source == "collect":
+            d = {"source": "collect", "selector": self.filename}
+        elif self.source == "env":
+            if agentic:
+                d = {"source": "collect", "selector": self.filename}
+            else:
+                b64 = _collect_file_from_env(self._env, self.filename)
+                if b64 is None:
+                    d = {"source": "collect", "selector": self.filename}
+                else:
+                    d = {
+                        "source": "base64",
+                        "data": b64,
+                        "filename": self.filename,
+                        "media_type": _guess_file_media_type(self.filename or "file"),
+                    }
+        else:
+            raise ValueError(f"Unknown file source: {self.source}")
+        if label is not None:
+            d["label"] = label
+        return d
 class JudgeResult(float):
     """Float subclass that carries grading details.
@@ -412,6 +546,102 @@ async def _collect_image_from_env_async(env: Any, filename: str) -> Optional[str
     return None
+def _collect_file_from_env(env: Any, filename: str) -> Optional[str]:
+    """Collect a file from the environment using DB -> filesystem strategies.
+    Similar to _collect_image_from_env but skips notebook cell output strategy
+    (which is image-specific). Returns base64-encoded file data, or None if not found.
+    """
+    # Strategy 1: DB files table
+    try:
+        current = env.db("current")
+        where = f"path = '{filename}' OR path LIKE '%/{filename}'"
+        rows = _extract_query_rows(
+            current.query(f"SELECT path, hex(content) AS content_hex FROM files WHERE {where}")
+        )
+        candidates = {}
+        for row in rows:
+            path, chex = row.get("path", ""), row.get("content_hex", "")
+            if path and chex:
+                try:
+                    candidates[path] = bytes.fromhex(chex)
+                except Exception:
+                    pass
+        # Prefer non-dataroom paths
+        non_dr = [p for p in candidates if not p.startswith("dataroom/")]
+        best = sorted(non_dr or list(candidates.keys()), key=len)
+        if best:
+            logger.debug("Loaded file from DB: %s", best[0])
+            return base64.b64encode(candidates[best[0]]).decode()
+    except Exception as e:
+        logger.debug("DB file query failed: %s", e)
+    # Strategy 2: Filesystem fallback
+    search_paths = [
+        filename,
+        f"/app/workspace/{filename}",
+        f"/workspace/{filename}",
+    ]
+    for fp in search_paths:
+        try:
+            if os.path.exists(fp):
+                with open(fp, "rb") as f:
+                    logger.debug("Loaded file from filesystem: %s", fp)
+                    return base64.b64encode(f.read()).decode()
+        except Exception:
+            pass
+    return None
+async def _collect_file_from_env_async(env: Any, filename: str) -> Optional[str]:
+    """Async version of _collect_file_from_env.
+    Collects a file from an AsyncEnv using DB -> filesystem strategies.
+    Returns base64-encoded file data, or None if not found.
+    """
+    # Strategy 1: DB files table
+    try:
+        current = env.db("current")
+        where = f"path = '{filename}' OR path LIKE '%/{filename}'"
+        rows = _extract_query_rows(
+            await current.query(f"SELECT path, hex(content) AS content_hex FROM files WHERE {where}")
+        )
+        candidates = {}
+        for row in rows:
+            path, chex = row.get("path", ""), row.get("content_hex", "")
+            if path and chex:
+                try:
+                    candidates[path] = bytes.fromhex(chex)
+                except Exception:
+                    pass
+        # Prefer non-dataroom paths
+        non_dr = [p for p in candidates if not p.startswith("dataroom/")]
+        best = sorted(non_dr or list(candidates.keys()), key=len)
+        if best:
+            logger.debug("Loaded file from DB (async): %s", best[0])
+            return base64.b64encode(candidates[best[0]]).decode()
+    except Exception as e:
+        logger.debug("DB file query failed (async): %s", e)
+    # Strategy 2: Filesystem fallback
+    search_paths = [
+        filename,
+        f"/app/workspace/{filename}",
+        f"/workspace/{filename}",
+    ]
+    for fp in search_paths:
+        try:
+            if os.path.exists(fp):
+                with open(fp, "rb") as f:
+                    logger.debug("Loaded file from filesystem (async): %s", fp)
+                    return base64.b64encode(f.read()).decode()
+        except Exception:
+            pass
+    return None
 # ---------------------------------------------------------------------------
 # Accumulator printing (verifier protocol)
 # ---------------------------------------------------------------------------
@@ -447,6 +677,12 @@ def _print_accumulators(data: dict) -> None:
         print(json.dumps(golden_urls))
         print("<<< GOLDEN_URLS <<<")
+    agent_steps = acc.get("agent_steps")
+    if agent_steps:
+        print(">>> AGENT_STEPS >>>")
+        print(json.dumps(agent_steps))
+        print("<<< AGENT_STEPS <<<")
     timing = acc.get("timing")
     if timing:
         print(
@@ -466,6 +702,7 @@ def _print_judge_call_start(
     images: Optional[Dict[str, "Image"]],
     agentic: bool,
     model: Optional[str],
+    files: Optional[Dict[str, "File"]] = None,
 ) -> None:
     """Print info when initiating a judge grading call."""
     mode = "agentic" if agentic else "standard"
@@ -488,6 +725,18 @@ def _print_judge_call_start(
     else:
         print("[C] No images provided")
+    if files:
+        for label, f in files.items():
+            src = f.source
+            detail = ""
+            if f.url:
+                detail = f" url={f.url}"
+            elif f.filename:
+                detail = f" file={f.filename}"
+            if f.media_type:
+                detail += f" type={f.media_type}"
+            print(f"[C] File '{label}': source={src}{detail}")
 def _build_grade_request(
     instance_id: str,
@@ -500,6 +749,7 @@ def _build_grade_request(
     reference_claims: Optional[str] = None,
     conversation: Optional[List[dict]] = None,
     images: Optional[Dict[str, Image]] = None,
+    files: Optional[Dict[str, "File"]] = None,
     model: Optional[str] = None,
     provider: Optional[str] = None,
     agentic: bool = False,
@@ -554,6 +804,13 @@ def _build_grade_request(
             for label, img in images.items()
         ]
+    # Serialize files as labeled array
+    if files:
+        body["files"] = [
+            f.serialize(label=label, agentic=agentic)
+            for label, f in files.items()
+        ]
     return body
@@ -566,6 +823,54 @@ def _parse_grade_response(data: dict) -> JudgeResult:
     return JudgeResult(score, details=data)
+def _print_criteria_markers(criteria: list) -> None:
+    """Emit ``>>> CRITERIA >>>`` stdout markers for structured criteria display.
+    The orchestrator (theseus PR #1967) scans verifier stdout for these
+    markers and wraps the execution result so the client (client PR #1737)
+    can render an expandable rubric breakdown.
+    Converts from the orchestrator judge-response format::
+        {"name": str, "score": int, "max_score": int, "reasoning": str}
+    to the client-expected marker format::
+        {"criteria": str, "score": float, "score_out_of": float, "description"?: str}
+    Each criterion's score is normalised to a 0.0–1.0 float using its own
+    ``max_score``.
+    """
+    marker_criteria = []
+    for c in criteria:
+        name = c.get("name", "")
+        cscore = c.get("score", 0)
+        cmax = c.get("max_score", 0)
+        # Normalise per-criterion score to 0.0–1.0
+        if cmax and float(cmax) > 0:
+            norm_score = float(cscore) / float(cmax)
+        else:
+            norm_score = float(cscore)
+        entry: dict = {
+            "criteria": name,
+            "score": round(norm_score, 4),
+            "score_out_of": 1.0,
+        }
+        reasoning = c.get("reasoning", "")
+        if reasoning:
+            entry["description"] = reasoning
+        marker_criteria.append(entry)
+    if marker_criteria:
+        print(">>> CRITERIA >>>")
+        print(json.dumps(marker_criteria))
+        print("<<< CRITERIA <<<")
 def _print_judge_result(data: dict) -> None:
     """Print detailed judge grading result for verifier stdout capture."""
     model = data.get("model_used", "unknown")
@@ -591,6 +896,12 @@ def _print_judge_result(data: dict) -> None:
             if len(reasoning) > 200:
                 reasoning = reasoning[:200] + "..."
             print(f"[C]   {name}: {cscore}/{cmax} — {reasoning}")
+        # Emit structured criteria via stdout markers so the orchestrator
+        # (_extract_criteria_from_stdout) and client can render a rubric
+        # breakdown.  Schema per element:
+        #   {"criteria": str, "score": float, "score_out_of": float, "description"?: str}
+        _print_criteria_markers(criteria)
     else:
         print(f"[C] Score: {normalized:.2f}")
@@ -605,6 +916,26 @@ def _print_judge_result(data: dict) -> None:
         for url in golden_urls:
             print(f"[C] Gold reference: {url}")
+    # Print agentic judge steps if present
+    agent_steps = (data.get("accumulators") or {}).get("agent_steps")
+    if agent_steps:
+        print(f"[C] Agentic judge: {len(agent_steps)} steps")
+        for step in agent_steps:
+            stype = step.get("type", "?")
+            if stype == "mcp_connect":
+                print(f"[C]   MCP connected ({step.get('tools_available', '?')} tools)")
+            elif stype == "tool_call":
+                tool = step.get("tool", "?")
+                turn = step.get("turn", "?")
+                is_err = step.get("is_error", False)
+                result_preview = step.get("result", "")[:100]
+                status = "ERROR" if is_err else "ok"
+                print(f"[C]   Turn {turn}: {tool}() → {status}: {result_preview}")
+            elif stype == "final_response":
+                print(f"[C]   Turn {step.get('turn', '?')}: final response")
+            elif stype == "max_turns_reached":
+                print(f"[C]   Max turns reached ({step.get('turns_used', '?')})")
 # ---------------------------------------------------------------------------
 # Sync judge
@@ -632,6 +963,7 @@ class SyncJudge:
         reference_claims: Optional[str] = None,
         conversation: Optional[List[dict]] = None,
         images: Optional[Dict[str, Image]] = None,
+        files: Optional[Dict[str, File]] = None,
         model: Optional[str] = None,
         provider: Optional[str] = None,
         agentic: bool = False,
@@ -651,7 +983,8 @@ class SyncJudge:
             context: Additional context for the judge.
             reference_claims: Reference analysis claims (folded into context).
             conversation: Conversation history as list of message dicts.
-            images: List of Image objects for the judge.
+            images: Named Image objects for the judge.
+            files: Named File objects for the judge (PDF, CSV, STEP, etc.).
             model: Override LLM model (server picks default if None).
             provider: Override LLM provider (server picks default if None).
             agentic: If True, the orchestrator collects artifacts from the instance.
@@ -668,6 +1001,7 @@ class SyncJudge:
             reference_claims=reference_claims,
             conversation=conversation,
             images=images,
+            files=files,
             model=model,
             provider=provider,
             agentic=agentic,
@@ -675,6 +1009,6 @@ class SyncJudge:
             task_id=task_id,
         )
-        _print_judge_call_start(rubric, images, agentic, model)
+        _print_judge_call_start(rubric, images, agentic, model, files=files)
         response = self._client.request("POST", "/v1/judge/grade", json=body)
         return _parse_grade_response(response.json())

{fleet_python-0.2.114 → fleet_python-0.2.116/fleet_python.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: fleet-python
-Version: 0.2.114
+Version: 0.2.116
 Summary: Python SDK for Fleet environments
 Author-email: Fleet AI <nic@fleet.so>
 License: Apache-2.0

{fleet_python-0.2.114 → fleet_python-0.2.116}/fleet_python.egg-info/SOURCES.txt RENAMED Viewed

@@ -117,6 +117,7 @@ tests/test_app_method.py
 tests/test_expect_exactly.py
 tests/test_expect_only.py
 tests/test_instance_dispatch.py
+tests/test_judge_criteria_markers.py
 tests/test_sqlite_resource_dual_mode.py
 tests/test_sqlite_shared_memory_behavior.py
 tests/test_verifier_from_string.py

{fleet_python-0.2.114 → fleet_python-0.2.116}/pyproject.toml RENAMED Viewed

@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "fleet-python"
-version = "0.2.114"
+version = "0.2.116"
 description = "Python SDK for Fleet environments"
 authors = [
     {name = "Fleet AI", email = "nic@fleet.so"},

fleet-python 0.2.114__tar.gz → 0.2.116__tar.gz

fleet-python 0.2.114tar.gz → 0.2.116tar.gz