PyPI - fleet-python - Versions diffs - 0.2.66b2__py3-none-any.whl → 0.2.105__py3-none-any.whl - Mend

fleet-python 0.2.66b2py3-none-any.whl → 0.2.105py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

examples/export_tasks.py +16 -5
examples/export_tasks_filtered.py +245 -0
examples/fetch_tasks.py +230 -0
examples/import_tasks.py +140 -8
examples/iterate_verifiers.py +725 -0
fleet/__init__.py +128 -5
fleet/_async/__init__.py +27 -3
fleet/_async/base.py +24 -9
fleet/_async/client.py +938 -41
fleet/_async/env/client.py +60 -3
fleet/_async/instance/client.py +52 -7
fleet/_async/models.py +15 -0
fleet/_async/resources/api.py +200 -0
fleet/_async/resources/sqlite.py +1801 -46
fleet/_async/tasks.py +122 -25
fleet/_async/verifiers/bundler.py +22 -21
fleet/_async/verifiers/verifier.py +25 -19
fleet/agent/__init__.py +32 -0
fleet/agent/gemini_cua/Dockerfile +45 -0
fleet/agent/gemini_cua/__init__.py +10 -0
fleet/agent/gemini_cua/agent.py +759 -0
fleet/agent/gemini_cua/mcp/main.py +108 -0
fleet/agent/gemini_cua/mcp_server/__init__.py +5 -0
fleet/agent/gemini_cua/mcp_server/main.py +105 -0
fleet/agent/gemini_cua/mcp_server/tools.py +178 -0
fleet/agent/gemini_cua/requirements.txt +5 -0
fleet/agent/gemini_cua/start.sh +30 -0
fleet/agent/orchestrator.py +854 -0
fleet/agent/types.py +49 -0
fleet/agent/utils.py +34 -0
fleet/base.py +34 -9
fleet/cli.py +1061 -0
fleet/client.py +1060 -48
fleet/config.py +1 -1
fleet/env/__init__.py +16 -0
fleet/env/client.py +60 -3
fleet/eval/__init__.py +15 -0
fleet/eval/uploader.py +231 -0
fleet/exceptions.py +8 -0
fleet/instance/client.py +53 -8
fleet/instance/models.py +1 -0
fleet/models.py +303 -0
fleet/proxy/__init__.py +25 -0
fleet/proxy/proxy.py +453 -0
fleet/proxy/whitelist.py +244 -0
fleet/resources/api.py +200 -0
fleet/resources/sqlite.py +1845 -46
fleet/tasks.py +113 -20
fleet/utils/__init__.py +7 -0
fleet/utils/http_logging.py +178 -0
fleet/utils/logging.py +13 -0
fleet/utils/playwright.py +440 -0
fleet/verifiers/bundler.py +22 -21
fleet/verifiers/db.py +985 -1
fleet/verifiers/decorator.py +1 -1
fleet/verifiers/verifier.py +25 -19
{fleet_python-0.2.66b2.dist-info → fleet_python-0.2.105.dist-info}/METADATA +28 -1
fleet_python-0.2.105.dist-info/RECORD +115 -0
{fleet_python-0.2.66b2.dist-info → fleet_python-0.2.105.dist-info}/WHEEL +1 -1
fleet_python-0.2.105.dist-info/entry_points.txt +2 -0
tests/test_app_method.py +85 -0
tests/test_expect_exactly.py +4148 -0
tests/test_expect_only.py +2593 -0
tests/test_instance_dispatch.py +607 -0
tests/test_sqlite_resource_dual_mode.py +263 -0
tests/test_sqlite_shared_memory_behavior.py +117 -0
fleet_python-0.2.66b2.dist-info/RECORD +0 -81
tests/test_verifier_security.py +0 -427
{fleet_python-0.2.66b2.dist-info → fleet_python-0.2.105.dist-info}/licenses/LICENSE +0 -0
{fleet_python-0.2.66b2.dist-info → fleet_python-0.2.105.dist-info}/top_level.txt +0 -0

fleet/_async/tasks.py CHANGED Viewed

@@ -38,6 +38,12 @@ class Task(BaseModel):
     metadata: Optional[Dict[str, Any]] = Field(
         default_factory=dict, description="Additional task metadata"
     )
+    writer_metadata: Optional[Dict[str, Any]] = Field(
+        None, description="Metadata filled by task writer"
+    )
+    qa_metadata: Optional[Dict[str, Any]] = Field(
+        None, description="Metadata filled by QA reviewer"
+    )
     output_json_schema: Optional[Dict[str, Any]] = Field(
         None, description="JSON schema for expected output format"
     )
@@ -209,23 +215,33 @@ class Task(BaseModel):
             )
             self.verifier = verifier
-    async def make_env(self, region: Optional[str] = None):
+    async def make_env(
+        self,
+        region: Optional[str] = None,
+        image_type: Optional[str] = None,
+        ttl_seconds: Optional[int] = None,
+        run_id: Optional[str] = None,
+        heartbeat_interval: Optional[int] = None,
+    ):
         """Create an environment instance for this task's environment.
-        Uses the task's env_id (and version if present) to create the env.
+        Alias for make() method. Uses the task's env_id (and version if present) to create the env.
         """
-        if not self.env_id:
-            raise ValueError("Task has no env_id defined")
-        # Deferred import to avoid circular dependencies
-        from .client import AsyncFleet
-        return await AsyncFleet().make(env_key=self.env_key, region=region)
+        return await self.make(
+            region=region,
+            image_type=image_type,
+            ttl_seconds=ttl_seconds,
+            run_id=run_id,
+            heartbeat_interval=heartbeat_interval,
+        )
     async def make(
         self,
         region: Optional[str] = None,
         image_type: Optional[str] = None,
         ttl_seconds: Optional[int] = None,
+        run_id: Optional[str] = None,
+        heartbeat_interval: Optional[int] = None,
     ):
         """Create an environment instance with task's configuration.
@@ -233,11 +249,15 @@ class Task(BaseModel):
         - env_key (env_id + version)
         - data_key (data_id + data_version, if present)
         - env_variables (if present)
+        - run_id (if present)
+        - heartbeat_interval (if present)
         Args:
             region: Optional AWS region for the environment
             image_type: Optional image type for the environment
             ttl_seconds: Optional TTL in seconds for the instance
+            run_id: Optional run ID to group instances
+            heartbeat_interval: Optional heartbeat interval in seconds (30-3600)
         Returns:
             Environment instance configured for this task
@@ -245,7 +265,7 @@ class Task(BaseModel):
         Example:
             task = fleet.Task(key="my-task", prompt="...", env_id="my-env",
                             data_id="my-data", data_version="v1.0")
-            env = await task.make(region="us-west-2")
+            env = await task.make(region="us-west-2", run_id="my-batch-123", heartbeat_interval=60)
         """
         if not self.env_id:
             raise ValueError("Task has no env_id defined")
@@ -260,11 +280,13 @@ class Task(BaseModel):
             env_variables=self.env_variables if self.env_variables else None,
             image_type=image_type,
             ttl_seconds=ttl_seconds,
+            run_id=run_id,
+            heartbeat_interval=heartbeat_interval,
         )
 def verifier_from_string(
-    verifier_func: str, verifier_id: str, verifier_key: str, sha256: str = ""
+    verifier_func: str, verifier_id: str, verifier_key: str, sha256: str = "", verifier_runtime_version: str = ""
 ) -> "VerifierFunction":
     """Create a verifier function from string code.
@@ -273,32 +295,87 @@ def verifier_from_string(
         verifier_id: Unique identifier for the verifier
         verifier_key: Key/name for the verifier
         sha256: SHA256 hash of the verifier code
+        verifier_runtime_version: Verifier runtime version
     Returns:
         VerifierFunction instance that can be used to verify tasks
     """
     try:
         import inspect
+        import re
+        import json
+        import string
         from .verifiers.verifier import AsyncVerifierFunction
         from fleet.verifiers.code import TASK_SUCCESSFUL_SCORE, TASK_FAILED_SCORE
         from fleet.verifiers.db import IgnoreConfig
-        from fleet.verifiers.parsing import parse_and_validate_verifier
-        # Validate the code and extract function name
-        # This ensures no arbitrary code execution during import
-        func_name = parse_and_validate_verifier(verifier_func)
-        # Create a local namespace for executing the code
-        local_namespace = {
+        # Strip @verifier decorator if present to avoid double-wrapping
+        # Remove lines like: @verifier(key="...")
+        cleaned_code = re.sub(r"@verifier\([^)]*\)\s*\n", "", verifier_func)
+        # Also remove the verifier import if present
+        # Use MULTILINE flag to match beginning of lines with ^
+        cleaned_code = re.sub(r"^from fleet\.verifiers.*import.*verifier.*$\n?", "", cleaned_code, flags=re.MULTILINE)
+        cleaned_code = re.sub(r"^from fleet import verifier.*$\n?", "", cleaned_code, flags=re.MULTILINE)
+        cleaned_code = re.sub(r"^import fleet\.verifiers.*$\n?", "", cleaned_code, flags=re.MULTILINE)
+        cleaned_code = re.sub(r"^import fleet$\n?", "", cleaned_code, flags=re.MULTILINE)
+        # Define helper functions for verifier execution
+        _TRANSLATOR = str.maketrans(string.punctuation, " " * len(string.punctuation))
+        def _normalize_text(value: str) -> str:
+            text = value.lower().translate(_TRANSLATOR)
+            return "".join(text.split())
+        def _stringify_content(content: Any) -> str:
+            if isinstance(content, (dict, list)):
+                return json.dumps(content, sort_keys=True)
+            return str(content)
+        def normalized_contains(target: str, blob: Any) -> bool:
+            normalized_target = _normalize_text(target)
+            normalized_blob = _normalize_text(_stringify_content(blob))
+            return normalized_target in normalized_blob
+        def extract_numbers(text: str) -> list:
+            cleaned_text = text.replace(',', '')
+            pattern = r'-?\d+\.?\d*'
+            matches = re.findall(pattern, cleaned_text)
+            return [float(num) for num in matches]
+        def contains_number(text: str, target_number) -> bool:
+            numbers = extract_numbers(text)
+            try:
+                if isinstance(target_number, str):
+                    target_number = target_number.replace(',', '')
+                target = float(target_number)
+            except (ValueError, AttributeError):
+                return False
+            return target in numbers
+        # Create a globals namespace with all required imports
+        exec_globals = globals().copy()
+        exec_globals.update({
             "TASK_SUCCESSFUL_SCORE": TASK_SUCCESSFUL_SCORE,
             "TASK_FAILED_SCORE": TASK_FAILED_SCORE,
             "IgnoreConfig": IgnoreConfig,
             "Environment": object,  # Add Environment type if needed
-        }
+            "normalized_contains": normalized_contains,
+            "extract_numbers": extract_numbers,
+            "contains_number": contains_number,
+            "json": json,
+            "re": re,
+            "string": string,
+        })
-        # Execute the verifier code in the namespace
-        # This is now safe because we validated it contains only declarative code
-        exec(verifier_func, globals(), local_namespace)
+        # Create a local namespace for executing the code
+        local_namespace = {}
+        # Execute the cleaned verifier code in the namespace
+        exec(cleaned_code, exec_globals, local_namespace)
+        # Merge local_namespace into exec_globals so helper functions are accessible
+        # from the main verifier function when it's called
+        exec_globals.update(local_namespace)
         # Find the function that was defined (not imported)
         # Functions defined via exec have co_filename == '<string>'
@@ -319,6 +396,7 @@ def verifier_from_string(
             verifier_id=verifier_id,
             sha256=sha256,
             raw_code=verifier_func,
+            verifier_runtime_version=verifier_runtime_version if verifier_runtime_version else None,
         )
         return verifier_instance
@@ -384,7 +462,12 @@ async def load_tasks(
 async def update_task(
-    task_key: str, prompt: Optional[str] = None, verifier_code: Optional[str] = None
+    task_key: str,
+    prompt: Optional[str] = None,
+    verifier_code: Optional[str] = None,
+    metadata: Optional[Dict[str, Any]] = None,
+    writer_metadata: Optional[Dict[str, Any]] = None,
+    qa_metadata: Optional[Dict[str, Any]] = None,
 ):
     """Convenience function to update an existing task.
@@ -392,6 +475,9 @@ async def update_task(
         task_key: The key of the task to update
         prompt: New prompt text for the task (optional)
         verifier_code: Python code for task verification (optional)
+        metadata: Additional metadata for the task (optional)
+        writer_metadata: Metadata filled by task writer (optional)
+        qa_metadata: Metadata filled by QA reviewer (optional)
     Returns:
         TaskResponse containing the updated task details
@@ -399,16 +485,25 @@ async def update_task(
     Examples:
         response = await fleet.update_task("my-task", prompt="New prompt text")
         response = await fleet.update_task("my-task", verifier_code="def verify(env): return True")
+        response = await fleet.update_task("my-task", metadata={"seed": 42, "story": "Updated story"})
+        response = await fleet.update_task("my-task", writer_metadata={"author": "john"})
     """
     from .global_client import get_client
     client = get_client()
     return await client.update_task(
-        task_key=task_key, prompt=prompt, verifier_code=verifier_code
+        task_key=task_key,
+        prompt=prompt,
+        verifier_code=verifier_code,
+        metadata=metadata,
+        writer_metadata=writer_metadata,
+        qa_metadata=qa_metadata,
     )
-async def get_task(task_key: str, version_id: Optional[str] = None, team_id: Optional[str] = None):
+async def get_task(
+    task_key: str, version_id: Optional[str] = None, team_id: Optional[str] = None
+):
     """Convenience function to get a task by key and optional version.
     Args:
@@ -427,7 +522,9 @@ async def get_task(task_key: str, version_id: Optional[str] = None, team_id: Opt
     from .global_client import get_client
     client = get_client()
-    return await client.get_task(task_key=task_key, version_id=version_id, team_id=team_id)
+    return await client.get_task(
+        task_key=task_key, version_id=version_id, team_id=team_id
+    )
 async def import_task(task: Task, project_key: Optional[str] = None):

fleet/_async/verifiers/bundler.py CHANGED Viewed

@@ -37,7 +37,7 @@ class FunctionBundler:
     ) -> bytes:
         """Create a function bundle with statically extracted code."""
-        logger.info(f"Creating function bundle for {func.__name__}")
+        # logger.info(f"Creating function bundle for {func.__name__}")
         # 1. Parse the main function and find dependencies
         mod_file = Path(func.__code__.co_filename)
@@ -115,7 +115,7 @@ class FunctionBundler:
         # Find function calls within the verifier function
         called_functions = self._extract_function_calls(main_func_ast)
-        logger.debug(f"Functions called in verifier: {called_functions}")
+        # logger.debug(f"Functions called in verifier: {called_functions}")
         # Find all functions defined in the module
         module_functions = {}
@@ -128,7 +128,7 @@ class FunctionBundler:
         for func_name in called_functions:
             if func_name in module_functions and func_name != func.__name__:
                 same_module_deps.append(func_name)
-                logger.debug(f"Found same-module dependency: {func_name}")
+                # logger.debug(f"Found same-module dependency: {func_name}")
         # Separate local and external imports
         local_imports = {}
@@ -292,7 +292,7 @@ class FunctionBundler:
                     code = ast.unparse(node)
                     extracted_code.append(code)
                 except Exception as e:
-                    logger.warning(f"Could not unparse AST node: {e}")
+                    # logger.warning(f"Could not unparse AST node: {e}")
                     # Fallback to original source extraction
                     lines = content.split("\n")
                     start_line = node.lineno - 1
@@ -305,11 +305,11 @@ class FunctionBundler:
                     extracted_code.append(code)
             result = "\n\n".join(extracted_code)
-            logger.debug(f"Extracted {len(extracted_code)} items from {file_path}")
+            # logger.debug(f"Extracted {len(extracted_code)} items from {file_path}")
             return result
         except Exception as e:
-            logger.warning(f"Failed to extract functions from {file_path}: {e}")
+            # logger.warning(f"Failed to extract functions from {file_path}: {e}")
             # Fallback to including the entire file
             with open(file_path, "r", encoding="utf-8") as f:
                 return f.read()
@@ -464,14 +464,14 @@ class FunctionBundler:
                 version = dist.version  # Get the installed version
                 package_with_version = f"{package_name}=={version}"
                 packages.add(package_with_version)
-                logger.debug(f"Mapped {mod} -> {package_with_version}")
+                # logger.debug(f"Mapped {mod} -> {package_with_version}")
             except imd.PackageNotFoundError:
                 # Skip stdlib or local modules
-                logger.debug(f"Skipping {mod} (stdlib or local)")
+                # logger.debug(f"Skipping {mod} (stdlib or local)")
                 continue
         package_list = list(packages)
-        logger.debug(f"Final package list: {package_list}")
+        # logger.debug(f"Final package list: {package_list}")
         return package_list
     def _merge_requirements(
@@ -511,10 +511,10 @@ class FunctionBundler:
             if pkg_name not in seen_packages:
                 final_requirements.append(req)
                 seen_packages.add(pkg_name)
-            else:
-                logger.debug(
-                    f"Skipping auto-detected {req}, using explicit version instead"
-                )
+            # else:
+            #     logger.debug(
+            #         f"Skipping auto-detected {req}, using explicit version instead"
+            #     )
         # Always ensure fleet-python is included
         if "fleet-python" not in seen_packages:
@@ -565,9 +565,9 @@ class FunctionBundler:
                         )
                         if dep_src:
                             same_module_code += f"\n{dep_src}\n"
-                            logger.debug(
-                                f"Extracted same-module dependency: {dep_name}"
-                            )
+                            # logger.debug(
+                            #     f"Extracted same-module dependency: {dep_name}"
+                            # )
                 # Create verifier.py with the main function
                 verifier_file = build_dir / "verifier.py"
@@ -586,7 +586,7 @@ class FunctionBundler:
 {code}
 """
                     dest_path.write_text(extracted_content)
-                    logger.debug(f"Created extracted file: {relative_path}")
+                    # logger.debug(f"Created extracted file: {relative_path}")
                     # Ensure __init__.py files exist
                     self._ensure_init_files(Path(relative_path), build_dir)
@@ -595,7 +595,7 @@ class FunctionBundler:
                 return self._create_zip_bundle(build_dir)
             except Exception as e:
-                logger.error(f"Failed to build function bundle: {e}")
+                # logger.error(f"Failed to build function bundle: {e}")
                 raise RuntimeError(f"Function bundle creation failed: {e}")
     def _ensure_init_files(self, rel_path: Path, build_dir: Path):
@@ -607,7 +607,7 @@ class FunctionBundler:
             if not init_file.exists():
                 init_file.parent.mkdir(parents=True, exist_ok=True)
                 init_file.write_text("# Auto-generated __init__.py")
-                logger.debug(f"Created __init__.py: {current}")
+                # logger.debug(f"Created __init__.py: {current}")
             current = current.parent
     def _create_zip_bundle(self, build_dir: Path) -> bytes:
@@ -621,7 +621,7 @@ class FunctionBundler:
                     zf.write(file_path, arcname)
         bundle_size = len(zip_buffer.getvalue())
-        logger.debug(f"Created function bundle ({bundle_size:,} bytes)")
+        # logger.debug(f"Created function bundle ({bundle_size:,} bytes)")
         return zip_buffer.getvalue()
     def _extract_function_source(
@@ -662,7 +662,8 @@ class FunctionBundler:
                     return "\n".join(func_lines)
         except Exception as e:
-            logger.warning(f"Failed to extract function {function_name}: {e}")
+            # logger.warning(f"Failed to extract function {function_name}: {e}")
+            pass
         return None

fleet/_async/verifiers/verifier.py CHANGED Viewed

@@ -42,6 +42,7 @@ class AsyncVerifierFunction:
         verifier_id: Optional[str] = None,
         sha256: Optional[str] = None,
         raw_code: Optional[str] = None,
+        verifier_runtime_version: Optional[str] = None,
     ):
         self.func = func
         self.key = key
@@ -52,6 +53,7 @@ class AsyncVerifierFunction:
         self._bundle_data: Optional[bytes] = None  # Cached bundle data
         self._raw_code: Optional[str] = raw_code  # Store raw code if provided
         self._is_async = asyncio.iscoroutinefunction(func)
+        self.verifier_runtime_version = verifier_runtime_version
         # Copy function metadata
         functools.update_wrapper(self, func)
@@ -79,9 +81,9 @@ class AsyncVerifierFunction:
                 self._bundle_data = zip_buffer.getvalue()
                 self._bundle_sha = _get_bundle_sha(self._bundle_data)
-                logger.debug(
-                    f"Created bundle from raw code for {self.key} with SHA: {self._bundle_sha}"
-                )
+                # logger.debug(
+                #     f"Created bundle from raw code for {self.key} with SHA: {self._bundle_sha}"
+                # )
             else:
                 # Try to create bundle from function source
                 try:
@@ -89,9 +91,9 @@ class AsyncVerifierFunction:
                         self.func, self.extra_requirements, self.verifier_id
                     )
                     self._bundle_sha = _get_bundle_sha(self._bundle_data)
-                    logger.debug(
-                        f"Created bundle for {self.key} with SHA: {self._bundle_sha}"
-                    )
+                    # logger.debug(
+                    #     f"Created bundle for {self.key} with SHA: {self._bundle_sha}"
+                    # )
                 except OSError as e:
                     # Can't create bundle - no source and no raw code
                     raise OSError(f"Cannot create bundle for {self.key}: {e}")
@@ -104,20 +106,21 @@ class AsyncVerifierFunction:
         # If bundle_data is empty, we're using server-side bundle
         if not bundle_data:
-            logger.debug(f"Using server-side bundle {bundle_sha[:8]}...")
+            # logger.debug(f"Using server-side bundle {bundle_sha[:8]}...")
             return bundle_sha, False  # No upload needed, server has it
         # Always check if bundle exists on server
         try:
             exists = await env.check_bundle_exists(bundle_sha)
             if exists.success:
-                logger.info(f"Bundle {bundle_sha[:8]}... found on server")
+                # logger.info(f"Bundle {bundle_sha[:8]}... found on server")
                 return bundle_sha, False  # Found on server, no upload needed
         except Exception as e:
-            logger.warning(f"Failed to check bundle existence: {e}")
+            # logger.warning(f"Failed to check bundle existence: {e}")
+            pass
         # Bundle not found on server - upload needed
-        logger.info(f"Bundle {bundle_sha[:8]}... needs to be uploaded")
+        # logger.info(f"Bundle {bundle_sha[:8]}... needs to be uploaded")
         return bundle_sha, True  # Upload needed
     async def __call__(self, env: AsyncEnv, *args, **kwargs) -> float:
@@ -147,7 +150,7 @@ class AsyncVerifierFunction:
                     )
         except Exception as e:
-            logger.error(f"Error in verifier {self.key}: {e}")
+            # logger.error(f"Error in verifier {self.key}: {e}")
             # Return error score 0
             return 0.0
@@ -179,7 +182,7 @@ class AsyncVerifierFunction:
                 try:
                     return float(result)
                 except (ValueError, TypeError):
-                    logger.warning(f"Could not convert result to float: {result}")
+                    # logger.warning(f"Could not convert result to float: {result}")
                     return 0.0
     def _raise_remote_error(self, error_info: Dict[str, Any]):
@@ -238,7 +241,7 @@ Remote traceback:
             if needs_upload:
                 # Need to upload bundle to S3
-                logger.info(f"Uploading bundle {bundle_sha[:8]}... for {self.key}")
+                # logger.info(f"Uploading bundle {bundle_sha[:8]}... for {self.key}")
                 bundle_data, _ = self._get_or_create_bundle()
                 response = await env.execute_verifier_remote(
@@ -250,13 +253,14 @@ Remote traceback:
                     args_array=args_array,
                     kwargs=kwargs,
                     needs_upload=True,
+                    verifier_runtime_version=self.verifier_runtime_version,
                 )
-                logger.debug(f"Bundle {bundle_sha[:8]}... uploaded successfully")
+                # logger.debug(f"Bundle {bundle_sha[:8]}... uploaded successfully")
             else:
                 # Bundle already available - execute without upload
-                logger.info(f"Bundle {bundle_sha[:8]}... already cached for {self.key}")
+                # logger.info(f"Bundle {bundle_sha[:8]}... already cached for {self.key}")
                 response = await env.execute_verifier_remote(
                     bundle_data=b"",  # Empty bundle since it's cached
                     bundle_sha=bundle_sha,
@@ -266,6 +270,7 @@ Remote traceback:
                     args_array=args_array,
                     kwargs=kwargs,
                     needs_upload=False,
+                    verifier_runtime_version=self.verifier_runtime_version,
                 )
             return response
@@ -273,9 +278,9 @@ Remote traceback:
         except Exception as e:
             # Check if error indicates bundle not found and retry with upload
             if self._is_bundle_not_found_error(e) and not needs_upload:
-                logger.info(
-                    f"Bundle {bundle_sha[:8]}... not found on server, uploading..."
-                )
+                # logger.info(
+                #     f"Bundle {bundle_sha[:8]}... not found on server, uploading..."
+                # )
                 bundle_data, _ = self._get_or_create_bundle()
                 response = await env.execute_verifier_remote(
                     bundle_data=bundle_data,
@@ -286,10 +291,11 @@ Remote traceback:
                     args_array=args_array,
                     kwargs=kwargs,
                     needs_upload=True,
+                    verifier_runtime_version=self.verifier_runtime_version,
                 )
                 return response
             else:
-                logger.error(f"Error in remote execution of {self.key}: {e}")
+                # logger.error(f"Error in remote execution of {self.key}: {e}")
                 raise

fleet/agent/__init__.py ADDED Viewed

@@ -0,0 +1,32 @@
+"""Fleet Agent - Run agents locally with Docker-based browser control.
+Usage:
+    # Via CLI
+    flt eval run -p my-project -m google/gemini-2.5-pro --local gemini_cua
+    # Via Python
+    from fleet.agent import run_agent
+    results = await run_agent(
+        project_key="my-project",
+        agent="gemini_cua",
+        api_keys={"GEMINI_API_KEY": "xxx"},
+    )
+"""
+from .types import AgentConfig, AgentResult, TaskResult
+from .utils import get_agent_path, AGENT_DIR
+# Import these last to avoid circular imports
+from .orchestrator import run_agent, AgentOrchestrator
+__all__ = [
+    "AgentConfig",
+    "AgentResult",
+    "TaskResult",
+    "run_agent",
+    "AgentOrchestrator",
+    "get_agent_path",
+    "AGENT_DIR",
+]

fleet/agent/gemini_cua/Dockerfile ADDED Viewed

@@ -0,0 +1,45 @@
+# MCP Server - Browser control in Docker with optional VNC
+FROM python:3.11-slim
+# Install dependencies for Chromium and VNC
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    # Chromium dependencies
+    wget fonts-liberation libasound2 libatk-bridge2.0-0 libatk1.0-0 \
+    libatspi2.0-0 libcups2 libdbus-1-3 libdrm2 libgbm1 libgtk-3-0 \
+    libnspr4 libnss3 libxcomposite1 libxdamage1 libxfixes3 libxkbcommon0 \
+    libxrandr2 xdg-utils \
+    # VNC and display for headful mode
+    xvfb x11vnc fluxbox \
+    # noVNC for web-based viewing
+    novnc websockify \
+    # Utilities
+    procps net-tools \
+    && rm -rf /var/lib/apt/lists/*
+WORKDIR /app
+# Install Python deps (includes fleet-python for utils like fleet.utils.playwright)
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt && playwright install chromium
+# Copy MCP server files (standalone scripts that import from installed fleet-python)
+COPY mcp_server/ ./mcp_server/
+# Copy start script
+COPY start.sh .
+RUN chmod +x start.sh
+# Environment
+ENV PORT=8765 \
+    SCREEN_WIDTH=1366 \
+    SCREEN_HEIGHT=768 \
+    HEADLESS=true \
+    VNC_PORT=5900 \
+    NOVNC_PORT=6080 \
+    DISPLAY=:99
+# Expose ports: MCP server, VNC, noVNC
+EXPOSE 8765 5900 6080
+# Start script handles display setup
+CMD ["./start.sh"]

fleet/agent/gemini_cua/__init__.py ADDED Viewed

@@ -0,0 +1,10 @@
+"""Gemini Computer Use Agent.
+- agent.py: Runs on HOST, calls Gemini API
+- cua_server.py: Runs in Docker, controls browser via Playwright
+"""
+from pathlib import Path
+AGENT_DIR = Path(__file__).parent

fleet-python 0.2.66b2__py3-none-any.whl → 0.2.105__py3-none-any.whl

fleet-python 0.2.66b2py3-none-any.whl → 0.2.105py3-none-any.whl