PyPI - futurehouse-client - Versions diffs - 0.3.17.dev56__py3-none-any.whl → 0.3.18__py3-none-any.whl - Mend

futurehouse-client 0.3.17.dev56py3-none-any.whl → 0.3.18py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

futurehouse_client/clients/rest_client.py CHANGED Viewed

@@ -1,23 +1,27 @@
 import ast
+import asyncio
 import base64
+import contextlib
 import copy
 import importlib.metadata
 import inspect
 import json
 import logging
 import os
+import sys
 import tempfile
+import time
 import uuid
-from collections.abc import Mapping
-from datetime import datetime
+from collections.abc import Collection
 from pathlib import Path
 from types import ModuleType
-from typing import Any, ClassVar, assert_never, cast
+from typing import Any, ClassVar, cast
 from uuid import UUID
 import cloudpickle
 from aviary.functional import EnvironmentBuilder
 from httpx import (
+    AsyncClient,
     Client,
     CloseError,
     ConnectError,
@@ -29,7 +33,6 @@ from httpx import (
     RemoteProtocolError,
 )
 from ldp.agent import AgentConfig
-from pydantic import BaseModel, ConfigDict, model_validator
 from requests.exceptions import RequestException, Timeout
 from tenacity import (
     retry,
@@ -37,15 +40,22 @@ from tenacity import (
     stop_after_attempt,
     wait_exponential,
 )
+from tqdm import tqdm as sync_tqdm
+from tqdm.asyncio import tqdm
 from futurehouse_client.clients import JobNames
 from futurehouse_client.models.app import (
-    APIKeyPayload,
     AuthType,
     JobDeploymentConfig,
+    PQATaskResponse,
     Stage,
     TaskRequest,
+    TaskResponse,
+    TaskResponseVerbose,
 )
+from futurehouse_client.models.rest import ExecutionStatus
+from futurehouse_client.utils.auth import RefreshingJWT
+from futurehouse_client.utils.general import gather_with_concurrency
 from futurehouse_client.utils.module_utils import (
     OrganizationSelector,
     fetch_environment_function_docstring,
@@ -55,24 +65,14 @@ from futurehouse_client.utils.monitoring import (
 )
 logger = logging.getLogger(__name__)
+logging.basicConfig(
+    level=logging.WARNING,
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+    stream=sys.stdout,
+)
+logging.getLogger("httpx").setLevel(logging.WARNING)
 TaskRequest.model_rebuild()
-retry_if_connection_error = retry_if_exception_type((
-    # From requests
-    Timeout,
-    ConnectionError,
-    RequestException,
-    # From httpx
-    ConnectError,
-    ConnectTimeout,
-    ReadTimeout,
-    ReadError,
-    NetworkError,
-    RemoteProtocolError,
-    CloseError,
-))
 FILE_UPLOAD_IGNORE_PARTS = {
     ".ruff_cache",
     "__pycache__",
@@ -103,114 +103,35 @@ class InvalidTaskDescriptionError(Exception):
     """Raised when the task description is invalid or empty."""
-class SimpleOrganization(BaseModel):
-    id: int
-    name: str
-    display_name: str
-# 5 minute default for JWTs
-JWT_TOKEN_CACHE_EXPIRY: int = 300  # seconds
-class TaskResponse(BaseModel):
-    """Base class for task responses. This holds attributes shared over all futurehouse jobs."""
-    model_config = ConfigDict(extra="ignore")
-    status: str
-    query: str
-    user: str | None = None
-    created_at: datetime
-    job_name: str
-    public: bool
-    shared_with: list[SimpleOrganization] | None = None
-    build_owner: str | None = None
-    environment_name: str | None = None
-    agent_name: str | None = None
-    task_id: UUID | None = None
-    @model_validator(mode="before")
-    @classmethod
-    def validate_fields(cls, data: Mapping[str, Any]) -> Mapping[str, Any]:
-        # Extract fields from environment frame state
-        if not isinstance(data, dict):
-            return data
-        # TODO: We probably want to remove these two once we define the final names.
-        data["job_name"] = data.get("crow")
-        data["query"] = data.get("task")
-        if not (env_frame := data.get("environment_frame", {})):
-            return data
-        state = env_frame.get("state", {}).get("state", {})
-        data["task_id"] = cast(UUID, state.get("id")) if state.get("id") else None
-        if not (metadata := data.get("metadata", {})):
-            return data
-        data["environment_name"] = metadata.get("environment_name")
-        data["agent_name"] = metadata.get("agent_name")
-        return data
-class PQATaskResponse(TaskResponse):
-    model_config = ConfigDict(extra="ignore")
-    answer: str | None = None
-    formatted_answer: str | None = None
-    answer_reasoning: str | None = None
-    has_successful_answer: bool | None = None
-    total_cost: float | None = None
-    total_queries: int | None = None
-    @model_validator(mode="before")
-    @classmethod
-    def validate_pqa_fields(cls, data: Mapping[str, Any]) -> Mapping[str, Any]:
-        # Extract fields from environment frame state
-        if not isinstance(data, dict):
-            return data
-        if not (env_frame := data.get("environment_frame", {})):
-            return data
-        state = env_frame.get("state", {}).get("state", {})
-        response = state.get("response", {})
-        answer = response.get("answer", {})
-        usage = state.get("info", {}).get("usage", {})
-        # Add additional PQA specific fields to data so that pydantic can validate the model
-        data["answer"] = answer.get("answer")
-        data["formatted_answer"] = answer.get("formatted_answer")
-        data["answer_reasoning"] = answer.get("answer_reasoning")
-        data["has_successful_answer"] = answer.get("has_successful_answer")
-        data["total_cost"] = cast(float, usage.get("total_cost"))
-        data["total_queries"] = cast(int, usage.get("total_queries"))
-        return data
-    def clean_verbose(self) -> "TaskResponse":
-        """Clean the verbose response from the server."""
-        self.request = None
-        self.response = None
-        return self
-class TaskResponseVerbose(TaskResponse):
-    """Class for responses to include all the fields of a task response."""
-    model_config = ConfigDict(extra="allow")
-    public: bool
-    agent_state: list[dict[str, Any]] | None = None
-    environment_frame: dict[str, Any] | None = None
-    metadata: dict[str, Any] | None = None
-    shared_with: list[SimpleOrganization] | None = None
 class FileUploadError(RestClientError):
     """Raised when there's an error uploading a file."""
+retry_if_connection_error = retry_if_exception_type((
+    # From requests
+    Timeout,
+    ConnectionError,
+    RequestException,
+    # From httpx
+    ConnectError,
+    ConnectTimeout,
+    ReadTimeout,
+    ReadError,
+    NetworkError,
+    RemoteProtocolError,
+    CloseError,
+    FileUploadError,
+))
+DEFAULT_AGENT_TIMEOUT: int = 2400  # seconds
 class RestClient:
     REQUEST_TIMEOUT: ClassVar[float] = 30.0  # sec
     MAX_RETRY_ATTEMPTS: ClassVar[int] = 3
     RETRY_MULTIPLIER: ClassVar[int] = 1
     MAX_RETRY_WAIT: ClassVar[int] = 10
+    DEFAULT_POLLING_TIME: ClassVar[int] = 5  # seconds
     CHUNK_SIZE: ClassVar[int] = 16 * 1024 * 1024  # 16MB chunks
     def __init__(
@@ -222,62 +143,116 @@ class RestClient:
         api_key: str | None = None,
         jwt: str | None = None,
         headers: dict[str, str] | None = None,
+        verbose_logging: bool = False,
     ):
+        if verbose_logging:
+            logger.setLevel(logging.INFO)
+        else:
+            logger.setLevel(logging.WARNING)
         self.base_url = service_uri or stage.value
         self.stage = stage
         self.auth_type = auth_type
         self.api_key = api_key
-        self._clients: dict[str, Client] = {}
+        self._clients: dict[str, Client | AsyncClient] = {}
         self.headers = headers or {}
-        self.auth_jwt = self._run_auth(jwt=jwt)
+        self.jwt = jwt
         self.organizations: list[str] = self._filter_orgs(organization)
     @property
     def client(self) -> Client:
-        """Lazily initialized and cached HTTP client with authentication."""
-        return self.get_client("application/json", with_auth=True)
+        """Authenticated HTTP client for regular API calls."""
+        return cast(Client, self.get_client("application/json", authenticated=True))
     @property
-    def auth_client(self) -> Client:
-        """Lazily initialized and cached HTTP client without authentication."""
-        return self.get_client("application/json", with_auth=False)
+    def async_client(self) -> AsyncClient:
+        """Authenticated async HTTP client for regular API calls."""
+        return cast(
+            AsyncClient,
+            self.get_client("application/json", authenticated=True, async_client=True),
+        )
+    @property
+    def unauthenticated_client(self) -> Client:
+        """Unauthenticated HTTP client for auth operations."""
+        return cast(Client, self.get_client("application/json", authenticated=False))
     @property
     def multipart_client(self) -> Client:
-        """Lazily initialized and cached HTTP client for multipart uploads."""
-        return self.get_client(None, with_auth=True)
+        """Authenticated HTTP client for multipart uploads."""
+        return cast(Client, self.get_client(None, authenticated=True))
     def get_client(
-        self, content_type: str | None = "application/json", with_auth: bool = True
-    ) -> Client:
+        self,
+        content_type: str | None = "application/json",
+        authenticated: bool = True,
+        async_client: bool = False,
+    ) -> Client | AsyncClient:
         """Return a cached HTTP client or create one if needed.
         Args:
             content_type: The desired content type header. Use None for multipart uploads.
-            with_auth: Whether the client should include an Authorization header.
+            authenticated: Whether the client should include authentication.
+            async_client: Whether to use an async client.
         Returns:
             An HTTP client configured with the appropriate headers.
         """
-        # Create a composite key based on content type and auth flag.
-        key = f"{content_type or 'multipart'}_{with_auth}"
+        # Create a composite key based on content type and auth flag
+        key = f"{content_type or 'multipart'}_{authenticated}_{async_client}"
         if key not in self._clients:
             headers = copy.deepcopy(self.headers)
-            if with_auth:
-                headers["Authorization"] = f"Bearer {self.auth_jwt}"
+            auth = None
+            if authenticated:
+                auth = RefreshingJWT(
+                    # authenticated=False will always return a synchronous client
+                    auth_client=cast(
+                        Client, self.get_client("application/json", authenticated=False)
+                    ),
+                    auth_type=self.auth_type,
+                    api_key=self.api_key,
+                    jwt=self.jwt,
+                )
             if content_type:
                 headers["Content-Type"] = content_type
-            self._clients[key] = Client(
-                base_url=self.base_url,
-                headers=headers,
-                timeout=self.REQUEST_TIMEOUT,
+            self._clients[key] = (
+                AsyncClient(
+                    base_url=self.base_url,
+                    headers=headers,
+                    timeout=self.REQUEST_TIMEOUT,
+                    auth=auth,
+                )
+                if async_client
+                else Client(
+                    base_url=self.base_url,
+                    headers=headers,
+                    timeout=self.REQUEST_TIMEOUT,
+                    auth=auth,
+                )
             )
         return self._clients[key]
-    def __del__(self):
-        """Ensure all cached clients are properly closed when the instance is destroyed."""
+    def close(self):
+        """Explicitly close all cached clients."""
         for client in self._clients.values():
-            client.close()
+            if isinstance(client, Client):
+                with contextlib.suppress(RuntimeError, CloseError):
+                    client.close()
+    async def aclose(self):
+        """Asynchronously close all cached clients."""
+        for client in self._clients.values():
+            if isinstance(client, AsyncClient):
+                with contextlib.suppress(RuntimeError, CloseError):
+                    await client.aclose()
+    def __del__(self):
+        self.close()
     def _filter_orgs(self, organization: str | None = None) -> list[str]:
         filtered_orgs = [
@@ -289,31 +264,6 @@ class RestClient:
             raise ValueError(f"Organization '{organization}' not found.")
         return filtered_orgs
-    def _run_auth(self, jwt: str | None = None) -> str:
-        auth_payload: APIKeyPayload | None
-        if self.auth_type == AuthType.API_KEY:
-            auth_payload = APIKeyPayload(api_key=self.api_key)
-        elif self.auth_type == AuthType.JWT:
-            auth_payload = None
-        else:
-            assert_never(self.auth_type)
-        try:
-            # Use the unauthenticated client for login
-            if auth_payload:
-                response = self.auth_client.post(
-                    "/auth/login", json=auth_payload.model_dump()
-                )
-                response.raise_for_status()
-                token_data = response.json()
-            elif jwt:
-                token_data = {"access_token": jwt, "expires_in": JWT_TOKEN_CACHE_EXPIRY}
-            else:
-                raise ValueError("JWT token required for JWT authentication.")
-            return token_data["access_token"]
-        except Exception as e:
-            raise RestClientError(f"Error authenticating: {e!s}") from e
     def _check_job(self, name: str, organization: str) -> dict[str, Any]:
         try:
             response = self.client.get(
@@ -407,8 +357,11 @@ class RestClient:
                 ),
                 self.client.stream("GET", url, params={"history": history}) as response,
             ):
+                response.raise_for_status()
                 json_data = "".join(response.iter_text(chunk_size=1024))
                 data = json.loads(json_data)
+                if "id" not in data:
+                    data["id"] = task_id
                 verbose_response = TaskResponseVerbose(**data)
             if verbose:
@@ -419,8 +372,52 @@ class RestClient:
             ):
                 return PQATaskResponse(**data)
             return TaskResponse(**data)
-        except ValueError as e:
-            raise ValueError("Invalid task ID format. Must be a valid UUID.") from e
+        except Exception as e:
+            raise TaskFetchError(f"Error getting task: {e!s}") from e
+    @retry(
+        stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
+        wait=wait_exponential(multiplier=RETRY_MULTIPLIER, max=MAX_RETRY_WAIT),
+        retry=retry_if_connection_error,
+    )
+    async def aget_task(
+        self, task_id: str | None = None, history: bool = False, verbose: bool = False
+    ) -> "TaskResponse":
+        """Get details for a specific task asynchronously."""
+        try:
+            task_id = task_id or self.trajectory_id
+            url = f"/v0.1/trajectories/{task_id}"
+            full_url = f"{self.base_url}{url}"
+            with external_trace(
+                url=full_url,
+                method="GET",
+                library="httpx",
+                custom_params={
+                    "operation": "get_job",
+                    "job_id": task_id,
+                },
+            ):
+                async with self.async_client.stream(
+                    "GET", url, params={"history": history}
+                ) as response:
+                    response.raise_for_status()
+                    json_data = "".join([
+                        chunk async for chunk in response.aiter_text()
+                    ])
+                    data = json.loads(json_data)
+                    if "id" not in data:
+                        data["id"] = task_id
+                    verbose_response = TaskResponseVerbose(**data)
+            if verbose:
+                return verbose_response
+            if any(
+                JobNames.from_string(job_name) in verbose_response.job_name
+                for job_name in ["crow", "falcon", "owl", "dummy"]
+            ):
+                return PQATaskResponse(**data)
+            return TaskResponse(**data)
         except Exception as e:
             raise TaskFetchError(f"Error getting task: {e!s}") from e
@@ -445,10 +442,179 @@ class RestClient:
                 "/v0.1/crows", json=task_data.model_dump(mode="json")
             )
             response.raise_for_status()
-            self.trajectory_id = response.json()["trajectory_id"]
+            trajectory_id = response.json()["trajectory_id"]
+            self.trajectory_id = trajectory_id
         except Exception as e:
             raise TaskFetchError(f"Error creating task: {e!s}") from e
-        return self.trajectory_id
+        return trajectory_id
+    @retry(
+        stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
+        wait=wait_exponential(multiplier=RETRY_MULTIPLIER, max=MAX_RETRY_WAIT),
+        retry=retry_if_connection_error,
+    )
+    async def acreate_task(self, task_data: TaskRequest | dict[str, Any]):
+        """Create a new futurehouse task."""
+        if isinstance(task_data, dict):
+            task_data = TaskRequest.model_validate(task_data)
+        if isinstance(task_data.name, JobNames):
+            task_data.name = task_data.name.from_stage(
+                task_data.name.name,
+                self.stage,
+            )
+        try:
+            response = await self.async_client.post(
+                "/v0.1/crows", json=task_data.model_dump(mode="json")
+            )
+            response.raise_for_status()
+            trajectory_id = response.json()["trajectory_id"]
+            self.trajectory_id = trajectory_id
+        except Exception as e:
+            raise TaskFetchError(f"Error creating task: {e!s}") from e
+        return trajectory_id
+    async def arun_tasks_until_done(
+        self,
+        task_data: TaskRequest
+        | dict[str, Any]
+        | Collection[TaskRequest]
+        | Collection[dict[str, Any]],
+        verbose: bool = False,
+        progress_bar: bool = False,
+        concurrency: int = 10,
+        timeout: int = DEFAULT_AGENT_TIMEOUT,
+    ) -> list[TaskResponse]:
+        all_tasks: Collection[TaskRequest | dict[str, Any]] = (
+            cast(Collection[TaskRequest | dict[str, Any]], [task_data])
+            if (isinstance(task_data, dict) or not isinstance(task_data, Collection))
+            else cast(Collection[TaskRequest | dict[str, Any]], task_data)
+        )
+        trajectory_ids = await gather_with_concurrency(
+            concurrency,
+            [self.acreate_task(task) for task in all_tasks],
+            progress=progress_bar,
+        )
+        start_time = time.monotonic()
+        completed_tasks: dict[str, TaskResponse] = {}
+        if progress_bar:
+            progress = tqdm(
+                total=len(trajectory_ids), desc="Waiting for tasks to finish", ncols=0
+            )
+        while (time.monotonic() - start_time) < timeout:
+            task_results = await gather_with_concurrency(
+                concurrency,
+                [
+                    self.aget_task(task_id, verbose=verbose)
+                    for task_id in trajectory_ids
+                    if task_id not in completed_tasks
+                ],
+            )
+            for task in task_results:
+                task_id = str(task.task_id)
+                if (
+                    task_id not in completed_tasks
+                    and ExecutionStatus(task.status).is_terminal_state()
+                ):
+                    completed_tasks[task_id] = task
+                    if progress_bar:
+                        progress.update(1)
+            all_done = len(completed_tasks) == len(trajectory_ids)
+            if all_done:
+                break
+            await asyncio.sleep(self.DEFAULT_POLLING_TIME)
+        else:
+            logger.warning(
+                f"Timed out waiting for tasks to finish after {timeout} seconds. Returning with {len(completed_tasks)} completed tasks and {len(trajectory_ids)} total tasks."
+            )
+        if progress_bar:
+            progress.close()
+        return [
+            completed_tasks.get(task_id)
+            or (await self.aget_task(task_id, verbose=verbose))
+            for task_id in trajectory_ids
+        ]
+    def run_tasks_until_done(
+        self,
+        task_data: TaskRequest
+        | dict[str, Any]
+        | Collection[TaskRequest]
+        | Collection[dict[str, Any]],
+        verbose: bool = False,
+        progress_bar: bool = False,
+        timeout: int = DEFAULT_AGENT_TIMEOUT,
+    ) -> list[TaskResponse]:
+        """Run multiple tasks and wait for them to complete.
+        Args:
+            task_data: A single task or collection of tasks to run
+            verbose: Whether to return verbose task responses
+            progress_bar: Whether to display a progress bar
+            timeout: Maximum time to wait for task completion in seconds
+        Returns:
+            A list of completed task responses
+        """
+        all_tasks: Collection[TaskRequest | dict[str, Any]] = (
+            cast(Collection[TaskRequest | dict[str, Any]], [task_data])
+            if (isinstance(task_data, dict) or not isinstance(task_data, Collection))
+            else cast(Collection[TaskRequest | dict[str, Any]], task_data)
+        )
+        trajectory_ids = [self.create_task(task) for task in all_tasks]
+        start_time = time.monotonic()
+        completed_tasks: dict[str, TaskResponse] = {}
+        if progress_bar:
+            progress = sync_tqdm(
+                total=len(trajectory_ids), desc="Waiting for tasks to finish", ncols=0
+            )
+        while (time.monotonic() - start_time) < timeout:
+            all_done = True
+            for task_id in trajectory_ids:
+                if task_id in completed_tasks:
+                    continue
+                task = self.get_task(task_id, verbose=verbose)
+                if not ExecutionStatus(task.status).is_terminal_state():
+                    all_done = False
+                elif task_id not in completed_tasks:
+                    completed_tasks[task_id] = task
+                    if progress_bar:
+                        progress.update(1)
+            if all_done:
+                break
+            time.sleep(self.DEFAULT_POLLING_TIME)
+        else:
+            logger.warning(
+                f"Timed out waiting for tasks to finish after {timeout} seconds. Returning with {len(completed_tasks)} completed tasks and {len(trajectory_ids)} total tasks."
+            )
+        if progress_bar:
+            progress.close()
+        return [
+            completed_tasks.get(task_id) or self.get_task(task_id, verbose=verbose)
+            for task_id in trajectory_ids
+        ]
     @retry(
         stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
@@ -457,9 +623,12 @@ class RestClient:
     )
     def get_build_status(self, build_id: UUID | None = None) -> dict[str, Any]:
         """Get the status of a build."""
-        build_id = build_id or self.build_id
-        response = self.client.get(f"/v0.1/builds/{build_id}")
-        response.raise_for_status()
+        try:
+            build_id = build_id or self.build_id
+            response = self.client.get(f"/v0.1/builds/{build_id}")
+            response.raise_for_status()
+        except Exception as e:
+            raise JobFetchError(f"Error getting build status: {e!s}") from e
         return response.json()
     # TODO: Refactor later so we don't have to ignore PLR0915
@@ -660,14 +829,14 @@ class RestClient:
         self,
         job_name: str,
         file_path: str | os.PathLike,
-        folder_name: str | None = None,
+        upload_id: str | None = None,
     ) -> str:
         """Upload a file or directory to a futurehouse job bucket.
         Args:
             job_name: The name of the futurehouse job to upload to.
             file_path: The local path to the file or directory to upload.
-            folder_name: Optional folder name to use for the upload. If not provided, a random UUID will be used.
+            upload_id: Optional folder name to use for the upload. If not provided, a random UUID will be used.
         Returns:
             The upload ID used for the upload.
@@ -679,7 +848,7 @@ class RestClient:
         if not file_path.exists():
             raise FileNotFoundError(f"File or directory not found: {file_path}")
-        upload_id = folder_name or str(uuid.uuid4())
+        upload_id = upload_id or str(uuid.uuid4())
         if file_path.is_dir():
             # Process directory recursively
@@ -742,6 +911,12 @@ class RestClient:
         """
         file_name = file_name or file_path.name
         file_size = file_path.stat().st_size
+        # Skip empty files
+        if file_size == 0:
+            logger.warning(f"Skipping upload of empty file: {file_path}")
+            return
         total_chunks = (file_size + self.CHUNK_SIZE - 1) // self.CHUNK_SIZE
         logger.info(f"Uploading {file_path} as {file_name} ({total_chunks} chunks)")
@@ -789,7 +964,6 @@ class RestClient:
                         )
             logger.info(f"Successfully uploaded {file_name}")
         except Exception as e:
             logger.exception(f"Error uploading file {file_path}")
             raise FileUploadError(f"Error uploading file {file_path}: {e}") from e
@@ -799,12 +973,18 @@ class RestClient:
         wait=wait_exponential(multiplier=RETRY_MULTIPLIER, max=MAX_RETRY_WAIT),
         retry=retry_if_connection_error,
     )
-    def list_files(self, job_name: str, folder_name: str) -> dict[str, list[str]]:
+    def list_files(
+        self,
+        job_name: str,
+        trajectory_id: str | None = None,
+        upload_id: str | None = None,
+    ) -> dict[str, list[str]]:
         """List files and directories in a GCS location for a given job_name and upload_id.
         Args:
             job_name: The name of the futurehouse job.
-            folder_name: The specific folder name (upload_id) to list files from.
+            trajectory_id: The specific trajectory id to list files from.
+            upload_id: The specific upload id to list files from.
         Returns:
             A list of files in the GCS folder.
@@ -812,22 +992,27 @@ class RestClient:
         Raises:
             RestClientError: If there is an error listing the files.
         """
+        if not bool(trajectory_id) ^ bool(upload_id):
+            raise RestClientError(
+                "Must at least specify one of trajectory_id or upload_id, but not both"
+            )
         try:
             url = f"/v0.1/crows/{job_name}/list-files"
-            params = {"upload_id": folder_name}
+            params = {"trajectory_id": trajectory_id, "upload_id": upload_id}
+            params = {k: v for k, v in params.items() if v is not None}
             response = self.client.get(url, params=params)
             response.raise_for_status()
             return response.json()
         except HTTPStatusError as e:
             logger.exception(
-                f"Error listing files for job {job_name}, folder {folder_name}: {e.response.text}"
+                f"Error listing files for job {job_name}, trajectory {trajectory_id}, upload_id {upload_id}: {e.response.text}"
             )
             raise RestClientError(
                 f"Error listing files: {e.response.status_code} - {e.response.text}"
             ) from e
         except Exception as e:
             logger.exception(
-                f"Error listing files for job {job_name}, folder {folder_name}"
+                f"Error listing files for job {job_name}, trajectory {trajectory_id}, upload_id {upload_id}"
             )
             raise RestClientError(f"Error listing files: {e!s}") from e
@@ -839,7 +1024,7 @@ class RestClient:
     def download_file(
         self,
         job_name: str,
-        folder_name: str,
+        trajectory_id: str,
         file_path: str,
         destination_path: str | os.PathLike,
     ) -> None:
@@ -847,14 +1032,14 @@ class RestClient:
         Args:
             job_name: The name of the futurehouse job.
-            folder_name: The specific folder name (upload_id) the file belongs to.
+            trajectory_id: The specific trajectory id the file belongs to.
             file_path: The relative path of the file to download
                        (e.g., 'data/my_file.csv' or 'my_image.png').
             destination_path: The local path where the file should be saved.
         Raises:
             RestClientError: If there is an error downloading the file.
-            FileNotFoundError: If the destination directory does not exist.
+            FileNotFoundError: If the destination directory does not exist or if the file is not found.
         """
         destination_path = Path(destination_path)
         # Ensure the destination directory exists
@@ -862,17 +1047,24 @@ class RestClient:
         try:
             url = f"/v0.1/crows/{job_name}/download-file"
-            params = {"upload_id": folder_name, "file_path": file_path}
+            params = {"trajectory_id": trajectory_id, "file_path": file_path}
             with self.client.stream("GET", url, params=params) as response:
                 response.raise_for_status()  # Check for HTTP errors before streaming
                 with open(destination_path, "wb") as f:
                     for chunk in response.iter_bytes(chunk_size=8192):
                         f.write(chunk)
+            # Check if the downloaded file is empty
+            if destination_path.stat().st_size == 0:
+                # Remove the empty file
+                destination_path.unlink()
+                raise FileNotFoundError(f"File not found or is empty: {file_path}")
             logger.info(f"File {file_path} downloaded to {destination_path}")
         except HTTPStatusError as e:
             logger.exception(
-                f"Error downloading file {file_path} for job {job_name}, folder {folder_name}: {e.response.text}"
+                f"Error downloading file {file_path} for job {job_name}, trajectory_id {trajectory_id}: {e.response.text}"
             )
             # Clean up partially downloaded file if an error occurs
             if destination_path.exists():
@@ -880,9 +1072,20 @@ class RestClient:
             raise RestClientError(
                 f"Error downloading file: {e.response.status_code} - {e.response.text}"
             ) from e
+        except RemoteProtocolError as e:
+            logger.error(
+                f"Connection error while downloading file {file_path} for job {job_name}, trajectory_id {trajectory_id}"
+            )
+            # Clean up partially downloaded file
+            if destination_path.exists():
+                destination_path.unlink()
+            # Often RemoteProtocolError during download means the file wasn't found
+            # or was empty/corrupted on the server side
+            raise FileNotFoundError(f"File not found or corrupted: {file_path}") from e
         except Exception as e:
             logger.exception(
-                f"Error downloading file {file_path} for job {job_name}, folder {folder_name}"
+                f"Error downloading file {file_path} for job {job_name}, trajectory_id {trajectory_id}"
             )
             if destination_path.exists():
                 destination_path.unlink()  # Clean up partial file

futurehouse-client 0.3.17.dev56__py3-none-any.whl → 0.3.18__py3-none-any.whl

futurehouse-client 0.3.17.dev56py3-none-any.whl → 0.3.18py3-none-any.whl