PyPI - futurehouse-client - Versions diffs - 0.3.17.dev94__tar.gz → 0.3.18__tar.gz - Mend

futurehouse-client 0.3.17.dev94tar.gz → 0.3.18tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

{futurehouse_client-0.3.17.dev94 → futurehouse_client-0.3.18}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: futurehouse-client
-Version: 0.3.17.dev94
+Version: 0.3.18
 Summary: A client for interacting with endpoints of the FutureHouse service.
 Author-email: FutureHouse technical staff <hello@futurehouse.org>
 Classifier: Operating System :: OS Independent

{futurehouse_client-0.3.17.dev94 → futurehouse_client-0.3.18}/futurehouse_client/clients/job_client.py RENAMED Viewed

@@ -30,6 +30,7 @@ class JobNames(StrEnum):
     OWL = "job-futurehouse-hasanyone"
     DUMMY = "job-futurehouse-dummy-env"
     PHOENIX = "job-futurehouse-phoenix"
+    FINCH = "job-futurehouse-data-analysis-crow-high"
     @classmethod
     def from_stage(cls, job_name: str, stage: Stage | None = None) -> str:

{futurehouse_client-0.3.17.dev94 → futurehouse_client-0.3.18}/futurehouse_client/clients/rest_client.py RENAMED Viewed

@@ -8,14 +8,14 @@ import inspect
 import json
 import logging
 import os
+import sys
 import tempfile
 import time
 import uuid
-from collections.abc import Collection, Mapping
-from datetime import datetime
+from collections.abc import Collection
 from pathlib import Path
 from types import ModuleType
-from typing import Any, ClassVar, assert_never, cast
+from typing import Any, ClassVar, cast
 from uuid import UUID
 import cloudpickle
@@ -33,7 +33,6 @@ from httpx import (
     RemoteProtocolError,
 )
 from ldp.agent import AgentConfig
-from pydantic import BaseModel, ConfigDict, model_validator
 from requests.exceptions import RequestException, Timeout
 from tenacity import (
     retry,
@@ -46,13 +45,16 @@ from tqdm.asyncio import tqdm
 from futurehouse_client.clients import JobNames
 from futurehouse_client.models.app import (
-    APIKeyPayload,
     AuthType,
     JobDeploymentConfig,
+    PQATaskResponse,
     Stage,
     TaskRequest,
+    TaskResponse,
+    TaskResponseVerbose,
 )
 from futurehouse_client.models.rest import ExecutionStatus
+from futurehouse_client.utils.auth import RefreshingJWT
 from futurehouse_client.utils.general import gather_with_concurrency
 from futurehouse_client.utils.module_utils import (
     OrganizationSelector,
@@ -63,24 +65,14 @@ from futurehouse_client.utils.monitoring import (
 )
 logger = logging.getLogger(__name__)
+logging.basicConfig(
+    level=logging.WARNING,
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+    stream=sys.stdout,
+)
+logging.getLogger("httpx").setLevel(logging.WARNING)
 TaskRequest.model_rebuild()
-retry_if_connection_error = retry_if_exception_type((
-    # From requests
-    Timeout,
-    ConnectionError,
-    RequestException,
-    # From httpx
-    ConnectError,
-    ConnectTimeout,
-    ReadTimeout,
-    ReadError,
-    NetworkError,
-    RemoteProtocolError,
-    CloseError,
-))
 FILE_UPLOAD_IGNORE_PARTS = {
     ".ruff_cache",
     "__pycache__",
@@ -111,104 +103,27 @@ class InvalidTaskDescriptionError(Exception):
     """Raised when the task description is invalid or empty."""
-class SimpleOrganization(BaseModel):
-    id: int
-    name: str
-    display_name: str
-# 5 minute default for JWTs
-JWT_TOKEN_CACHE_EXPIRY: int = 300  # seconds
-DEFAULT_AGENT_TIMEOUT: int = 2400  # seconds
+class FileUploadError(RestClientError):
+    """Raised when there's an error uploading a file."""
-class TaskResponse(BaseModel):
-    """Base class for task responses. This holds attributes shared over all futurehouse jobs."""
-    model_config = ConfigDict(extra="ignore")
-    status: str
-    query: str
-    user: str | None = None
-    created_at: datetime
-    job_name: str
-    public: bool
-    shared_with: list[SimpleOrganization] | None = None
-    build_owner: str | None = None
-    environment_name: str | None = None
-    agent_name: str | None = None
-    task_id: UUID | None = None
-    @model_validator(mode="before")
-    @classmethod
-    def validate_fields(cls, data: Mapping[str, Any]) -> Mapping[str, Any]:
-        # Extract fields from environment frame state
-        if not isinstance(data, dict):
-            return data
-        # TODO: We probably want to remove these two once we define the final names.
-        data["job_name"] = data.get("crow")
-        data["query"] = data.get("task")
-        data["task_id"] = cast(UUID, data.get("id")) if data.get("id") else None
-        if not (metadata := data.get("metadata", {})):
-            return data
-        data["environment_name"] = metadata.get("environment_name")
-        data["agent_name"] = metadata.get("agent_name")
-        return data
-class PQATaskResponse(TaskResponse):
-    model_config = ConfigDict(extra="ignore")
-    answer: str | None = None
-    formatted_answer: str | None = None
-    answer_reasoning: str | None = None
-    has_successful_answer: bool | None = None
-    total_cost: float | None = None
-    total_queries: int | None = None
-    @model_validator(mode="before")
-    @classmethod
-    def validate_pqa_fields(cls, data: Mapping[str, Any]) -> Mapping[str, Any]:
-        if not isinstance(data, dict):
-            return data
-        if not (env_frame := data.get("environment_frame", {})):
-            return data
-        state = env_frame.get("state", {}).get("state", {})
-        response = state.get("response", {})
-        answer = response.get("answer", {})
-        usage = state.get("info", {}).get("usage", {})
-        # Add additional PQA specific fields to data so that pydantic can validate the model
-        data["answer"] = answer.get("answer")
-        data["formatted_answer"] = answer.get("formatted_answer")
-        data["answer_reasoning"] = answer.get("answer_reasoning")
-        data["has_successful_answer"] = answer.get("has_successful_answer")
-        data["total_cost"] = cast(float, usage.get("total_cost"))
-        data["total_queries"] = cast(int, usage.get("total_queries"))
-        return data
-    def clean_verbose(self) -> "TaskResponse":
-        """Clean the verbose response from the server."""
-        self.request = None
-        self.response = None
-        return self
-class TaskResponseVerbose(TaskResponse):
-    """Class for responses to include all the fields of a task response."""
-    model_config = ConfigDict(extra="allow")
-    public: bool
-    agent_state: list[dict[str, Any]] | None = None
-    environment_frame: dict[str, Any] | None = None
-    metadata: dict[str, Any] | None = None
-    shared_with: list[SimpleOrganization] | None = None
+retry_if_connection_error = retry_if_exception_type((
+    # From requests
+    Timeout,
+    ConnectionError,
+    RequestException,
+    # From httpx
+    ConnectError,
+    ConnectTimeout,
+    ReadTimeout,
+    ReadError,
+    NetworkError,
+    RemoteProtocolError,
+    CloseError,
+    FileUploadError,
+))
-class FileUploadError(RestClientError):
-    """Raised when there's an error uploading a file."""
+DEFAULT_AGENT_TIMEOUT: int = 2400  # seconds
 class RestClient:
@@ -228,76 +143,98 @@ class RestClient:
         api_key: str | None = None,
         jwt: str | None = None,
         headers: dict[str, str] | None = None,
+        verbose_logging: bool = False,
     ):
+        if verbose_logging:
+            logger.setLevel(logging.INFO)
+        else:
+            logger.setLevel(logging.WARNING)
         self.base_url = service_uri or stage.value
         self.stage = stage
         self.auth_type = auth_type
         self.api_key = api_key
         self._clients: dict[str, Client | AsyncClient] = {}
         self.headers = headers or {}
-        self.auth_jwt = self._run_auth(jwt=jwt)
+        self.jwt = jwt
         self.organizations: list[str] = self._filter_orgs(organization)
     @property
     def client(self) -> Client:
-        """Lazily initialized and cached HTTP client with authentication."""
-        return cast(Client, self.get_client("application/json", with_auth=True))
+        """Authenticated HTTP client for regular API calls."""
+        return cast(Client, self.get_client("application/json", authenticated=True))
     @property
     def async_client(self) -> AsyncClient:
-        """Lazily initialized and cached HTTP client with authentication."""
+        """Authenticated async HTTP client for regular API calls."""
         return cast(
             AsyncClient,
-            self.get_client("application/json", with_auth=True, with_async=True),
+            self.get_client("application/json", authenticated=True, async_client=True),
         )
     @property
-    def auth_client(self) -> Client:
-        """Lazily initialized and cached HTTP client without authentication."""
-        return cast(Client, self.get_client("application/json", with_auth=False))
+    def unauthenticated_client(self) -> Client:
+        """Unauthenticated HTTP client for auth operations."""
+        return cast(Client, self.get_client("application/json", authenticated=False))
     @property
     def multipart_client(self) -> Client:
-        """Lazily initialized and cached HTTP client for multipart uploads."""
-        return cast(Client, self.get_client(None, with_auth=True))
+        """Authenticated HTTP client for multipart uploads."""
+        return cast(Client, self.get_client(None, authenticated=True))
     def get_client(
         self,
         content_type: str | None = "application/json",
-        with_auth: bool = True,
-        with_async: bool = False,
+        authenticated: bool = True,
+        async_client: bool = False,
     ) -> Client | AsyncClient:
         """Return a cached HTTP client or create one if needed.
         Args:
             content_type: The desired content type header. Use None for multipart uploads.
-            with_auth: Whether the client should include an Authorization header.
-            with_async: Whether to use an async client.
+            authenticated: Whether the client should include authentication.
+            async_client: Whether to use an async client.
         Returns:
             An HTTP client configured with the appropriate headers.
         """
-        # Create a composite key based on content type and auth flag.
-        key = f"{content_type or 'multipart'}_{with_auth}_{with_async}"
+        # Create a composite key based on content type and auth flag
+        key = f"{content_type or 'multipart'}_{authenticated}_{async_client}"
         if key not in self._clients:
             headers = copy.deepcopy(self.headers)
-            if with_auth:
-                headers["Authorization"] = f"Bearer {self.auth_jwt}"
+            auth = None
+            if authenticated:
+                auth = RefreshingJWT(
+                    # authenticated=False will always return a synchronous client
+                    auth_client=cast(
+                        Client, self.get_client("application/json", authenticated=False)
+                    ),
+                    auth_type=self.auth_type,
+                    api_key=self.api_key,
+                    jwt=self.jwt,
+                )
             if content_type:
                 headers["Content-Type"] = content_type
             self._clients[key] = (
                 AsyncClient(
                     base_url=self.base_url,
                     headers=headers,
                     timeout=self.REQUEST_TIMEOUT,
+                    auth=auth,
                 )
-                if with_async
+                if async_client
                 else Client(
                     base_url=self.base_url,
                     headers=headers,
                     timeout=self.REQUEST_TIMEOUT,
+                    auth=auth,
                 )
             )
         return self._clients[key]
     def close(self):
@@ -327,31 +264,6 @@ class RestClient:
             raise ValueError(f"Organization '{organization}' not found.")
         return filtered_orgs
-    def _run_auth(self, jwt: str | None = None) -> str:
-        auth_payload: APIKeyPayload | None
-        if self.auth_type == AuthType.API_KEY:
-            auth_payload = APIKeyPayload(api_key=self.api_key)
-        elif self.auth_type == AuthType.JWT:
-            auth_payload = None
-        else:
-            assert_never(self.auth_type)
-        try:
-            # Use the unauthenticated client for login
-            if auth_payload:
-                response = self.auth_client.post(
-                    "/auth/login", json=auth_payload.model_dump()
-                )
-                response.raise_for_status()
-                token_data = response.json()
-            elif jwt:
-                token_data = {"access_token": jwt, "expires_in": JWT_TOKEN_CACHE_EXPIRY}
-            else:
-                raise ValueError("JWT token required for JWT authentication.")
-            return token_data["access_token"]
-        except Exception as e:
-            raise RestClientError(f"Error authenticating: {e!s}") from e
     def _check_job(self, name: str, organization: str) -> dict[str, Any]:
         try:
             response = self.client.get(
@@ -445,6 +357,7 @@ class RestClient:
                 ),
                 self.client.stream("GET", url, params={"history": history}) as response,
             ):
+                response.raise_for_status()
                 json_data = "".join(response.iter_text(chunk_size=1024))
                 data = json.loads(json_data)
                 if "id" not in data:
@@ -459,8 +372,6 @@ class RestClient:
             ):
                 return PQATaskResponse(**data)
             return TaskResponse(**data)
-        except ValueError as e:
-            raise ValueError("Invalid task ID format. Must be a valid UUID.") from e
         except Exception as e:
             raise TaskFetchError(f"Error getting task: {e!s}") from e
@@ -507,8 +418,6 @@ class RestClient:
             ):
                 return PQATaskResponse(**data)
             return TaskResponse(**data)
-        except ValueError as e:
-            raise ValueError("Invalid task ID format. Must be a valid UUID.") from e
         except Exception as e:
             raise TaskFetchError(f"Error getting task: {e!s}") from e
@@ -714,9 +623,12 @@ class RestClient:
     )
     def get_build_status(self, build_id: UUID | None = None) -> dict[str, Any]:
         """Get the status of a build."""
-        build_id = build_id or self.build_id
-        response = self.client.get(f"/v0.1/builds/{build_id}")
-        response.raise_for_status()
+        try:
+            build_id = build_id or self.build_id
+            response = self.client.get(f"/v0.1/builds/{build_id}")
+            response.raise_for_status()
+        except Exception as e:
+            raise JobFetchError(f"Error getting build status: {e!s}") from e
         return response.json()
     # TODO: Refactor later so we don't have to ignore PLR0915
@@ -917,14 +829,14 @@ class RestClient:
         self,
         job_name: str,
         file_path: str | os.PathLike,
-        folder_name: str | None = None,
+        upload_id: str | None = None,
     ) -> str:
         """Upload a file or directory to a futurehouse job bucket.
         Args:
             job_name: The name of the futurehouse job to upload to.
             file_path: The local path to the file or directory to upload.
-            folder_name: Optional folder name to use for the upload. If not provided, a random UUID will be used.
+            upload_id: Optional folder name to use for the upload. If not provided, a random UUID will be used.
         Returns:
             The upload ID used for the upload.
@@ -936,7 +848,7 @@ class RestClient:
         if not file_path.exists():
             raise FileNotFoundError(f"File or directory not found: {file_path}")
-        upload_id = folder_name or str(uuid.uuid4())
+        upload_id = upload_id or str(uuid.uuid4())
         if file_path.is_dir():
             # Process directory recursively
@@ -999,6 +911,12 @@ class RestClient:
         """
         file_name = file_name or file_path.name
         file_size = file_path.stat().st_size
+        # Skip empty files
+        if file_size == 0:
+            logger.warning(f"Skipping upload of empty file: {file_path}")
+            return
         total_chunks = (file_size + self.CHUNK_SIZE - 1) // self.CHUNK_SIZE
         logger.info(f"Uploading {file_path} as {file_name} ({total_chunks} chunks)")
@@ -1046,7 +964,6 @@ class RestClient:
                         )
             logger.info(f"Successfully uploaded {file_name}")
         except Exception as e:
             logger.exception(f"Error uploading file {file_path}")
             raise FileUploadError(f"Error uploading file {file_path}: {e}") from e
@@ -1056,12 +973,18 @@ class RestClient:
         wait=wait_exponential(multiplier=RETRY_MULTIPLIER, max=MAX_RETRY_WAIT),
         retry=retry_if_connection_error,
     )
-    def list_files(self, job_name: str, folder_name: str) -> dict[str, list[str]]:
+    def list_files(
+        self,
+        job_name: str,
+        trajectory_id: str | None = None,
+        upload_id: str | None = None,
+    ) -> dict[str, list[str]]:
         """List files and directories in a GCS location for a given job_name and upload_id.
         Args:
             job_name: The name of the futurehouse job.
-            folder_name: The specific folder name (upload_id) to list files from.
+            trajectory_id: The specific trajectory id to list files from.
+            upload_id: The specific upload id to list files from.
         Returns:
             A list of files in the GCS folder.
@@ -1069,22 +992,27 @@ class RestClient:
         Raises:
             RestClientError: If there is an error listing the files.
         """
+        if not bool(trajectory_id) ^ bool(upload_id):
+            raise RestClientError(
+                "Must at least specify one of trajectory_id or upload_id, but not both"
+            )
         try:
             url = f"/v0.1/crows/{job_name}/list-files"
-            params = {"upload_id": folder_name}
+            params = {"trajectory_id": trajectory_id, "upload_id": upload_id}
+            params = {k: v for k, v in params.items() if v is not None}
             response = self.client.get(url, params=params)
             response.raise_for_status()
             return response.json()
         except HTTPStatusError as e:
             logger.exception(
-                f"Error listing files for job {job_name}, folder {folder_name}: {e.response.text}"
+                f"Error listing files for job {job_name}, trajectory {trajectory_id}, upload_id {upload_id}: {e.response.text}"
             )
             raise RestClientError(
                 f"Error listing files: {e.response.status_code} - {e.response.text}"
             ) from e
         except Exception as e:
             logger.exception(
-                f"Error listing files for job {job_name}, folder {folder_name}"
+                f"Error listing files for job {job_name}, trajectory {trajectory_id}, upload_id {upload_id}"
             )
             raise RestClientError(f"Error listing files: {e!s}") from e
@@ -1096,7 +1024,7 @@ class RestClient:
     def download_file(
         self,
         job_name: str,
-        folder_name: str,
+        trajectory_id: str,
         file_path: str,
         destination_path: str | os.PathLike,
     ) -> None:
@@ -1104,14 +1032,14 @@ class RestClient:
         Args:
             job_name: The name of the futurehouse job.
-            folder_name: The specific folder name (upload_id) the file belongs to.
+            trajectory_id: The specific trajectory id the file belongs to.
             file_path: The relative path of the file to download
                        (e.g., 'data/my_file.csv' or 'my_image.png').
             destination_path: The local path where the file should be saved.
         Raises:
             RestClientError: If there is an error downloading the file.
-            FileNotFoundError: If the destination directory does not exist.
+            FileNotFoundError: If the destination directory does not exist or if the file is not found.
         """
         destination_path = Path(destination_path)
         # Ensure the destination directory exists
@@ -1119,17 +1047,24 @@ class RestClient:
         try:
             url = f"/v0.1/crows/{job_name}/download-file"
-            params = {"upload_id": folder_name, "file_path": file_path}
+            params = {"trajectory_id": trajectory_id, "file_path": file_path}
             with self.client.stream("GET", url, params=params) as response:
                 response.raise_for_status()  # Check for HTTP errors before streaming
                 with open(destination_path, "wb") as f:
                     for chunk in response.iter_bytes(chunk_size=8192):
                         f.write(chunk)
+            # Check if the downloaded file is empty
+            if destination_path.stat().st_size == 0:
+                # Remove the empty file
+                destination_path.unlink()
+                raise FileNotFoundError(f"File not found or is empty: {file_path}")
             logger.info(f"File {file_path} downloaded to {destination_path}")
         except HTTPStatusError as e:
             logger.exception(
-                f"Error downloading file {file_path} for job {job_name}, folder {folder_name}: {e.response.text}"
+                f"Error downloading file {file_path} for job {job_name}, trajectory_id {trajectory_id}: {e.response.text}"
             )
             # Clean up partially downloaded file if an error occurs
             if destination_path.exists():
@@ -1137,9 +1072,20 @@ class RestClient:
             raise RestClientError(
                 f"Error downloading file: {e.response.status_code} - {e.response.text}"
             ) from e
+        except RemoteProtocolError as e:
+            logger.error(
+                f"Connection error while downloading file {file_path} for job {job_name}, trajectory_id {trajectory_id}"
+            )
+            # Clean up partially downloaded file
+            if destination_path.exists():
+                destination_path.unlink()
+            # Often RemoteProtocolError during download means the file wasn't found
+            # or was empty/corrupted on the server side
+            raise FileNotFoundError(f"File not found or corrupted: {file_path}") from e
         except Exception as e:
             logger.exception(
-                f"Error downloading file {file_path} for job {job_name}, folder {folder_name}"
+                f"Error downloading file {file_path} for job {job_name}, trajectory_id {trajectory_id}"
             )
             if destination_path.exists():
                 destination_path.unlink()  # Clean up partial file

{futurehouse_client-0.3.17.dev94 → futurehouse_client-0.3.18}/futurehouse_client/models/__init__.py RENAMED Viewed

@@ -3,10 +3,15 @@ from .app import (
     DockerContainerConfiguration,
     FramePath,
     JobDeploymentConfig,
+    PQATaskResponse,
     RuntimeConfig,
     Stage,
     Step,
+    TaskQueue,
+    TaskQueuesConfig,
     TaskRequest,
+    TaskResponse,
+    TaskResponseVerbose,
 )
 __all__ = [
@@ -14,8 +19,13 @@ __all__ = [
     "DockerContainerConfiguration",
     "FramePath",
     "JobDeploymentConfig",
+    "PQATaskResponse",
     "RuntimeConfig",
     "Stage",
     "Step",
+    "TaskQueue",
+    "TaskQueuesConfig",
     "TaskRequest",
+    "TaskResponse",
+    "TaskResponseVerbose",
 ]

{futurehouse_client-0.3.17.dev94 → futurehouse_client-0.3.18}/futurehouse_client/models/app.py RENAMED Viewed

@@ -1,6 +1,9 @@
+import copy
 import json
 import os
 import re
+from collections.abc import Mapping
+from datetime import datetime
 from enum import StrEnum, auto
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, ClassVar, Self, cast
@@ -646,3 +649,96 @@ class TaskRequest(BaseModel):
     runtime_config: RuntimeConfig | None = Field(
         default=None, description="All optional runtime parameters for the job"
     )
+class SimpleOrganization(BaseModel):
+    id: int
+    name: str
+    display_name: str
+class TaskResponse(BaseModel):
+    """Base class for task responses. This holds attributes shared over all futurehouse jobs."""
+    model_config = ConfigDict(extra="ignore")
+    status: str
+    query: str
+    user: str | None = None
+    created_at: datetime
+    job_name: str
+    public: bool
+    shared_with: list[SimpleOrganization] | None = None
+    build_owner: str | None = None
+    environment_name: str | None = None
+    agent_name: str | None = None
+    task_id: UUID | None = None
+    @model_validator(mode="before")
+    @classmethod
+    def validate_fields(cls, original_data: Mapping[str, Any]) -> Mapping[str, Any]:
+        data = copy.deepcopy(original_data)  # Avoid mutating the original data
+        # Extract fields from environment frame state
+        if not isinstance(data, dict):
+            return data
+        # TODO: We probably want to remove these two once we define the final names.
+        data["job_name"] = data.get("crow")
+        data["query"] = data.get("task")
+        data["task_id"] = cast(UUID, data.get("id")) if data.get("id") else None
+        if not (metadata := data.get("metadata", {})):
+            return data
+        data["environment_name"] = metadata.get("environment_name")
+        data["agent_name"] = metadata.get("agent_name")
+        return data
+class PQATaskResponse(TaskResponse):
+    model_config = ConfigDict(extra="ignore")
+    answer: str | None = None
+    formatted_answer: str | None = None
+    answer_reasoning: str | None = None
+    has_successful_answer: bool | None = None
+    total_cost: float | None = None
+    total_queries: int | None = None
+    @model_validator(mode="before")
+    @classmethod
+    def validate_pqa_fields(cls, original_data: Mapping[str, Any]) -> Mapping[str, Any]:
+        data = copy.deepcopy(original_data)  # Avoid mutating the original data
+        if not isinstance(data, dict):
+            return data
+        if not (env_frame := data.get("environment_frame", {})):
+            return data
+        state = env_frame.get("state", {}).get("state", {})
+        response = state.get("response", {})
+        answer = response.get("answer", {})
+        usage = state.get("info", {}).get("usage", {})
+        # Add additional PQA specific fields to data so that pydantic can validate the model
+        data["answer"] = answer.get("answer")
+        data["formatted_answer"] = answer.get("formatted_answer")
+        data["answer_reasoning"] = answer.get("answer_reasoning")
+        data["has_successful_answer"] = answer.get("has_successful_answer")
+        data["total_cost"] = cast(float, usage.get("total_cost"))
+        data["total_queries"] = cast(int, usage.get("total_queries"))
+        return data
+    def clean_verbose(self) -> "TaskResponse":
+        """Clean the verbose response from the server."""
+        self.request = None
+        self.response = None
+        return self
+class TaskResponseVerbose(TaskResponse):
+    """Class for responses to include all the fields of a task response."""
+    model_config = ConfigDict(extra="allow")
+    public: bool
+    agent_state: list[dict[str, Any]] | None = None
+    environment_frame: dict[str, Any] | None = None
+    metadata: dict[str, Any] | None = None
+    shared_with: list[SimpleOrganization] | None = None

futurehouse_client-0.3.18/futurehouse_client/utils/auth.py ADDED Viewed

@@ -0,0 +1,92 @@
+import logging
+from collections.abc import Collection, Generator
+from typing import ClassVar, Final
+import httpx
+from futurehouse_client.models.app import APIKeyPayload, AuthType
+logger = logging.getLogger(__name__)
+INVALID_REFRESH_TYPE_MSG: Final[str] = (
+    "API key auth is required to refresh auth tokens."
+)
+JWT_TOKEN_CACHE_EXPIRY: int = 300  # seconds
+def _run_auth(
+    client: httpx.Client,
+    auth_type: AuthType = AuthType.API_KEY,
+    api_key: str | None = None,
+    jwt: str | None = None,
+) -> str:
+    auth_payload: APIKeyPayload | None
+    if auth_type == AuthType.API_KEY:
+        auth_payload = APIKeyPayload(api_key=api_key)
+    elif auth_type == AuthType.JWT:
+        auth_payload = None
+    try:
+        if auth_payload:
+            response = client.post("/auth/login", json=auth_payload.model_dump())
+            response.raise_for_status()
+            token_data = response.json()
+        elif jwt:
+            token_data = {"access_token": jwt, "expires_in": JWT_TOKEN_CACHE_EXPIRY}
+        else:
+            raise ValueError("JWT token required for JWT authentication.")
+        return token_data["access_token"]
+    except Exception as e:
+        raise Exception("Failed to authenticate") from e  # noqa: TRY002
+class RefreshingJWT(httpx.Auth):
+    """Automatically (re-)inject a JWT and transparently retry exactly once when we hit a 401/403."""
+    RETRY_STATUSES: ClassVar[Collection[httpx.codes]] = {
+        httpx.codes.UNAUTHORIZED,
+        httpx.codes.FORBIDDEN,
+    }
+    def __init__(
+        self,
+        auth_client: httpx.Client,
+        auth_type: AuthType = AuthType.API_KEY,
+        api_key: str | None = None,
+        jwt: str | None = None,
+    ):
+        self.auth_type = auth_type
+        self.auth_client = auth_client
+        self.api_key = api_key
+        self._jwt = _run_auth(
+            client=auth_client,
+            jwt=jwt,
+            auth_type=auth_type,
+            api_key=api_key,
+        )
+    def refresh_token(self) -> None:
+        if self.auth_type == AuthType.JWT:
+            logger.error(INVALID_REFRESH_TYPE_MSG)
+            raise ValueError(INVALID_REFRESH_TYPE_MSG)
+        self._jwt = _run_auth(
+            client=self.auth_client,
+            auth_type=self.auth_type,
+            api_key=self.api_key,
+        )
+    def auth_flow(
+        self, request: httpx.Request
+    ) -> Generator[httpx.Request, httpx.Response, None]:
+        request.headers["Authorization"] = f"Bearer {self._jwt}"
+        response = yield request
+        # If it failed, refresh once and replay the request
+        if response.status_code in self.RETRY_STATUSES:
+            logger.info(
+                "Received %s, refreshing token and retrying …",
+                response.status_code,
+            )
+            self.refresh_token()
+            request.headers["Authorization"] = f"Bearer {self._jwt}"
+            yield request  # second (and final) attempt, again or use a while loop

{futurehouse_client-0.3.17.dev94 → futurehouse_client-0.3.18}/futurehouse_client.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: futurehouse-client
-Version: 0.3.17.dev94
+Version: 0.3.18
 Summary: A client for interacting with endpoints of the FutureHouse service.
 Author-email: FutureHouse technical staff <hello@futurehouse.org>
 Classifier: Operating System :: OS Independent

{futurehouse_client-0.3.17.dev94 → futurehouse_client-0.3.18}/futurehouse_client.egg-info/SOURCES.txt RENAMED Viewed

@@ -18,7 +18,9 @@ futurehouse_client/models/app.py
 futurehouse_client/models/client.py
 futurehouse_client/models/rest.py
 futurehouse_client/utils/__init__.py
+futurehouse_client/utils/auth.py
 futurehouse_client/utils/general.py
 futurehouse_client/utils/module_utils.py
 futurehouse_client/utils/monitoring.py
+tests/test_client.py
 tests/test_rest.py

futurehouse_client-0.3.18/tests/test_client.py ADDED Viewed

@@ -0,0 +1,161 @@
+import copy
+from datetime import datetime
+from typing import Any
+from unittest.mock import MagicMock
+import httpx
+import pytest
+from futurehouse_client.models.app import AuthType, TaskResponse
+from futurehouse_client.utils.auth import RefreshingJWT
+@pytest.fixture
+def mock_client():
+    """Create a mock synchronous HTTP client that returns success on first auth attempt."""
+    client = MagicMock(spec=httpx.Client)
+    response = MagicMock()
+    response.raise_for_status.return_value = None
+    response.json.return_value = {
+        "access_token": "test_token_from_api",
+        "expires_in": 300,
+    }
+    client.post.return_value = response
+    return client
+@pytest.fixture
+def failing_then_success_client():
+    """Create a client that fails with 401 on first call, then succeeds on retry."""
+    client = MagicMock(spec=httpx.Client)
+    first_response = MagicMock(status_code=401)
+    success_response = MagicMock()
+    success_response.raise_for_status.return_value = None
+    success_response.json.return_value = {
+        "access_token": "refreshed_token",
+        "expires_in": 300,
+    }
+    client.post.return_value = success_response
+    return client, first_response
+def test_refreshing_jwt_with_api_key(mock_client):
+    """Test that RefreshingJWT works with API key authentication."""
+    api_key = "mock_api_key_12345"
+    auth = RefreshingJWT(
+        auth_client=mock_client, auth_type=AuthType.API_KEY, api_key=api_key
+    )
+    assert auth._jwt == "test_token_from_api"
+    mock_client.post.assert_called_once()
+    args, kwargs = mock_client.post.call_args
+    assert args[0] == "/auth/login"
+    assert "json" in kwargs
+    assert kwargs["json"] == {"api_key": api_key}
+def test_refreshing_jwt_with_jwt_token():
+    """Test that RefreshingJWT works with JWT authentication."""
+    jwt_token = "mock.jwt.token"
+    auth = RefreshingJWT(auth_client=MagicMock(), auth_type=AuthType.JWT, jwt=jwt_token)
+    assert auth._jwt == jwt_token
+def test_refreshing_jwt_refresh_token(mock_client):
+    """Test that refresh_token method correctly gets a new token."""
+    api_key = "mock_api_key_12345"
+    auth = RefreshingJWT(
+        auth_client=mock_client, auth_type=AuthType.API_KEY, api_key=api_key
+    )
+    original_token = auth._jwt
+    new_response = MagicMock()
+    new_response.raise_for_status.return_value = None
+    new_response.json.return_value = {
+        "access_token": "new_refreshed_token",
+        "expires_in": 300,
+    }
+    mock_client.post.return_value = new_response
+    auth.refresh_token()
+    assert auth._jwt == "new_refreshed_token"
+    assert auth._jwt != original_token
+    assert mock_client.post.call_count == 2  # Initial auth + refresh
+def test_refreshing_jwt_refresh_token_jwt_auth_fails():
+    """Test that refresh_token raises an error with JWT auth type."""
+    jwt_token = "mock.jwt.token"
+    auth = RefreshingJWT(auth_client=MagicMock(), auth_type=AuthType.JWT, jwt=jwt_token)
+    with pytest.raises(ValueError) as excinfo:  # noqa: PT011
+        auth.refresh_token()
+    assert "API key auth is required to refresh auth tokens" in str(excinfo.value)
+def test_auth_flow_with_retry(failing_then_success_client):
+    """Test that auth_flow retries with new token after receiving a 401."""
+    client, first_response = failing_then_success_client
+    api_key = "mock_api_key_12345"
+    auth = RefreshingJWT(
+        auth_client=client, auth_type=AuthType.API_KEY, api_key=api_key
+    )
+    request = httpx.Request("GET", "https://fh.org")
+    flow = auth.auth_flow(request)
+    first_request = next(flow)
+    assert first_request.headers["Authorization"] == f"Bearer {auth._jwt}"
+    second_request = flow.send(first_response)
+    assert auth._jwt == "refreshed_token"
+    assert second_request.headers["Authorization"] == "Bearer refreshed_token"
+    success_response = httpx.Response(200)
+    try:
+        flow.send(success_response)
+        pytest.fail("Generator should have exited after processing the response")
+    except StopIteration:
+        pass
+    client.post.assert_called_with("/auth/login", json={"api_key": api_key})
+def test_task_response_does_not_mutate_original_data():
+    """Test that TaskResponse doesn't mutate the original data when creating an instance."""
+    original_data: dict[str, Any] = {
+        "crow": "test-crow",
+        "task": "test task",
+        "metadata": {
+            "environment_name": "test-env",
+            "agent_name": "test-agent",
+            "some_other_field": "should not be modified",
+        },
+        "status": "success",
+        "created_at": datetime.now(),
+        "public": True,
+    }
+    original_data_copy = copy.deepcopy(original_data)
+    task_response = TaskResponse(**original_data)
+    assert original_data == original_data_copy, "Original data was mutated"
+    # Assert the fields are set correctly
+    assert task_response.job_name == original_data["crow"]
+    assert task_response.query == original_data["task"]
+    metadata = original_data.get("metadata", {})
+    assert task_response.environment_name == metadata.get("environment_name")
+    assert task_response.agent_name == metadata.get("agent_name")

{futurehouse_client-0.3.17.dev94 → futurehouse_client-0.3.18}/tests/test_rest.py RENAMED Viewed

@@ -1,3 +1,4 @@
+# ruff: noqa: ARG001
 import asyncio
 import os
 import time
@@ -18,87 +19,98 @@ PUBLIC_API_KEY = os.environ["PLAYWRIGHT_PUBLIC_API_KEY"]
 TEST_MAX_POLLS = 100
-@pytest.mark.timeout(300)
-@pytest.mark.flaky(reruns=3)
-def test_futurehouse_dummy_env_crow():
-    client = RestClient(
+@pytest.fixture
+def admin_client():
+    """Create a RestClient for testing; using an admin key."""
+    return RestClient(
         stage=Stage.DEV,
         api_key=ADMIN_API_KEY,
     )
-    task_data = TaskRequest(
+@pytest.fixture
+def pub_client():
+    """Create a RestClient for testing; using a public user key with limited access."""
+    return RestClient(
+        stage=Stage.DEV,
+        api_key=PUBLIC_API_KEY,
+    )
+@pytest.fixture
+def task_req():
+    """Create a sample task request."""
+    return TaskRequest(
         name=JobNames.from_string("dummy"),
         query="How many moons does earth have?",
     )
-    client.create_task(task_data)
-    while (task_status := client.get_task().status) in {"queued", "in progress"}:
-        time.sleep(5)
+@pytest.fixture
+def pqa_task_req():
+    return TaskRequest(
+        name=JobNames.from_string("crow"),
+        query="How many moons does earth have?",
+    )
+@pytest.mark.timeout(300)
+@pytest.mark.flaky(reruns=3)
+def test_futurehouse_dummy_env_crow(admin_client: RestClient, task_req: TaskRequest):
+    admin_client.create_task(task_req)
+    while (task_status := admin_client.get_task().status) in {"queued", "in progress"}:
+        time.sleep(5)
     assert task_status == "success"
-def test_insufficient_permissions_request():
+def test_insufficient_permissions_request(
+    pub_client: RestClient, task_req: TaskRequest
+):
     # Create a new instance so that cached credentials aren't reused
-    client = RestClient(
-        stage=Stage.DEV,
-        api_key=PUBLIC_API_KEY,
-    )
-    task_data = TaskRequest(
-        name=JobNames.from_string("dummy"),
-        query="How many moons does earth have?",
-    )
     with pytest.raises(TaskFetchError) as exc_info:
-        client.create_task(task_data)
+        pub_client.create_task(task_req)
     assert "Error creating task" in str(exc_info.value)
 @pytest.mark.timeout(300)
 @pytest.mark.asyncio
-async def test_job_response(subtests: SubTests):  # noqa: PLR0915
-    client = RestClient(
-        stage=Stage.DEV,
-        api_key=ADMIN_API_KEY,
-    )
-    task_data = TaskRequest(
-        name=JobNames.from_string("crow"),
-        query="How many moons does earth have?",
-    )
-    task_id = client.create_task(task_data)
-    atask_id = await client.acreate_task(task_data)
+async def test_job_response(  # noqa: PLR0915
+    subtests: SubTests, admin_client: RestClient, pqa_task_req: TaskRequest
+):
+    task_id = admin_client.create_task(pqa_task_req)
+    atask_id = await admin_client.acreate_task(pqa_task_req)
     with subtests.test("Test TaskResponse with queued task"):
-        task_response = client.get_task(task_id)
+        task_response = admin_client.get_task(task_id)
         assert task_response.status in {"queued", "in progress"}
-        assert task_response.job_name == task_data.name
-        assert task_response.query == task_data.query
-        task_response = await client.aget_task(atask_id)
+        assert task_response.job_name == pqa_task_req.name
+        assert task_response.query == pqa_task_req.query
+        task_response = await admin_client.aget_task(atask_id)
         assert task_response.status in {"queued", "in progress"}
-        assert task_response.job_name == task_data.name
-        assert task_response.query == task_data.query
+        assert task_response.job_name == pqa_task_req.name
+        assert task_response.query == pqa_task_req.query
     for _ in range(TEST_MAX_POLLS):
-        task_response = client.get_task(task_id)
+        task_response = admin_client.get_task(task_id)
         if task_response.status in ExecutionStatus.terminal_states():
             break
         await asyncio.sleep(5)
     for _ in range(TEST_MAX_POLLS):
-        task_response = await client.aget_task(atask_id)
+        task_response = await admin_client.aget_task(atask_id)
         if task_response.status in ExecutionStatus.terminal_states():
             break
         await asyncio.sleep(5)
     with subtests.test("Test PQA job response"):
-        task_response = client.get_task(task_id)
+        task_response = admin_client.get_task(task_id)
         assert isinstance(task_response, PQATaskResponse)
         # assert it has general fields
         assert task_response.status == "success"
         assert task_response.task_id is not None
-        assert task_data.name in task_response.job_name
-        assert task_data.query in task_response.query
+        assert pqa_task_req.name in task_response.job_name
+        assert pqa_task_req.query in task_response.query
         # assert it has PQA specific fields
         assert task_response.answer is not None
         # assert it's not verbose
@@ -106,13 +118,13 @@ async def test_job_response(subtests: SubTests):  # noqa: PLR0915
         assert not hasattr(task_response, "agent_state")
     with subtests.test("Test async PQA job response"):
-        task_response = await client.aget_task(atask_id)
+        task_response = await admin_client.aget_task(atask_id)
         assert isinstance(task_response, PQATaskResponse)
         # assert it has general fields
         assert task_response.status == "success"
         assert task_response.task_id is not None
-        assert task_data.name in task_response.job_name
-        assert task_data.query in task_response.query
+        assert pqa_task_req.name in task_response.job_name
+        assert pqa_task_req.query in task_response.query
         # assert it has PQA specific fields
         assert task_response.answer is not None
         # assert it's not verbose
@@ -120,14 +132,14 @@ async def test_job_response(subtests: SubTests):  # noqa: PLR0915
         assert not hasattr(task_response, "agent_state")
     with subtests.test("Test task response with verbose"):
-        task_response = client.get_task(task_id, verbose=True)
+        task_response = admin_client.get_task(task_id, verbose=True)
         assert isinstance(task_response, TaskResponseVerbose)
         assert task_response.status == "success"
         assert task_response.environment_frame is not None
         assert task_response.agent_state is not None
     with subtests.test("Test task async response with verbose"):
-        task_response = await client.aget_task(atask_id, verbose=True)
+        task_response = await admin_client.aget_task(atask_id, verbose=True)
         assert isinstance(task_response, TaskResponseVerbose)
         assert task_response.status == "success"
         assert task_response.environment_frame is not None
@@ -136,20 +148,12 @@ async def test_job_response(subtests: SubTests):  # noqa: PLR0915
 @pytest.mark.timeout(300)
 @pytest.mark.flaky(reruns=3)
-def test_run_until_done_futurehouse_dummy_env_crow():
-    client = RestClient(
-        stage=Stage.DEV,
-        api_key=ADMIN_API_KEY,
-    )
-    task_data = TaskRequest(
-        name=JobNames.from_string("dummy"),
-        query="How many moons does earth have?",
-    )
-    tasks_to_do = [task_data, task_data]
+def test_run_until_done_futurehouse_dummy_env_crow(
+    admin_client: RestClient, task_req: TaskRequest
+):
+    tasks_to_do = [task_req, task_req]
-    results = client.run_tasks_until_done(tasks_to_do)
+    results = admin_client.run_tasks_until_done(tasks_to_do)
     assert len(results) == len(tasks_to_do), "Should return 2 tasks."
     assert all(task.status == "success" for task in results)
@@ -158,20 +162,12 @@ def test_run_until_done_futurehouse_dummy_env_crow():
 @pytest.mark.timeout(300)
 @pytest.mark.flaky(reruns=3)
 @pytest.mark.asyncio
-async def test_arun_until_done_futurehouse_dummy_env_crow():
-    client = RestClient(
-        stage=Stage.DEV,
-        api_key=ADMIN_API_KEY,
-    )
+async def test_arun_until_done_futurehouse_dummy_env_crow(
+    admin_client: RestClient, task_req: TaskRequest
+):
+    tasks_to_do = [task_req, task_req]
-    task_data = TaskRequest(
-        name=JobNames.from_string("dummy"),
-        query="How many moons does earth have?",
-    )
-    tasks_to_do = [task_data, task_data]
-    results = await client.arun_tasks_until_done(tasks_to_do)
+    results = await admin_client.arun_tasks_until_done(tasks_to_do)
     assert len(results) == len(tasks_to_do), "Should return 2 tasks."
     assert all(task.status == "success" for task in results)
@@ -180,20 +176,12 @@ async def test_arun_until_done_futurehouse_dummy_env_crow():
 @pytest.mark.timeout(300)
 @pytest.mark.flaky(reruns=3)
 @pytest.mark.asyncio
-async def test_timeout_run_until_done_futurehouse_dummy_env_crow():
-    client = RestClient(
-        stage=Stage.DEV,
-        api_key=ADMIN_API_KEY,
-    )
-    task_data = TaskRequest(
-        name=JobNames.from_string("dummy"),
-        query="How many moons does earth have?",
-    )
-    tasks_to_do = [task_data, task_data]
+async def test_timeout_run_until_done_futurehouse_dummy_env_crow(
+    admin_client: RestClient, task_req: TaskRequest
+):
+    tasks_to_do = [task_req, task_req]
-    results = await client.arun_tasks_until_done(
+    results = await admin_client.arun_tasks_until_done(
         tasks_to_do, verbose=True, timeout=5, progress_bar=True
     )
@@ -203,7 +191,7 @@ async def test_timeout_run_until_done_futurehouse_dummy_env_crow():
         "Should be verbose."
     )
-    results = client.run_tasks_until_done(
+    results = admin_client.run_tasks_until_done(
         tasks_to_do, verbose=True, timeout=5, progress_bar=True
     )