PyPI - futurehouse-client - Versions diffs - 0.3.16__py3-none-any.whl → 0.3.17.dev94__py3-none-any.whl - Mend

futurehouse-client 0.3.16py3-none-any.whl → 0.3.17.dev94py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

futurehouse_client/clients/job_client.py CHANGED Viewed

@@ -29,6 +29,7 @@ class JobNames(StrEnum):
     FALCON = "job-futurehouse-paperqa2-deep"
     OWL = "job-futurehouse-hasanyone"
     DUMMY = "job-futurehouse-dummy-env"
+    PHOENIX = "job-futurehouse-phoenix"
     @classmethod
     def from_stage(cls, job_name: str, stage: Stage | None = None) -> str:

futurehouse_client/clients/rest_client.py CHANGED Viewed

@@ -1,12 +1,17 @@
 import ast
+import asyncio
 import base64
+import contextlib
 import copy
 import importlib.metadata
 import inspect
 import json
 import logging
 import os
-from collections.abc import Mapping
+import tempfile
+import time
+import uuid
+from collections.abc import Collection, Mapping
 from datetime import datetime
 from pathlib import Path
 from types import ModuleType
@@ -16,6 +21,7 @@ from uuid import UUID
 import cloudpickle
 from aviary.functional import EnvironmentBuilder
 from httpx import (
+    AsyncClient,
     Client,
     CloseError,
     ConnectError,
@@ -35,6 +41,8 @@ from tenacity import (
     stop_after_attempt,
     wait_exponential,
 )
+from tqdm import tqdm as sync_tqdm
+from tqdm.asyncio import tqdm
 from futurehouse_client.clients import JobNames
 from futurehouse_client.models.app import (
@@ -44,6 +52,8 @@ from futurehouse_client.models.app import (
     Stage,
     TaskRequest,
 )
+from futurehouse_client.models.rest import ExecutionStatus
+from futurehouse_client.utils.general import gather_with_concurrency
 from futurehouse_client.utils.module_utils import (
     OrganizationSelector,
     fetch_environment_function_docstring,
@@ -109,6 +119,7 @@ class SimpleOrganization(BaseModel):
 # 5 minute default for JWTs
 JWT_TOKEN_CACHE_EXPIRY: int = 300  # seconds
+DEFAULT_AGENT_TIMEOUT: int = 2400  # seconds
 class TaskResponse(BaseModel):
@@ -137,10 +148,7 @@ class TaskResponse(BaseModel):
         # TODO: We probably want to remove these two once we define the final names.
         data["job_name"] = data.get("crow")
         data["query"] = data.get("task")
-        if not (env_frame := data.get("environment_frame", {})):
-            return data
-        state = env_frame.get("state", {}).get("state", {})
-        data["task_id"] = cast(UUID, state.get("id")) if state.get("id") else None
+        data["task_id"] = cast(UUID, data.get("id")) if data.get("id") else None
         if not (metadata := data.get("metadata", {})):
             return data
         data["environment_name"] = metadata.get("environment_name")
@@ -161,7 +169,6 @@ class PQATaskResponse(TaskResponse):
     @model_validator(mode="before")
     @classmethod
     def validate_pqa_fields(cls, data: Mapping[str, Any]) -> Mapping[str, Any]:
-        # Extract fields from environment frame state
         if not isinstance(data, dict):
             return data
         if not (env_frame := data.get("environment_frame", {})):
@@ -200,11 +207,17 @@ class TaskResponseVerbose(TaskResponse):
     shared_with: list[SimpleOrganization] | None = None
+class FileUploadError(RestClientError):
+    """Raised when there's an error uploading a file."""
 class RestClient:
     REQUEST_TIMEOUT: ClassVar[float] = 30.0  # sec
     MAX_RETRY_ATTEMPTS: ClassVar[int] = 3
     RETRY_MULTIPLIER: ClassVar[int] = 1
     MAX_RETRY_WAIT: ClassVar[int] = 10
+    DEFAULT_POLLING_TIME: ClassVar[int] = 5  # seconds
+    CHUNK_SIZE: ClassVar[int] = 16 * 1024 * 1024  # 16MB chunks
     def __init__(
         self,
@@ -220,7 +233,7 @@ class RestClient:
         self.stage = stage
         self.auth_type = auth_type
         self.api_key = api_key
-        self._clients: dict[str, Client] = {}
+        self._clients: dict[str, Client | AsyncClient] = {}
         self.headers = headers or {}
         self.auth_jwt = self._run_auth(jwt=jwt)
         self.organizations: list[str] = self._filter_orgs(organization)
@@ -228,49 +241,81 @@ class RestClient:
     @property
     def client(self) -> Client:
         """Lazily initialized and cached HTTP client with authentication."""
-        return self.get_client("application/json", with_auth=True)
+        return cast(Client, self.get_client("application/json", with_auth=True))
+    @property
+    def async_client(self) -> AsyncClient:
+        """Lazily initialized and cached HTTP client with authentication."""
+        return cast(
+            AsyncClient,
+            self.get_client("application/json", with_auth=True, with_async=True),
+        )
     @property
     def auth_client(self) -> Client:
         """Lazily initialized and cached HTTP client without authentication."""
-        return self.get_client("application/json", with_auth=False)
+        return cast(Client, self.get_client("application/json", with_auth=False))
     @property
     def multipart_client(self) -> Client:
         """Lazily initialized and cached HTTP client for multipart uploads."""
-        return self.get_client(None, with_auth=True)
+        return cast(Client, self.get_client(None, with_auth=True))
     def get_client(
-        self, content_type: str | None = "application/json", with_auth: bool = True
-    ) -> Client:
+        self,
+        content_type: str | None = "application/json",
+        with_auth: bool = True,
+        with_async: bool = False,
+    ) -> Client | AsyncClient:
         """Return a cached HTTP client or create one if needed.
         Args:
             content_type: The desired content type header. Use None for multipart uploads.
             with_auth: Whether the client should include an Authorization header.
+            with_async: Whether to use an async client.
         Returns:
             An HTTP client configured with the appropriate headers.
         """
         # Create a composite key based on content type and auth flag.
-        key = f"{content_type or 'multipart'}_{with_auth}"
+        key = f"{content_type or 'multipart'}_{with_auth}_{with_async}"
         if key not in self._clients:
             headers = copy.deepcopy(self.headers)
             if with_auth:
                 headers["Authorization"] = f"Bearer {self.auth_jwt}"
             if content_type:
                 headers["Content-Type"] = content_type
-            self._clients[key] = Client(
-                base_url=self.base_url,
-                headers=headers,
-                timeout=self.REQUEST_TIMEOUT,
+            self._clients[key] = (
+                AsyncClient(
+                    base_url=self.base_url,
+                    headers=headers,
+                    timeout=self.REQUEST_TIMEOUT,
+                )
+                if with_async
+                else Client(
+                    base_url=self.base_url,
+                    headers=headers,
+                    timeout=self.REQUEST_TIMEOUT,
+                )
             )
         return self._clients[key]
-    def __del__(self):
-        """Ensure all cached clients are properly closed when the instance is destroyed."""
+    def close(self):
+        """Explicitly close all cached clients."""
+        for client in self._clients.values():
+            if isinstance(client, Client):
+                with contextlib.suppress(RuntimeError, CloseError):
+                    client.close()
+    async def aclose(self):
+        """Asynchronously close all cached clients."""
         for client in self._clients.values():
-            client.close()
+            if isinstance(client, AsyncClient):
+                with contextlib.suppress(RuntimeError, CloseError):
+                    await client.aclose()
+    def __del__(self):
+        self.close()
     def _filter_orgs(self, organization: str | None = None) -> list[str]:
         filtered_orgs = [
@@ -402,6 +447,8 @@ class RestClient:
             ):
                 json_data = "".join(response.iter_text(chunk_size=1024))
                 data = json.loads(json_data)
+                if "id" not in data:
+                    data["id"] = task_id
                 verbose_response = TaskResponseVerbose(**data)
             if verbose:
@@ -417,6 +464,54 @@ class RestClient:
         except Exception as e:
             raise TaskFetchError(f"Error getting task: {e!s}") from e
+    @retry(
+        stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
+        wait=wait_exponential(multiplier=RETRY_MULTIPLIER, max=MAX_RETRY_WAIT),
+        retry=retry_if_connection_error,
+    )
+    async def aget_task(
+        self, task_id: str | None = None, history: bool = False, verbose: bool = False
+    ) -> "TaskResponse":
+        """Get details for a specific task asynchronously."""
+        try:
+            task_id = task_id or self.trajectory_id
+            url = f"/v0.1/trajectories/{task_id}"
+            full_url = f"{self.base_url}{url}"
+            with external_trace(
+                url=full_url,
+                method="GET",
+                library="httpx",
+                custom_params={
+                    "operation": "get_job",
+                    "job_id": task_id,
+                },
+            ):
+                async with self.async_client.stream(
+                    "GET", url, params={"history": history}
+                ) as response:
+                    response.raise_for_status()
+                    json_data = "".join([
+                        chunk async for chunk in response.aiter_text()
+                    ])
+                    data = json.loads(json_data)
+                    if "id" not in data:
+                        data["id"] = task_id
+                    verbose_response = TaskResponseVerbose(**data)
+            if verbose:
+                return verbose_response
+            if any(
+                JobNames.from_string(job_name) in verbose_response.job_name
+                for job_name in ["crow", "falcon", "owl", "dummy"]
+            ):
+                return PQATaskResponse(**data)
+            return TaskResponse(**data)
+        except ValueError as e:
+            raise ValueError("Invalid task ID format. Must be a valid UUID.") from e
+        except Exception as e:
+            raise TaskFetchError(f"Error getting task: {e!s}") from e
     @retry(
         stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
         wait=wait_exponential(multiplier=RETRY_MULTIPLIER, max=MAX_RETRY_WAIT),
@@ -438,10 +533,179 @@ class RestClient:
                 "/v0.1/crows", json=task_data.model_dump(mode="json")
             )
             response.raise_for_status()
-            self.trajectory_id = response.json()["trajectory_id"]
+            trajectory_id = response.json()["trajectory_id"]
+            self.trajectory_id = trajectory_id
         except Exception as e:
             raise TaskFetchError(f"Error creating task: {e!s}") from e
-        return self.trajectory_id
+        return trajectory_id
+    @retry(
+        stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
+        wait=wait_exponential(multiplier=RETRY_MULTIPLIER, max=MAX_RETRY_WAIT),
+        retry=retry_if_connection_error,
+    )
+    async def acreate_task(self, task_data: TaskRequest | dict[str, Any]):
+        """Create a new futurehouse task."""
+        if isinstance(task_data, dict):
+            task_data = TaskRequest.model_validate(task_data)
+        if isinstance(task_data.name, JobNames):
+            task_data.name = task_data.name.from_stage(
+                task_data.name.name,
+                self.stage,
+            )
+        try:
+            response = await self.async_client.post(
+                "/v0.1/crows", json=task_data.model_dump(mode="json")
+            )
+            response.raise_for_status()
+            trajectory_id = response.json()["trajectory_id"]
+            self.trajectory_id = trajectory_id
+        except Exception as e:
+            raise TaskFetchError(f"Error creating task: {e!s}") from e
+        return trajectory_id
+    async def arun_tasks_until_done(
+        self,
+        task_data: TaskRequest
+        | dict[str, Any]
+        | Collection[TaskRequest]
+        | Collection[dict[str, Any]],
+        verbose: bool = False,
+        progress_bar: bool = False,
+        concurrency: int = 10,
+        timeout: int = DEFAULT_AGENT_TIMEOUT,
+    ) -> list[TaskResponse]:
+        all_tasks: Collection[TaskRequest | dict[str, Any]] = (
+            cast(Collection[TaskRequest | dict[str, Any]], [task_data])
+            if (isinstance(task_data, dict) or not isinstance(task_data, Collection))
+            else cast(Collection[TaskRequest | dict[str, Any]], task_data)
+        )
+        trajectory_ids = await gather_with_concurrency(
+            concurrency,
+            [self.acreate_task(task) for task in all_tasks],
+            progress=progress_bar,
+        )
+        start_time = time.monotonic()
+        completed_tasks: dict[str, TaskResponse] = {}
+        if progress_bar:
+            progress = tqdm(
+                total=len(trajectory_ids), desc="Waiting for tasks to finish", ncols=0
+            )
+        while (time.monotonic() - start_time) < timeout:
+            task_results = await gather_with_concurrency(
+                concurrency,
+                [
+                    self.aget_task(task_id, verbose=verbose)
+                    for task_id in trajectory_ids
+                    if task_id not in completed_tasks
+                ],
+            )
+            for task in task_results:
+                task_id = str(task.task_id)
+                if (
+                    task_id not in completed_tasks
+                    and ExecutionStatus(task.status).is_terminal_state()
+                ):
+                    completed_tasks[task_id] = task
+                    if progress_bar:
+                        progress.update(1)
+            all_done = len(completed_tasks) == len(trajectory_ids)
+            if all_done:
+                break
+            await asyncio.sleep(self.DEFAULT_POLLING_TIME)
+        else:
+            logger.warning(
+                f"Timed out waiting for tasks to finish after {timeout} seconds. Returning with {len(completed_tasks)} completed tasks and {len(trajectory_ids)} total tasks."
+            )
+        if progress_bar:
+            progress.close()
+        return [
+            completed_tasks.get(task_id)
+            or (await self.aget_task(task_id, verbose=verbose))
+            for task_id in trajectory_ids
+        ]
+    def run_tasks_until_done(
+        self,
+        task_data: TaskRequest
+        | dict[str, Any]
+        | Collection[TaskRequest]
+        | Collection[dict[str, Any]],
+        verbose: bool = False,
+        progress_bar: bool = False,
+        timeout: int = DEFAULT_AGENT_TIMEOUT,
+    ) -> list[TaskResponse]:
+        """Run multiple tasks and wait for them to complete.
+        Args:
+            task_data: A single task or collection of tasks to run
+            verbose: Whether to return verbose task responses
+            progress_bar: Whether to display a progress bar
+            timeout: Maximum time to wait for task completion in seconds
+        Returns:
+            A list of completed task responses
+        """
+        all_tasks: Collection[TaskRequest | dict[str, Any]] = (
+            cast(Collection[TaskRequest | dict[str, Any]], [task_data])
+            if (isinstance(task_data, dict) or not isinstance(task_data, Collection))
+            else cast(Collection[TaskRequest | dict[str, Any]], task_data)
+        )
+        trajectory_ids = [self.create_task(task) for task in all_tasks]
+        start_time = time.monotonic()
+        completed_tasks: dict[str, TaskResponse] = {}
+        if progress_bar:
+            progress = sync_tqdm(
+                total=len(trajectory_ids), desc="Waiting for tasks to finish", ncols=0
+            )
+        while (time.monotonic() - start_time) < timeout:
+            all_done = True
+            for task_id in trajectory_ids:
+                if task_id in completed_tasks:
+                    continue
+                task = self.get_task(task_id, verbose=verbose)
+                if not ExecutionStatus(task.status).is_terminal_state():
+                    all_done = False
+                elif task_id not in completed_tasks:
+                    completed_tasks[task_id] = task
+                    if progress_bar:
+                        progress.update(1)
+            if all_done:
+                break
+            time.sleep(self.DEFAULT_POLLING_TIME)
+        else:
+            logger.warning(
+                f"Timed out waiting for tasks to finish after {timeout} seconds. Returning with {len(completed_tasks)} completed tasks and {len(trajectory_ids)} total tasks."
+            )
+        if progress_bar:
+            progress.close()
+        return [
+            completed_tasks.get(task_id) or self.get_task(task_id, verbose=verbose)
+            for task_id in trajectory_ids
+        ]
     @retry(
         stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
@@ -644,6 +908,243 @@ class RestClient:
             raise JobCreationError(f"Error generating docker image: {e!s}") from e
         return build_context
+    @retry(
+        stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
+        wait=wait_exponential(multiplier=RETRY_MULTIPLIER, max=MAX_RETRY_WAIT),
+        retry=retry_if_connection_error,
+    )
+    def upload_file(
+        self,
+        job_name: str,
+        file_path: str | os.PathLike,
+        folder_name: str | None = None,
+    ) -> str:
+        """Upload a file or directory to a futurehouse job bucket.
+        Args:
+            job_name: The name of the futurehouse job to upload to.
+            file_path: The local path to the file or directory to upload.
+            folder_name: Optional folder name to use for the upload. If not provided, a random UUID will be used.
+        Returns:
+            The upload ID used for the upload.
+        Raises:
+            FileUploadError: If there's an error uploading the file.
+        """
+        file_path = Path(file_path)
+        if not file_path.exists():
+            raise FileNotFoundError(f"File or directory not found: {file_path}")
+        upload_id = folder_name or str(uuid.uuid4())
+        if file_path.is_dir():
+            # Process directory recursively
+            self._upload_directory(job_name, file_path, upload_id)
+        else:
+            # Process single file
+            self._upload_single_file(job_name, file_path, upload_id)
+        logger.info(f"Successfully uploaded {file_path} to {upload_id}")
+        return upload_id
+    def _upload_directory(self, job_name: str, dir_path: Path, upload_id: str) -> None:
+        """Upload all files in a directory recursively.
+        Args:
+            job_name: The key of the crow to upload to.
+            dir_path: The path to the directory to upload.
+            upload_id: The upload ID to use.
+        Raises:
+            FileUploadError: If there's an error uploading any file.
+        """
+        # Skip common directories that shouldn't be uploaded
+        if any(ignore in dir_path.parts for ignore in FILE_UPLOAD_IGNORE_PARTS):
+            return
+        try:
+            # Upload all files in the directory recursively
+            for path in dir_path.rglob("*"):
+                if path.is_file() and not any(
+                    ignore in path.parts for ignore in FILE_UPLOAD_IGNORE_PARTS
+                ):
+                    # Use path relative to the original directory as file name
+                    rel_path = path.relative_to(dir_path)
+                    self._upload_single_file(
+                        job_name,
+                        path,
+                        upload_id,
+                        file_name=str(rel_path),
+                    )
+        except Exception as e:
+            raise FileUploadError(f"Error uploading directory {dir_path}: {e}") from e
+    def _upload_single_file(
+        self,
+        job_name: str,
+        file_path: Path,
+        upload_id: str,
+        file_name: str | None = None,
+    ) -> None:
+        """Upload a single file in chunks.
+        Args:
+            job_name: The key of the crow to upload to.
+            file_path: The path to the file to upload.
+            upload_id: The upload ID to use.
+            file_name: Optional name to use for the file. If not provided, the file's name will be used.
+        Raises:
+            FileUploadError: If there's an error uploading the file.
+        """
+        file_name = file_name or file_path.name
+        file_size = file_path.stat().st_size
+        total_chunks = (file_size + self.CHUNK_SIZE - 1) // self.CHUNK_SIZE
+        logger.info(f"Uploading {file_path} as {file_name} ({total_chunks} chunks)")
+        try:
+            with open(file_path, "rb") as f:
+                for chunk_index in range(total_chunks):
+                    # Read the chunk from the file
+                    f.seek(chunk_index * self.CHUNK_SIZE)
+                    chunk_data = f.read(self.CHUNK_SIZE)
+                    # Prepare and send the chunk
+                    with tempfile.NamedTemporaryFile() as temp_file:
+                        temp_file.write(chunk_data)
+                        temp_file.flush()
+                        # Create form data
+                        with open(temp_file.name, "rb") as chunk_file_obj:
+                            files = {
+                                "chunk": (
+                                    file_name,
+                                    chunk_file_obj,
+                                    "application/octet-stream",
+                                )
+                            }
+                            data = {
+                                "file_name": file_name,
+                                "chunk_index": chunk_index,
+                                "total_chunks": total_chunks,
+                                "upload_id": upload_id,
+                            }
+                            # Send the chunk
+                            response = self.multipart_client.post(
+                                f"/v0.1/crows/{job_name}/upload-chunk",
+                                files=files,
+                                data=data,
+                            )
+                            response.raise_for_status()
+                        # Call progress callback if provided
+                        logger.debug(
+                            f"Uploaded chunk {chunk_index + 1}/{total_chunks} of {file_name}"
+                        )
+            logger.info(f"Successfully uploaded {file_name}")
+        except Exception as e:
+            logger.exception(f"Error uploading file {file_path}")
+            raise FileUploadError(f"Error uploading file {file_path}: {e}") from e
+    @retry(
+        stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
+        wait=wait_exponential(multiplier=RETRY_MULTIPLIER, max=MAX_RETRY_WAIT),
+        retry=retry_if_connection_error,
+    )
+    def list_files(self, job_name: str, folder_name: str) -> dict[str, list[str]]:
+        """List files and directories in a GCS location for a given job_name and upload_id.
+        Args:
+            job_name: The name of the futurehouse job.
+            folder_name: The specific folder name (upload_id) to list files from.
+        Returns:
+            A list of files in the GCS folder.
+        Raises:
+            RestClientError: If there is an error listing the files.
+        """
+        try:
+            url = f"/v0.1/crows/{job_name}/list-files"
+            params = {"upload_id": folder_name}
+            response = self.client.get(url, params=params)
+            response.raise_for_status()
+            return response.json()
+        except HTTPStatusError as e:
+            logger.exception(
+                f"Error listing files for job {job_name}, folder {folder_name}: {e.response.text}"
+            )
+            raise RestClientError(
+                f"Error listing files: {e.response.status_code} - {e.response.text}"
+            ) from e
+        except Exception as e:
+            logger.exception(
+                f"Error listing files for job {job_name}, folder {folder_name}"
+            )
+            raise RestClientError(f"Error listing files: {e!s}") from e
+    @retry(
+        stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
+        wait=wait_exponential(multiplier=RETRY_MULTIPLIER, max=MAX_RETRY_WAIT),
+        retry=retry_if_connection_error,
+    )
+    def download_file(
+        self,
+        job_name: str,
+        folder_name: str,
+        file_path: str,
+        destination_path: str | os.PathLike,
+    ) -> None:
+        """Download a file from GCS to a local path.
+        Args:
+            job_name: The name of the futurehouse job.
+            folder_name: The specific folder name (upload_id) the file belongs to.
+            file_path: The relative path of the file to download
+                       (e.g., 'data/my_file.csv' or 'my_image.png').
+            destination_path: The local path where the file should be saved.
+        Raises:
+            RestClientError: If there is an error downloading the file.
+            FileNotFoundError: If the destination directory does not exist.
+        """
+        destination_path = Path(destination_path)
+        # Ensure the destination directory exists
+        destination_path.parent.mkdir(parents=True, exist_ok=True)
+        try:
+            url = f"/v0.1/crows/{job_name}/download-file"
+            params = {"upload_id": folder_name, "file_path": file_path}
+            with self.client.stream("GET", url, params=params) as response:
+                response.raise_for_status()  # Check for HTTP errors before streaming
+                with open(destination_path, "wb") as f:
+                    for chunk in response.iter_bytes(chunk_size=8192):
+                        f.write(chunk)
+            logger.info(f"File {file_path} downloaded to {destination_path}")
+        except HTTPStatusError as e:
+            logger.exception(
+                f"Error downloading file {file_path} for job {job_name}, folder {folder_name}: {e.response.text}"
+            )
+            # Clean up partially downloaded file if an error occurs
+            if destination_path.exists():
+                destination_path.unlink()
+            raise RestClientError(
+                f"Error downloading file: {e.response.status_code} - {e.response.text}"
+            ) from e
+        except Exception as e:
+            logger.exception(
+                f"Error downloading file {file_path} for job {job_name}, folder {folder_name}"
+            )
+            if destination_path.exists():
+                destination_path.unlink()  # Clean up partial file
+            raise RestClientError(f"Error downloading file: {e!s}") from e
 def get_installed_packages() -> dict[str, str]:
     """Returns a dictionary of installed packages and their versions."""

futurehouse_client/models/rest.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from enum import StrEnum, auto
 from pydantic import BaseModel, JsonValue
@@ -17,3 +19,18 @@ class StoreEnvironmentFrameRequest(BaseModel):
     current_agent_step: str
     state: JsonValue
     trajectory_timestep: int
+class ExecutionStatus(StrEnum):
+    QUEUED = auto()
+    IN_PROGRESS = "in progress"
+    FAIL = auto()
+    SUCCESS = auto()
+    CANCELLED = auto()
+    def is_terminal_state(self) -> bool:
+        return self in self.terminal_states()
+    @classmethod
+    def terminal_states(cls) -> set["ExecutionStatus"]:
+        return {cls.SUCCESS, cls.FAIL, cls.CANCELLED}

futurehouse_client/utils/general.py ADDED Viewed

@@ -0,0 +1,29 @@
+import asyncio
+from collections.abc import Awaitable, Iterable
+from typing import TypeVar
+from tqdm.asyncio import tqdm
+T = TypeVar("T")
+async def gather_with_concurrency(
+    n: int | asyncio.Semaphore, coros: Iterable[Awaitable[T]], progress: bool = False
+) -> list[T]:
+    """
+    Run asyncio.gather with a concurrency limit.
+    SEE: https://stackoverflow.com/a/61478547/2392535
+    """
+    semaphore = asyncio.Semaphore(n) if isinstance(n, int) else n
+    async def sem_coro(coro: Awaitable[T]) -> T:
+        async with semaphore:
+            return await coro
+    if progress:
+        return await tqdm.gather(
+            *(sem_coro(c) for c in coros), desc="Gathering", ncols=0
+        )
+    return await asyncio.gather(*(sem_coro(c) for c in coros))

{futurehouse_client-0.3.16.dist-info → futurehouse_client-0.3.17.dev94.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: futurehouse-client
-Version: 0.3.16
+Version: 0.3.17.dev94
 Summary: A client for interacting with endpoints of the FutureHouse service.
 Author-email: FutureHouse technical staff <hello@futurehouse.org>
 Classifier: Operating System :: OS Independent
@@ -19,6 +19,7 @@ Requires-Dist: litellm==1.67.4.post1
 Requires-Dist: pydantic
 Requires-Dist: python-dotenv
 Requires-Dist: tenacity
+Requires-Dist: tqdm>=4.62
 Provides-Extra: dev
 Requires-Dist: black; extra == "dev"
 Requires-Dist: jupyter; extra == "dev"
@@ -30,6 +31,7 @@ Requires-Dist: pylint; extra == "dev"
 Requires-Dist: pylint-per-file-ignores; extra == "dev"
 Requires-Dist: pylint-pydantic; extra == "dev"
 Requires-Dist: pytest; extra == "dev"
+Requires-Dist: pytest-asyncio; extra == "dev"
 Requires-Dist: pytest-rerunfailures; extra == "dev"
 Requires-Dist: pytest-subtests; extra == "dev"
 Requires-Dist: pytest-timeout; extra == "dev"
@@ -49,9 +51,9 @@ Documentation and tutorials for futurehouse-client, a client for interacting wit
 - [Quickstart](#quickstart)
 - [Functionalities](#functionalities)
 - [Authentication](#authentication)
-- [Task submission](#task-submission)
+- [Simple task running](#simple-task-running)
 - [Task Continuation](#task-continuation)
-- [Task retrieval](#task-retrieval)
+- [Asynchronous tasks](#asynchronous-tasks)
 <!--TOC-->
@@ -78,19 +80,17 @@ task_data = {
     "query": "Which neglected diseases had a treatment developed by artificial intelligence?",
 }
-task_run_id = client.create_task(task_data)
-task_status = client.get_task(task_run_id)
+task_response = client.run_tasks_until_done(task_data)
 ```
-A quickstart example can be found in the [client_notebook.ipynb](https://github.com/Future-House/futurehouse-client-docs/blob/main/docs/client_notebook.ipynb) file, where we show how to submit and retrieve a job task, pass runtime configuration to the agent, and ask follow-up questions to the previous job.
+A quickstart example can be found in the [client_notebook.ipynb](https://futurehouse.gitbook.io/futurehouse-cookbook/futurehouse-client/docs/client_notebook) file, where we show how to submit and retrieve a job task, pass runtime configuration to the agent, and ask follow-up questions to the previous job.
 ## Functionalities
 FutureHouse client implements a RestClient (called `FutureHouseClient`) with the following functionalities:
-- [Task submission](#task-submission): `create_task(TaskRequest)`
-- [Task status](#task-status): `get_task(task_id)`
+- [Simple task running](#simple-task-running): `run_tasks_until_done(TaskRequest)` or `await arun_tasks_until_done(TaskRequest)`
+- [Asynchronous tasks](#asynchronous-tasks): `get_task(task_id)` or `aget_task(task_id)` and `create_task(TaskRequest)` or `acreate_task(TaskRequest)`
 To create a `FutureHouseClient`, you need to pass an FutureHouse platform api key (see [Authentication](#authentication)):
@@ -106,9 +106,9 @@ client = FutureHouseClient(
 In order to use the `FutureHouseClient`, you need to authenticate yourself. Authentication is done by providing an API key, which can be obtained directly from your [profile page in the FutureHouse platform](https://platform.futurehouse.org/profile).
-## Task submission
+## Simple task running
-In the futurehouse platform, we define the deployed combination of an agent and an environment as a `job`. To invoke a job, we need to submit a `task` (also called a `query`) to it.
+In the FutureHouse platform, we define the deployed combination of an agent and an environment as a `job`. To invoke a job, we need to submit a `task` (also called a `query`) to it.
 `FutureHouseClient` can be used to submit tasks/queries to available jobs in the FutureHouse platform. Using a `FutureHouseClient` instance, you can submit tasks to the platform by calling the `create_task` method, which receives a `TaskRequest` (or a dictionary with `kwargs`) and returns the task id.
 Aiming to make the submission of tasks as simple as possible, we have created a `JobNames` `enum` that contains the available task types.
@@ -118,10 +118,10 @@ The available supported jobs are:
 | `JobNames.CROW` | `job-futurehouse-paperqa2` | Fast Search | Ask a question of scientific data sources, and receive a high-accuracy, cited response. Built with [PaperQA2](https://github.com/Future-House/paper-qa). |
 | `JobNames.FALCON` | `job-futurehouse-paperqa2-deep` | Deep Search | Use a plethora of sources to deeply research. Receive a detailed, structured report as a response. |
 | `JobNames.OWL` | `job-futurehouse-hasanyone` | Precedent Search | Formerly known as HasAnyone, query if anyone has ever done something in science. |
+| `JobNames.PHOENIX` | `job-futurehouse-phoenix` | Chemistry Tasks | A new iteration of ChemCrow, Phoenix uses cheminformatics tools to do chemistry. Good for planning synthesis and design of new molecules. |
 | `JobNames.DUMMY` | `job-futurehouse-dummy` | Dummy Task | This is a dummy task. Mainly for testing purposes. |
-Using `JobNames`, the client automatically adapts the job name to the current stage.
-The task submission looks like this:
+Using `JobNames`, the task submission looks like this:
 ```python
 from futurehouse_client import FutureHouseClient, JobNames
@@ -135,10 +135,73 @@ task_data = {
     "query": "Has anyone tested therapeutic exerkines in humans or NHPs?",
 }
-task_id = client.create_task(task_data)
+task_response = client.run_tasks_until_done(task_data)
+print(task_response.answer)
+```
+Or if running async code:
+```python
+import asyncio
+from futurehouse_client import FutureHouseClient, JobNames
+async def main():
+    client = FutureHouseClient(
+        api_key="your_api_key",
+    )
+    task_data = {
+        "name": JobNames.OWL,
+        "query": "Has anyone tested therapeutic exerkines in humans or NHPs?",
+    }
+    task_response = await client.arun_tasks_until_done(task_data)
+    print(task_response.answer)
+    return task_id
+# For Python 3.7+
+if __name__ == "__main__":
+    task_id = asyncio.run(main())
 ```
-`TaskRequest` has the following fields:
+Note that in either the sync or the async code, collections of tasks can be given to the client to run them in a batch:
+```python
+import asyncio
+from futurehouse_client import FutureHouseClient, JobNames
+async def main():
+    client = FutureHouseClient(
+        api_key="your_api_key",
+    )
+    task_data = [
+        {
+            "name": JobNames.OWL,
+            "query": "Has anyone tested therapeutic exerkines in humans or NHPs?",
+        },
+        {
+            "name": JobNames.CROW,
+            "query": "Are there any clinically validated therapeutic exerkines for humans?",
+        },
+    ]
+    task_responses = await client.arun_tasks_until_done(task_data)
+    print(task_responses[0].answer)
+    print(task_responses[1].answer)
+    return task_id
+# For Python 3.7+
+if __name__ == "__main__":
+    task_id = asyncio.run(main())
+```
+`TaskRequest` can also be used to submit jobs and it has the following fields:
 | Field          | Type          | Description                                                                                                         |
 | -------------- | ------------- | ------------------------------------------------------------------------------------------------------------------- |
@@ -148,13 +211,67 @@ task_id = client.create_task(task_data)
 | runtime_config | RuntimeConfig | Optional runtime parameters for the job                                                                             |
 `runtime_config` can receive a `AgentConfig` object with the desired kwargs. Check the available `AgentConfig` fields in the [LDP documentation](https://github.com/Future-House/ldp/blob/main/src/ldp/agent/agent.py#L87). Besides the `AgentConfig` object, we can also pass `timeout` and `max_steps` to limit the execution time and the number of steps the agent can take.
-Other especialised configurations are also available but are outside the scope of this documentation.
+```python
+from futurehouse_client import FutureHouseClient, JobNames
+from futurehouse_client.models.app import TaskRequest
+client = FutureHouseClient(
+    api_key="your_api_key",
+)
+task_response = client.run_tasks_until_done(
+    TaskRequest(
+        name=JobNames.OWL,
+        query="Has anyone tested therapeutic exerkines in humans or NHPs?",
+    )
+)
+print(task_response.answer)
+```
+A `TaskResponse` will be returned from using our agents. For Owl, Crow, and Falcon, we default to a subclass, `PQATaskResponse` which has some key attributes:
+| Field                 | Type | Description                                                                     |
+| --------------------- | ---- | ------------------------------------------------------------------------------- |
+| answer                | str  | Answer to your query.                                                           |
+| formatted_answer      | str  | Specially formatted answer with references.                                     |
+| has_successful_answer | bool | Flag for whether the agent was able to find a good answer to your query or not. |
+If using the `verbose` setting, much more data can be pulled down from your `TaskResponse`, which will exist across all agents (not just Owl, Crow, and Falcon).
+```python
+from futurehouse_client import FutureHouseClient, JobNames
+from futurehouse_client.models.app import TaskRequest
+client = FutureHouseClient(
+    api_key="your_api_key",
+)
+task_response = client.run_tasks_until_done(
+    TaskRequest(
+        name=JobNames.OWL,
+        query="Has anyone tested therapeutic exerkines in humans or NHPs?",
+    ),
+    verbose=True,
+)
+print(task_response.environment_frame)
+```
+In that case, a `TaskResponseVerbose` will have the following fields:
+| Field             | Type | Description                                                                                                            |
+| ----------------- | ---- | ---------------------------------------------------------------------------------------------------------------------- | --- |
+| agent_state       | dict | Large object with all agent states during the progress of your task.                                                   |
+| environment_frame | dict | Large nested object with all environment data, for PQA environments it includes contexts, paper metadata, and answers. |
+| metadata          | dict | Extra metadata about your query.                                                                                       |     |
 ## Task Continuation
 Once a task is submitted and the answer is returned, FutureHouse platform allow you to ask follow-up questions to the previous task.
 It is also possible through the platform API.
-To accomplish that, we can use the `runtime_config` we discussed in the [Task submission](#task-submission) section.
+To accomplish that, we can use the `runtime_config` we discussed in the [Simple task running](#simple-task-running) section.
 ```python
 from futurehouse_client import FutureHouseClient, JobNames
@@ -173,12 +290,12 @@ continued_task_data = {
     "runtime_config": {"continued_task_id": task_id},
 }
-continued_task_id = client.create_task(continued_task_data)
+task_result = client.run_tasks_until_done(continued_task_data)
 ```
-## Task retrieval
+## Asynchronous tasks
-Once a task is submitted, you can retrieve it by calling the `get_task` method, which receives a task id and returns a `TaskResponse` object.
+Sometimes you may want to submit many jobs, while querying results at a later time. In this way you can do other things while waiting for a response. The platform API supports this as well rather than waiting for a result.
 ```python
 from futurehouse_client import FutureHouseClient
@@ -187,9 +304,13 @@ client = FutureHouseClient(
     api_key="your_api_key",
 )
-task_id = "task_id"
+task_data = {"name": JobNames.CROW, "query": "How many species of birds are there?"}
+task_id = client.create_task(task_data)
+# move on to do other things
 task_status = client.get_task(task_id)
 ```
-`task_status` contains information about the task. For instance, its `status`, `task`, `environment_name` and `agent_name`, and other fields specific to the job.
+`task_status` contains information about the task. For instance, its `status`, `task`, `environment_name` and `agent_name`, and other fields specific to the job. You can continually query the status until it's `success` before moving on.

futurehouse_client-0.3.17.dev94.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,16 @@
+futurehouse_client/__init__.py,sha256=ddxO7JE97c6bt7LjNglZZ2Ql8bYCGI9laSFeh9MP6VU,344
+futurehouse_client/clients/__init__.py,sha256=tFWqwIAY5PvwfOVsCje4imjTpf6xXNRMh_UHIKVI1_0,320
+futurehouse_client/clients/job_client.py,sha256=Fi3YvN4k82AuXCe8vlwxhkK8CXS164NQrs7paj9qIek,11096
+futurehouse_client/clients/rest_client.py,sha256=dsUmpgV5sfyb4GDv6whWVwRN1z2LOfZsPF8vjoioNfY,45472
+futurehouse_client/models/__init__.py,sha256=ta3jFLM_LsDz1rKDmx8rja8sT7WtSKoFvMgLF0yFpvA,342
+futurehouse_client/models/app.py,sha256=yfZ9tyw4VATVAfYrU7aTdCNPSljLEho09_nIbh8oZDY,23174
+futurehouse_client/models/client.py,sha256=n4HD0KStKLm6Ek9nL9ylP-bkK10yzAaD1uIDF83Qp_A,1828
+futurehouse_client/models/rest.py,sha256=lgwkMIXz0af-49BYSkKeS7SRqvN3motqnAikDN4YGTc,789
+futurehouse_client/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+futurehouse_client/utils/general.py,sha256=A_rtTiYW30ELGEZlWCIArO7q1nEmqi8hUlmBRYkMQ_c,767
+futurehouse_client/utils/module_utils.py,sha256=aFyd-X-pDARXz9GWpn8SSViUVYdSbuy9vSkrzcVIaGI,4955
+futurehouse_client/utils/monitoring.py,sha256=UjRlufe67kI3VxRHOd5fLtJmlCbVA2Wqwpd4uZhXkQM,8728
+futurehouse_client-0.3.17.dev94.dist-info/METADATA,sha256=acLPon9oE1ecVZzz8JrpumcSLmhRkqGGG62gjGEW1IQ,12766
+futurehouse_client-0.3.17.dev94.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
+futurehouse_client-0.3.17.dev94.dist-info/top_level.txt,sha256=TRuLUCt_qBnggdFHCX4O_BoCu1j2X43lKfIZC-ElwWY,19
+futurehouse_client-0.3.17.dev94.dist-info/RECORD,,

futurehouse_client-0.3.16.dist-info/RECORD DELETED Viewed

@@ -1,15 +0,0 @@
-futurehouse_client/__init__.py,sha256=ddxO7JE97c6bt7LjNglZZ2Ql8bYCGI9laSFeh9MP6VU,344
-futurehouse_client/clients/__init__.py,sha256=tFWqwIAY5PvwfOVsCje4imjTpf6xXNRMh_UHIKVI1_0,320
-futurehouse_client/clients/job_client.py,sha256=yBFKDNcFnuZDNgoK2d5037rbuzQ7TlSK6MmklEKV8EA,11056
-futurehouse_client/clients/rest_client.py,sha256=Dc29QRNZMO4uxaXNGKyx18Tn-vLaJ6P5fCbM_0u-Z3I,26379
-futurehouse_client/models/__init__.py,sha256=ta3jFLM_LsDz1rKDmx8rja8sT7WtSKoFvMgLF0yFpvA,342
-futurehouse_client/models/app.py,sha256=yfZ9tyw4VATVAfYrU7aTdCNPSljLEho09_nIbh8oZDY,23174
-futurehouse_client/models/client.py,sha256=n4HD0KStKLm6Ek9nL9ylP-bkK10yzAaD1uIDF83Qp_A,1828
-futurehouse_client/models/rest.py,sha256=W-wNFTN7HALYFFphw-RQYRMm6_TSa1cl4T-mZ1msk90,393
-futurehouse_client/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-futurehouse_client/utils/module_utils.py,sha256=aFyd-X-pDARXz9GWpn8SSViUVYdSbuy9vSkrzcVIaGI,4955
-futurehouse_client/utils/monitoring.py,sha256=UjRlufe67kI3VxRHOd5fLtJmlCbVA2Wqwpd4uZhXkQM,8728
-futurehouse_client-0.3.16.dist-info/METADATA,sha256=uCvzXKeI6i8PRvike8YKVa7-IJQAwL8G2ILjjOf6xIo,8175
-futurehouse_client-0.3.16.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
-futurehouse_client-0.3.16.dist-info/top_level.txt,sha256=TRuLUCt_qBnggdFHCX4O_BoCu1j2X43lKfIZC-ElwWY,19
-futurehouse_client-0.3.16.dist-info/RECORD,,

{futurehouse_client-0.3.16.dist-info → futurehouse_client-0.3.17.dev94.dist-info}/WHEEL RENAMED Viewed

File without changes

{futurehouse_client-0.3.16.dist-info → futurehouse_client-0.3.17.dev94.dist-info}/top_level.txt RENAMED Viewed

File without changes

futurehouse-client 0.3.16__py3-none-any.whl → 0.3.17.dev94__py3-none-any.whl

futurehouse-client 0.3.16py3-none-any.whl → 0.3.17.dev94py3-none-any.whl