PyPI - futurehouse-client - Versions diffs - 0.3.15.dev71__tar.gz → 0.3.17.dev56__tar.gz - Mend

futurehouse-client 0.3.15.dev71tar.gz → 0.3.17.dev56tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

{futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: futurehouse-client
-Version: 0.3.15.dev71
+Version: 0.3.17.dev56
 Summary: A client for interacting with endpoints of the FutureHouse service.
 Author-email: FutureHouse technical staff <hello@futurehouse.org>
 Classifier: Operating System :: OS Independent

{futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/docs/client_notebook.ipynb RENAMED Viewed

@@ -27,12 +27,12 @@
    "source": [
     "import time\n",
     "\n",
-    "from futurehouse_client import Client, JobNames\n",
+    "from futurehouse_client import FutureHouseClient, JobNames\n",
     "from futurehouse_client.models import (\n",
     "    AuthType,\n",
-    "    JobRequest,\n",
     "    RuntimeConfig,\n",
     "    Stage,\n",
+    "    TaskRequest,\n",
     ")\n",
     "from ldp.agent import AgentConfig"
    ]
@@ -53,7 +53,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "client = Client(\n",
+    "client = FutureHouseClient(\n",
     "    stage=Stage.PROD,\n",
     "    auth_type=AuthType.API_KEY,\n",
     "    api_key=\"your-api-key\",\n",
@@ -80,7 +80,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "job_data = JobRequest(\n",
+    "job_data = TaskRequest(\n",
     "    name=JobNames.from_string(\"crow\"),\n",
     "    query=\"What is the molecule known to have the greatest solubility in water?\",\n",
     ")\n",
@@ -114,7 +114,7 @@
     "        \"temperature\": 0.0,\n",
     "    },\n",
     ")\n",
-    "job_data = JobRequest(\n",
+    "job_data = TaskRequest(\n",
     "    name=JobNames.CROW,\n",
     "    query=\"How many moons does earth have?\",\n",
     "    runtime_config=RuntimeConfig(agent=agent, max_steps=10),\n",
@@ -145,7 +145,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "job_data = JobRequest(name=JobNames.CROW, query=\"How many species of birds are there?\")\n",
+    "job_data = TaskRequest(name=JobNames.CROW, query=\"How many species of birds are there?\")\n",
     "\n",
     "job_id = client.create_job(job_data)\n",
     "while client.get_job().status != \"success\":\n",

{futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/futurehouse_client/clients/job_client.py RENAMED Viewed

@@ -29,6 +29,7 @@ class JobNames(StrEnum):
     FALCON = "job-futurehouse-paperqa2-deep"
     OWL = "job-futurehouse-hasanyone"
     DUMMY = "job-futurehouse-dummy-env"
+    PHOENIX = "job-futurehouse-phoenix"
     @classmethod
     def from_stage(cls, job_name: str, stage: Stage | None = None) -> str:

{futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/futurehouse_client/clients/rest_client.py RENAMED Viewed

@@ -6,6 +6,8 @@ import inspect
 import json
 import logging
 import os
+import tempfile
+import uuid
 from collections.abc import Mapping
 from datetime import datetime
 from pathlib import Path
@@ -118,7 +120,7 @@ class TaskResponse(BaseModel):
     status: str
     query: str
-    user: str
+    user: str | None = None
     created_at: datetime
     job_name: str
     public: bool
@@ -200,11 +202,16 @@ class TaskResponseVerbose(TaskResponse):
     shared_with: list[SimpleOrganization] | None = None
+class FileUploadError(RestClientError):
+    """Raised when there's an error uploading a file."""
 class RestClient:
     REQUEST_TIMEOUT: ClassVar[float] = 30.0  # sec
     MAX_RETRY_ATTEMPTS: ClassVar[int] = 3
     RETRY_MULTIPLIER: ClassVar[int] = 1
     MAX_RETRY_WAIT: ClassVar[int] = 10
+    CHUNK_SIZE: ClassVar[int] = 16 * 1024 * 1024  # 16MB chunks
     def __init__(
         self,
@@ -388,29 +395,30 @@ class RestClient:
             url = f"/v0.1/trajectories/{task_id}"
             full_url = f"{self.base_url}{url}"
-            with external_trace(
-                url=full_url,
-                method="GET",
-                library="httpx",
-                custom_params={
-                    "operation": "get_job",
-                    "job_id": task_id,
-                },
+            with (
+                external_trace(
+                    url=full_url,
+                    method="GET",
+                    library="httpx",
+                    custom_params={
+                        "operation": "get_job",
+                        "job_id": task_id,
+                    },
+                ),
+                self.client.stream("GET", url, params={"history": history}) as response,
             ):
-                response = self.client.get(
-                    url,
-                    params={"history": history},
-                )
-            response.raise_for_status()
-            verbose_response = TaskResponseVerbose(**response.json())
+                json_data = "".join(response.iter_text(chunk_size=1024))
+                data = json.loads(json_data)
+                verbose_response = TaskResponseVerbose(**data)
             if verbose:
                 return verbose_response
             if any(
                 JobNames.from_string(job_name) in verbose_response.job_name
                 for job_name in ["crow", "falcon", "owl", "dummy"]
             ):
-                return PQATaskResponse(**response.json())
-            return TaskResponse(**response.json())
+                return PQATaskResponse(**data)
+            return TaskResponse(**data)
         except ValueError as e:
             raise ValueError("Invalid task ID format. Must be a valid UUID.") from e
         except Exception as e:
@@ -643,6 +651,243 @@ class RestClient:
             raise JobCreationError(f"Error generating docker image: {e!s}") from e
         return build_context
+    @retry(
+        stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
+        wait=wait_exponential(multiplier=RETRY_MULTIPLIER, max=MAX_RETRY_WAIT),
+        retry=retry_if_connection_error,
+    )
+    def upload_file(
+        self,
+        job_name: str,
+        file_path: str | os.PathLike,
+        folder_name: str | None = None,
+    ) -> str:
+        """Upload a file or directory to a futurehouse job bucket.
+        Args:
+            job_name: The name of the futurehouse job to upload to.
+            file_path: The local path to the file or directory to upload.
+            folder_name: Optional folder name to use for the upload. If not provided, a random UUID will be used.
+        Returns:
+            The upload ID used for the upload.
+        Raises:
+            FileUploadError: If there's an error uploading the file.
+        """
+        file_path = Path(file_path)
+        if not file_path.exists():
+            raise FileNotFoundError(f"File or directory not found: {file_path}")
+        upload_id = folder_name or str(uuid.uuid4())
+        if file_path.is_dir():
+            # Process directory recursively
+            self._upload_directory(job_name, file_path, upload_id)
+        else:
+            # Process single file
+            self._upload_single_file(job_name, file_path, upload_id)
+        logger.info(f"Successfully uploaded {file_path} to {upload_id}")
+        return upload_id
+    def _upload_directory(self, job_name: str, dir_path: Path, upload_id: str) -> None:
+        """Upload all files in a directory recursively.
+        Args:
+            job_name: The key of the crow to upload to.
+            dir_path: The path to the directory to upload.
+            upload_id: The upload ID to use.
+        Raises:
+            FileUploadError: If there's an error uploading any file.
+        """
+        # Skip common directories that shouldn't be uploaded
+        if any(ignore in dir_path.parts for ignore in FILE_UPLOAD_IGNORE_PARTS):
+            return
+        try:
+            # Upload all files in the directory recursively
+            for path in dir_path.rglob("*"):
+                if path.is_file() and not any(
+                    ignore in path.parts for ignore in FILE_UPLOAD_IGNORE_PARTS
+                ):
+                    # Use path relative to the original directory as file name
+                    rel_path = path.relative_to(dir_path)
+                    self._upload_single_file(
+                        job_name,
+                        path,
+                        upload_id,
+                        file_name=str(rel_path),
+                    )
+        except Exception as e:
+            raise FileUploadError(f"Error uploading directory {dir_path}: {e}") from e
+    def _upload_single_file(
+        self,
+        job_name: str,
+        file_path: Path,
+        upload_id: str,
+        file_name: str | None = None,
+    ) -> None:
+        """Upload a single file in chunks.
+        Args:
+            job_name: The key of the crow to upload to.
+            file_path: The path to the file to upload.
+            upload_id: The upload ID to use.
+            file_name: Optional name to use for the file. If not provided, the file's name will be used.
+        Raises:
+            FileUploadError: If there's an error uploading the file.
+        """
+        file_name = file_name or file_path.name
+        file_size = file_path.stat().st_size
+        total_chunks = (file_size + self.CHUNK_SIZE - 1) // self.CHUNK_SIZE
+        logger.info(f"Uploading {file_path} as {file_name} ({total_chunks} chunks)")
+        try:
+            with open(file_path, "rb") as f:
+                for chunk_index in range(total_chunks):
+                    # Read the chunk from the file
+                    f.seek(chunk_index * self.CHUNK_SIZE)
+                    chunk_data = f.read(self.CHUNK_SIZE)
+                    # Prepare and send the chunk
+                    with tempfile.NamedTemporaryFile() as temp_file:
+                        temp_file.write(chunk_data)
+                        temp_file.flush()
+                        # Create form data
+                        with open(temp_file.name, "rb") as chunk_file_obj:
+                            files = {
+                                "chunk": (
+                                    file_name,
+                                    chunk_file_obj,
+                                    "application/octet-stream",
+                                )
+                            }
+                            data = {
+                                "file_name": file_name,
+                                "chunk_index": chunk_index,
+                                "total_chunks": total_chunks,
+                                "upload_id": upload_id,
+                            }
+                            # Send the chunk
+                            response = self.multipart_client.post(
+                                f"/v0.1/crows/{job_name}/upload-chunk",
+                                files=files,
+                                data=data,
+                            )
+                            response.raise_for_status()
+                        # Call progress callback if provided
+                        logger.debug(
+                            f"Uploaded chunk {chunk_index + 1}/{total_chunks} of {file_name}"
+                        )
+            logger.info(f"Successfully uploaded {file_name}")
+        except Exception as e:
+            logger.exception(f"Error uploading file {file_path}")
+            raise FileUploadError(f"Error uploading file {file_path}: {e}") from e
+    @retry(
+        stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
+        wait=wait_exponential(multiplier=RETRY_MULTIPLIER, max=MAX_RETRY_WAIT),
+        retry=retry_if_connection_error,
+    )
+    def list_files(self, job_name: str, folder_name: str) -> dict[str, list[str]]:
+        """List files and directories in a GCS location for a given job_name and upload_id.
+        Args:
+            job_name: The name of the futurehouse job.
+            folder_name: The specific folder name (upload_id) to list files from.
+        Returns:
+            A list of files in the GCS folder.
+        Raises:
+            RestClientError: If there is an error listing the files.
+        """
+        try:
+            url = f"/v0.1/crows/{job_name}/list-files"
+            params = {"upload_id": folder_name}
+            response = self.client.get(url, params=params)
+            response.raise_for_status()
+            return response.json()
+        except HTTPStatusError as e:
+            logger.exception(
+                f"Error listing files for job {job_name}, folder {folder_name}: {e.response.text}"
+            )
+            raise RestClientError(
+                f"Error listing files: {e.response.status_code} - {e.response.text}"
+            ) from e
+        except Exception as e:
+            logger.exception(
+                f"Error listing files for job {job_name}, folder {folder_name}"
+            )
+            raise RestClientError(f"Error listing files: {e!s}") from e
+    @retry(
+        stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
+        wait=wait_exponential(multiplier=RETRY_MULTIPLIER, max=MAX_RETRY_WAIT),
+        retry=retry_if_connection_error,
+    )
+    def download_file(
+        self,
+        job_name: str,
+        folder_name: str,
+        file_path: str,
+        destination_path: str | os.PathLike,
+    ) -> None:
+        """Download a file from GCS to a local path.
+        Args:
+            job_name: The name of the futurehouse job.
+            folder_name: The specific folder name (upload_id) the file belongs to.
+            file_path: The relative path of the file to download
+                       (e.g., 'data/my_file.csv' or 'my_image.png').
+            destination_path: The local path where the file should be saved.
+        Raises:
+            RestClientError: If there is an error downloading the file.
+            FileNotFoundError: If the destination directory does not exist.
+        """
+        destination_path = Path(destination_path)
+        # Ensure the destination directory exists
+        destination_path.parent.mkdir(parents=True, exist_ok=True)
+        try:
+            url = f"/v0.1/crows/{job_name}/download-file"
+            params = {"upload_id": folder_name, "file_path": file_path}
+            with self.client.stream("GET", url, params=params) as response:
+                response.raise_for_status()  # Check for HTTP errors before streaming
+                with open(destination_path, "wb") as f:
+                    for chunk in response.iter_bytes(chunk_size=8192):
+                        f.write(chunk)
+            logger.info(f"File {file_path} downloaded to {destination_path}")
+        except HTTPStatusError as e:
+            logger.exception(
+                f"Error downloading file {file_path} for job {job_name}, folder {folder_name}: {e.response.text}"
+            )
+            # Clean up partially downloaded file if an error occurs
+            if destination_path.exists():
+                destination_path.unlink()
+            raise RestClientError(
+                f"Error downloading file: {e.response.status_code} - {e.response.text}"
+            ) from e
+        except Exception as e:
+            logger.exception(
+                f"Error downloading file {file_path} for job {job_name}, folder {folder_name}"
+            )
+            if destination_path.exists():
+                destination_path.unlink()  # Clean up partial file
+            raise RestClientError(f"Error downloading file: {e!s}") from e
 def get_installed_packages() -> dict[str, str]:
     """Returns a dictionary of installed packages and their versions."""

{futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/futurehouse_client.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: futurehouse-client
-Version: 0.3.15.dev71
+Version: 0.3.17.dev56
 Summary: A client for interacting with endpoints of the FutureHouse service.
 Author-email: FutureHouse technical staff <hello@futurehouse.org>
 Classifier: Operating System :: OS Independent