PyPI - futurehouse-client - Versions diffs - 0.3.18.dev195__py3-none-any.whl → 0.3.19__py3-none-any.whl - Mend

futurehouse-client 0.3.18.dev195py3-none-any.whl → 0.3.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

futurehouse_client/__init__.py CHANGED Viewed

@@ -1,12 +1,22 @@
 from .clients.job_client import JobClient, JobNames
-from .clients.rest_client import PQATaskResponse, TaskResponse, TaskResponseVerbose
 from .clients.rest_client import RestClient as FutureHouseClient
+from .models.app import (
+    FinchTaskResponse,
+    PhoenixTaskResponse,
+    PQATaskResponse,
+    TaskRequest,
+    TaskResponse,
+    TaskResponseVerbose,
+)
 __all__ = [
+    "FinchTaskResponse",
     "FutureHouseClient",
     "JobClient",
     "JobNames",
     "PQATaskResponse",
+    "PhoenixTaskResponse",
+    "TaskRequest",
     "TaskResponse",
     "TaskResponseVerbose",
 ]

futurehouse_client/clients/__init__.py CHANGED Viewed

@@ -1,12 +1,11 @@
 from .job_client import JobClient, JobNames
-from .rest_client import PQATaskResponse, TaskResponse, TaskResponseVerbose
 from .rest_client import RestClient as FutureHouseClient
+from .rest_client import TaskResponse, TaskResponseVerbose
 __all__ = [
     "FutureHouseClient",
     "JobClient",
     "JobNames",
-    "PQATaskResponse",
     "TaskResponse",
     "TaskResponseVerbose",
 ]

futurehouse_client/clients/job_client.py CHANGED Viewed

@@ -8,7 +8,13 @@ from aviary.env import Frame
 from pydantic import BaseModel
 from tenacity import before_sleep_log, retry, stop_after_attempt, wait_exponential
-from futurehouse_client.models.app import Stage
+from futurehouse_client.models.app import (
+    FinchTaskResponse,
+    PhoenixTaskResponse,
+    PQATaskResponse,
+    Stage,
+    TaskResponse,
+)
 from futurehouse_client.models.rest import (
     FinalEnvironmentRequest,
     StoreAgentStatePostRequest,
@@ -31,6 +37,19 @@ class JobNames(StrEnum):
     DUMMY = "job-futurehouse-dummy-env"
     PHOENIX = "job-futurehouse-phoenix"
     FINCH = "job-futurehouse-data-analysis-crow-high"
+    CHIMP = "job-futurehouse-chimp"
+    @classmethod
+    def _get_response_mapping(cls) -> dict[str, type[TaskResponse]]:
+        return {
+            cls.CROW: PQATaskResponse,
+            cls.FALCON: PQATaskResponse,
+            cls.OWL: PQATaskResponse,
+            cls.CHIMP: PQATaskResponse,
+            cls.PHOENIX: PhoenixTaskResponse,
+            cls.FINCH: FinchTaskResponse,
+            cls.DUMMY: TaskResponse,
+        }
     @classmethod
     def from_stage(cls, job_name: str, stage: Stage | None = None) -> str:
@@ -52,6 +71,13 @@ class JobNames(StrEnum):
                 f"Invalid job name: {job_name}. \nOptions are: {', '.join([name.name for name in cls])}"
             ) from e
+    @staticmethod
+    def get_response_object_from_job(job_name: str) -> type[TaskResponse]:
+        return JobNames._get_response_mapping()[job_name]
+    def get_response_object(self) -> type[TaskResponse]:
+        return self._get_response_mapping()[self.name]
 class JobClient:
     REQUEST_TIMEOUT: ClassVar[float] = 30.0  # sec

futurehouse_client/clients/rest_client.py CHANGED Viewed

@@ -13,6 +13,7 @@ import tempfile
 import time
 import uuid
 from collections.abc import Collection
+from concurrent.futures import ThreadPoolExecutor, as_completed
 from pathlib import Path
 from types import ModuleType
 from typing import Any, ClassVar, cast
@@ -31,6 +32,7 @@ from httpx import (
     ReadError,
     ReadTimeout,
     RemoteProtocolError,
+    codes,
 )
 from ldp.agent import AgentConfig
 from requests.exceptions import RequestException, Timeout
@@ -47,7 +49,6 @@ from futurehouse_client.clients import JobNames
 from futurehouse_client.models.app import (
     AuthType,
     JobDeploymentConfig,
-    PQATaskResponse,
     Stage,
     TaskRequest,
     TaskResponse,
@@ -133,6 +134,9 @@ class RestClient:
     MAX_RETRY_WAIT: ClassVar[int] = 10
     DEFAULT_POLLING_TIME: ClassVar[int] = 5  # seconds
     CHUNK_SIZE: ClassVar[int] = 16 * 1024 * 1024  # 16MB chunks
+    ASSEMBLY_POLLING_INTERVAL: ClassVar[int] = 10  # seconds
+    MAX_ASSEMBLY_WAIT_TIME: ClassVar[int] = 1800  # 30 minutes
+    MAX_CONCURRENT_CHUNKS: ClassVar[int] = 12  # Maximum concurrent chunk uploads
     def __init__(
         self,
@@ -174,7 +178,7 @@ class RestClient:
     @property
     def unauthenticated_client(self) -> Client:
-        """Unauthenticated HTTP client for auth operations to avoid recursion."""
+        """Unauthenticated HTTP client for auth operations."""
         return cast(Client, self.get_client("application/json", authenticated=False))
     @property
@@ -219,6 +223,8 @@ class RestClient:
             if content_type:
                 headers["Content-Type"] = content_type
+            headers["x-client"] = "sdk"
             self._clients[key] = (
                 AsyncClient(
                     base_url=self.base_url,
@@ -280,6 +286,104 @@ class RestClient:
         orgs = response.json()
         return [org["name"] for org in orgs]
+    def _check_assembly_status(
+        self, job_name: str, upload_id: str, file_name: str
+    ) -> dict[str, Any]:
+        """Check the assembly status of an uploaded file.
+        Args:
+            job_name: The name of the futurehouse job
+            upload_id: The upload ID
+            file_name: The name of the file
+        Returns:
+            Dict containing status information
+        Raises:
+            RestClientError: If there's an error checking status
+        """
+        try:
+            url = f"/v0.1/crows/{job_name}/assembly-status/{upload_id}/{file_name}"
+            response = self.client.get(url)
+            response.raise_for_status()
+            return response.json()
+        except Exception as e:
+            raise RestClientError(f"Error checking assembly status: {e}") from e
+    def _wait_for_all_assemblies_completion(
+        self,
+        job_name: str,
+        upload_id: str,
+        file_names: list[str],
+        timeout: int = MAX_ASSEMBLY_WAIT_TIME,
+    ) -> bool:
+        """Wait for all file assemblies to complete.
+        Args:
+            job_name: The name of the futurehouse job
+            upload_id: The upload ID
+            file_names: List of file names to wait for
+            timeout: Maximum time to wait in seconds
+        Returns:
+            True if all assemblies succeeded, False if any failed or timed out
+        Raises:
+            RestClientError: If any assembly fails
+        """
+        if not file_names:
+            return True
+        start_time = time.time()
+        logger.info(f"Waiting for assembly of {len(file_names)} file(s) to complete...")
+        completed_files: set[str] = set()
+        while (time.time() - start_time) < timeout and len(completed_files) < len(
+            file_names
+        ):
+            for file_name in file_names:
+                if file_name in completed_files:
+                    continue
+                try:
+                    status_data = self._check_assembly_status(
+                        job_name, upload_id, file_name
+                    )
+                    status = status_data.get("status")
+                    if status == ExecutionStatus.SUCCESS.value:
+                        logger.info(f"Assembly completed for {file_name}")
+                        completed_files.add(file_name)
+                    elif status == ExecutionStatus.FAIL.value:
+                        error_msg = status_data.get("error", "Unknown assembly error")
+                        raise RestClientError(
+                            f"Assembly failed for {file_name}: {error_msg}"
+                        )
+                    elif status == ExecutionStatus.IN_PROGRESS.value:
+                        logger.debug(f"Assembly in progress for {file_name}...")
+                except RestClientError:
+                    raise  # Re-raise assembly errors
+                except Exception as e:
+                    logger.warning(
+                        f"Error checking assembly status for {file_name}: {e}"
+                    )
+            # Don't sleep if all files are complete
+            if len(completed_files) < len(file_names):
+                time.sleep(self.ASSEMBLY_POLLING_INTERVAL)
+        if len(completed_files) < len(file_names):
+            remaining_files = set(file_names) - completed_files
+            logger.warning(
+                f"Assembly timeout for files: {remaining_files} after {timeout} seconds"
+            )
+            return False
+        logger.info(f"All {len(file_names)} file assemblies completed successfully")
+        return True
     @staticmethod
     def _validate_module_path(path: Path) -> None:
         """Validates that the given path exists and is a directory.
@@ -340,40 +444,36 @@ class RestClient:
         self, task_id: str | None = None, history: bool = False, verbose: bool = False
     ) -> "TaskResponse":
         """Get details for a specific task."""
-        try:
-            task_id = task_id or self.trajectory_id
-            url = f"/v0.1/trajectories/{task_id}"
-            full_url = f"{self.base_url}{url}"
-            with (
-                external_trace(
-                    url=full_url,
-                    method="GET",
-                    library="httpx",
-                    custom_params={
-                        "operation": "get_job",
-                        "job_id": task_id,
-                    },
-                ),
-                self.client.stream("GET", url, params={"history": history}) as response,
-            ):
-                response.raise_for_status()
-                json_data = "".join(response.iter_text(chunk_size=1024))
-                data = json.loads(json_data)
-                if "id" not in data:
-                    data["id"] = task_id
-                verbose_response = TaskResponseVerbose(**data)
+        task_id = task_id or self.trajectory_id
+        url = f"/v0.1/trajectories/{task_id}"
+        full_url = f"{self.base_url}{url}"
-            if verbose:
-                return verbose_response
-            if any(
-                JobNames.from_string(job_name) in verbose_response.job_name
-                for job_name in ["crow", "falcon", "owl", "dummy"]
-            ):
-                return PQATaskResponse(**data)
-            return TaskResponse(**data)
-        except Exception as e:
-            raise TaskFetchError(f"Error getting task: {e!s}") from e
+        with (
+            external_trace(
+                url=full_url,
+                method="GET",
+                library="httpx",
+                custom_params={
+                    "operation": "get_job",
+                    "job_id": task_id,
+                },
+            ),
+            self.client.stream("GET", url, params={"history": history}) as response,
+        ):
+            if response.status_code in {401, 403}:
+                raise PermissionError(
+                    f"Error getting task: Permission denied for task {task_id}"
+                )
+            response.raise_for_status()
+            json_data = "".join(response.iter_text(chunk_size=1024))
+            data = json.loads(json_data)
+            if "id" not in data:
+                data["id"] = task_id
+            verbose_response = TaskResponseVerbose(**data)
+        if verbose:
+            return verbose_response
+        return JobNames.get_response_object_from_job(verbose_response.job_name)(**data)
     @retry(
         stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
@@ -384,42 +484,36 @@ class RestClient:
         self, task_id: str | None = None, history: bool = False, verbose: bool = False
     ) -> "TaskResponse":
         """Get details for a specific task asynchronously."""
-        try:
-            task_id = task_id or self.trajectory_id
-            url = f"/v0.1/trajectories/{task_id}"
-            full_url = f"{self.base_url}{url}"
+        task_id = task_id or self.trajectory_id
+        url = f"/v0.1/trajectories/{task_id}"
+        full_url = f"{self.base_url}{url}"
+        with external_trace(
+            url=full_url,
+            method="GET",
+            library="httpx",
+            custom_params={
+                "operation": "get_job",
+                "job_id": task_id,
+            },
+        ):
+            async with self.async_client.stream(
+                "GET", url, params={"history": history}
+            ) as response:
+                if response.status_code in {401, 403}:
+                    raise PermissionError(
+                        f"Error getting task: Permission denied for task {task_id}"
+                    )
+                response.raise_for_status()
+                json_data = "".join([chunk async for chunk in response.aiter_text()])
+                data = json.loads(json_data)
+                if "id" not in data:
+                    data["id"] = task_id
+                verbose_response = TaskResponseVerbose(**data)
-            with external_trace(
-                url=full_url,
-                method="GET",
-                library="httpx",
-                custom_params={
-                    "operation": "get_job",
-                    "job_id": task_id,
-                },
-            ):
-                async with self.async_client.stream(
-                    "GET", url, params={"history": history}
-                ) as response:
-                    response.raise_for_status()
-                    json_data = "".join([
-                        chunk async for chunk in response.aiter_text()
-                    ])
-                    data = json.loads(json_data)
-                    if "id" not in data:
-                        data["id"] = task_id
-                    verbose_response = TaskResponseVerbose(**data)
-            if verbose:
-                return verbose_response
-            if any(
-                JobNames.from_string(job_name) in verbose_response.job_name
-                for job_name in ["crow", "falcon", "owl", "dummy"]
-            ):
-                return PQATaskResponse(**data)
-            return TaskResponse(**data)
-        except Exception as e:
-            raise TaskFetchError(f"Error getting task: {e!s}") from e
+        if verbose:
+            return verbose_response
+        return JobNames.get_response_object_from_job(verbose_response.job_name)(**data)
     @retry(
         stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
@@ -437,15 +531,16 @@ class RestClient:
                 self.stage,
             )
-        try:
-            response = self.client.post(
-                "/v0.1/crows", json=task_data.model_dump(mode="json")
+        response = self.client.post(
+            "/v0.1/crows", json=task_data.model_dump(mode="json")
+        )
+        if response.status_code in {401, 403}:
+            raise PermissionError(
+                f"Error creating task: Permission denied for task {task_data.name}"
             )
-            response.raise_for_status()
-            trajectory_id = response.json()["trajectory_id"]
-            self.trajectory_id = trajectory_id
-        except Exception as e:
-            raise TaskFetchError(f"Error creating task: {e!s}") from e
+        response.raise_for_status()
+        trajectory_id = response.json()["trajectory_id"]
+        self.trajectory_id = trajectory_id
         return trajectory_id
     @retry(
@@ -463,16 +558,16 @@ class RestClient:
                 task_data.name.name,
                 self.stage,
             )
-        try:
-            response = await self.async_client.post(
-                "/v0.1/crows", json=task_data.model_dump(mode="json")
+        response = await self.async_client.post(
+            "/v0.1/crows", json=task_data.model_dump(mode="json")
+        )
+        if response.status_code in {401, 403}:
+            raise PermissionError(
+                f"Error creating task: Permission denied for task {task_data.name}"
             )
-            response.raise_for_status()
-            trajectory_id = response.json()["trajectory_id"]
-            self.trajectory_id = trajectory_id
-        except Exception as e:
-            raise TaskFetchError(f"Error creating task: {e!s}") from e
+        response.raise_for_status()
+        trajectory_id = response.json()["trajectory_id"]
+        self.trajectory_id = trajectory_id
         return trajectory_id
     async def arun_tasks_until_done(
@@ -820,6 +915,8 @@ class RestClient:
             raise JobCreationError(f"Error generating docker image: {e!s}") from e
         return build_context
+    # TODO: we should have have an async upload_file, check_assembly_status,
+    # wait_for_assembly_completion, upload_directory, upload_single_file
     @retry(
         stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
         wait=wait_exponential(multiplier=RETRY_MULTIPLIER, max=MAX_RETRY_WAIT),
@@ -830,6 +927,8 @@ class RestClient:
         job_name: str,
         file_path: str | os.PathLike,
         upload_id: str | None = None,
+        wait_for_assembly: bool = True,
+        assembly_timeout: int = MAX_ASSEMBLY_WAIT_TIME,
     ) -> str:
         """Upload a file or directory to a futurehouse job bucket.
@@ -837,29 +936,47 @@ class RestClient:
             job_name: The name of the futurehouse job to upload to.
             file_path: The local path to the file or directory to upload.
             upload_id: Optional folder name to use for the upload. If not provided, a random UUID will be used.
+            wait_for_assembly: After file chunking, wait for the assembly to be processed.
+            assembly_timeout: Maximum time to wait for assembly in seconds.
         Returns:
             The upload ID used for the upload.
         Raises:
             FileUploadError: If there's an error uploading the file.
+            RestClientError: If assembly fails or times out.
         """
         file_path = Path(file_path)
         if not file_path.exists():
             raise FileNotFoundError(f"File or directory not found: {file_path}")
         upload_id = upload_id or str(uuid.uuid4())
+        uploaded_files: list[str] = []
         if file_path.is_dir():
             # Process directory recursively
-            self._upload_directory(job_name, file_path, upload_id)
+            uploaded_files = self._upload_directory(job_name, file_path, upload_id)
         else:
             # Process single file
             self._upload_single_file(job_name, file_path, upload_id)
+            uploaded_files = [file_path.name]
+        # Wait for all assemblies if requested and we have files
+        if wait_for_assembly and uploaded_files:
+            success = self._wait_for_all_assemblies_completion(
+                job_name, upload_id, uploaded_files, assembly_timeout
+            )
+            if not success:
+                raise RestClientError(
+                    f"Assembly failed or timed out for one or more files: {uploaded_files}"
+                )
         logger.info(f"Successfully uploaded {file_path} to {upload_id}")
         return upload_id
-    def _upload_directory(self, job_name: str, dir_path: Path, upload_id: str) -> None:
+    def _upload_directory(
+        self, job_name: str, dir_path: Path, upload_id: str
+    ) -> list[str]:
         """Upload all files in a directory recursively.
         Args:
@@ -867,12 +984,17 @@ class RestClient:
             dir_path: The path to the directory to upload.
             upload_id: The upload ID to use.
+        Returns:
+            List of uploaded file names.
         Raises:
             FileUploadError: If there's an error uploading any file.
         """
         # Skip common directories that shouldn't be uploaded
         if any(ignore in dir_path.parts for ignore in FILE_UPLOAD_IGNORE_PARTS):
-            return
+            return []
+        uploaded_files: list[str] = []
         try:
             # Upload all files in the directory recursively
@@ -882,23 +1004,27 @@ class RestClient:
                 ):
                     # Use path relative to the original directory as file name
                     rel_path = path.relative_to(dir_path)
+                    file_name = str(rel_path)
                     self._upload_single_file(
                         job_name,
                         path,
                         upload_id,
-                        file_name=str(rel_path),
+                        file_name=file_name,
                     )
+                    uploaded_files.append(file_name)
         except Exception as e:
             raise FileUploadError(f"Error uploading directory {dir_path}: {e}") from e
+        return uploaded_files
     def _upload_single_file(
         self,
         job_name: str,
         file_path: Path,
         upload_id: str,
         file_name: str | None = None,
-    ) -> None:
-        """Upload a single file in chunks.
+    ) -> str | None:
+        """Upload a single file in chunks using parallel uploads.
         Args:
             job_name: The key of the crow to upload to.
@@ -906,6 +1032,9 @@ class RestClient:
             upload_id: The upload ID to use.
             file_name: Optional name to use for the file. If not provided, the file's name will be used.
+        Returns:
+            The status URL if this was the last chunk, None otherwise.
         Raises:
             FileUploadError: If there's an error uploading the file.
         """
@@ -915,17 +1044,103 @@ class RestClient:
         # Skip empty files
         if file_size == 0:
             logger.warning(f"Skipping upload of empty file: {file_path}")
-            return
+            return None
         total_chunks = (file_size + self.CHUNK_SIZE - 1) // self.CHUNK_SIZE
         logger.info(f"Uploading {file_path} as {file_name} ({total_chunks} chunks)")
+        status_url = None
         try:
-            with open(file_path, "rb") as f:
-                for chunk_index in range(total_chunks):
-                    # Read the chunk from the file
-                    f.seek(chunk_index * self.CHUNK_SIZE)
+            status_url = self._upload_chunks_parallel(
+                job_name,
+                file_path,
+                file_name,
+                upload_id,
+                total_chunks,
+            )
+            logger.info(f"Successfully uploaded {file_name}")
+        except Exception as e:
+            logger.exception(f"Error uploading file {file_path}")
+            raise FileUploadError(f"Error uploading file {file_path}: {e}") from e
+        return status_url
+    def _upload_chunks_parallel(
+        self,
+        job_name: str,
+        file_path: Path,
+        file_name: str,
+        upload_id: str,
+        total_chunks: int,
+    ) -> str | None:
+        """Upload all chunks in parallel batches, including the final chunk.
+        Args:
+            job_name: The key of the crow to upload to.
+            file_path: The path to the file to upload.
+            file_name: The name to use for the file.
+            upload_id: The upload ID to use.
+            total_chunks: Total number of chunks.
+        Returns:
+            The status URL from the final chunk response, or None if no chunks.
+        Raises:
+            FileUploadError: If there's an error uploading any chunk.
+        """
+        if total_chunks <= 0:
+            return None
+        if total_chunks > 1:
+            num_regular_chunks = total_chunks - 1
+            for batch_start in range(0, num_regular_chunks, self.MAX_CONCURRENT_CHUNKS):
+                batch_end = min(
+                    batch_start + self.MAX_CONCURRENT_CHUNKS, num_regular_chunks
+                )
+                # Upload chunks in this batch concurrently
+                with ThreadPoolExecutor(
+                    max_workers=self.MAX_CONCURRENT_CHUNKS
+                ) as executor:
+                    futures = {
+                        executor.submit(
+                            self._upload_single_chunk,
+                            job_name,
+                            file_path,
+                            file_name,
+                            upload_id,
+                            chunk_index,
+                            total_chunks,
+                        ): chunk_index
+                        for chunk_index in range(batch_start, batch_end)
+                    }
+                    for future in as_completed(futures):
+                        chunk_index = futures[future]
+                        try:
+                            future.result()
+                            logger.debug(
+                                f"Uploaded chunk {chunk_index + 1}/{total_chunks} of {file_name}"
+                            )
+                        except Exception as e:
+                            logger.error(f"Error uploading chunk {chunk_index}: {e}")
+                            raise FileUploadError(
+                                f"Error uploading chunk {chunk_index} of {file_name}: {e}"
+                            ) from e
+        # Upload the final chunk with retry logic
+        final_chunk_index = total_chunks - 1
+        retries = 0
+        max_retries = 3
+        retry_delay = 2.0
+        while retries < max_retries:
+            try:
+                with open(file_path, "rb") as f:
+                    # Read the final chunk from the file
+                    f.seek(final_chunk_index * self.CHUNK_SIZE)
                     chunk_data = f.read(self.CHUNK_SIZE)
                     # Prepare and send the chunk
@@ -944,29 +1159,107 @@ class RestClient:
                             }
                             data = {
                                 "file_name": file_name,
-                                "chunk_index": chunk_index,
+                                "chunk_index": final_chunk_index,
                                 "total_chunks": total_chunks,
                                 "upload_id": upload_id,
                             }
-                            # Send the chunk
+                            # Send the final chunk
                             response = self.multipart_client.post(
                                 f"/v0.1/crows/{job_name}/upload-chunk",
                                 files=files,
                                 data=data,
                             )
+                            # Handle missing chunks (status 409)
+                            if response.status_code == codes.CONFLICT:
+                                retries += 1
+                                if retries < max_retries:
+                                    logger.warning(
+                                        f"Missing chunks detected for {file_name}, retrying in {retry_delay}s... (attempt {retries}/{max_retries})"
+                                    )
+                                    time.sleep(retry_delay)
+                                    continue
                             response.raise_for_status()
+                            response_data = response.json()
+                            status_url = response_data.get("status_url")
-                        # Call progress callback if provided
+                            logger.debug(
+                                f"Uploaded final chunk {final_chunk_index + 1}/{total_chunks} of {file_name}"
+                            )
+                            return status_url
+            except Exception as e:
+                if retries >= max_retries - 1:
+                    raise FileUploadError(
+                        f"Error uploading final chunk of {file_name}: {e}"
+                    ) from e
+                retries += 1
+                logger.warning(
+                    f"Error uploading final chunk of {file_name}, retrying in {retry_delay}s... (attempt {retries}/{max_retries}): {e}"
+                )
+                time.sleep(retry_delay)
-                        logger.debug(
-                            f"Uploaded chunk {chunk_index + 1}/{total_chunks} of {file_name}"
-                        )
+        raise FileUploadError(
+            f"Failed to upload final chunk of {file_name} after {max_retries} retries"
+        )
-            logger.info(f"Successfully uploaded {file_name}")
-        except Exception as e:
-            logger.exception(f"Error uploading file {file_path}")
-            raise FileUploadError(f"Error uploading file {file_path}: {e}") from e
+    def _upload_single_chunk(
+        self,
+        job_name: str,
+        file_path: Path,
+        file_name: str,
+        upload_id: str,
+        chunk_index: int,
+        total_chunks: int,
+    ) -> None:
+        """Upload a single chunk.
+        Args:
+            job_name: The key of the crow to upload to.
+            file_path: The path to the file to upload.
+            file_name: The name to use for the file.
+            upload_id: The upload ID to use.
+            chunk_index: The index of this chunk.
+            total_chunks: Total number of chunks.
+        Raises:
+            Exception: If there's an error uploading the chunk.
+        """
+        with open(file_path, "rb") as f:
+            # Read the chunk from the file
+            f.seek(chunk_index * self.CHUNK_SIZE)
+            chunk_data = f.read(self.CHUNK_SIZE)
+            # Prepare and send the chunk
+            with tempfile.NamedTemporaryFile() as temp_file:
+                temp_file.write(chunk_data)
+                temp_file.flush()
+                # Create form data
+                with open(temp_file.name, "rb") as chunk_file_obj:
+                    files = {
+                        "chunk": (
+                            file_name,
+                            chunk_file_obj,
+                            "application/octet-stream",
+                        )
+                    }
+                    data = {
+                        "file_name": file_name,
+                        "chunk_index": chunk_index,
+                        "total_chunks": total_chunks,
+                        "upload_id": upload_id,
+                    }
+                    # Send the chunk
+                    response = self.multipart_client.post(
+                        f"/v0.1/crows/{job_name}/upload-chunk",
+                        files=files,
+                        data=data,
+                    )
+                    response.raise_for_status()
     @retry(
         stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),

futurehouse_client/models/app.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import copy
 import json
 import os
 import re
@@ -675,7 +676,8 @@ class TaskResponse(BaseModel):
     @model_validator(mode="before")
     @classmethod
-    def validate_fields(cls, data: Mapping[str, Any]) -> Mapping[str, Any]:
+    def validate_fields(cls, original_data: Mapping[str, Any]) -> Mapping[str, Any]:
+        data = copy.deepcopy(original_data)  # Avoid mutating the original data
         # Extract fields from environment frame state
         if not isinstance(data, dict):
             return data
@@ -690,7 +692,72 @@ class TaskResponse(BaseModel):
         return data
+class PhoenixTaskResponse(TaskResponse):
+    """
+    Response scheme for tasks executed with Phoenix.
+    Additional fields:
+        answer: Final answer from Phoenix
+    """
+    model_config = ConfigDict(extra="ignore")
+    answer: str | None = None
+    @model_validator(mode="before")
+    @classmethod
+    def validate_phoenix_fields(
+        cls, original_data: Mapping[str, Any]
+    ) -> Mapping[str, Any]:
+        data = copy.deepcopy(original_data)
+        if not isinstance(data, dict):
+            return data
+        if not (env_frame := data.get("environment_frame", {})):
+            return data
+        state = env_frame.get("state", {}).get("state", {})
+        data["answer"] = state.get("answer")
+        return data
+class FinchTaskResponse(TaskResponse):
+    """
+    Response scheme for tasks executed with Finch.
+    Additional fields:
+        answer: Final answer from Finch
+        notebook: a dictionary with `cells` and `metadata` regarding the notebook content
+    """
+    model_config = ConfigDict(extra="ignore")
+    answer: str | None = None
+    notebook: dict[str, Any] | None = None
+    @model_validator(mode="before")
+    @classmethod
+    def validate_finch_fields(
+        cls, original_data: Mapping[str, Any]
+    ) -> Mapping[str, Any]:
+        data = copy.deepcopy(original_data)
+        if not isinstance(data, dict):
+            return data
+        if not (env_frame := data.get("environment_frame", {})):
+            return data
+        state = env_frame.get("state", {}).get("state", {})
+        data["answer"] = state.get("answer")
+        data["notebook"] = state.get("nb_state")
+        return data
 class PQATaskResponse(TaskResponse):
+    """
+    Response scheme for tasks executed with PQA.
+    Additional fields:
+        answer: Final answer from PQA
+        formatted_answer: Formatted answer from PQA
+        answer_reasoning: Reasoning used to generate the final answer, if available
+        has_successful_answer: Whether the answer is successful
+    """
     model_config = ConfigDict(extra="ignore")
     answer: str | None = None
@@ -702,7 +769,8 @@ class PQATaskResponse(TaskResponse):
     @model_validator(mode="before")
     @classmethod
-    def validate_pqa_fields(cls, data: Mapping[str, Any]) -> Mapping[str, Any]:
+    def validate_pqa_fields(cls, original_data: Mapping[str, Any]) -> Mapping[str, Any]:
+        data = copy.deepcopy(original_data)  # Avoid mutating the original data
         if not isinstance(data, dict):
             return data
         if not (env_frame := data.get("environment_frame", {})):

futurehouse_client/py.typed ADDED Viewed

File without changes

futurehouse_client/utils/auth.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import logging
+from collections.abc import Collection, Generator
 from typing import ClassVar, Final
 import httpx
@@ -42,7 +43,7 @@ def _run_auth(
 class RefreshingJWT(httpx.Auth):
     """Automatically (re-)inject a JWT and transparently retry exactly once when we hit a 401/403."""
-    RETRY_STATUSES: ClassVar[set[int]] = {
+    RETRY_STATUSES: ClassVar[Collection[httpx.codes]] = {
         httpx.codes.UNAUTHORIZED,
         httpx.codes.FORBIDDEN,
     }
@@ -64,7 +65,7 @@ class RefreshingJWT(httpx.Auth):
             api_key=api_key,
         )
-    def refresh_token(self):
+    def refresh_token(self) -> None:
         if self.auth_type == AuthType.JWT:
             logger.error(INVALID_REFRESH_TYPE_MSG)
             raise ValueError(INVALID_REFRESH_TYPE_MSG)
@@ -74,7 +75,9 @@ class RefreshingJWT(httpx.Auth):
             api_key=self.api_key,
         )
-    def auth_flow(self, request):
+    def auth_flow(
+        self, request: httpx.Request
+    ) -> Generator[httpx.Request, httpx.Response, None]:
         request.headers["Authorization"] = f"Bearer {self._jwt}"
         response = yield request

{futurehouse_client-0.3.18.dev195.dist-info → futurehouse_client-0.3.19.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: futurehouse-client
-Version: 0.3.18.dev195
+Version: 0.3.19
 Summary: A client for interacting with endpoints of the FutureHouse service.
 Author-email: FutureHouse technical staff <hello@futurehouse.org>
 Classifier: Operating System :: OS Independent
@@ -8,10 +8,9 @@ Classifier: Programming Language :: Python :: 3 :: Only
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python
-Requires-Python: <3.13,>=3.11
+Requires-Python: <3.14,>=3.11
 Description-Content-Type: text/markdown
 Requires-Dist: cloudpickle
-Requires-Dist: dm-tree<0.1.9
 Requires-Dist: fhaviary
 Requires-Dist: httpx
 Requires-Dist: ldp>=0.22.0

futurehouse_client-0.3.19.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,18 @@
+futurehouse_client/__init__.py,sha256=BztM_ntbgmIEjzvnBWcvPhvLjM8xGDFCK0Upf3-nIn8,488
+futurehouse_client/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+futurehouse_client/clients/__init__.py,sha256=-HXNj-XJ3LRO5XM6MZ709iPs29YpApss0Q2YYg1qMZw,280
+futurehouse_client/clients/job_client.py,sha256=JgB5IUAyCmnhGRsYc3bgKldA-lkM1JLwHRwwUeOCdus,11944
+futurehouse_client/clients/rest_client.py,sha256=3wfVz6d2KuRQUr_nms7P25yVR6aTjsRrSkqmVs55soA,54552
+futurehouse_client/models/__init__.py,sha256=5x-f9AoM1hGzJBEHcHAXSt7tPeImST5oZLuMdwp0mXc,554
+futurehouse_client/models/app.py,sha256=VCtg0ygd-TSrR6DtfljTBt9jnl1eBNal8UXHFdkDg88,28587
+futurehouse_client/models/client.py,sha256=n4HD0KStKLm6Ek9nL9ylP-bkK10yzAaD1uIDF83Qp_A,1828
+futurehouse_client/models/rest.py,sha256=lgwkMIXz0af-49BYSkKeS7SRqvN3motqnAikDN4YGTc,789
+futurehouse_client/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+futurehouse_client/utils/auth.py,sha256=tgWELjKfg8eWme_qdcRmc8TjQN9DVZuHHaVXZNHLchk,2960
+futurehouse_client/utils/general.py,sha256=A_rtTiYW30ELGEZlWCIArO7q1nEmqi8hUlmBRYkMQ_c,767
+futurehouse_client/utils/module_utils.py,sha256=aFyd-X-pDARXz9GWpn8SSViUVYdSbuy9vSkrzcVIaGI,4955
+futurehouse_client/utils/monitoring.py,sha256=UjRlufe67kI3VxRHOd5fLtJmlCbVA2Wqwpd4uZhXkQM,8728
+futurehouse_client-0.3.19.dist-info/METADATA,sha256=FbtQGStv4salVccxR5wtpdlGbufSqxoiCtM44qDOHJs,12731
+futurehouse_client-0.3.19.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+futurehouse_client-0.3.19.dist-info/top_level.txt,sha256=TRuLUCt_qBnggdFHCX4O_BoCu1j2X43lKfIZC-ElwWY,19
+futurehouse_client-0.3.19.dist-info/RECORD,,

{futurehouse_client-0.3.18.dev195.dist-info → futurehouse_client-0.3.19.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.7.1)
+Generator: setuptools (80.9.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

futurehouse_client-0.3.18.dev195.dist-info/RECORD DELETED Viewed

@@ -1,17 +0,0 @@
-futurehouse_client/__init__.py,sha256=ddxO7JE97c6bt7LjNglZZ2Ql8bYCGI9laSFeh9MP6VU,344
-futurehouse_client/clients/__init__.py,sha256=tFWqwIAY5PvwfOVsCje4imjTpf6xXNRMh_UHIKVI1_0,320
-futurehouse_client/clients/job_client.py,sha256=uNkqQbeZw7wbA0qDWcIOwOykrosza-jev58paJZ_mbA,11150
-futurehouse_client/clients/rest_client.py,sha256=6HQF3YXDnSdGxAoXpB_wU6Vhcqhp5OB5SNuGQJ6Hseo,43454
-futurehouse_client/models/__init__.py,sha256=5x-f9AoM1hGzJBEHcHAXSt7tPeImST5oZLuMdwp0mXc,554
-futurehouse_client/models/app.py,sha256=w_1e4F0IiC-BKeOLqYkABYo4U-Nka1S-F64S_eHB2KM,26421
-futurehouse_client/models/client.py,sha256=n4HD0KStKLm6Ek9nL9ylP-bkK10yzAaD1uIDF83Qp_A,1828
-futurehouse_client/models/rest.py,sha256=lgwkMIXz0af-49BYSkKeS7SRqvN3motqnAikDN4YGTc,789
-futurehouse_client/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-futurehouse_client/utils/auth.py,sha256=0V161S9jW4vbTCoJJrOtNzWXQkAVyzdGM3yefGgJ578,2808
-futurehouse_client/utils/general.py,sha256=A_rtTiYW30ELGEZlWCIArO7q1nEmqi8hUlmBRYkMQ_c,767
-futurehouse_client/utils/module_utils.py,sha256=aFyd-X-pDARXz9GWpn8SSViUVYdSbuy9vSkrzcVIaGI,4955
-futurehouse_client/utils/monitoring.py,sha256=UjRlufe67kI3VxRHOd5fLtJmlCbVA2Wqwpd4uZhXkQM,8728
-futurehouse_client-0.3.18.dev195.dist-info/METADATA,sha256=yM1NbN2au3MmkfIkkuT85eYahKYTmnBuaWCQ1OvQ97A,12767
-futurehouse_client-0.3.18.dev195.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
-futurehouse_client-0.3.18.dev195.dist-info/top_level.txt,sha256=TRuLUCt_qBnggdFHCX4O_BoCu1j2X43lKfIZC-ElwWY,19
-futurehouse_client-0.3.18.dev195.dist-info/RECORD,,

{futurehouse_client-0.3.18.dev195.dist-info → futurehouse_client-0.3.19.dist-info}/top_level.txt RENAMED Viewed

File without changes

futurehouse-client 0.3.18.dev195__py3-none-any.whl → 0.3.19__py3-none-any.whl

futurehouse-client 0.3.18.dev195py3-none-any.whl → 0.3.19py3-none-any.whl