PyPI - futurehouse-client - Versions diffs - 0.3.19.dev111__tar.gz → 0.3.19.dev133__tar.gz - Mend

futurehouse-client 0.3.19.dev111tar.gz → 0.3.19.dev133tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

{futurehouse_client-0.3.19.dev111 → futurehouse_client-0.3.19.dev133}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: futurehouse-client
-Version: 0.3.19.dev111
+Version: 0.3.19.dev133
 Summary: A client for interacting with endpoints of the FutureHouse service.
 Author-email: FutureHouse technical staff <hello@futurehouse.org>
 Classifier: Operating System :: OS Independent

{futurehouse_client-0.3.19.dev111 → futurehouse_client-0.3.19.dev133}/futurehouse_client/clients/rest_client.py RENAMED Viewed

@@ -444,37 +444,36 @@ class RestClient:
         self, task_id: str | None = None, history: bool = False, verbose: bool = False
     ) -> "TaskResponse":
         """Get details for a specific task."""
-        try:
-            task_id = task_id or self.trajectory_id
-            url = f"/v0.1/trajectories/{task_id}"
-            full_url = f"{self.base_url}{url}"
-            with (
-                external_trace(
-                    url=full_url,
-                    method="GET",
-                    library="httpx",
-                    custom_params={
-                        "operation": "get_job",
-                        "job_id": task_id,
-                    },
-                ),
-                self.client.stream("GET", url, params={"history": history}) as response,
-            ):
-                response.raise_for_status()
-                json_data = "".join(response.iter_text(chunk_size=1024))
-                data = json.loads(json_data)
-                if "id" not in data:
-                    data["id"] = task_id
-                verbose_response = TaskResponseVerbose(**data)
+        task_id = task_id or self.trajectory_id
+        url = f"/v0.1/trajectories/{task_id}"
+        full_url = f"{self.base_url}{url}"
-            if verbose:
-                return verbose_response
-            return JobNames.get_response_object_from_job(verbose_response.job_name)(
-                **data
-            )
-        except Exception as e:
-            raise TaskFetchError(f"Error getting task: {e!s}") from e
+        with (
+            external_trace(
+                url=full_url,
+                method="GET",
+                library="httpx",
+                custom_params={
+                    "operation": "get_job",
+                    "job_id": task_id,
+                },
+            ),
+            self.client.stream("GET", url, params={"history": history}) as response,
+        ):
+            if response.status_code in {401, 403}:
+                raise PermissionError(
+                    f"Error getting task: Permission denied for task {task_id}"
+                )
+            response.raise_for_status()
+            json_data = "".join(response.iter_text(chunk_size=1024))
+            data = json.loads(json_data)
+            if "id" not in data:
+                data["id"] = task_id
+            verbose_response = TaskResponseVerbose(**data)
+        if verbose:
+            return verbose_response
+        return JobNames.get_response_object_from_job(verbose_response.job_name)(**data)
     @retry(
         stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
@@ -485,39 +484,36 @@ class RestClient:
         self, task_id: str | None = None, history: bool = False, verbose: bool = False
     ) -> "TaskResponse":
         """Get details for a specific task asynchronously."""
-        try:
-            task_id = task_id or self.trajectory_id
-            url = f"/v0.1/trajectories/{task_id}"
-            full_url = f"{self.base_url}{url}"
+        task_id = task_id or self.trajectory_id
+        url = f"/v0.1/trajectories/{task_id}"
+        full_url = f"{self.base_url}{url}"
+        with external_trace(
+            url=full_url,
+            method="GET",
+            library="httpx",
+            custom_params={
+                "operation": "get_job",
+                "job_id": task_id,
+            },
+        ):
+            async with self.async_client.stream(
+                "GET", url, params={"history": history}
+            ) as response:
+                if response.status_code in {401, 403}:
+                    raise PermissionError(
+                        f"Error getting task: Permission denied for task {task_id}"
+                    )
+                response.raise_for_status()
+                json_data = "".join([chunk async for chunk in response.aiter_text()])
+                data = json.loads(json_data)
+                if "id" not in data:
+                    data["id"] = task_id
+                verbose_response = TaskResponseVerbose(**data)
-            with external_trace(
-                url=full_url,
-                method="GET",
-                library="httpx",
-                custom_params={
-                    "operation": "get_job",
-                    "job_id": task_id,
-                },
-            ):
-                async with self.async_client.stream(
-                    "GET", url, params={"history": history}
-                ) as response:
-                    response.raise_for_status()
-                    json_data = "".join([
-                        chunk async for chunk in response.aiter_text()
-                    ])
-                    data = json.loads(json_data)
-                    if "id" not in data:
-                        data["id"] = task_id
-                    verbose_response = TaskResponseVerbose(**data)
-            if verbose:
-                return verbose_response
-            return JobNames.get_response_object_from_job(verbose_response.job_name)(
-                **data
-            )
-        except Exception as e:
-            raise TaskFetchError(f"Error getting task: {e!s}") from e
+        if verbose:
+            return verbose_response
+        return JobNames.get_response_object_from_job(verbose_response.job_name)(**data)
     @retry(
         stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
@@ -535,15 +531,16 @@ class RestClient:
                 self.stage,
             )
-        try:
-            response = self.client.post(
-                "/v0.1/crows", json=task_data.model_dump(mode="json")
+        response = self.client.post(
+            "/v0.1/crows", json=task_data.model_dump(mode="json")
+        )
+        if response.status_code in {401, 403}:
+            raise PermissionError(
+                f"Error creating task: Permission denied for task {task_data.name}"
             )
-            response.raise_for_status()
-            trajectory_id = response.json()["trajectory_id"]
-            self.trajectory_id = trajectory_id
-        except Exception as e:
-            raise TaskFetchError(f"Error creating task: {e!s}") from e
+        response.raise_for_status()
+        trajectory_id = response.json()["trajectory_id"]
+        self.trajectory_id = trajectory_id
         return trajectory_id
     @retry(
@@ -561,16 +558,16 @@ class RestClient:
                 task_data.name.name,
                 self.stage,
             )
-        try:
-            response = await self.async_client.post(
-                "/v0.1/crows", json=task_data.model_dump(mode="json")
+        response = await self.async_client.post(
+            "/v0.1/crows", json=task_data.model_dump(mode="json")
+        )
+        if response.status_code in {401, 403}:
+            raise PermissionError(
+                f"Error creating task: Permission denied for task {task_data.name}"
             )
-            response.raise_for_status()
-            trajectory_id = response.json()["trajectory_id"]
-            self.trajectory_id = trajectory_id
-        except Exception as e:
-            raise TaskFetchError(f"Error creating task: {e!s}") from e
+        response.raise_for_status()
+        trajectory_id = response.json()["trajectory_id"]
+        self.trajectory_id = trajectory_id
         return trajectory_id
     async def arun_tasks_until_done(
@@ -1056,24 +1053,11 @@ class RestClient:
         status_url = None
         try:
-            # Upload all chunks except the last one in parallel
-            if total_chunks > 1:
-                self._upload_chunks_parallel(
-                    job_name,
-                    file_path,
-                    file_name,
-                    upload_id,
-                    total_chunks - 1,
-                    total_chunks,
-                )
-            # Upload the last chunk separately (handles assembly)
-            status_url = self._upload_final_chunk(
+            status_url = self._upload_chunks_parallel(
                 job_name,
                 file_path,
                 file_name,
                 upload_id,
-                total_chunks - 1,
                 total_chunks,
             )
@@ -1089,149 +1073,74 @@ class RestClient:
         file_path: Path,
         file_name: str,
         upload_id: str,
-        num_regular_chunks: int,
         total_chunks: int,
-    ) -> None:
-        """Upload chunks in parallel batches.
+    ) -> str | None:
+        """Upload all chunks in parallel batches, including the final chunk.
         Args:
             job_name: The key of the crow to upload to.
             file_path: The path to the file to upload.
             file_name: The name to use for the file.
             upload_id: The upload ID to use.
-            num_regular_chunks: Number of regular chunks (excluding final chunk).
             total_chunks: Total number of chunks.
-        Raises:
-            FileUploadError: If there's an error uploading any chunk.
-        """
-        if num_regular_chunks <= 0:
-            return
-        # Process chunks in batches
-        for batch_start in range(0, num_regular_chunks, self.MAX_CONCURRENT_CHUNKS):
-            batch_end = min(
-                batch_start + self.MAX_CONCURRENT_CHUNKS, num_regular_chunks
-            )
-            # Upload chunks in this batch concurrently
-            with ThreadPoolExecutor(max_workers=self.MAX_CONCURRENT_CHUNKS) as executor:
-                futures = {
-                    executor.submit(
-                        self._upload_single_chunk,
-                        job_name,
-                        file_path,
-                        file_name,
-                        upload_id,
-                        chunk_index,
-                        total_chunks,
-                    ): chunk_index
-                    for chunk_index in range(batch_start, batch_end)
-                }
-                for future in as_completed(futures):
-                    chunk_index = futures[future]
-                    try:
-                        future.result()
-                        logger.debug(
-                            f"Uploaded chunk {chunk_index + 1}/{total_chunks} of {file_name}"
-                        )
-                    except Exception as e:
-                        logger.error(f"Error uploading chunk {chunk_index}: {e}")
-                        raise FileUploadError(
-                            f"Error uploading chunk {chunk_index} of {file_name}: {e}"
-                        ) from e
-    def _upload_single_chunk(
-        self,
-        job_name: str,
-        file_path: Path,
-        file_name: str,
-        upload_id: str,
-        chunk_index: int,
-        total_chunks: int,
-    ) -> None:
-        """Upload a single chunk.
-        Args:
-            job_name: The key of the crow to upload to.
-            file_path: The path to the file to upload.
-            file_name: The name to use for the file.
-            upload_id: The upload ID to use.
-            chunk_index: The index of this chunk.
-            total_chunks: Total number of chunks.
+        Returns:
+            The status URL from the final chunk response, or None if no chunks.
         Raises:
-            Exception: If there's an error uploading the chunk.
+            FileUploadError: If there's an error uploading any chunk.
         """
-        with open(file_path, "rb") as f:
-            # Read the chunk from the file
-            f.seek(chunk_index * self.CHUNK_SIZE)
-            chunk_data = f.read(self.CHUNK_SIZE)
+        if total_chunks <= 0:
+            return None
-            # Prepare and send the chunk
-            with tempfile.NamedTemporaryFile() as temp_file:
-                temp_file.write(chunk_data)
-                temp_file.flush()
+        if total_chunks > 1:
+            num_regular_chunks = total_chunks - 1
+            for batch_start in range(0, num_regular_chunks, self.MAX_CONCURRENT_CHUNKS):
+                batch_end = min(
+                    batch_start + self.MAX_CONCURRENT_CHUNKS, num_regular_chunks
+                )
-                # Create form data
-                with open(temp_file.name, "rb") as chunk_file_obj:
-                    files = {
-                        "chunk": (
+                # Upload chunks in this batch concurrently
+                with ThreadPoolExecutor(
+                    max_workers=self.MAX_CONCURRENT_CHUNKS
+                ) as executor:
+                    futures = {
+                        executor.submit(
+                            self._upload_single_chunk,
+                            job_name,
+                            file_path,
                             file_name,
-                            chunk_file_obj,
-                            "application/octet-stream",
-                        )
-                    }
-                    data = {
-                        "file_name": file_name,
-                        "chunk_index": chunk_index,
-                        "total_chunks": total_chunks,
-                        "upload_id": upload_id,
+                            upload_id,
+                            chunk_index,
+                            total_chunks,
+                        ): chunk_index
+                        for chunk_index in range(batch_start, batch_end)
                     }
-                    # Send the chunk
-                    response = self.multipart_client.post(
-                        f"/v0.1/crows/{job_name}/upload-chunk",
-                        files=files,
-                        data=data,
-                    )
-                    response.raise_for_status()
-    def _upload_final_chunk(
-        self,
-        job_name: str,
-        file_path: Path,
-        file_name: str,
-        upload_id: str,
-        chunk_index: int,
-        total_chunks: int,
-    ) -> str | None:
-        """Upload the final chunk with retry logic for missing chunks.
-        Args:
-            job_name: The key of the crow to upload to.
-            file_path: The path to the file to upload.
-            file_name: The name to use for the file.
-            upload_id: The upload ID to use.
-            chunk_index: The index of the final chunk.
-            total_chunks: Total number of chunks.
-        Returns:
-            The status URL from the response.
-        Raises:
-            FileUploadError: If there's an error uploading the final chunk.
-        """
+                    for future in as_completed(futures):
+                        chunk_index = futures[future]
+                        try:
+                            future.result()
+                            logger.debug(
+                                f"Uploaded chunk {chunk_index + 1}/{total_chunks} of {file_name}"
+                            )
+                        except Exception as e:
+                            logger.error(f"Error uploading chunk {chunk_index}: {e}")
+                            raise FileUploadError(
+                                f"Error uploading chunk {chunk_index} of {file_name}: {e}"
+                            ) from e
+        # Upload the final chunk with retry logic
+        final_chunk_index = total_chunks - 1
         retries = 0
         max_retries = 3
-        retry_delay = 2.0  # seconds
+        retry_delay = 2.0
         while retries < max_retries:
             try:
                 with open(file_path, "rb") as f:
                     # Read the final chunk from the file
-                    f.seek(chunk_index * self.CHUNK_SIZE)
+                    f.seek(final_chunk_index * self.CHUNK_SIZE)
                     chunk_data = f.read(self.CHUNK_SIZE)
                     # Prepare and send the chunk
@@ -1250,7 +1159,7 @@ class RestClient:
                             }
                             data = {
                                 "file_name": file_name,
-                                "chunk_index": chunk_index,
+                                "chunk_index": final_chunk_index,
                                 "total_chunks": total_chunks,
                                 "upload_id": upload_id,
                             }
@@ -1277,7 +1186,7 @@ class RestClient:
                             status_url = response_data.get("status_url")
                             logger.debug(
-                                f"Uploaded final chunk {chunk_index + 1}/{total_chunks} of {file_name}"
+                                f"Uploaded final chunk {final_chunk_index + 1}/{total_chunks} of {file_name}"
                             )
                             return status_url
@@ -1296,6 +1205,62 @@ class RestClient:
             f"Failed to upload final chunk of {file_name} after {max_retries} retries"
         )
+    def _upload_single_chunk(
+        self,
+        job_name: str,
+        file_path: Path,
+        file_name: str,
+        upload_id: str,
+        chunk_index: int,
+        total_chunks: int,
+    ) -> None:
+        """Upload a single chunk.
+        Args:
+            job_name: The key of the crow to upload to.
+            file_path: The path to the file to upload.
+            file_name: The name to use for the file.
+            upload_id: The upload ID to use.
+            chunk_index: The index of this chunk.
+            total_chunks: Total number of chunks.
+        Raises:
+            Exception: If there's an error uploading the chunk.
+        """
+        with open(file_path, "rb") as f:
+            # Read the chunk from the file
+            f.seek(chunk_index * self.CHUNK_SIZE)
+            chunk_data = f.read(self.CHUNK_SIZE)
+            # Prepare and send the chunk
+            with tempfile.NamedTemporaryFile() as temp_file:
+                temp_file.write(chunk_data)
+                temp_file.flush()
+                # Create form data
+                with open(temp_file.name, "rb") as chunk_file_obj:
+                    files = {
+                        "chunk": (
+                            file_name,
+                            chunk_file_obj,
+                            "application/octet-stream",
+                        )
+                    }
+                    data = {
+                        "file_name": file_name,
+                        "chunk_index": chunk_index,
+                        "total_chunks": total_chunks,
+                        "upload_id": upload_id,
+                    }
+                    # Send the chunk
+                    response = self.multipart_client.post(
+                        f"/v0.1/crows/{job_name}/upload-chunk",
+                        files=files,
+                        data=data,
+                    )
+                    response.raise_for_status()
     @retry(
         stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
         wait=wait_exponential(multiplier=RETRY_MULTIPLIER, max=MAX_RETRY_WAIT),

futurehouse_client-0.3.19.dev133/futurehouse_client/utils/__init__.py ADDED Viewed

File without changes

{futurehouse_client-0.3.19.dev111 → futurehouse_client-0.3.19.dev133}/futurehouse_client.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: futurehouse-client
-Version: 0.3.19.dev111
+Version: 0.3.19.dev133
 Summary: A client for interacting with endpoints of the FutureHouse service.
 Author-email: FutureHouse technical staff <hello@futurehouse.org>
 Classifier: Operating System :: OS Independent

{futurehouse_client-0.3.19.dev111 → futurehouse_client-0.3.19.dev133}/futurehouse_client.egg-info/SOURCES.txt RENAMED Viewed

@@ -5,6 +5,7 @@ uv.lock
 docs/__init__.py
 docs/client_notebook.ipynb
 futurehouse_client/__init__.py
+futurehouse_client/py.typed
 futurehouse_client.egg-info/PKG-INFO
 futurehouse_client.egg-info/SOURCES.txt
 futurehouse_client.egg-info/dependency_links.txt

futurehouse-client 0.3.19.dev111__tar.gz → 0.3.19.dev133__tar.gz

futurehouse-client 0.3.19.dev111tar.gz → 0.3.19.dev133tar.gz