PyPI - futurehouse-client - Versions diffs - 0.3.19.dev129__tar.gz → 0.3.19.dev133__tar.gz - Mend

futurehouse-client 0.3.19.dev129tar.gz → 0.3.19.dev133tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

{futurehouse_client-0.3.19.dev129 → futurehouse_client-0.3.19.dev133}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: futurehouse-client
-Version: 0.3.19.dev129
+Version: 0.3.19.dev133
 Summary: A client for interacting with endpoints of the FutureHouse service.
 Author-email: FutureHouse technical staff <hello@futurehouse.org>
 Classifier: Operating System :: OS Independent

{futurehouse_client-0.3.19.dev129 → futurehouse_client-0.3.19.dev133}/futurehouse_client/clients/rest_client.py RENAMED Viewed

@@ -1053,24 +1053,11 @@ class RestClient:
         status_url = None
         try:
-            # Upload all chunks except the last one in parallel
-            if total_chunks > 1:
-                self._upload_chunks_parallel(
-                    job_name,
-                    file_path,
-                    file_name,
-                    upload_id,
-                    total_chunks - 1,
-                    total_chunks,
-                )
-            # Upload the last chunk separately (handles assembly)
-            status_url = self._upload_final_chunk(
+            status_url = self._upload_chunks_parallel(
                 job_name,
                 file_path,
                 file_name,
                 upload_id,
-                total_chunks - 1,
                 total_chunks,
             )
@@ -1086,149 +1073,74 @@ class RestClient:
         file_path: Path,
         file_name: str,
         upload_id: str,
-        num_regular_chunks: int,
         total_chunks: int,
-    ) -> None:
-        """Upload chunks in parallel batches.
+    ) -> str | None:
+        """Upload all chunks in parallel batches, including the final chunk.
         Args:
             job_name: The key of the crow to upload to.
             file_path: The path to the file to upload.
             file_name: The name to use for the file.
             upload_id: The upload ID to use.
-            num_regular_chunks: Number of regular chunks (excluding final chunk).
             total_chunks: Total number of chunks.
-        Raises:
-            FileUploadError: If there's an error uploading any chunk.
-        """
-        if num_regular_chunks <= 0:
-            return
-        # Process chunks in batches
-        for batch_start in range(0, num_regular_chunks, self.MAX_CONCURRENT_CHUNKS):
-            batch_end = min(
-                batch_start + self.MAX_CONCURRENT_CHUNKS, num_regular_chunks
-            )
-            # Upload chunks in this batch concurrently
-            with ThreadPoolExecutor(max_workers=self.MAX_CONCURRENT_CHUNKS) as executor:
-                futures = {
-                    executor.submit(
-                        self._upload_single_chunk,
-                        job_name,
-                        file_path,
-                        file_name,
-                        upload_id,
-                        chunk_index,
-                        total_chunks,
-                    ): chunk_index
-                    for chunk_index in range(batch_start, batch_end)
-                }
-                for future in as_completed(futures):
-                    chunk_index = futures[future]
-                    try:
-                        future.result()
-                        logger.debug(
-                            f"Uploaded chunk {chunk_index + 1}/{total_chunks} of {file_name}"
-                        )
-                    except Exception as e:
-                        logger.error(f"Error uploading chunk {chunk_index}: {e}")
-                        raise FileUploadError(
-                            f"Error uploading chunk {chunk_index} of {file_name}: {e}"
-                        ) from e
-    def _upload_single_chunk(
-        self,
-        job_name: str,
-        file_path: Path,
-        file_name: str,
-        upload_id: str,
-        chunk_index: int,
-        total_chunks: int,
-    ) -> None:
-        """Upload a single chunk.
-        Args:
-            job_name: The key of the crow to upload to.
-            file_path: The path to the file to upload.
-            file_name: The name to use for the file.
-            upload_id: The upload ID to use.
-            chunk_index: The index of this chunk.
-            total_chunks: Total number of chunks.
+        Returns:
+            The status URL from the final chunk response, or None if no chunks.
         Raises:
-            Exception: If there's an error uploading the chunk.
+            FileUploadError: If there's an error uploading any chunk.
         """
-        with open(file_path, "rb") as f:
-            # Read the chunk from the file
-            f.seek(chunk_index * self.CHUNK_SIZE)
-            chunk_data = f.read(self.CHUNK_SIZE)
+        if total_chunks <= 0:
+            return None
-            # Prepare and send the chunk
-            with tempfile.NamedTemporaryFile() as temp_file:
-                temp_file.write(chunk_data)
-                temp_file.flush()
+        if total_chunks > 1:
+            num_regular_chunks = total_chunks - 1
+            for batch_start in range(0, num_regular_chunks, self.MAX_CONCURRENT_CHUNKS):
+                batch_end = min(
+                    batch_start + self.MAX_CONCURRENT_CHUNKS, num_regular_chunks
+                )
-                # Create form data
-                with open(temp_file.name, "rb") as chunk_file_obj:
-                    files = {
-                        "chunk": (
+                # Upload chunks in this batch concurrently
+                with ThreadPoolExecutor(
+                    max_workers=self.MAX_CONCURRENT_CHUNKS
+                ) as executor:
+                    futures = {
+                        executor.submit(
+                            self._upload_single_chunk,
+                            job_name,
+                            file_path,
                             file_name,
-                            chunk_file_obj,
-                            "application/octet-stream",
-                        )
-                    }
-                    data = {
-                        "file_name": file_name,
-                        "chunk_index": chunk_index,
-                        "total_chunks": total_chunks,
-                        "upload_id": upload_id,
+                            upload_id,
+                            chunk_index,
+                            total_chunks,
+                        ): chunk_index
+                        for chunk_index in range(batch_start, batch_end)
                     }
-                    # Send the chunk
-                    response = self.multipart_client.post(
-                        f"/v0.1/crows/{job_name}/upload-chunk",
-                        files=files,
-                        data=data,
-                    )
-                    response.raise_for_status()
-    def _upload_final_chunk(
-        self,
-        job_name: str,
-        file_path: Path,
-        file_name: str,
-        upload_id: str,
-        chunk_index: int,
-        total_chunks: int,
-    ) -> str | None:
-        """Upload the final chunk with retry logic for missing chunks.
-        Args:
-            job_name: The key of the crow to upload to.
-            file_path: The path to the file to upload.
-            file_name: The name to use for the file.
-            upload_id: The upload ID to use.
-            chunk_index: The index of the final chunk.
-            total_chunks: Total number of chunks.
-        Returns:
-            The status URL from the response.
-        Raises:
-            FileUploadError: If there's an error uploading the final chunk.
-        """
+                    for future in as_completed(futures):
+                        chunk_index = futures[future]
+                        try:
+                            future.result()
+                            logger.debug(
+                                f"Uploaded chunk {chunk_index + 1}/{total_chunks} of {file_name}"
+                            )
+                        except Exception as e:
+                            logger.error(f"Error uploading chunk {chunk_index}: {e}")
+                            raise FileUploadError(
+                                f"Error uploading chunk {chunk_index} of {file_name}: {e}"
+                            ) from e
+        # Upload the final chunk with retry logic
+        final_chunk_index = total_chunks - 1
         retries = 0
         max_retries = 3
-        retry_delay = 2.0  # seconds
+        retry_delay = 2.0
         while retries < max_retries:
             try:
                 with open(file_path, "rb") as f:
                     # Read the final chunk from the file
-                    f.seek(chunk_index * self.CHUNK_SIZE)
+                    f.seek(final_chunk_index * self.CHUNK_SIZE)
                     chunk_data = f.read(self.CHUNK_SIZE)
                     # Prepare and send the chunk
@@ -1247,7 +1159,7 @@ class RestClient:
                             }
                             data = {
                                 "file_name": file_name,
-                                "chunk_index": chunk_index,
+                                "chunk_index": final_chunk_index,
                                 "total_chunks": total_chunks,
                                 "upload_id": upload_id,
                             }
@@ -1274,7 +1186,7 @@ class RestClient:
                             status_url = response_data.get("status_url")
                             logger.debug(
-                                f"Uploaded final chunk {chunk_index + 1}/{total_chunks} of {file_name}"
+                                f"Uploaded final chunk {final_chunk_index + 1}/{total_chunks} of {file_name}"
                             )
                             return status_url
@@ -1293,6 +1205,62 @@ class RestClient:
             f"Failed to upload final chunk of {file_name} after {max_retries} retries"
         )
+    def _upload_single_chunk(
+        self,
+        job_name: str,
+        file_path: Path,
+        file_name: str,
+        upload_id: str,
+        chunk_index: int,
+        total_chunks: int,
+    ) -> None:
+        """Upload a single chunk.
+        Args:
+            job_name: The key of the crow to upload to.
+            file_path: The path to the file to upload.
+            file_name: The name to use for the file.
+            upload_id: The upload ID to use.
+            chunk_index: The index of this chunk.
+            total_chunks: Total number of chunks.
+        Raises:
+            Exception: If there's an error uploading the chunk.
+        """
+        with open(file_path, "rb") as f:
+            # Read the chunk from the file
+            f.seek(chunk_index * self.CHUNK_SIZE)
+            chunk_data = f.read(self.CHUNK_SIZE)
+            # Prepare and send the chunk
+            with tempfile.NamedTemporaryFile() as temp_file:
+                temp_file.write(chunk_data)
+                temp_file.flush()
+                # Create form data
+                with open(temp_file.name, "rb") as chunk_file_obj:
+                    files = {
+                        "chunk": (
+                            file_name,
+                            chunk_file_obj,
+                            "application/octet-stream",
+                        )
+                    }
+                    data = {
+                        "file_name": file_name,
+                        "chunk_index": chunk_index,
+                        "total_chunks": total_chunks,
+                        "upload_id": upload_id,
+                    }
+                    # Send the chunk
+                    response = self.multipart_client.post(
+                        f"/v0.1/crows/{job_name}/upload-chunk",
+                        files=files,
+                        data=data,
+                    )
+                    response.raise_for_status()
     @retry(
         stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
         wait=wait_exponential(multiplier=RETRY_MULTIPLIER, max=MAX_RETRY_WAIT),

futurehouse_client-0.3.19.dev133/futurehouse_client/utils/__init__.py ADDED Viewed

File without changes

{futurehouse_client-0.3.19.dev129 → futurehouse_client-0.3.19.dev133}/futurehouse_client.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: futurehouse-client
-Version: 0.3.19.dev129
+Version: 0.3.19.dev133
 Summary: A client for interacting with endpoints of the FutureHouse service.
 Author-email: FutureHouse technical staff <hello@futurehouse.org>
 Classifier: Operating System :: OS Independent

{futurehouse_client-0.3.19.dev129 → futurehouse_client-0.3.19.dev133}/futurehouse_client.egg-info/SOURCES.txt RENAMED Viewed

@@ -5,6 +5,7 @@ uv.lock
 docs/__init__.py
 docs/client_notebook.ipynb
 futurehouse_client/__init__.py
+futurehouse_client/py.typed
 futurehouse_client.egg-info/PKG-INFO
 futurehouse_client.egg-info/SOURCES.txt
 futurehouse_client.egg-info/dependency_links.txt

{futurehouse_client-0.3.19.dev129 → futurehouse_client-0.3.19.dev133}/tests/test_rest.py RENAMED Viewed

@@ -264,9 +264,6 @@ class TestParallelChunking:
         client._upload_single_chunk = types.MethodType(
             RestClient._upload_single_chunk, client
         )
-        client._upload_final_chunk = types.MethodType(
-            RestClient._upload_final_chunk, client
-        )
         client._upload_single_file = types.MethodType(
             RestClient._upload_single_file, client
         )
@@ -372,19 +369,34 @@ class TestParallelChunking:
         num_regular_chunks = 5  # Smaller number for easier testing
         total_chunks = 6
-        # Use patch to mock the _upload_single_chunk method
+        # Mock file content for final chunk
+        chunk_content = b"A" * 1000
+        # Mock final chunk response
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.raise_for_status.return_value = None
+        mock_response.json.return_value = {"status_url": "http://test.com/status"}
+        mock_client.multipart_client.post.return_value = mock_response
+        # Use patch to mock the _upload_single_chunk method and file operations
         with patch.object(mock_client, "_upload_single_chunk") as mock_upload_chunk:
-            # Call the method - it should use ThreadPoolExecutor internally
-            mock_client._upload_chunks_parallel(
-                job_name,
-                file_path,
-                file_name,
-                upload_id,
-                num_regular_chunks,
-                total_chunks,
-            )
-            # Verify all chunks were processed by checking the call count
+            with patch("builtins.open", mock_open(read_data=chunk_content)):
+                with patch("tempfile.NamedTemporaryFile") as mock_temp_file:
+                    mock_temp_file.return_value.__enter__.return_value.name = (
+                        "temp_chunk"
+                    )
+                    # Call the method - it should use ThreadPoolExecutor internally
+                    mock_client._upload_chunks_parallel(
+                        job_name,
+                        file_path,
+                        file_name,
+                        upload_id,
+                        total_chunks,
+                    )
+            # Verify all regular chunks were processed by checking the call count
             assert mock_upload_chunk.call_count == num_regular_chunks
             # Verify the calls were made with correct parameters
@@ -437,14 +449,13 @@ class TestParallelChunking:
                 assert data["total_chunks"] == total_chunks
                 assert data["upload_id"] == upload_id
-    def test_upload_final_chunk_with_retry_on_conflict(self, mock_client):
+    def test_upload_chunks_parallel_retry_on_conflict(self, mock_client):
         """Test final chunk upload with retry logic for missing chunks (409 conflict)."""
         job_name = "test-job"
         file_path = Path("test_file.txt")
         file_name = "test_file.txt"
         upload_id = "test-upload-id"
-        chunk_index = 2
-        total_chunks = 3
+        total_chunks = 1
         # Mock file content
         chunk_content = b"A" * 1000
@@ -473,12 +484,11 @@ class TestParallelChunking:
                         "temp_chunk"
                     )
-                    status_url = mock_client._upload_final_chunk(
+                    status_url = mock_client._upload_chunks_parallel(
                         job_name,
                         file_path,
                         file_name,
                         upload_id,
-                        chunk_index,
                         total_chunks,
                     )
@@ -487,29 +497,24 @@ class TestParallelChunking:
                     assert status_url == "http://test.com/status"
                     mock_sleep.assert_called_once()  # Verify sleep was called for retry
-    def test_upload_final_chunk_max_retries_exceeded(self, mock_client):
+    def test_upload_chunks_parallel_final_chunk_max_retries_exceeded(self, mock_client):
         """Test final chunk upload fails after max retries."""
         job_name = "test-job"
         file_path = Path("test_file.txt")
         file_name = "test_file.txt"
         upload_id = "test-upload-id"
-        chunk_index = 2
-        total_chunks = 3
+        total_chunks = 1
         # Mock file content
         chunk_content = b"A" * 1000
-        # Mock response that always returns 409 (conflict) and raises an exception on raise_for_status
-        mock_response = MagicMock()
-        mock_response.status_code = 409
-        # Make raise_for_status raise an exception after the retries are exhausted
-        from httpx import HTTPStatusError, Request, codes
+        # Create a side effect that simulates an exception on every attempt
+        def side_effect(*args, **kwargs):
+            raise Exception("Simulated upload failure")  # noqa: TRY002
-        mock_request = MagicMock(spec=Request)
-        mock_response.raise_for_status.side_effect = HTTPStatusError(
-            "409 Conflict", request=mock_request, response=mock_response
-        )
-        mock_client.multipart_client.post.return_value = mock_response
+        mock_client.multipart_client.post.side_effect = side_effect
+        from httpx import codes
         with patch("builtins.open", mock_open(read_data=chunk_content)):
             with patch("tempfile.NamedTemporaryFile") as mock_temp_file:
@@ -523,17 +528,16 @@ class TestParallelChunking:
                         with pytest.raises(
                             FileUploadError, match="Error uploading final chunk"
                         ):
-                            mock_client._upload_final_chunk(
+                            mock_client._upload_chunks_parallel(
                                 job_name,
                                 file_path,
                                 file_name,
                                 upload_id,
-                                chunk_index,
                                 total_chunks,
                             )
-                        # Verify that retries were attempted (should be 3 attempts total)
-                        assert mock_client.multipart_client.post.call_count == 3
+                # Verify that retries were attempted (should be 3 attempts total)
+                assert mock_client.multipart_client.post.call_count == 3
     def test_upload_directory_recursive(self, mock_client):
         """Test uploading a directory with nested files."""