futurehouse-client 0.3.16__py3-none-any.whl → 0.3.17.dev56__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -29,6 +29,7 @@ class JobNames(StrEnum):
29
29
  FALCON = "job-futurehouse-paperqa2-deep"
30
30
  OWL = "job-futurehouse-hasanyone"
31
31
  DUMMY = "job-futurehouse-dummy-env"
32
+ PHOENIX = "job-futurehouse-phoenix"
32
33
 
33
34
  @classmethod
34
35
  def from_stage(cls, job_name: str, stage: Stage | None = None) -> str:
@@ -6,6 +6,8 @@ import inspect
6
6
  import json
7
7
  import logging
8
8
  import os
9
+ import tempfile
10
+ import uuid
9
11
  from collections.abc import Mapping
10
12
  from datetime import datetime
11
13
  from pathlib import Path
@@ -200,11 +202,16 @@ class TaskResponseVerbose(TaskResponse):
200
202
  shared_with: list[SimpleOrganization] | None = None
201
203
 
202
204
 
205
+ class FileUploadError(RestClientError):
206
+ """Raised when there's an error uploading a file."""
207
+
208
+
203
209
  class RestClient:
204
210
  REQUEST_TIMEOUT: ClassVar[float] = 30.0 # sec
205
211
  MAX_RETRY_ATTEMPTS: ClassVar[int] = 3
206
212
  RETRY_MULTIPLIER: ClassVar[int] = 1
207
213
  MAX_RETRY_WAIT: ClassVar[int] = 10
214
+ CHUNK_SIZE: ClassVar[int] = 16 * 1024 * 1024 # 16MB chunks
208
215
 
209
216
  def __init__(
210
217
  self,
@@ -644,6 +651,243 @@ class RestClient:
644
651
  raise JobCreationError(f"Error generating docker image: {e!s}") from e
645
652
  return build_context
646
653
 
654
+ @retry(
655
+ stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
656
+ wait=wait_exponential(multiplier=RETRY_MULTIPLIER, max=MAX_RETRY_WAIT),
657
+ retry=retry_if_connection_error,
658
+ )
659
+ def upload_file(
660
+ self,
661
+ job_name: str,
662
+ file_path: str | os.PathLike,
663
+ folder_name: str | None = None,
664
+ ) -> str:
665
+ """Upload a file or directory to a futurehouse job bucket.
666
+
667
+ Args:
668
+ job_name: The name of the futurehouse job to upload to.
669
+ file_path: The local path to the file or directory to upload.
670
+ folder_name: Optional folder name to use for the upload. If not provided, a random UUID will be used.
671
+
672
+ Returns:
673
+ The upload ID used for the upload.
674
+
675
+ Raises:
676
+ FileUploadError: If there's an error uploading the file.
677
+ """
678
+ file_path = Path(file_path)
679
+ if not file_path.exists():
680
+ raise FileNotFoundError(f"File or directory not found: {file_path}")
681
+
682
+ upload_id = folder_name or str(uuid.uuid4())
683
+
684
+ if file_path.is_dir():
685
+ # Process directory recursively
686
+ self._upload_directory(job_name, file_path, upload_id)
687
+ else:
688
+ # Process single file
689
+ self._upload_single_file(job_name, file_path, upload_id)
690
+ logger.info(f"Successfully uploaded {file_path} to {upload_id}")
691
+ return upload_id
692
+
693
+ def _upload_directory(self, job_name: str, dir_path: Path, upload_id: str) -> None:
694
+ """Upload all files in a directory recursively.
695
+
696
+ Args:
697
+ job_name: The key of the crow to upload to.
698
+ dir_path: The path to the directory to upload.
699
+ upload_id: The upload ID to use.
700
+
701
+ Raises:
702
+ FileUploadError: If there's an error uploading any file.
703
+ """
704
+ # Skip common directories that shouldn't be uploaded
705
+ if any(ignore in dir_path.parts for ignore in FILE_UPLOAD_IGNORE_PARTS):
706
+ return
707
+
708
+ try:
709
+ # Upload all files in the directory recursively
710
+ for path in dir_path.rglob("*"):
711
+ if path.is_file() and not any(
712
+ ignore in path.parts for ignore in FILE_UPLOAD_IGNORE_PARTS
713
+ ):
714
+ # Use path relative to the original directory as file name
715
+ rel_path = path.relative_to(dir_path)
716
+ self._upload_single_file(
717
+ job_name,
718
+ path,
719
+ upload_id,
720
+ file_name=str(rel_path),
721
+ )
722
+ except Exception as e:
723
+ raise FileUploadError(f"Error uploading directory {dir_path}: {e}") from e
724
+
725
+ def _upload_single_file(
726
+ self,
727
+ job_name: str,
728
+ file_path: Path,
729
+ upload_id: str,
730
+ file_name: str | None = None,
731
+ ) -> None:
732
+ """Upload a single file in chunks.
733
+
734
+ Args:
735
+ job_name: The key of the crow to upload to.
736
+ file_path: The path to the file to upload.
737
+ upload_id: The upload ID to use.
738
+ file_name: Optional name to use for the file. If not provided, the file's name will be used.
739
+
740
+ Raises:
741
+ FileUploadError: If there's an error uploading the file.
742
+ """
743
+ file_name = file_name or file_path.name
744
+ file_size = file_path.stat().st_size
745
+ total_chunks = (file_size + self.CHUNK_SIZE - 1) // self.CHUNK_SIZE
746
+
747
+ logger.info(f"Uploading {file_path} as {file_name} ({total_chunks} chunks)")
748
+
749
+ try:
750
+ with open(file_path, "rb") as f:
751
+ for chunk_index in range(total_chunks):
752
+ # Read the chunk from the file
753
+ f.seek(chunk_index * self.CHUNK_SIZE)
754
+ chunk_data = f.read(self.CHUNK_SIZE)
755
+
756
+ # Prepare and send the chunk
757
+ with tempfile.NamedTemporaryFile() as temp_file:
758
+ temp_file.write(chunk_data)
759
+ temp_file.flush()
760
+
761
+ # Create form data
762
+ with open(temp_file.name, "rb") as chunk_file_obj:
763
+ files = {
764
+ "chunk": (
765
+ file_name,
766
+ chunk_file_obj,
767
+ "application/octet-stream",
768
+ )
769
+ }
770
+ data = {
771
+ "file_name": file_name,
772
+ "chunk_index": chunk_index,
773
+ "total_chunks": total_chunks,
774
+ "upload_id": upload_id,
775
+ }
776
+
777
+ # Send the chunk
778
+ response = self.multipart_client.post(
779
+ f"/v0.1/crows/{job_name}/upload-chunk",
780
+ files=files,
781
+ data=data,
782
+ )
783
+ response.raise_for_status()
784
+
785
+ # Call progress callback if provided
786
+
787
+ logger.debug(
788
+ f"Uploaded chunk {chunk_index + 1}/{total_chunks} of {file_name}"
789
+ )
790
+
791
+ logger.info(f"Successfully uploaded {file_name}")
792
+
793
+ except Exception as e:
794
+ logger.exception(f"Error uploading file {file_path}")
795
+ raise FileUploadError(f"Error uploading file {file_path}: {e}") from e
796
+
797
+ @retry(
798
+ stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
799
+ wait=wait_exponential(multiplier=RETRY_MULTIPLIER, max=MAX_RETRY_WAIT),
800
+ retry=retry_if_connection_error,
801
+ )
802
+ def list_files(self, job_name: str, folder_name: str) -> dict[str, list[str]]:
803
+ """List files and directories in a GCS location for a given job_name and upload_id.
804
+
805
+ Args:
806
+ job_name: The name of the futurehouse job.
807
+ folder_name: The specific folder name (upload_id) to list files from.
808
+
809
+ Returns:
810
+ A list of files in the GCS folder.
811
+
812
+ Raises:
813
+ RestClientError: If there is an error listing the files.
814
+ """
815
+ try:
816
+ url = f"/v0.1/crows/{job_name}/list-files"
817
+ params = {"upload_id": folder_name}
818
+ response = self.client.get(url, params=params)
819
+ response.raise_for_status()
820
+ return response.json()
821
+ except HTTPStatusError as e:
822
+ logger.exception(
823
+ f"Error listing files for job {job_name}, folder {folder_name}: {e.response.text}"
824
+ )
825
+ raise RestClientError(
826
+ f"Error listing files: {e.response.status_code} - {e.response.text}"
827
+ ) from e
828
+ except Exception as e:
829
+ logger.exception(
830
+ f"Error listing files for job {job_name}, folder {folder_name}"
831
+ )
832
+ raise RestClientError(f"Error listing files: {e!s}") from e
833
+
834
+ @retry(
835
+ stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
836
+ wait=wait_exponential(multiplier=RETRY_MULTIPLIER, max=MAX_RETRY_WAIT),
837
+ retry=retry_if_connection_error,
838
+ )
839
+ def download_file(
840
+ self,
841
+ job_name: str,
842
+ folder_name: str,
843
+ file_path: str,
844
+ destination_path: str | os.PathLike,
845
+ ) -> None:
846
+ """Download a file from GCS to a local path.
847
+
848
+ Args:
849
+ job_name: The name of the futurehouse job.
850
+ folder_name: The specific folder name (upload_id) the file belongs to.
851
+ file_path: The relative path of the file to download
852
+ (e.g., 'data/my_file.csv' or 'my_image.png').
853
+ destination_path: The local path where the file should be saved.
854
+
855
+ Raises:
856
+ RestClientError: If there is an error downloading the file.
857
+ FileNotFoundError: If the destination directory does not exist.
858
+ """
859
+ destination_path = Path(destination_path)
860
+ # Ensure the destination directory exists
861
+ destination_path.parent.mkdir(parents=True, exist_ok=True)
862
+
863
+ try:
864
+ url = f"/v0.1/crows/{job_name}/download-file"
865
+ params = {"upload_id": folder_name, "file_path": file_path}
866
+
867
+ with self.client.stream("GET", url, params=params) as response:
868
+ response.raise_for_status() # Check for HTTP errors before streaming
869
+ with open(destination_path, "wb") as f:
870
+ for chunk in response.iter_bytes(chunk_size=8192):
871
+ f.write(chunk)
872
+ logger.info(f"File {file_path} downloaded to {destination_path}")
873
+ except HTTPStatusError as e:
874
+ logger.exception(
875
+ f"Error downloading file {file_path} for job {job_name}, folder {folder_name}: {e.response.text}"
876
+ )
877
+ # Clean up partially downloaded file if an error occurs
878
+ if destination_path.exists():
879
+ destination_path.unlink()
880
+ raise RestClientError(
881
+ f"Error downloading file: {e.response.status_code} - {e.response.text}"
882
+ ) from e
883
+ except Exception as e:
884
+ logger.exception(
885
+ f"Error downloading file {file_path} for job {job_name}, folder {folder_name}"
886
+ )
887
+ if destination_path.exists():
888
+ destination_path.unlink() # Clean up partial file
889
+ raise RestClientError(f"Error downloading file: {e!s}") from e
890
+
647
891
 
648
892
  def get_installed_packages() -> dict[str, str]:
649
893
  """Returns a dictionary of installed packages and their versions."""
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: futurehouse-client
3
- Version: 0.3.16
3
+ Version: 0.3.17.dev56
4
4
  Summary: A client for interacting with endpoints of the FutureHouse service.
5
5
  Author-email: FutureHouse technical staff <hello@futurehouse.org>
6
6
  Classifier: Operating System :: OS Independent
@@ -1,7 +1,7 @@
1
1
  futurehouse_client/__init__.py,sha256=ddxO7JE97c6bt7LjNglZZ2Ql8bYCGI9laSFeh9MP6VU,344
2
2
  futurehouse_client/clients/__init__.py,sha256=tFWqwIAY5PvwfOVsCje4imjTpf6xXNRMh_UHIKVI1_0,320
3
- futurehouse_client/clients/job_client.py,sha256=yBFKDNcFnuZDNgoK2d5037rbuzQ7TlSK6MmklEKV8EA,11056
4
- futurehouse_client/clients/rest_client.py,sha256=Dc29QRNZMO4uxaXNGKyx18Tn-vLaJ6P5fCbM_0u-Z3I,26379
3
+ futurehouse_client/clients/job_client.py,sha256=Fi3YvN4k82AuXCe8vlwxhkK8CXS164NQrs7paj9qIek,11096
4
+ futurehouse_client/clients/rest_client.py,sha256=OBJeRSQezd2BSJGHKQZ4Cg1uhThtOKwgBOSEDI4n0go,36181
5
5
  futurehouse_client/models/__init__.py,sha256=ta3jFLM_LsDz1rKDmx8rja8sT7WtSKoFvMgLF0yFpvA,342
6
6
  futurehouse_client/models/app.py,sha256=yfZ9tyw4VATVAfYrU7aTdCNPSljLEho09_nIbh8oZDY,23174
7
7
  futurehouse_client/models/client.py,sha256=n4HD0KStKLm6Ek9nL9ylP-bkK10yzAaD1uIDF83Qp_A,1828
@@ -9,7 +9,7 @@ futurehouse_client/models/rest.py,sha256=W-wNFTN7HALYFFphw-RQYRMm6_TSa1cl4T-mZ1m
9
9
  futurehouse_client/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  futurehouse_client/utils/module_utils.py,sha256=aFyd-X-pDARXz9GWpn8SSViUVYdSbuy9vSkrzcVIaGI,4955
11
11
  futurehouse_client/utils/monitoring.py,sha256=UjRlufe67kI3VxRHOd5fLtJmlCbVA2Wqwpd4uZhXkQM,8728
12
- futurehouse_client-0.3.16.dist-info/METADATA,sha256=uCvzXKeI6i8PRvike8YKVa7-IJQAwL8G2ILjjOf6xIo,8175
13
- futurehouse_client-0.3.16.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
14
- futurehouse_client-0.3.16.dist-info/top_level.txt,sha256=TRuLUCt_qBnggdFHCX4O_BoCu1j2X43lKfIZC-ElwWY,19
15
- futurehouse_client-0.3.16.dist-info/RECORD,,
12
+ futurehouse_client-0.3.17.dev56.dist-info/METADATA,sha256=KZWJ9eKHsx4-VjrG_O-dB__TqrIK-jeU5kcNHfKPoaI,8181
13
+ futurehouse_client-0.3.17.dev56.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
14
+ futurehouse_client-0.3.17.dev56.dist-info/top_level.txt,sha256=TRuLUCt_qBnggdFHCX4O_BoCu1j2X43lKfIZC-ElwWY,19
15
+ futurehouse_client-0.3.17.dev56.dist-info/RECORD,,