futurehouse-client 0.3.15.dev71__tar.gz → 0.3.17.dev56__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/PKG-INFO +1 -1
  2. {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/docs/client_notebook.ipynb +6 -6
  3. {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/futurehouse_client/clients/job_client.py +1 -0
  4. {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/futurehouse_client/clients/rest_client.py +262 -17
  5. {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/futurehouse_client.egg-info/PKG-INFO +1 -1
  6. {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/LICENSE +0 -0
  7. {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/README.md +0 -0
  8. {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/docs/__init__.py +0 -0
  9. {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/futurehouse_client/__init__.py +0 -0
  10. {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/futurehouse_client/clients/__init__.py +0 -0
  11. {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/futurehouse_client/models/__init__.py +0 -0
  12. {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/futurehouse_client/models/app.py +0 -0
  13. {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/futurehouse_client/models/client.py +0 -0
  14. {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/futurehouse_client/models/rest.py +0 -0
  15. {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/futurehouse_client/utils/__init__.py +0 -0
  16. {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/futurehouse_client/utils/module_utils.py +0 -0
  17. {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/futurehouse_client/utils/monitoring.py +0 -0
  18. {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/futurehouse_client.egg-info/SOURCES.txt +0 -0
  19. {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/futurehouse_client.egg-info/dependency_links.txt +0 -0
  20. {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/futurehouse_client.egg-info/requires.txt +0 -0
  21. {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/futurehouse_client.egg-info/top_level.txt +0 -0
  22. {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/pyproject.toml +0 -0
  23. {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/setup.cfg +0 -0
  24. {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/tests/test_rest.py +0 -0
  25. {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: futurehouse-client
3
- Version: 0.3.15.dev71
3
+ Version: 0.3.17.dev56
4
4
  Summary: A client for interacting with endpoints of the FutureHouse service.
5
5
  Author-email: FutureHouse technical staff <hello@futurehouse.org>
6
6
  Classifier: Operating System :: OS Independent
@@ -27,12 +27,12 @@
27
27
  "source": [
28
28
  "import time\n",
29
29
  "\n",
30
- "from futurehouse_client import Client, JobNames\n",
30
+ "from futurehouse_client import FutureHouseClient, JobNames\n",
31
31
  "from futurehouse_client.models import (\n",
32
32
  " AuthType,\n",
33
- " JobRequest,\n",
34
33
  " RuntimeConfig,\n",
35
34
  " Stage,\n",
35
+ " TaskRequest,\n",
36
36
  ")\n",
37
37
  "from ldp.agent import AgentConfig"
38
38
  ]
@@ -53,7 +53,7 @@
53
53
  "metadata": {},
54
54
  "outputs": [],
55
55
  "source": [
56
- "client = Client(\n",
56
+ "client = FutureHouseClient(\n",
57
57
  " stage=Stage.PROD,\n",
58
58
  " auth_type=AuthType.API_KEY,\n",
59
59
  " api_key=\"your-api-key\",\n",
@@ -80,7 +80,7 @@
80
80
  "metadata": {},
81
81
  "outputs": [],
82
82
  "source": [
83
- "job_data = JobRequest(\n",
83
+ "job_data = TaskRequest(\n",
84
84
  " name=JobNames.from_string(\"crow\"),\n",
85
85
  " query=\"What is the molecule known to have the greatest solubility in water?\",\n",
86
86
  ")\n",
@@ -114,7 +114,7 @@
114
114
  " \"temperature\": 0.0,\n",
115
115
  " },\n",
116
116
  ")\n",
117
- "job_data = JobRequest(\n",
117
+ "job_data = TaskRequest(\n",
118
118
  " name=JobNames.CROW,\n",
119
119
  " query=\"How many moons does earth have?\",\n",
120
120
  " runtime_config=RuntimeConfig(agent=agent, max_steps=10),\n",
@@ -145,7 +145,7 @@
145
145
  "metadata": {},
146
146
  "outputs": [],
147
147
  "source": [
148
- "job_data = JobRequest(name=JobNames.CROW, query=\"How many species of birds are there?\")\n",
148
+ "job_data = TaskRequest(name=JobNames.CROW, query=\"How many species of birds are there?\")\n",
149
149
  "\n",
150
150
  "job_id = client.create_job(job_data)\n",
151
151
  "while client.get_job().status != \"success\":\n",
@@ -29,6 +29,7 @@ class JobNames(StrEnum):
29
29
  FALCON = "job-futurehouse-paperqa2-deep"
30
30
  OWL = "job-futurehouse-hasanyone"
31
31
  DUMMY = "job-futurehouse-dummy-env"
32
+ PHOENIX = "job-futurehouse-phoenix"
32
33
 
33
34
  @classmethod
34
35
  def from_stage(cls, job_name: str, stage: Stage | None = None) -> str:
@@ -6,6 +6,8 @@ import inspect
6
6
  import json
7
7
  import logging
8
8
  import os
9
+ import tempfile
10
+ import uuid
9
11
  from collections.abc import Mapping
10
12
  from datetime import datetime
11
13
  from pathlib import Path
@@ -118,7 +120,7 @@ class TaskResponse(BaseModel):
118
120
 
119
121
  status: str
120
122
  query: str
121
- user: str
123
+ user: str | None = None
122
124
  created_at: datetime
123
125
  job_name: str
124
126
  public: bool
@@ -200,11 +202,16 @@ class TaskResponseVerbose(TaskResponse):
200
202
  shared_with: list[SimpleOrganization] | None = None
201
203
 
202
204
 
205
+ class FileUploadError(RestClientError):
206
+ """Raised when there's an error uploading a file."""
207
+
208
+
203
209
  class RestClient:
204
210
  REQUEST_TIMEOUT: ClassVar[float] = 30.0 # sec
205
211
  MAX_RETRY_ATTEMPTS: ClassVar[int] = 3
206
212
  RETRY_MULTIPLIER: ClassVar[int] = 1
207
213
  MAX_RETRY_WAIT: ClassVar[int] = 10
214
+ CHUNK_SIZE: ClassVar[int] = 16 * 1024 * 1024 # 16MB chunks
208
215
 
209
216
  def __init__(
210
217
  self,
@@ -388,29 +395,30 @@ class RestClient:
388
395
  url = f"/v0.1/trajectories/{task_id}"
389
396
  full_url = f"{self.base_url}{url}"
390
397
 
391
- with external_trace(
392
- url=full_url,
393
- method="GET",
394
- library="httpx",
395
- custom_params={
396
- "operation": "get_job",
397
- "job_id": task_id,
398
- },
398
+ with (
399
+ external_trace(
400
+ url=full_url,
401
+ method="GET",
402
+ library="httpx",
403
+ custom_params={
404
+ "operation": "get_job",
405
+ "job_id": task_id,
406
+ },
407
+ ),
408
+ self.client.stream("GET", url, params={"history": history}) as response,
399
409
  ):
400
- response = self.client.get(
401
- url,
402
- params={"history": history},
403
- )
404
- response.raise_for_status()
405
- verbose_response = TaskResponseVerbose(**response.json())
410
+ json_data = "".join(response.iter_text(chunk_size=1024))
411
+ data = json.loads(json_data)
412
+ verbose_response = TaskResponseVerbose(**data)
413
+
406
414
  if verbose:
407
415
  return verbose_response
408
416
  if any(
409
417
  JobNames.from_string(job_name) in verbose_response.job_name
410
418
  for job_name in ["crow", "falcon", "owl", "dummy"]
411
419
  ):
412
- return PQATaskResponse(**response.json())
413
- return TaskResponse(**response.json())
420
+ return PQATaskResponse(**data)
421
+ return TaskResponse(**data)
414
422
  except ValueError as e:
415
423
  raise ValueError("Invalid task ID format. Must be a valid UUID.") from e
416
424
  except Exception as e:
@@ -643,6 +651,243 @@ class RestClient:
643
651
  raise JobCreationError(f"Error generating docker image: {e!s}") from e
644
652
  return build_context
645
653
 
654
+ @retry(
655
+ stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
656
+ wait=wait_exponential(multiplier=RETRY_MULTIPLIER, max=MAX_RETRY_WAIT),
657
+ retry=retry_if_connection_error,
658
+ )
659
+ def upload_file(
660
+ self,
661
+ job_name: str,
662
+ file_path: str | os.PathLike,
663
+ folder_name: str | None = None,
664
+ ) -> str:
665
+ """Upload a file or directory to a futurehouse job bucket.
666
+
667
+ Args:
668
+ job_name: The name of the futurehouse job to upload to.
669
+ file_path: The local path to the file or directory to upload.
670
+ folder_name: Optional folder name to use for the upload. If not provided, a random UUID will be used.
671
+
672
+ Returns:
673
+ The upload ID used for the upload.
674
+
675
+ Raises:
676
+ FileUploadError: If there's an error uploading the file.
677
+ """
678
+ file_path = Path(file_path)
679
+ if not file_path.exists():
680
+ raise FileNotFoundError(f"File or directory not found: {file_path}")
681
+
682
+ upload_id = folder_name or str(uuid.uuid4())
683
+
684
+ if file_path.is_dir():
685
+ # Process directory recursively
686
+ self._upload_directory(job_name, file_path, upload_id)
687
+ else:
688
+ # Process single file
689
+ self._upload_single_file(job_name, file_path, upload_id)
690
+ logger.info(f"Successfully uploaded {file_path} to {upload_id}")
691
+ return upload_id
692
+
693
+ def _upload_directory(self, job_name: str, dir_path: Path, upload_id: str) -> None:
694
+ """Upload all files in a directory recursively.
695
+
696
+ Args:
697
+ job_name: The key of the crow to upload to.
698
+ dir_path: The path to the directory to upload.
699
+ upload_id: The upload ID to use.
700
+
701
+ Raises:
702
+ FileUploadError: If there's an error uploading any file.
703
+ """
704
+ # Skip common directories that shouldn't be uploaded
705
+ if any(ignore in dir_path.parts for ignore in FILE_UPLOAD_IGNORE_PARTS):
706
+ return
707
+
708
+ try:
709
+ # Upload all files in the directory recursively
710
+ for path in dir_path.rglob("*"):
711
+ if path.is_file() and not any(
712
+ ignore in path.parts for ignore in FILE_UPLOAD_IGNORE_PARTS
713
+ ):
714
+ # Use path relative to the original directory as file name
715
+ rel_path = path.relative_to(dir_path)
716
+ self._upload_single_file(
717
+ job_name,
718
+ path,
719
+ upload_id,
720
+ file_name=str(rel_path),
721
+ )
722
+ except Exception as e:
723
+ raise FileUploadError(f"Error uploading directory {dir_path}: {e}") from e
724
+
725
+ def _upload_single_file(
726
+ self,
727
+ job_name: str,
728
+ file_path: Path,
729
+ upload_id: str,
730
+ file_name: str | None = None,
731
+ ) -> None:
732
+ """Upload a single file in chunks.
733
+
734
+ Args:
735
+ job_name: The key of the crow to upload to.
736
+ file_path: The path to the file to upload.
737
+ upload_id: The upload ID to use.
738
+ file_name: Optional name to use for the file. If not provided, the file's name will be used.
739
+
740
+ Raises:
741
+ FileUploadError: If there's an error uploading the file.
742
+ """
743
+ file_name = file_name or file_path.name
744
+ file_size = file_path.stat().st_size
745
+ total_chunks = (file_size + self.CHUNK_SIZE - 1) // self.CHUNK_SIZE
746
+
747
+ logger.info(f"Uploading {file_path} as {file_name} ({total_chunks} chunks)")
748
+
749
+ try:
750
+ with open(file_path, "rb") as f:
751
+ for chunk_index in range(total_chunks):
752
+ # Read the chunk from the file
753
+ f.seek(chunk_index * self.CHUNK_SIZE)
754
+ chunk_data = f.read(self.CHUNK_SIZE)
755
+
756
+ # Prepare and send the chunk
757
+ with tempfile.NamedTemporaryFile() as temp_file:
758
+ temp_file.write(chunk_data)
759
+ temp_file.flush()
760
+
761
+ # Create form data
762
+ with open(temp_file.name, "rb") as chunk_file_obj:
763
+ files = {
764
+ "chunk": (
765
+ file_name,
766
+ chunk_file_obj,
767
+ "application/octet-stream",
768
+ )
769
+ }
770
+ data = {
771
+ "file_name": file_name,
772
+ "chunk_index": chunk_index,
773
+ "total_chunks": total_chunks,
774
+ "upload_id": upload_id,
775
+ }
776
+
777
+ # Send the chunk
778
+ response = self.multipart_client.post(
779
+ f"/v0.1/crows/{job_name}/upload-chunk",
780
+ files=files,
781
+ data=data,
782
+ )
783
+ response.raise_for_status()
784
+
785
+ # Call progress callback if provided
786
+
787
+ logger.debug(
788
+ f"Uploaded chunk {chunk_index + 1}/{total_chunks} of {file_name}"
789
+ )
790
+
791
+ logger.info(f"Successfully uploaded {file_name}")
792
+
793
+ except Exception as e:
794
+ logger.exception(f"Error uploading file {file_path}")
795
+ raise FileUploadError(f"Error uploading file {file_path}: {e}") from e
796
+
797
+ @retry(
798
+ stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
799
+ wait=wait_exponential(multiplier=RETRY_MULTIPLIER, max=MAX_RETRY_WAIT),
800
+ retry=retry_if_connection_error,
801
+ )
802
+ def list_files(self, job_name: str, folder_name: str) -> dict[str, list[str]]:
803
+ """List files and directories in a GCS location for a given job_name and upload_id.
804
+
805
+ Args:
806
+ job_name: The name of the futurehouse job.
807
+ folder_name: The specific folder name (upload_id) to list files from.
808
+
809
+ Returns:
810
+ A list of files in the GCS folder.
811
+
812
+ Raises:
813
+ RestClientError: If there is an error listing the files.
814
+ """
815
+ try:
816
+ url = f"/v0.1/crows/{job_name}/list-files"
817
+ params = {"upload_id": folder_name}
818
+ response = self.client.get(url, params=params)
819
+ response.raise_for_status()
820
+ return response.json()
821
+ except HTTPStatusError as e:
822
+ logger.exception(
823
+ f"Error listing files for job {job_name}, folder {folder_name}: {e.response.text}"
824
+ )
825
+ raise RestClientError(
826
+ f"Error listing files: {e.response.status_code} - {e.response.text}"
827
+ ) from e
828
+ except Exception as e:
829
+ logger.exception(
830
+ f"Error listing files for job {job_name}, folder {folder_name}"
831
+ )
832
+ raise RestClientError(f"Error listing files: {e!s}") from e
833
+
834
+ @retry(
835
+ stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
836
+ wait=wait_exponential(multiplier=RETRY_MULTIPLIER, max=MAX_RETRY_WAIT),
837
+ retry=retry_if_connection_error,
838
+ )
839
+ def download_file(
840
+ self,
841
+ job_name: str,
842
+ folder_name: str,
843
+ file_path: str,
844
+ destination_path: str | os.PathLike,
845
+ ) -> None:
846
+ """Download a file from GCS to a local path.
847
+
848
+ Args:
849
+ job_name: The name of the futurehouse job.
850
+ folder_name: The specific folder name (upload_id) the file belongs to.
851
+ file_path: The relative path of the file to download
852
+ (e.g., 'data/my_file.csv' or 'my_image.png').
853
+ destination_path: The local path where the file should be saved.
854
+
855
+ Raises:
856
+ RestClientError: If there is an error downloading the file.
857
+ FileNotFoundError: If the destination directory does not exist.
858
+ """
859
+ destination_path = Path(destination_path)
860
+ # Ensure the destination directory exists
861
+ destination_path.parent.mkdir(parents=True, exist_ok=True)
862
+
863
+ try:
864
+ url = f"/v0.1/crows/{job_name}/download-file"
865
+ params = {"upload_id": folder_name, "file_path": file_path}
866
+
867
+ with self.client.stream("GET", url, params=params) as response:
868
+ response.raise_for_status() # Check for HTTP errors before streaming
869
+ with open(destination_path, "wb") as f:
870
+ for chunk in response.iter_bytes(chunk_size=8192):
871
+ f.write(chunk)
872
+ logger.info(f"File {file_path} downloaded to {destination_path}")
873
+ except HTTPStatusError as e:
874
+ logger.exception(
875
+ f"Error downloading file {file_path} for job {job_name}, folder {folder_name}: {e.response.text}"
876
+ )
877
+ # Clean up partially downloaded file if an error occurs
878
+ if destination_path.exists():
879
+ destination_path.unlink()
880
+ raise RestClientError(
881
+ f"Error downloading file: {e.response.status_code} - {e.response.text}"
882
+ ) from e
883
+ except Exception as e:
884
+ logger.exception(
885
+ f"Error downloading file {file_path} for job {job_name}, folder {folder_name}"
886
+ )
887
+ if destination_path.exists():
888
+ destination_path.unlink() # Clean up partial file
889
+ raise RestClientError(f"Error downloading file: {e!s}") from e
890
+
646
891
 
647
892
  def get_installed_packages() -> dict[str, str]:
648
893
  """Returns a dictionary of installed packages and their versions."""
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: futurehouse-client
3
- Version: 0.3.15.dev71
3
+ Version: 0.3.17.dev56
4
4
  Summary: A client for interacting with endpoints of the FutureHouse service.
5
5
  Author-email: FutureHouse technical staff <hello@futurehouse.org>
6
6
  Classifier: Operating System :: OS Independent