futurehouse-client 0.3.15.dev71__tar.gz → 0.3.17.dev56__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/PKG-INFO +1 -1
- {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/docs/client_notebook.ipynb +6 -6
- {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/futurehouse_client/clients/job_client.py +1 -0
- {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/futurehouse_client/clients/rest_client.py +262 -17
- {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/futurehouse_client.egg-info/PKG-INFO +1 -1
- {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/LICENSE +0 -0
- {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/README.md +0 -0
- {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/docs/__init__.py +0 -0
- {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/futurehouse_client/__init__.py +0 -0
- {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/futurehouse_client/clients/__init__.py +0 -0
- {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/futurehouse_client/models/__init__.py +0 -0
- {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/futurehouse_client/models/app.py +0 -0
- {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/futurehouse_client/models/client.py +0 -0
- {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/futurehouse_client/models/rest.py +0 -0
- {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/futurehouse_client/utils/__init__.py +0 -0
- {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/futurehouse_client/utils/module_utils.py +0 -0
- {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/futurehouse_client/utils/monitoring.py +0 -0
- {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/futurehouse_client.egg-info/SOURCES.txt +0 -0
- {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/futurehouse_client.egg-info/dependency_links.txt +0 -0
- {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/futurehouse_client.egg-info/requires.txt +0 -0
- {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/futurehouse_client.egg-info/top_level.txt +0 -0
- {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/pyproject.toml +0 -0
- {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/setup.cfg +0 -0
- {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/tests/test_rest.py +0 -0
- {futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/uv.lock +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: futurehouse-client
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.17.dev56
|
4
4
|
Summary: A client for interacting with endpoints of the FutureHouse service.
|
5
5
|
Author-email: FutureHouse technical staff <hello@futurehouse.org>
|
6
6
|
Classifier: Operating System :: OS Independent
|
{futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/docs/client_notebook.ipynb
RENAMED
@@ -27,12 +27,12 @@
|
|
27
27
|
"source": [
|
28
28
|
"import time\n",
|
29
29
|
"\n",
|
30
|
-
"from futurehouse_client import
|
30
|
+
"from futurehouse_client import FutureHouseClient, JobNames\n",
|
31
31
|
"from futurehouse_client.models import (\n",
|
32
32
|
" AuthType,\n",
|
33
|
-
" JobRequest,\n",
|
34
33
|
" RuntimeConfig,\n",
|
35
34
|
" Stage,\n",
|
35
|
+
" TaskRequest,\n",
|
36
36
|
")\n",
|
37
37
|
"from ldp.agent import AgentConfig"
|
38
38
|
]
|
@@ -53,7 +53,7 @@
|
|
53
53
|
"metadata": {},
|
54
54
|
"outputs": [],
|
55
55
|
"source": [
|
56
|
-
"client =
|
56
|
+
"client = FutureHouseClient(\n",
|
57
57
|
" stage=Stage.PROD,\n",
|
58
58
|
" auth_type=AuthType.API_KEY,\n",
|
59
59
|
" api_key=\"your-api-key\",\n",
|
@@ -80,7 +80,7 @@
|
|
80
80
|
"metadata": {},
|
81
81
|
"outputs": [],
|
82
82
|
"source": [
|
83
|
-
"job_data =
|
83
|
+
"job_data = TaskRequest(\n",
|
84
84
|
" name=JobNames.from_string(\"crow\"),\n",
|
85
85
|
" query=\"What is the molecule known to have the greatest solubility in water?\",\n",
|
86
86
|
")\n",
|
@@ -114,7 +114,7 @@
|
|
114
114
|
" \"temperature\": 0.0,\n",
|
115
115
|
" },\n",
|
116
116
|
")\n",
|
117
|
-
"job_data =
|
117
|
+
"job_data = TaskRequest(\n",
|
118
118
|
" name=JobNames.CROW,\n",
|
119
119
|
" query=\"How many moons does earth have?\",\n",
|
120
120
|
" runtime_config=RuntimeConfig(agent=agent, max_steps=10),\n",
|
@@ -145,7 +145,7 @@
|
|
145
145
|
"metadata": {},
|
146
146
|
"outputs": [],
|
147
147
|
"source": [
|
148
|
-
"job_data =
|
148
|
+
"job_data = TaskRequest(name=JobNames.CROW, query=\"How many species of birds are there?\")\n",
|
149
149
|
"\n",
|
150
150
|
"job_id = client.create_job(job_data)\n",
|
151
151
|
"while client.get_job().status != \"success\":\n",
|
@@ -29,6 +29,7 @@ class JobNames(StrEnum):
|
|
29
29
|
FALCON = "job-futurehouse-paperqa2-deep"
|
30
30
|
OWL = "job-futurehouse-hasanyone"
|
31
31
|
DUMMY = "job-futurehouse-dummy-env"
|
32
|
+
PHOENIX = "job-futurehouse-phoenix"
|
32
33
|
|
33
34
|
@classmethod
|
34
35
|
def from_stage(cls, job_name: str, stage: Stage | None = None) -> str:
|
@@ -6,6 +6,8 @@ import inspect
|
|
6
6
|
import json
|
7
7
|
import logging
|
8
8
|
import os
|
9
|
+
import tempfile
|
10
|
+
import uuid
|
9
11
|
from collections.abc import Mapping
|
10
12
|
from datetime import datetime
|
11
13
|
from pathlib import Path
|
@@ -118,7 +120,7 @@ class TaskResponse(BaseModel):
|
|
118
120
|
|
119
121
|
status: str
|
120
122
|
query: str
|
121
|
-
user: str
|
123
|
+
user: str | None = None
|
122
124
|
created_at: datetime
|
123
125
|
job_name: str
|
124
126
|
public: bool
|
@@ -200,11 +202,16 @@ class TaskResponseVerbose(TaskResponse):
|
|
200
202
|
shared_with: list[SimpleOrganization] | None = None
|
201
203
|
|
202
204
|
|
205
|
+
class FileUploadError(RestClientError):
|
206
|
+
"""Raised when there's an error uploading a file."""
|
207
|
+
|
208
|
+
|
203
209
|
class RestClient:
|
204
210
|
REQUEST_TIMEOUT: ClassVar[float] = 30.0 # sec
|
205
211
|
MAX_RETRY_ATTEMPTS: ClassVar[int] = 3
|
206
212
|
RETRY_MULTIPLIER: ClassVar[int] = 1
|
207
213
|
MAX_RETRY_WAIT: ClassVar[int] = 10
|
214
|
+
CHUNK_SIZE: ClassVar[int] = 16 * 1024 * 1024 # 16MB chunks
|
208
215
|
|
209
216
|
def __init__(
|
210
217
|
self,
|
@@ -388,29 +395,30 @@ class RestClient:
|
|
388
395
|
url = f"/v0.1/trajectories/{task_id}"
|
389
396
|
full_url = f"{self.base_url}{url}"
|
390
397
|
|
391
|
-
with
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
398
|
+
with (
|
399
|
+
external_trace(
|
400
|
+
url=full_url,
|
401
|
+
method="GET",
|
402
|
+
library="httpx",
|
403
|
+
custom_params={
|
404
|
+
"operation": "get_job",
|
405
|
+
"job_id": task_id,
|
406
|
+
},
|
407
|
+
),
|
408
|
+
self.client.stream("GET", url, params={"history": history}) as response,
|
399
409
|
):
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
response.raise_for_status()
|
405
|
-
verbose_response = TaskResponseVerbose(**response.json())
|
410
|
+
json_data = "".join(response.iter_text(chunk_size=1024))
|
411
|
+
data = json.loads(json_data)
|
412
|
+
verbose_response = TaskResponseVerbose(**data)
|
413
|
+
|
406
414
|
if verbose:
|
407
415
|
return verbose_response
|
408
416
|
if any(
|
409
417
|
JobNames.from_string(job_name) in verbose_response.job_name
|
410
418
|
for job_name in ["crow", "falcon", "owl", "dummy"]
|
411
419
|
):
|
412
|
-
return PQATaskResponse(**
|
413
|
-
return TaskResponse(**
|
420
|
+
return PQATaskResponse(**data)
|
421
|
+
return TaskResponse(**data)
|
414
422
|
except ValueError as e:
|
415
423
|
raise ValueError("Invalid task ID format. Must be a valid UUID.") from e
|
416
424
|
except Exception as e:
|
@@ -643,6 +651,243 @@ class RestClient:
|
|
643
651
|
raise JobCreationError(f"Error generating docker image: {e!s}") from e
|
644
652
|
return build_context
|
645
653
|
|
654
|
+
@retry(
|
655
|
+
stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
|
656
|
+
wait=wait_exponential(multiplier=RETRY_MULTIPLIER, max=MAX_RETRY_WAIT),
|
657
|
+
retry=retry_if_connection_error,
|
658
|
+
)
|
659
|
+
def upload_file(
|
660
|
+
self,
|
661
|
+
job_name: str,
|
662
|
+
file_path: str | os.PathLike,
|
663
|
+
folder_name: str | None = None,
|
664
|
+
) -> str:
|
665
|
+
"""Upload a file or directory to a futurehouse job bucket.
|
666
|
+
|
667
|
+
Args:
|
668
|
+
job_name: The name of the futurehouse job to upload to.
|
669
|
+
file_path: The local path to the file or directory to upload.
|
670
|
+
folder_name: Optional folder name to use for the upload. If not provided, a random UUID will be used.
|
671
|
+
|
672
|
+
Returns:
|
673
|
+
The upload ID used for the upload.
|
674
|
+
|
675
|
+
Raises:
|
676
|
+
FileUploadError: If there's an error uploading the file.
|
677
|
+
"""
|
678
|
+
file_path = Path(file_path)
|
679
|
+
if not file_path.exists():
|
680
|
+
raise FileNotFoundError(f"File or directory not found: {file_path}")
|
681
|
+
|
682
|
+
upload_id = folder_name or str(uuid.uuid4())
|
683
|
+
|
684
|
+
if file_path.is_dir():
|
685
|
+
# Process directory recursively
|
686
|
+
self._upload_directory(job_name, file_path, upload_id)
|
687
|
+
else:
|
688
|
+
# Process single file
|
689
|
+
self._upload_single_file(job_name, file_path, upload_id)
|
690
|
+
logger.info(f"Successfully uploaded {file_path} to {upload_id}")
|
691
|
+
return upload_id
|
692
|
+
|
693
|
+
def _upload_directory(self, job_name: str, dir_path: Path, upload_id: str) -> None:
|
694
|
+
"""Upload all files in a directory recursively.
|
695
|
+
|
696
|
+
Args:
|
697
|
+
job_name: The key of the crow to upload to.
|
698
|
+
dir_path: The path to the directory to upload.
|
699
|
+
upload_id: The upload ID to use.
|
700
|
+
|
701
|
+
Raises:
|
702
|
+
FileUploadError: If there's an error uploading any file.
|
703
|
+
"""
|
704
|
+
# Skip common directories that shouldn't be uploaded
|
705
|
+
if any(ignore in dir_path.parts for ignore in FILE_UPLOAD_IGNORE_PARTS):
|
706
|
+
return
|
707
|
+
|
708
|
+
try:
|
709
|
+
# Upload all files in the directory recursively
|
710
|
+
for path in dir_path.rglob("*"):
|
711
|
+
if path.is_file() and not any(
|
712
|
+
ignore in path.parts for ignore in FILE_UPLOAD_IGNORE_PARTS
|
713
|
+
):
|
714
|
+
# Use path relative to the original directory as file name
|
715
|
+
rel_path = path.relative_to(dir_path)
|
716
|
+
self._upload_single_file(
|
717
|
+
job_name,
|
718
|
+
path,
|
719
|
+
upload_id,
|
720
|
+
file_name=str(rel_path),
|
721
|
+
)
|
722
|
+
except Exception as e:
|
723
|
+
raise FileUploadError(f"Error uploading directory {dir_path}: {e}") from e
|
724
|
+
|
725
|
+
def _upload_single_file(
|
726
|
+
self,
|
727
|
+
job_name: str,
|
728
|
+
file_path: Path,
|
729
|
+
upload_id: str,
|
730
|
+
file_name: str | None = None,
|
731
|
+
) -> None:
|
732
|
+
"""Upload a single file in chunks.
|
733
|
+
|
734
|
+
Args:
|
735
|
+
job_name: The key of the crow to upload to.
|
736
|
+
file_path: The path to the file to upload.
|
737
|
+
upload_id: The upload ID to use.
|
738
|
+
file_name: Optional name to use for the file. If not provided, the file's name will be used.
|
739
|
+
|
740
|
+
Raises:
|
741
|
+
FileUploadError: If there's an error uploading the file.
|
742
|
+
"""
|
743
|
+
file_name = file_name or file_path.name
|
744
|
+
file_size = file_path.stat().st_size
|
745
|
+
total_chunks = (file_size + self.CHUNK_SIZE - 1) // self.CHUNK_SIZE
|
746
|
+
|
747
|
+
logger.info(f"Uploading {file_path} as {file_name} ({total_chunks} chunks)")
|
748
|
+
|
749
|
+
try:
|
750
|
+
with open(file_path, "rb") as f:
|
751
|
+
for chunk_index in range(total_chunks):
|
752
|
+
# Read the chunk from the file
|
753
|
+
f.seek(chunk_index * self.CHUNK_SIZE)
|
754
|
+
chunk_data = f.read(self.CHUNK_SIZE)
|
755
|
+
|
756
|
+
# Prepare and send the chunk
|
757
|
+
with tempfile.NamedTemporaryFile() as temp_file:
|
758
|
+
temp_file.write(chunk_data)
|
759
|
+
temp_file.flush()
|
760
|
+
|
761
|
+
# Create form data
|
762
|
+
with open(temp_file.name, "rb") as chunk_file_obj:
|
763
|
+
files = {
|
764
|
+
"chunk": (
|
765
|
+
file_name,
|
766
|
+
chunk_file_obj,
|
767
|
+
"application/octet-stream",
|
768
|
+
)
|
769
|
+
}
|
770
|
+
data = {
|
771
|
+
"file_name": file_name,
|
772
|
+
"chunk_index": chunk_index,
|
773
|
+
"total_chunks": total_chunks,
|
774
|
+
"upload_id": upload_id,
|
775
|
+
}
|
776
|
+
|
777
|
+
# Send the chunk
|
778
|
+
response = self.multipart_client.post(
|
779
|
+
f"/v0.1/crows/{job_name}/upload-chunk",
|
780
|
+
files=files,
|
781
|
+
data=data,
|
782
|
+
)
|
783
|
+
response.raise_for_status()
|
784
|
+
|
785
|
+
# Call progress callback if provided
|
786
|
+
|
787
|
+
logger.debug(
|
788
|
+
f"Uploaded chunk {chunk_index + 1}/{total_chunks} of {file_name}"
|
789
|
+
)
|
790
|
+
|
791
|
+
logger.info(f"Successfully uploaded {file_name}")
|
792
|
+
|
793
|
+
except Exception as e:
|
794
|
+
logger.exception(f"Error uploading file {file_path}")
|
795
|
+
raise FileUploadError(f"Error uploading file {file_path}: {e}") from e
|
796
|
+
|
797
|
+
@retry(
|
798
|
+
stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
|
799
|
+
wait=wait_exponential(multiplier=RETRY_MULTIPLIER, max=MAX_RETRY_WAIT),
|
800
|
+
retry=retry_if_connection_error,
|
801
|
+
)
|
802
|
+
def list_files(self, job_name: str, folder_name: str) -> dict[str, list[str]]:
|
803
|
+
"""List files and directories in a GCS location for a given job_name and upload_id.
|
804
|
+
|
805
|
+
Args:
|
806
|
+
job_name: The name of the futurehouse job.
|
807
|
+
folder_name: The specific folder name (upload_id) to list files from.
|
808
|
+
|
809
|
+
Returns:
|
810
|
+
A list of files in the GCS folder.
|
811
|
+
|
812
|
+
Raises:
|
813
|
+
RestClientError: If there is an error listing the files.
|
814
|
+
"""
|
815
|
+
try:
|
816
|
+
url = f"/v0.1/crows/{job_name}/list-files"
|
817
|
+
params = {"upload_id": folder_name}
|
818
|
+
response = self.client.get(url, params=params)
|
819
|
+
response.raise_for_status()
|
820
|
+
return response.json()
|
821
|
+
except HTTPStatusError as e:
|
822
|
+
logger.exception(
|
823
|
+
f"Error listing files for job {job_name}, folder {folder_name}: {e.response.text}"
|
824
|
+
)
|
825
|
+
raise RestClientError(
|
826
|
+
f"Error listing files: {e.response.status_code} - {e.response.text}"
|
827
|
+
) from e
|
828
|
+
except Exception as e:
|
829
|
+
logger.exception(
|
830
|
+
f"Error listing files for job {job_name}, folder {folder_name}"
|
831
|
+
)
|
832
|
+
raise RestClientError(f"Error listing files: {e!s}") from e
|
833
|
+
|
834
|
+
@retry(
|
835
|
+
stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
|
836
|
+
wait=wait_exponential(multiplier=RETRY_MULTIPLIER, max=MAX_RETRY_WAIT),
|
837
|
+
retry=retry_if_connection_error,
|
838
|
+
)
|
839
|
+
def download_file(
|
840
|
+
self,
|
841
|
+
job_name: str,
|
842
|
+
folder_name: str,
|
843
|
+
file_path: str,
|
844
|
+
destination_path: str | os.PathLike,
|
845
|
+
) -> None:
|
846
|
+
"""Download a file from GCS to a local path.
|
847
|
+
|
848
|
+
Args:
|
849
|
+
job_name: The name of the futurehouse job.
|
850
|
+
folder_name: The specific folder name (upload_id) the file belongs to.
|
851
|
+
file_path: The relative path of the file to download
|
852
|
+
(e.g., 'data/my_file.csv' or 'my_image.png').
|
853
|
+
destination_path: The local path where the file should be saved.
|
854
|
+
|
855
|
+
Raises:
|
856
|
+
RestClientError: If there is an error downloading the file.
|
857
|
+
FileNotFoundError: If the destination directory does not exist.
|
858
|
+
"""
|
859
|
+
destination_path = Path(destination_path)
|
860
|
+
# Ensure the destination directory exists
|
861
|
+
destination_path.parent.mkdir(parents=True, exist_ok=True)
|
862
|
+
|
863
|
+
try:
|
864
|
+
url = f"/v0.1/crows/{job_name}/download-file"
|
865
|
+
params = {"upload_id": folder_name, "file_path": file_path}
|
866
|
+
|
867
|
+
with self.client.stream("GET", url, params=params) as response:
|
868
|
+
response.raise_for_status() # Check for HTTP errors before streaming
|
869
|
+
with open(destination_path, "wb") as f:
|
870
|
+
for chunk in response.iter_bytes(chunk_size=8192):
|
871
|
+
f.write(chunk)
|
872
|
+
logger.info(f"File {file_path} downloaded to {destination_path}")
|
873
|
+
except HTTPStatusError as e:
|
874
|
+
logger.exception(
|
875
|
+
f"Error downloading file {file_path} for job {job_name}, folder {folder_name}: {e.response.text}"
|
876
|
+
)
|
877
|
+
# Clean up partially downloaded file if an error occurs
|
878
|
+
if destination_path.exists():
|
879
|
+
destination_path.unlink()
|
880
|
+
raise RestClientError(
|
881
|
+
f"Error downloading file: {e.response.status_code} - {e.response.text}"
|
882
|
+
) from e
|
883
|
+
except Exception as e:
|
884
|
+
logger.exception(
|
885
|
+
f"Error downloading file {file_path} for job {job_name}, folder {folder_name}"
|
886
|
+
)
|
887
|
+
if destination_path.exists():
|
888
|
+
destination_path.unlink() # Clean up partial file
|
889
|
+
raise RestClientError(f"Error downloading file: {e!s}") from e
|
890
|
+
|
646
891
|
|
647
892
|
def get_installed_packages() -> dict[str, str]:
|
648
893
|
"""Returns a dictionary of installed packages and their versions."""
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: futurehouse-client
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.17.dev56
|
4
4
|
Summary: A client for interacting with endpoints of the FutureHouse service.
|
5
5
|
Author-email: FutureHouse technical staff <hello@futurehouse.org>
|
6
6
|
Classifier: Operating System :: OS Independent
|
File without changes
|
File without changes
|
File without changes
|
{futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/futurehouse_client/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{futurehouse_client-0.3.15.dev71 → futurehouse_client-0.3.17.dev56}/futurehouse_client/models/app.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|