futurehouse-client 0.4.1.dev95__py3-none-any.whl → 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- futurehouse_client/clients/data_storage_methods.py +725 -139
- futurehouse_client/clients/job_client.py +50 -0
- futurehouse_client/clients/rest_client.py +126 -56
- futurehouse_client/models/__init__.py +2 -1
- futurehouse_client/models/app.py +4 -7
- futurehouse_client/models/data_storage_methods.py +31 -10
- futurehouse_client/models/rest.py +48 -7
- futurehouse_client/utils/general.py +35 -6
- futurehouse_client/utils/world_model_tools.py +23 -3
- futurehouse_client/version.py +16 -3
- {futurehouse_client-0.4.1.dev95.dist-info → futurehouse_client-0.4.2.dist-info}/METADATA +2 -1
- futurehouse_client-0.4.2.dist-info/RECORD +23 -0
- futurehouse_client-0.4.1.dev95.dist-info/RECORD +0 -23
- {futurehouse_client-0.4.1.dev95.dist-info → futurehouse_client-0.4.2.dist-info}/WHEEL +0 -0
- {futurehouse_client-0.4.1.dev95.dist-info → futurehouse_client-0.4.2.dist-info}/licenses/LICENSE +0 -0
- {futurehouse_client-0.4.1.dev95.dist-info → futurehouse_client-0.4.2.dist-info}/top_level.txt +0 -0
@@ -15,6 +15,7 @@ import aiohttp
|
|
15
15
|
import requests as requests_lib
|
16
16
|
from google.resumable_media import requests as resumable_requests
|
17
17
|
from httpx import AsyncClient, Client, HTTPStatusError, codes
|
18
|
+
from lmi.utils import gather_with_concurrency
|
18
19
|
from requests.adapters import HTTPAdapter
|
19
20
|
from tenacity import (
|
20
21
|
before_sleep_log,
|
@@ -26,12 +27,17 @@ from tqdm import tqdm
|
|
26
27
|
from urllib3.util.retry import Retry
|
27
28
|
|
28
29
|
from futurehouse_client.models.data_storage_methods import (
|
30
|
+
CreateDatasetPayload,
|
29
31
|
DataStorageLocationPayload,
|
30
32
|
DataStorageRequestPayload,
|
31
33
|
DataStorageResponse,
|
32
34
|
DirectoryManifest,
|
33
35
|
ManifestEntry,
|
34
36
|
)
|
37
|
+
from futurehouse_client.models.rest import (
|
38
|
+
DataStorageSearchPayload,
|
39
|
+
SearchCriterion,
|
40
|
+
)
|
35
41
|
from futurehouse_client.utils.general import retry_if_connection_error
|
36
42
|
|
37
43
|
# this is only required if they're using a yaml manifest
|
@@ -54,6 +60,7 @@ INITIATE_HEADERS = {
|
|
54
60
|
"x-goog-resumable": "start",
|
55
61
|
"Content-Length": "0",
|
56
62
|
}
|
63
|
+
DOWNLOAD_CONCURRENCY = 3
|
57
64
|
|
58
65
|
|
59
66
|
def _should_ignore_file(
|
@@ -438,6 +445,10 @@ class DataStorageCreationError(DataStorageError):
|
|
438
445
|
"""Raised when there's an error creating a data storage entry."""
|
439
446
|
|
440
447
|
|
448
|
+
class DataStorageRetrievalError(DataStorageError):
|
449
|
+
"""Raised when there's an error retrieving a data storage entry."""
|
450
|
+
|
451
|
+
|
441
452
|
class ProgressWrapper:
|
442
453
|
"""Common progress wrapper for file uploads."""
|
443
454
|
|
@@ -462,32 +473,30 @@ class ProgressWrapper:
|
|
462
473
|
return self.file_obj.tell()
|
463
474
|
|
464
475
|
|
465
|
-
class DataStorageMethods:
|
476
|
+
class DataStorageMethods: # pylint: disable=too-many-public-methods
|
466
477
|
"""Data storage methods for RestClient.
|
467
478
|
|
468
479
|
This class contains methods for interacting with the data storage API endpoints.
|
469
480
|
"""
|
470
481
|
|
471
482
|
# needed for mypy `NoReturn`
|
472
|
-
def _handle_http_errors(self, e: HTTPStatusError) -> NoReturn:
|
483
|
+
def _handle_http_errors(self, e: HTTPStatusError, operation: str) -> NoReturn:
|
473
484
|
"""Handle common HTTP errors for data storage operations."""
|
474
485
|
if e.response.status_code == codes.FORBIDDEN:
|
475
|
-
raise
|
476
|
-
"
|
486
|
+
raise DataStorageError(
|
487
|
+
f"Error {operation} data storage entry, not authorized"
|
477
488
|
) from e
|
478
489
|
if e.response.status_code == codes.UNPROCESSABLE_ENTITY:
|
479
|
-
raise
|
480
|
-
|
481
|
-
|
482
|
-
raise DataStorageCreationError(
|
483
|
-
f"Error creating data storage entry: {e.response.status_code} - {e.response.text}"
|
490
|
+
raise DataStorageError(f"Invalid request payload: {e.response.text}") from e
|
491
|
+
raise DataStorageError(
|
492
|
+
f"Error {operation} data storage entry: {e.response.status_code} - {e.response.text}"
|
484
493
|
) from e
|
485
494
|
|
486
495
|
def _validate_file_path(self, file_path: str | Path) -> Path:
|
487
496
|
"""Validate file path exists and return Path object."""
|
488
497
|
file_path = Path(file_path)
|
489
498
|
if not file_path.exists():
|
490
|
-
raise
|
499
|
+
raise DataStorageError(f"File or directory not found: {file_path}")
|
491
500
|
return file_path
|
492
501
|
|
493
502
|
def _build_zip_path(self, name: str, path: str | None) -> str:
|
@@ -529,19 +538,24 @@ class DataStorageMethods:
|
|
529
538
|
return extracted_items[0]
|
530
539
|
return extract_dir
|
531
540
|
|
532
|
-
async def _adownload_from_gcs(
|
541
|
+
async def _adownload_from_gcs(
|
542
|
+
self, signed_url: str, file_name: str | None = None
|
543
|
+
) -> Path:
|
533
544
|
"""Download file from GCS using signed URL and handle unzipping if needed.
|
534
545
|
|
535
546
|
Args:
|
536
547
|
signed_url: The signed URL to download from
|
548
|
+
file_name: The name of the file to download
|
537
549
|
|
538
550
|
Returns:
|
539
551
|
Path to the downloaded file (or unzipped directory if it was a zip)
|
540
552
|
"""
|
553
|
+
file_name = file_name or "downloaded_file"
|
554
|
+
|
541
555
|
try:
|
542
556
|
with tempfile.TemporaryDirectory() as temp_dir_str:
|
543
557
|
temp_dir = Path(temp_dir_str)
|
544
|
-
temp_file = temp_dir /
|
558
|
+
temp_file = temp_dir / file_name
|
545
559
|
|
546
560
|
async with self.async_client.stream("GET", signed_url) as response:
|
547
561
|
response.raise_for_status()
|
@@ -549,11 +563,11 @@ class DataStorageMethods:
|
|
549
563
|
content_disposition = response.headers.get(
|
550
564
|
"content-disposition", ""
|
551
565
|
)
|
552
|
-
filename =
|
566
|
+
filename = file_name
|
553
567
|
if "filename=" in content_disposition:
|
554
568
|
filename = content_disposition.split("filename=")[-1].strip('"')
|
555
569
|
|
556
|
-
if filename !=
|
570
|
+
if filename != file_name:
|
557
571
|
temp_file = temp_dir / filename
|
558
572
|
|
559
573
|
async with aiofiles.open(temp_file, "wb") as f:
|
@@ -583,21 +597,23 @@ class DataStorageMethods:
|
|
583
597
|
return final_file
|
584
598
|
|
585
599
|
except Exception as e:
|
586
|
-
raise
|
600
|
+
raise DataStorageError(f"Failed to download from GCS: {e}") from e
|
587
601
|
|
588
|
-
def _download_from_gcs(self, signed_url: str) -> Path:
|
602
|
+
def _download_from_gcs(self, signed_url: str, file_name: str | None = None) -> Path:
|
589
603
|
"""Download file from GCS using signed URL and handle unzipping if needed (sync version).
|
590
604
|
|
591
605
|
Args:
|
592
606
|
signed_url: The signed URL to download from
|
593
|
-
|
607
|
+
file_name: The name of the file to download
|
594
608
|
Returns:
|
595
609
|
Path to the downloaded file (or unzipped directory if it was a zip)
|
596
610
|
"""
|
611
|
+
file_name = file_name or "downloaded_file"
|
612
|
+
|
597
613
|
try:
|
598
614
|
with tempfile.TemporaryDirectory() as temp_dir_str:
|
599
615
|
temp_dir = Path(temp_dir_str)
|
600
|
-
temp_file = temp_dir /
|
616
|
+
temp_file = temp_dir / file_name
|
601
617
|
|
602
618
|
with requests_lib.get(signed_url, stream=True, timeout=30) as response:
|
603
619
|
response.raise_for_status()
|
@@ -605,11 +621,11 @@ class DataStorageMethods:
|
|
605
621
|
content_disposition = response.headers.get(
|
606
622
|
"content-disposition", ""
|
607
623
|
)
|
608
|
-
filename =
|
624
|
+
filename = file_name
|
609
625
|
if "filename=" in content_disposition:
|
610
626
|
filename = content_disposition.split("filename=")[-1].strip('"')
|
611
627
|
|
612
|
-
if filename !=
|
628
|
+
if filename != file_name:
|
613
629
|
temp_file = temp_dir / filename
|
614
630
|
|
615
631
|
with open(temp_file, "wb") as f:
|
@@ -639,7 +655,7 @@ class DataStorageMethods:
|
|
639
655
|
return final_file
|
640
656
|
|
641
657
|
except Exception as e:
|
642
|
-
raise
|
658
|
+
raise DataStorageError(f"Failed to download from GCS: {e}") from e
|
643
659
|
|
644
660
|
# =====================================
|
645
661
|
|
@@ -676,7 +692,7 @@ class DataStorageMethods:
|
|
676
692
|
) -> DataStorageResponse:
|
677
693
|
"""Create data storage entry via API (sync version)."""
|
678
694
|
response = self.client.post(
|
679
|
-
"/v0.1/data-storage",
|
695
|
+
"/v0.1/data-storage/data-entries",
|
680
696
|
json=payload.model_dump(mode="json", exclude_none=True),
|
681
697
|
)
|
682
698
|
response.raise_for_status()
|
@@ -687,7 +703,7 @@ class DataStorageMethods:
|
|
687
703
|
) -> DataStorageResponse:
|
688
704
|
"""Create data storage entry via API (async version)."""
|
689
705
|
response = await self.async_client.post(
|
690
|
-
"/v0.1/data-storage",
|
706
|
+
"/v0.1/data-storage/data-entries",
|
691
707
|
json=payload.model_dump(mode="json", exclude_none=True),
|
692
708
|
)
|
693
709
|
response.raise_for_status()
|
@@ -761,6 +777,7 @@ class DataStorageMethods:
|
|
761
777
|
path: str | None = None,
|
762
778
|
ignore_patterns: list[str] | None = None,
|
763
779
|
ignore_filename: str = ".gitignore",
|
780
|
+
project_id: UUID | None = None,
|
764
781
|
) -> DataStorageResponse:
|
765
782
|
"""Upload a directory as a single zip file collection.
|
766
783
|
|
@@ -771,6 +788,7 @@ class DataStorageMethods:
|
|
771
788
|
path: Optional GCS path for the zip file
|
772
789
|
ignore_patterns: List of patterns to ignore when zipping
|
773
790
|
ignore_filename: Name of ignore file to read from directory
|
791
|
+
project_id: ID of the project this data storage entry belongs to
|
774
792
|
|
775
793
|
Returns:
|
776
794
|
DataStorageResponse for the uploaded zip file
|
@@ -790,6 +808,7 @@ class DataStorageMethods:
|
|
790
808
|
description=description,
|
791
809
|
path=zip_gcs_path,
|
792
810
|
is_collection=True,
|
811
|
+
project_id=project_id,
|
793
812
|
)
|
794
813
|
|
795
814
|
logger.debug(
|
@@ -797,24 +816,30 @@ class DataStorageMethods:
|
|
797
816
|
)
|
798
817
|
data_storage_response = self._create_data_storage_entry(payload)
|
799
818
|
|
800
|
-
|
801
|
-
|
819
|
+
for storage_location in data_storage_response.storage_locations:
|
820
|
+
if not storage_location.storage_config.signed_url:
|
821
|
+
raise DataStorageCreationError(
|
822
|
+
"No signed URL returned for zip upload"
|
823
|
+
)
|
802
824
|
|
803
|
-
|
804
|
-
|
805
|
-
|
806
|
-
|
807
|
-
|
808
|
-
|
809
|
-
|
810
|
-
|
811
|
-
|
812
|
-
|
813
|
-
|
814
|
-
|
825
|
+
with tqdm(
|
826
|
+
total=zip_size,
|
827
|
+
unit="B",
|
828
|
+
unit_scale=True,
|
829
|
+
unit_divisor=1024,
|
830
|
+
desc=f"Uploading {dir_path.name} (zipped)",
|
831
|
+
miniters=1,
|
832
|
+
mininterval=0.1,
|
833
|
+
) as pbar:
|
834
|
+
_upload_file_with_progress(
|
835
|
+
storage_location.storage_config.signed_url,
|
836
|
+
temp_zip_path,
|
837
|
+
pbar,
|
838
|
+
zip_size,
|
839
|
+
)
|
815
840
|
|
816
841
|
status_response = self.client.patch(
|
817
|
-
f"/v0.1/data-storage/{data_storage_response.data_storage.id}",
|
842
|
+
f"/v0.1/data-storage/data-entries/{data_storage_response.data_storage.id}",
|
818
843
|
json={"status": "active"},
|
819
844
|
)
|
820
845
|
status_response.raise_for_status()
|
@@ -832,6 +857,7 @@ class DataStorageMethods:
|
|
832
857
|
path: str | None = None,
|
833
858
|
ignore_patterns: list[str] | None = None,
|
834
859
|
ignore_filename: str = ".gitignore",
|
860
|
+
project_id: UUID | None = None,
|
835
861
|
) -> DataStorageResponse:
|
836
862
|
"""Asynchronously upload a directory as a single zip file.
|
837
863
|
|
@@ -842,6 +868,7 @@ class DataStorageMethods:
|
|
842
868
|
path: Optional GCS path for the zip file
|
843
869
|
ignore_patterns: List of patterns to ignore when zipping
|
844
870
|
ignore_filename: Name of ignore file to read from directory
|
871
|
+
project_id: ID of the project this data storage entry belongs to
|
845
872
|
|
846
873
|
Returns:
|
847
874
|
DataStorageResponse for the uploaded zip file
|
@@ -861,28 +888,35 @@ class DataStorageMethods:
|
|
861
888
|
description=description,
|
862
889
|
path=zip_gcs_path,
|
863
890
|
is_collection=True,
|
891
|
+
project_id=project_id,
|
864
892
|
)
|
865
893
|
|
866
894
|
data_storage_response = await self._acreate_data_storage_entry(payload)
|
867
895
|
|
868
|
-
|
869
|
-
|
896
|
+
for storage_location in data_storage_response.storage_locations:
|
897
|
+
if not storage_location.storage_config.signed_url:
|
898
|
+
raise DataStorageCreationError(
|
899
|
+
"No signed URL returned for zip upload"
|
900
|
+
)
|
870
901
|
|
871
|
-
|
872
|
-
|
873
|
-
|
874
|
-
|
875
|
-
|
876
|
-
|
877
|
-
|
878
|
-
|
879
|
-
|
880
|
-
|
881
|
-
|
882
|
-
|
902
|
+
with tqdm(
|
903
|
+
total=zip_size,
|
904
|
+
unit="B",
|
905
|
+
unit_scale=True,
|
906
|
+
unit_divisor=1024,
|
907
|
+
desc=f"Uploading {dir_path.name} (zipped)",
|
908
|
+
miniters=1,
|
909
|
+
mininterval=0.1,
|
910
|
+
) as pbar:
|
911
|
+
await _aupload_file_with_progress(
|
912
|
+
storage_location.storage_config.signed_url,
|
913
|
+
temp_zip_path,
|
914
|
+
pbar,
|
915
|
+
zip_size,
|
916
|
+
)
|
883
917
|
|
884
918
|
status_response = await self.async_client.patch(
|
885
|
-
f"/v0.1/data-storage/{data_storage_response.data_storage.id}",
|
919
|
+
f"/v0.1/data-storage/data-entries/{data_storage_response.data_storage.id}",
|
886
920
|
json={"status": "active"},
|
887
921
|
)
|
888
922
|
status_response.raise_for_status()
|
@@ -898,6 +932,7 @@ class DataStorageMethods:
|
|
898
932
|
file_path: Path,
|
899
933
|
description: str | None,
|
900
934
|
path: str | None = None,
|
935
|
+
project_id: UUID | None = None,
|
901
936
|
) -> DataStorageResponse:
|
902
937
|
"""Upload a single file."""
|
903
938
|
file_size = file_path.stat().st_size
|
@@ -918,6 +953,7 @@ class DataStorageMethods:
|
|
918
953
|
content=text_content,
|
919
954
|
path=path,
|
920
955
|
is_collection=False,
|
956
|
+
project_id=project_id,
|
921
957
|
)
|
922
958
|
|
923
959
|
logger.debug("Sending file as text content")
|
@@ -934,6 +970,7 @@ class DataStorageMethods:
|
|
934
970
|
description=description,
|
935
971
|
path=path,
|
936
972
|
is_collection=False,
|
973
|
+
project_id=project_id,
|
937
974
|
)
|
938
975
|
|
939
976
|
logger.debug(
|
@@ -942,30 +979,34 @@ class DataStorageMethods:
|
|
942
979
|
|
943
980
|
data_storage_response = self._create_data_storage_entry(payload)
|
944
981
|
|
945
|
-
|
946
|
-
|
982
|
+
for storage_location in data_storage_response.storage_locations:
|
983
|
+
if not storage_location.storage_config.signed_url:
|
984
|
+
raise DataStorageCreationError("No signed URL returned from server")
|
947
985
|
|
948
|
-
|
949
|
-
|
950
|
-
|
951
|
-
|
952
|
-
|
953
|
-
|
954
|
-
|
955
|
-
|
956
|
-
|
957
|
-
|
958
|
-
|
959
|
-
|
960
|
-
|
961
|
-
|
962
|
-
|
963
|
-
|
964
|
-
|
986
|
+
with tqdm(
|
987
|
+
total=file_size,
|
988
|
+
unit="B",
|
989
|
+
unit_scale=True,
|
990
|
+
unit_divisor=1024,
|
991
|
+
desc=f"Uploading {file_path.name}",
|
992
|
+
miniters=1,
|
993
|
+
mininterval=0.1,
|
994
|
+
) as pbar:
|
995
|
+
try:
|
996
|
+
_upload_file_with_progress(
|
997
|
+
storage_location.storage_config.signed_url,
|
998
|
+
file_path,
|
999
|
+
pbar,
|
1000
|
+
file_size,
|
1001
|
+
)
|
1002
|
+
logger.debug("File upload to signed URL completed successfully")
|
1003
|
+
except Exception as e:
|
1004
|
+
logger.error(f"Failed to upload file to signed URL: {e}")
|
1005
|
+
raise
|
965
1006
|
|
966
1007
|
logger.debug("Updating data storage status to active")
|
967
1008
|
status_response = self.client.patch(
|
968
|
-
f"/v0.1/data-storage/{data_storage_response.data_storage.id}",
|
1009
|
+
f"/v0.1/data-storage/data-entries/{data_storage_response.data_storage.id}",
|
969
1010
|
json={"status": "active"},
|
970
1011
|
)
|
971
1012
|
status_response.raise_for_status()
|
@@ -980,6 +1021,7 @@ class DataStorageMethods:
|
|
980
1021
|
description: str | None,
|
981
1022
|
path: str | None = None,
|
982
1023
|
dataset_id: UUID | None = None,
|
1024
|
+
project_id: UUID | None = None,
|
983
1025
|
) -> DataStorageResponse:
|
984
1026
|
"""Asynchronously upload a single file."""
|
985
1027
|
file_size, text_payload = self._prepare_single_file_upload(
|
@@ -1000,28 +1042,33 @@ class DataStorageMethods:
|
|
1000
1042
|
path=path,
|
1001
1043
|
is_collection=False,
|
1002
1044
|
dataset_id=dataset_id,
|
1045
|
+
project_id=project_id,
|
1003
1046
|
)
|
1004
1047
|
|
1005
1048
|
data_storage_response = await self._acreate_data_storage_entry(payload)
|
1006
1049
|
|
1007
|
-
|
1008
|
-
|
1050
|
+
for location in data_storage_response.storage_locations:
|
1051
|
+
if not location.storage_config.signed_url:
|
1052
|
+
raise DataStorageCreationError(
|
1053
|
+
f"No signed URL returned from server for location: {location.id}"
|
1054
|
+
)
|
1009
1055
|
|
1010
|
-
|
1011
|
-
|
1012
|
-
|
1013
|
-
|
1014
|
-
|
1015
|
-
|
1016
|
-
|
1017
|
-
|
1018
|
-
|
1019
|
-
|
1020
|
-
|
1021
|
-
|
1056
|
+
with tqdm(
|
1057
|
+
total=file_size,
|
1058
|
+
unit="B",
|
1059
|
+
unit_scale=True,
|
1060
|
+
unit_divisor=1024,
|
1061
|
+
desc=f"Uploading {file_path.name}",
|
1062
|
+
miniters=1,
|
1063
|
+
mininterval=0.1,
|
1064
|
+
leave=False,
|
1065
|
+
) as pbar:
|
1066
|
+
await _aupload_file_with_progress(
|
1067
|
+
location.storage_config.signed_url, file_path, pbar, file_size
|
1068
|
+
)
|
1022
1069
|
|
1023
1070
|
status_response = await self.async_client.patch(
|
1024
|
-
f"/v0.1/data-storage/{data_storage_response.data_storage.id}",
|
1071
|
+
f"/v0.1/data-storage/data-entries/{data_storage_response.data_storage.id}",
|
1025
1072
|
json={"status": "active"},
|
1026
1073
|
)
|
1027
1074
|
status_response.raise_for_status()
|
@@ -1036,6 +1083,7 @@ class DataStorageMethods:
|
|
1036
1083
|
path: str | None,
|
1037
1084
|
parent_id: UUID | None,
|
1038
1085
|
dataset_id: UUID | None = None,
|
1086
|
+
project_id: UUID | None = None,
|
1039
1087
|
) -> DataStorageResponse:
|
1040
1088
|
"""Upload a single file with a parent ID (sync version)."""
|
1041
1089
|
file_size, text_payload = self._prepare_single_file_upload(
|
@@ -1046,6 +1094,7 @@ class DataStorageMethods:
|
|
1046
1094
|
logger.debug("Sending file as text content with parent_id")
|
1047
1095
|
text_payload.parent_id = parent_id
|
1048
1096
|
text_payload.dataset_id = dataset_id
|
1097
|
+
text_payload.project_id = project_id
|
1049
1098
|
return self._create_data_storage_entry(text_payload)
|
1050
1099
|
|
1051
1100
|
logger.debug(
|
@@ -1058,28 +1107,30 @@ class DataStorageMethods:
|
|
1058
1107
|
is_collection=False,
|
1059
1108
|
parent_id=parent_id,
|
1060
1109
|
dataset_id=dataset_id,
|
1110
|
+
project_id=project_id,
|
1061
1111
|
)
|
1062
1112
|
data_storage_response = self._create_data_storage_entry(payload)
|
1063
1113
|
|
1064
|
-
|
1065
|
-
|
1114
|
+
for location in data_storage_response.storage_locations:
|
1115
|
+
if not location.storage_config.signed_url:
|
1116
|
+
raise DataStorageCreationError("No signed URL returned from server")
|
1066
1117
|
|
1067
|
-
|
1068
|
-
|
1069
|
-
|
1070
|
-
|
1071
|
-
|
1072
|
-
|
1073
|
-
|
1074
|
-
|
1075
|
-
|
1076
|
-
|
1077
|
-
|
1078
|
-
|
1079
|
-
|
1118
|
+
with tqdm(
|
1119
|
+
total=file_size,
|
1120
|
+
unit="B",
|
1121
|
+
unit_scale=True,
|
1122
|
+
unit_divisor=1024,
|
1123
|
+
desc=f"Uploading {file_path.name}",
|
1124
|
+
miniters=1,
|
1125
|
+
mininterval=0.1,
|
1126
|
+
leave=False,
|
1127
|
+
) as pbar:
|
1128
|
+
_upload_file_with_progress(
|
1129
|
+
location.storage_config.signed_url, file_path, pbar, file_size
|
1130
|
+
)
|
1080
1131
|
|
1081
1132
|
status_response = self.client.patch(
|
1082
|
-
f"/v0.1/data-storage/{data_storage_response.data_storage.id}",
|
1133
|
+
f"/v0.1/data-storage/data-entries/{data_storage_response.data_storage.id}",
|
1083
1134
|
json={"status": "active"},
|
1084
1135
|
)
|
1085
1136
|
status_response.raise_for_status()
|
@@ -1092,6 +1143,7 @@ class DataStorageMethods:
|
|
1092
1143
|
dir_manifest: DirectoryManifest,
|
1093
1144
|
current_parent_id: UUID,
|
1094
1145
|
dataset_id: UUID | None = None,
|
1146
|
+
project_id: UUID | None = None,
|
1095
1147
|
) -> DataStorageResponse | None:
|
1096
1148
|
"""Process a single file item for upload."""
|
1097
1149
|
try:
|
@@ -1109,6 +1161,7 @@ class DataStorageMethods:
|
|
1109
1161
|
path=None,
|
1110
1162
|
parent_id=current_parent_id,
|
1111
1163
|
dataset_id=dataset_id,
|
1164
|
+
project_id=project_id,
|
1112
1165
|
)
|
1113
1166
|
except Exception as e:
|
1114
1167
|
logger.error(f"Failed to upload file {item}: {e}")
|
@@ -1126,6 +1179,7 @@ class DataStorageMethods:
|
|
1126
1179
|
base_dir: Path | None = None,
|
1127
1180
|
dir_manifest: DirectoryManifest | None = None,
|
1128
1181
|
dataset_id: UUID | None = None,
|
1182
|
+
project_id: UUID | None = None,
|
1129
1183
|
) -> list[DataStorageResponse]:
|
1130
1184
|
"""Upload a directory with single dataset and individual file storage entries."""
|
1131
1185
|
responses = []
|
@@ -1141,6 +1195,7 @@ class DataStorageMethods:
|
|
1141
1195
|
parent_id=None,
|
1142
1196
|
dataset_id=None,
|
1143
1197
|
is_collection=False,
|
1198
|
+
project_id=project_id,
|
1144
1199
|
)
|
1145
1200
|
|
1146
1201
|
dir_response = self._create_data_storage_entry(payload)
|
@@ -1182,6 +1237,7 @@ class DataStorageMethods:
|
|
1182
1237
|
parent_id=current_parent_id,
|
1183
1238
|
dataset_id=current_dataset_id,
|
1184
1239
|
is_collection=False,
|
1240
|
+
project_id=project_id,
|
1185
1241
|
)
|
1186
1242
|
subdir_response = self._create_data_storage_entry(subdir_payload)
|
1187
1243
|
responses.append(subdir_response)
|
@@ -1197,6 +1253,7 @@ class DataStorageMethods:
|
|
1197
1253
|
base_dir=base_dir,
|
1198
1254
|
dir_manifest=subdir_manifest,
|
1199
1255
|
dataset_id=current_dataset_id,
|
1256
|
+
project_id=project_id,
|
1200
1257
|
)
|
1201
1258
|
responses.extend(subdir_responses)
|
1202
1259
|
elif item.is_file():
|
@@ -1247,6 +1304,7 @@ class DataStorageMethods:
|
|
1247
1304
|
path: str | None,
|
1248
1305
|
parent_id: UUID | None,
|
1249
1306
|
dataset_id: UUID | None = None,
|
1307
|
+
project_id: UUID | None = None,
|
1250
1308
|
) -> DataStorageResponse:
|
1251
1309
|
"""Asynchronously upload a single file with a parent ID."""
|
1252
1310
|
file_size, text_payload = self._prepare_single_file_upload(
|
@@ -1257,6 +1315,7 @@ class DataStorageMethods:
|
|
1257
1315
|
logger.debug("Sending file as text content with parent_id")
|
1258
1316
|
text_payload.parent_id = parent_id
|
1259
1317
|
text_payload.dataset_id = dataset_id
|
1318
|
+
text_payload.project_id = project_id
|
1260
1319
|
return await self._acreate_data_storage_entry(text_payload)
|
1261
1320
|
|
1262
1321
|
logger.debug(
|
@@ -1269,10 +1328,13 @@ class DataStorageMethods:
|
|
1269
1328
|
is_collection=False,
|
1270
1329
|
parent_id=parent_id,
|
1271
1330
|
dataset_id=dataset_id,
|
1331
|
+
project_id=project_id,
|
1272
1332
|
)
|
1273
1333
|
data_storage_response = await self._acreate_data_storage_entry(payload)
|
1274
1334
|
|
1275
|
-
|
1335
|
+
storage_location = data_storage_response.storage_locations[0]
|
1336
|
+
|
1337
|
+
if not storage_location.storage_config.signed_url:
|
1276
1338
|
raise DataStorageCreationError("No signed URL returned from server")
|
1277
1339
|
|
1278
1340
|
with tqdm(
|
@@ -1285,11 +1347,11 @@ class DataStorageMethods:
|
|
1285
1347
|
mininterval=0.1,
|
1286
1348
|
) as pbar:
|
1287
1349
|
await _aupload_file_with_progress(
|
1288
|
-
|
1350
|
+
storage_location.storage_config.signed_url, file_path, pbar, file_size
|
1289
1351
|
)
|
1290
1352
|
|
1291
1353
|
status_response = await self.async_client.patch(
|
1292
|
-
f"/v0.1/data-storage/{data_storage_response.data_storage.id}",
|
1354
|
+
f"/v0.1/data-storage/data-entries/{data_storage_response.data_storage.id}",
|
1293
1355
|
json={"status": "active"},
|
1294
1356
|
)
|
1295
1357
|
status_response.raise_for_status()
|
@@ -1302,6 +1364,7 @@ class DataStorageMethods:
|
|
1302
1364
|
dir_manifest: DirectoryManifest,
|
1303
1365
|
current_parent_id: UUID,
|
1304
1366
|
dataset_id: UUID | None = None,
|
1367
|
+
project_id: UUID | None = None,
|
1305
1368
|
) -> DataStorageResponse | None:
|
1306
1369
|
"""Asynchronously process a single file item for upload."""
|
1307
1370
|
try:
|
@@ -1319,6 +1382,7 @@ class DataStorageMethods:
|
|
1319
1382
|
path=None,
|
1320
1383
|
parent_id=current_parent_id,
|
1321
1384
|
dataset_id=dataset_id,
|
1385
|
+
project_id=project_id,
|
1322
1386
|
)
|
1323
1387
|
except Exception as e:
|
1324
1388
|
logger.error(f"Failed to upload file {item}: {e}")
|
@@ -1336,6 +1400,7 @@ class DataStorageMethods:
|
|
1336
1400
|
base_dir: Path | None = None,
|
1337
1401
|
dir_manifest: DirectoryManifest | None = None,
|
1338
1402
|
dataset_id: UUID | None = None,
|
1403
|
+
project_id: UUID | None = None,
|
1339
1404
|
) -> list[DataStorageResponse]:
|
1340
1405
|
"""Upload a directory with single dataset and individual file storage entries (async)."""
|
1341
1406
|
responses = []
|
@@ -1352,6 +1417,7 @@ class DataStorageMethods:
|
|
1352
1417
|
parent_id=None,
|
1353
1418
|
dataset_id=None,
|
1354
1419
|
is_collection=False,
|
1420
|
+
project_id=project_id,
|
1355
1421
|
)
|
1356
1422
|
|
1357
1423
|
dir_response = await self._acreate_data_storage_entry(payload)
|
@@ -1392,6 +1458,7 @@ class DataStorageMethods:
|
|
1392
1458
|
parent_id=current_parent_id,
|
1393
1459
|
dataset_id=current_dataset_id,
|
1394
1460
|
is_collection=False,
|
1461
|
+
project_id=project_id,
|
1395
1462
|
)
|
1396
1463
|
subdir_response = await self._acreate_data_storage_entry(subdir_payload)
|
1397
1464
|
responses.append(subdir_response)
|
@@ -1407,6 +1474,7 @@ class DataStorageMethods:
|
|
1407
1474
|
base_dir=base_dir,
|
1408
1475
|
dir_manifest=subdir_manifest,
|
1409
1476
|
dataset_id=current_dataset_id,
|
1477
|
+
project_id=project_id,
|
1410
1478
|
)
|
1411
1479
|
responses.extend(subdir_responses)
|
1412
1480
|
elif item.is_file():
|
@@ -1443,6 +1511,7 @@ class DataStorageMethods:
|
|
1443
1511
|
content: str,
|
1444
1512
|
description: str | None = None,
|
1445
1513
|
path: str | None = None,
|
1514
|
+
project_id: UUID | None = None,
|
1446
1515
|
) -> DataStorageResponse:
|
1447
1516
|
"""Store content as a string in the data storage system.
|
1448
1517
|
|
@@ -1451,6 +1520,7 @@ class DataStorageMethods:
|
|
1451
1520
|
content: Content to store as a string
|
1452
1521
|
description: Optional description of the data storage entry
|
1453
1522
|
path: Optional path for the data storage entry
|
1523
|
+
project_id: ID of the project this data storage entry belongs to
|
1454
1524
|
|
1455
1525
|
Returns:
|
1456
1526
|
DataStorageResponse containing the created data storage entry and storage locations
|
@@ -1464,10 +1534,11 @@ class DataStorageMethods:
|
|
1464
1534
|
content=content,
|
1465
1535
|
description=description,
|
1466
1536
|
path=path,
|
1537
|
+
project_id=project_id,
|
1467
1538
|
)
|
1468
1539
|
return self._create_data_storage_entry(payload)
|
1469
1540
|
except HTTPStatusError as e:
|
1470
|
-
self._handle_http_errors(e)
|
1541
|
+
self._handle_http_errors(e, "creating")
|
1471
1542
|
except Exception as e:
|
1472
1543
|
raise DataStorageCreationError(
|
1473
1544
|
f"An unexpected error occurred: {e!r}"
|
@@ -1486,6 +1557,7 @@ class DataStorageMethods:
|
|
1486
1557
|
description: str | None = None,
|
1487
1558
|
path: str | None = None,
|
1488
1559
|
dataset_id: UUID | None = None,
|
1560
|
+
project_id: UUID | None = None,
|
1489
1561
|
) -> DataStorageResponse:
|
1490
1562
|
"""Asynchronously store content as a string in the data storage system.
|
1491
1563
|
|
@@ -1495,6 +1567,7 @@ class DataStorageMethods:
|
|
1495
1567
|
description: Optional description of the data storage entry
|
1496
1568
|
path: Optional path for the data storage entry
|
1497
1569
|
dataset_id: Optional dataset ID to add entry to, or None to create new dataset
|
1570
|
+
project_id: ID of the project this data storage entry belongs to
|
1498
1571
|
|
1499
1572
|
Returns:
|
1500
1573
|
DataStorageResponse containing the created data storage entry and storage locations
|
@@ -1509,10 +1582,11 @@ class DataStorageMethods:
|
|
1509
1582
|
description=description,
|
1510
1583
|
path=path,
|
1511
1584
|
dataset_id=dataset_id,
|
1585
|
+
project_id=project_id,
|
1512
1586
|
)
|
1513
1587
|
return await self._acreate_data_storage_entry(payload)
|
1514
1588
|
except HTTPStatusError as e:
|
1515
|
-
self._handle_http_errors(e)
|
1589
|
+
self._handle_http_errors(e, "creating")
|
1516
1590
|
except Exception as e:
|
1517
1591
|
raise DataStorageCreationError(
|
1518
1592
|
f"An unexpected error occurred: {e!r}"
|
@@ -1534,6 +1608,7 @@ class DataStorageMethods:
|
|
1534
1608
|
manifest_filename: str | None = None,
|
1535
1609
|
ignore_patterns: list[str] | None = None,
|
1536
1610
|
ignore_filename: str = ".gitignore",
|
1611
|
+
project_id: UUID | None = None,
|
1537
1612
|
) -> DataStorageResponse:
|
1538
1613
|
"""Store file or directory content in the data storage system.
|
1539
1614
|
|
@@ -1552,6 +1627,7 @@ class DataStorageMethods:
|
|
1552
1627
|
manifest_filename: Name of manifest file
|
1553
1628
|
ignore_patterns: List of patterns to ignore when zipping directories
|
1554
1629
|
ignore_filename: Name of ignore file to read from directory (default: .gitignore)
|
1630
|
+
project_id: ID of the project this data storage entry belongs to
|
1555
1631
|
|
1556
1632
|
Returns:
|
1557
1633
|
DataStorageResponse containing the final data storage entry
|
@@ -1564,7 +1640,13 @@ class DataStorageMethods:
|
|
1564
1640
|
try:
|
1565
1641
|
if file_path.is_dir() and as_collection:
|
1566
1642
|
return self._upload_data_directory(
|
1567
|
-
name,
|
1643
|
+
name,
|
1644
|
+
file_path,
|
1645
|
+
description,
|
1646
|
+
path,
|
1647
|
+
ignore_patterns,
|
1648
|
+
ignore_filename,
|
1649
|
+
project_id,
|
1568
1650
|
)
|
1569
1651
|
if file_path.is_dir() and not as_collection:
|
1570
1652
|
responses = self._upload_directory_hierarchically(
|
@@ -1574,16 +1656,19 @@ class DataStorageMethods:
|
|
1574
1656
|
manifest_filename=manifest_filename,
|
1575
1657
|
ignore_patterns=ignore_patterns,
|
1576
1658
|
ignore_filename=ignore_filename,
|
1659
|
+
project_id=project_id,
|
1577
1660
|
)
|
1578
1661
|
if not responses:
|
1579
1662
|
raise DataStorageCreationError(
|
1580
1663
|
"No data storage entries were created"
|
1581
1664
|
)
|
1582
1665
|
return responses[0]
|
1583
|
-
return self._upload_data_single_file(
|
1666
|
+
return self._upload_data_single_file(
|
1667
|
+
name, file_path, description, path, project_id
|
1668
|
+
)
|
1584
1669
|
|
1585
1670
|
except HTTPStatusError as e:
|
1586
|
-
self._handle_http_errors(e)
|
1671
|
+
self._handle_http_errors(e, "creating")
|
1587
1672
|
except Exception as e:
|
1588
1673
|
raise DataStorageCreationError(
|
1589
1674
|
f"An unexpected error occurred during file upload: {e!r}"
|
@@ -1606,6 +1691,7 @@ class DataStorageMethods:
|
|
1606
1691
|
ignore_patterns: list[str] | None = None,
|
1607
1692
|
ignore_filename: str = ".gitignore",
|
1608
1693
|
dataset_id: UUID | None = None,
|
1694
|
+
project_id: UUID | None = None,
|
1609
1695
|
) -> DataStorageResponse:
|
1610
1696
|
"""Asynchronously store file or directory content in the data storage system.
|
1611
1697
|
|
@@ -1620,6 +1706,7 @@ class DataStorageMethods:
|
|
1620
1706
|
ignore_patterns: List of patterns to ignore when zipping.
|
1621
1707
|
ignore_filename: Name of ignore file to read (default: .gitignore).
|
1622
1708
|
dataset_id: Optional dataset ID to add entry to, or None to create new dataset.
|
1709
|
+
project_id: ID of the project this data storage entry belongs to
|
1623
1710
|
|
1624
1711
|
Returns:
|
1625
1712
|
The `DataStorageResponse` for the created entry. For hierarchical uploads,
|
@@ -1637,6 +1724,7 @@ class DataStorageMethods:
|
|
1637
1724
|
path,
|
1638
1725
|
ignore_patterns,
|
1639
1726
|
ignore_filename,
|
1727
|
+
project_id,
|
1640
1728
|
)
|
1641
1729
|
responses = await self._aupload_directory_hierarchically(
|
1642
1730
|
name=name,
|
@@ -1646,6 +1734,7 @@ class DataStorageMethods:
|
|
1646
1734
|
ignore_patterns=ignore_patterns,
|
1647
1735
|
ignore_filename=ignore_filename,
|
1648
1736
|
dataset_id=dataset_id,
|
1737
|
+
project_id=project_id,
|
1649
1738
|
)
|
1650
1739
|
if not responses:
|
1651
1740
|
raise DataStorageCreationError(
|
@@ -1653,11 +1742,11 @@ class DataStorageMethods:
|
|
1653
1742
|
)
|
1654
1743
|
return responses[0]
|
1655
1744
|
return await self._aupload_data_single_file(
|
1656
|
-
name, file_path, description, path, dataset_id
|
1745
|
+
name, file_path, description, path, dataset_id, project_id
|
1657
1746
|
)
|
1658
1747
|
|
1659
1748
|
except HTTPStatusError as e:
|
1660
|
-
self._handle_http_errors(e)
|
1749
|
+
self._handle_http_errors(e, "creating")
|
1661
1750
|
except Exception as e:
|
1662
1751
|
raise DataStorageCreationError(
|
1663
1752
|
f"An unexpected error occurred during async file upload: {e!r}"
|
@@ -1674,7 +1763,9 @@ class DataStorageMethods:
|
|
1674
1763
|
name: str,
|
1675
1764
|
existing_location: DataStorageLocationPayload,
|
1676
1765
|
description: str | None = None,
|
1766
|
+
as_collection: bool = False,
|
1677
1767
|
path: str | None = None,
|
1768
|
+
project_id: UUID | None = None,
|
1678
1769
|
) -> DataStorageResponse:
|
1679
1770
|
"""Store content as a string in the data storage system.
|
1680
1771
|
|
@@ -1682,7 +1773,11 @@ class DataStorageMethods:
|
|
1682
1773
|
name: Name of the data storage entry
|
1683
1774
|
existing_location: Describes the existing data source location to register
|
1684
1775
|
description: Optional description of the data storage entry
|
1776
|
+
as_collection: If uploading a directory, `True` creates a single storage entry for
|
1777
|
+
the whole directory and multiple storage locations for each file, `False` assumes
|
1778
|
+
you are uploading a single file.
|
1685
1779
|
path: Optional path for the data storage entry
|
1780
|
+
project_id: ID of the project this data storage entry belongs to
|
1686
1781
|
|
1687
1782
|
Returns:
|
1688
1783
|
DataStorageResponse containing the created data storage entry and storage locations
|
@@ -1696,14 +1791,17 @@ class DataStorageMethods:
|
|
1696
1791
|
description=description,
|
1697
1792
|
path=path,
|
1698
1793
|
existing_location=existing_location,
|
1794
|
+
project_id=project_id,
|
1795
|
+
is_collection=as_collection,
|
1699
1796
|
)
|
1700
1797
|
response = self.client.post(
|
1701
|
-
"/v0.1/data-storage",
|
1798
|
+
"/v0.1/data-storage/data-entries",
|
1799
|
+
json=payload.model_dump(exclude_none=True),
|
1702
1800
|
)
|
1703
1801
|
response.raise_for_status()
|
1704
1802
|
return DataStorageResponse.model_validate(response.json())
|
1705
1803
|
except HTTPStatusError as e:
|
1706
|
-
self._handle_http_errors(e)
|
1804
|
+
self._handle_http_errors(e, "creating")
|
1707
1805
|
except Exception as e:
|
1708
1806
|
raise DataStorageCreationError(
|
1709
1807
|
f"An unexpected error occurred: {e!r}"
|
@@ -1719,8 +1817,10 @@ class DataStorageMethods:
|
|
1719
1817
|
self,
|
1720
1818
|
name: str,
|
1721
1819
|
existing_location: DataStorageLocationPayload,
|
1820
|
+
as_collection: bool = False,
|
1722
1821
|
description: str | None = None,
|
1723
1822
|
path: str | None = None,
|
1823
|
+
project_id: UUID | None = None,
|
1724
1824
|
) -> DataStorageResponse:
|
1725
1825
|
"""Store content as a string in the data storage system.
|
1726
1826
|
|
@@ -1728,7 +1828,11 @@ class DataStorageMethods:
|
|
1728
1828
|
name: Name of the data storage entry
|
1729
1829
|
existing_location: Describes the existing data source location to register
|
1730
1830
|
description: Optional description of the data storage entry
|
1831
|
+
as_collection: If uploading a directory, `True` creates a single storage entry for
|
1832
|
+
the whole directory and multiple storage locations for each file, `False` assumes
|
1833
|
+
you are uploading a single file.
|
1731
1834
|
path: Optional path for the data storage entry
|
1835
|
+
project_id: ID of the project this data storage entry belongs to
|
1732
1836
|
|
1733
1837
|
Returns:
|
1734
1838
|
DataStorageResponse containing the created data storage entry and storage locations
|
@@ -1742,19 +1846,290 @@ class DataStorageMethods:
|
|
1742
1846
|
description=description,
|
1743
1847
|
path=path,
|
1744
1848
|
existing_location=existing_location,
|
1849
|
+
project_id=project_id,
|
1850
|
+
is_collection=as_collection,
|
1745
1851
|
)
|
1746
1852
|
response = await self.async_client.post(
|
1747
|
-
"/v0.1/data-storage",
|
1853
|
+
"/v0.1/data-storage/data-entries",
|
1854
|
+
json=payload.model_dump(exclude_none=True),
|
1748
1855
|
)
|
1749
1856
|
response.raise_for_status()
|
1750
1857
|
return DataStorageResponse.model_validate(response.json())
|
1751
1858
|
except HTTPStatusError as e:
|
1752
|
-
self._handle_http_errors(e)
|
1859
|
+
self._handle_http_errors(e, "creating")
|
1753
1860
|
except Exception as e:
|
1754
1861
|
raise DataStorageCreationError(
|
1755
1862
|
f"An unexpected error occurred: {e!r}"
|
1756
1863
|
) from e
|
1757
1864
|
|
1865
|
+
@retry(
|
1866
|
+
stop=stop_after_attempt(3),
|
1867
|
+
wait=wait_exponential(multiplier=1, max=10),
|
1868
|
+
retry=retry_if_connection_error,
|
1869
|
+
before_sleep=before_sleep_log(logger, logging.WARNING),
|
1870
|
+
)
|
1871
|
+
def search_data_storage(
|
1872
|
+
self,
|
1873
|
+
criteria: list[SearchCriterion] | None = None,
|
1874
|
+
size: int = 10,
|
1875
|
+
) -> list[dict]:
|
1876
|
+
"""Search data storage objects using structured criteria.
|
1877
|
+
|
1878
|
+
Args:
|
1879
|
+
criteria: List of search criteria (SearchCriterion objects with field, operator, value)
|
1880
|
+
size: Number of results to return (1-100)
|
1881
|
+
|
1882
|
+
Returns:
|
1883
|
+
List of search results with scores and data storage information
|
1884
|
+
|
1885
|
+
Raises:
|
1886
|
+
DataStorageCreationError: If there's an error searching data storage entries
|
1887
|
+
|
1888
|
+
Example:
|
1889
|
+
from futurehouse_client.models.rest import SearchCriterion, SearchOperator
|
1890
|
+
criteria = [
|
1891
|
+
SearchCriterion(field="name", operator=SearchOperator.CONTAINS, value="document"),
|
1892
|
+
SearchCriterion(field="project_id", operator=SearchOperator.EQUALS, value="my-project-id"),
|
1893
|
+
SearchCriterion(field="status", operator=SearchOperator.EQUALS, value="active"),
|
1894
|
+
]
|
1895
|
+
results = client.search_data_storage(criteria=criteria, size=20)
|
1896
|
+
"""
|
1897
|
+
try:
|
1898
|
+
payload = DataStorageSearchPayload(
|
1899
|
+
criteria=criteria or [],
|
1900
|
+
size=max(1, min(100, size)), # Clamp between 1-100
|
1901
|
+
)
|
1902
|
+
|
1903
|
+
response = self.client.post(
|
1904
|
+
"/v0.1/data-storage/search",
|
1905
|
+
json=payload.model_dump(mode="json"),
|
1906
|
+
)
|
1907
|
+
response.raise_for_status()
|
1908
|
+
return response.json()
|
1909
|
+
|
1910
|
+
except HTTPStatusError as e:
|
1911
|
+
if e.response.status_code == codes.SERVICE_UNAVAILABLE:
|
1912
|
+
raise DataStorageCreationError(
|
1913
|
+
"Search functionality is currently unavailable"
|
1914
|
+
) from e
|
1915
|
+
self._handle_http_errors(e, "searching")
|
1916
|
+
except Exception as e:
|
1917
|
+
raise DataStorageCreationError(
|
1918
|
+
f"An unexpected error occurred during search: {e!r}"
|
1919
|
+
) from e
|
1920
|
+
|
1921
|
+
@retry(
|
1922
|
+
stop=stop_after_attempt(3),
|
1923
|
+
wait=wait_exponential(multiplier=1, max=10),
|
1924
|
+
retry=retry_if_connection_error,
|
1925
|
+
before_sleep=before_sleep_log(logger, logging.WARNING),
|
1926
|
+
)
|
1927
|
+
async def asearch_data_storage(
|
1928
|
+
self,
|
1929
|
+
criteria: list[SearchCriterion] | None = None,
|
1930
|
+
size: int = 10,
|
1931
|
+
) -> list[dict]:
|
1932
|
+
"""Asynchronously search data storage objects using structured criteria.
|
1933
|
+
|
1934
|
+
Args:
|
1935
|
+
criteria: List of search criteria (SearchCriterion objects with field, operator, value)
|
1936
|
+
size: Number of results to return (1-100)
|
1937
|
+
|
1938
|
+
Returns:
|
1939
|
+
List of search results with scores and data storage information
|
1940
|
+
|
1941
|
+
Raises:
|
1942
|
+
DataStorageCreationError: If there's an error searching data storage entries
|
1943
|
+
|
1944
|
+
Example:
|
1945
|
+
from futurehouse_client.models.rest import SearchCriterion, SearchOperator
|
1946
|
+
criteria = [
|
1947
|
+
SearchCriterion(field="name", operator=SearchOperator.CONTAINS, value="document"),
|
1948
|
+
SearchCriterion(field="project_id", operator=SearchOperator.EQUALS, value="my-project-id"),
|
1949
|
+
SearchCriterion(field="status", operator=SearchOperator.EQUALS, value="active"),
|
1950
|
+
]
|
1951
|
+
results = await client.asearch_data_storage(criteria=criteria, size=20)
|
1952
|
+
"""
|
1953
|
+
try:
|
1954
|
+
payload = DataStorageSearchPayload(
|
1955
|
+
criteria=criteria or [],
|
1956
|
+
size=max(1, min(100, size)), # Clamp between 1-100
|
1957
|
+
)
|
1958
|
+
|
1959
|
+
response = await self.async_client.post(
|
1960
|
+
"/v0.1/data-storage/search",
|
1961
|
+
json=payload.model_dump(mode="json"),
|
1962
|
+
)
|
1963
|
+
response.raise_for_status()
|
1964
|
+
return response.json()
|
1965
|
+
|
1966
|
+
except HTTPStatusError as e:
|
1967
|
+
if e.response.status_code == codes.SERVICE_UNAVAILABLE:
|
1968
|
+
raise DataStorageCreationError(
|
1969
|
+
"Search functionality is currently unavailable"
|
1970
|
+
) from e
|
1971
|
+
self._handle_http_errors(e, "searching")
|
1972
|
+
except Exception as e:
|
1973
|
+
raise DataStorageCreationError(
|
1974
|
+
f"An unexpected error occurred during async search: {e!r}"
|
1975
|
+
) from e
|
1976
|
+
|
1977
|
+
@retry(
|
1978
|
+
stop=stop_after_attempt(3),
|
1979
|
+
wait=wait_exponential(multiplier=1, max=10),
|
1980
|
+
retry=retry_if_connection_error,
|
1981
|
+
before_sleep=before_sleep_log(logger, logging.WARNING),
|
1982
|
+
)
|
1983
|
+
def similarity_search_data_storage(
|
1984
|
+
self,
|
1985
|
+
embedding: list[float],
|
1986
|
+
size: int = 10,
|
1987
|
+
min_score: float = 0.7,
|
1988
|
+
dataset_id: UUID | None = None,
|
1989
|
+
tags: list[str] | None = None,
|
1990
|
+
user_id: str | None = None,
|
1991
|
+
project_id: str | None = None,
|
1992
|
+
) -> list[dict]:
|
1993
|
+
"""Search data storage objects using vector similarity.
|
1994
|
+
|
1995
|
+
Args:
|
1996
|
+
embedding: Embedding vector for similarity search
|
1997
|
+
size: Number of results to return (1-100)
|
1998
|
+
min_score: Minimum similarity score (0.0-1.0)
|
1999
|
+
dataset_id: Optional dataset ID filter
|
2000
|
+
tags: Optional list of tags to filter by
|
2001
|
+
user_id: Optional user ID filter (admin only)
|
2002
|
+
project_id: Optional project ID filter
|
2003
|
+
|
2004
|
+
Returns:
|
2005
|
+
List of search results with similarity scores and data storage information
|
2006
|
+
|
2007
|
+
Raises:
|
2008
|
+
DataStorageCreationError: If there's an error performing similarity search
|
2009
|
+
"""
|
2010
|
+
try:
|
2011
|
+
# Validate inputs
|
2012
|
+
if not embedding:
|
2013
|
+
raise DataStorageCreationError("Embedding vector is required")
|
2014
|
+
|
2015
|
+
if not all(isinstance(x, int | float) for x in embedding):
|
2016
|
+
raise DataStorageCreationError("Embedding must be a list of numbers")
|
2017
|
+
|
2018
|
+
size = max(1, min(100, size)) # Clamp between 1-100
|
2019
|
+
min_score = max(0.0, min(1.0, min_score)) # Clamp between 0.0-1.0
|
2020
|
+
|
2021
|
+
# Build request payload
|
2022
|
+
payload = {
|
2023
|
+
"embedding": embedding,
|
2024
|
+
"size": size,
|
2025
|
+
"min_score": min_score,
|
2026
|
+
}
|
2027
|
+
|
2028
|
+
# Add optional filters
|
2029
|
+
if dataset_id is not None:
|
2030
|
+
payload["dataset_id"] = str(dataset_id)
|
2031
|
+
if tags is not None:
|
2032
|
+
payload["tags"] = tags
|
2033
|
+
if user_id is not None:
|
2034
|
+
payload["user_id"] = user_id
|
2035
|
+
if project_id is not None:
|
2036
|
+
payload["project_id"] = project_id
|
2037
|
+
|
2038
|
+
response = self.client.post(
|
2039
|
+
"/v0.1/data-storage/similarity-search", json=payload
|
2040
|
+
)
|
2041
|
+
response.raise_for_status()
|
2042
|
+
return response.json()
|
2043
|
+
|
2044
|
+
except HTTPStatusError as e:
|
2045
|
+
if e.response.status_code == codes.SERVICE_UNAVAILABLE:
|
2046
|
+
raise DataStorageCreationError(
|
2047
|
+
"Similarity search functionality is currently unavailable"
|
2048
|
+
) from e
|
2049
|
+
self._handle_http_errors(e, "performing similarity search")
|
2050
|
+
except Exception as e:
|
2051
|
+
raise DataStorageCreationError(
|
2052
|
+
f"An unexpected error occurred during similarity search: {e!r}"
|
2053
|
+
) from e
|
2054
|
+
|
2055
|
+
@retry(
|
2056
|
+
stop=stop_after_attempt(3),
|
2057
|
+
wait=wait_exponential(multiplier=1, max=10),
|
2058
|
+
retry=retry_if_connection_error,
|
2059
|
+
before_sleep=before_sleep_log(logger, logging.WARNING),
|
2060
|
+
)
|
2061
|
+
async def asimilarity_search_data_storage(
|
2062
|
+
self,
|
2063
|
+
embedding: list[float],
|
2064
|
+
size: int = 10,
|
2065
|
+
min_score: float = 0.7,
|
2066
|
+
dataset_id: UUID | None = None,
|
2067
|
+
tags: list[str] | None = None,
|
2068
|
+
user_id: str | None = None,
|
2069
|
+
project_id: str | None = None,
|
2070
|
+
) -> list[dict]:
|
2071
|
+
"""Asynchronously search data storage objects using vector similarity.
|
2072
|
+
|
2073
|
+
Args:
|
2074
|
+
embedding: Embedding vector for similarity search
|
2075
|
+
size: Number of results to return (1-100)
|
2076
|
+
min_score: Minimum similarity score (0.0-1.0)
|
2077
|
+
dataset_id: Optional dataset ID filter
|
2078
|
+
tags: Optional list of tags to filter by
|
2079
|
+
user_id: Optional user ID filter (admin only)
|
2080
|
+
project_id: Optional project ID filter
|
2081
|
+
|
2082
|
+
Returns:
|
2083
|
+
List of search results with similarity scores and data storage information
|
2084
|
+
|
2085
|
+
Raises:
|
2086
|
+
DataStorageCreationError: If there's an error performing similarity search
|
2087
|
+
"""
|
2088
|
+
try:
|
2089
|
+
# Validate inputs
|
2090
|
+
if not embedding:
|
2091
|
+
raise DataStorageCreationError("Embedding vector is required")
|
2092
|
+
|
2093
|
+
if not all(isinstance(x, int | float) for x in embedding):
|
2094
|
+
raise DataStorageCreationError("Embedding must be a list of numbers")
|
2095
|
+
|
2096
|
+
size = max(1, min(100, size)) # Clamp between 1-100
|
2097
|
+
min_score = max(0.0, min(1.0, min_score)) # Clamp between 0.0-1.0
|
2098
|
+
|
2099
|
+
# Build request payload
|
2100
|
+
payload = {
|
2101
|
+
"embedding": embedding,
|
2102
|
+
"size": size,
|
2103
|
+
"min_score": min_score,
|
2104
|
+
}
|
2105
|
+
|
2106
|
+
# Add optional filters
|
2107
|
+
if dataset_id is not None:
|
2108
|
+
payload["dataset_id"] = str(dataset_id)
|
2109
|
+
if tags is not None:
|
2110
|
+
payload["tags"] = tags
|
2111
|
+
if user_id is not None:
|
2112
|
+
payload["user_id"] = user_id
|
2113
|
+
if project_id is not None:
|
2114
|
+
payload["project_id"] = project_id
|
2115
|
+
|
2116
|
+
response = await self.async_client.post(
|
2117
|
+
"/v0.1/data-storage/similarity-search", json=payload
|
2118
|
+
)
|
2119
|
+
response.raise_for_status()
|
2120
|
+
return response.json()
|
2121
|
+
|
2122
|
+
except HTTPStatusError as e:
|
2123
|
+
if e.response.status_code == codes.SERVICE_UNAVAILABLE:
|
2124
|
+
raise DataStorageCreationError(
|
2125
|
+
"Similarity search functionality is currently unavailable"
|
2126
|
+
) from e
|
2127
|
+
self._handle_http_errors(e, "performing similarity search")
|
2128
|
+
except Exception as e:
|
2129
|
+
raise DataStorageCreationError(
|
2130
|
+
f"An unexpected error occurred during async similarity search: {e!r}"
|
2131
|
+
) from e
|
2132
|
+
|
1758
2133
|
# TODO: EVERYTHING BELOW THIS LINE SHOULD BE MOVED TO FH_TOOLS REPO
|
1759
2134
|
# =================================================
|
1760
2135
|
@retry(
|
@@ -1766,7 +2141,7 @@ class DataStorageMethods:
|
|
1766
2141
|
def fetch_data_from_storage(
|
1767
2142
|
self,
|
1768
2143
|
data_storage_id: UUID | None = None,
|
1769
|
-
) -> str | Path | None:
|
2144
|
+
) -> str | Path | list[Path] | None:
|
1770
2145
|
"""Fetch data from the storage system (sync version).
|
1771
2146
|
|
1772
2147
|
Args:
|
@@ -1775,29 +2150,45 @@ class DataStorageMethods:
|
|
1775
2150
|
Returns:
|
1776
2151
|
For PG_TABLE storage: string content
|
1777
2152
|
For GCS storage: Path to downloaded file (may be unzipped if it was a zip)
|
2153
|
+
For multi-location entries: dict of location IDs to dicts with signed URL and file name
|
1778
2154
|
None if not found or error occurred
|
1779
2155
|
"""
|
1780
2156
|
if not data_storage_id:
|
1781
|
-
raise
|
2157
|
+
raise DataStorageRetrievalError(
|
1782
2158
|
"data_storage_id must be provided at this time"
|
1783
2159
|
)
|
1784
2160
|
|
1785
2161
|
try:
|
1786
|
-
response = self.client.get(
|
2162
|
+
response = self.client.get(
|
2163
|
+
f"/v0.1/data-storage/data-entries/{data_storage_id}", timeout=100
|
2164
|
+
)
|
1787
2165
|
response.raise_for_status()
|
1788
2166
|
result = DataStorageResponse.model_validate(response.json())
|
1789
2167
|
|
1790
|
-
|
2168
|
+
if len(result.storage_locations) > 1:
|
2169
|
+
return [
|
2170
|
+
self._download_from_gcs(
|
2171
|
+
location.storage_config.signed_url or "",
|
2172
|
+
(location.storage_config.location or "").split("/")[-1],
|
2173
|
+
)
|
2174
|
+
for location in result.storage_locations
|
2175
|
+
]
|
2176
|
+
|
2177
|
+
# Most scenarios will only have one location
|
2178
|
+
storage_location = result.storage_locations[0]
|
2179
|
+
storage_type = storage_location.storage_config.storage_type
|
1791
2180
|
|
1792
2181
|
if storage_type == "gcs":
|
1793
|
-
if not
|
1794
|
-
raise
|
2182
|
+
if not storage_location.storage_config.signed_url:
|
2183
|
+
raise DataStorageRetrievalError(
|
1795
2184
|
"No signed URL available for GCS download"
|
1796
2185
|
)
|
1797
2186
|
|
1798
|
-
return self._download_from_gcs(
|
2187
|
+
return self._download_from_gcs(
|
2188
|
+
storage_location.storage_config.signed_url
|
2189
|
+
)
|
1799
2190
|
|
1800
|
-
if storage_type
|
2191
|
+
if storage_type in {"raw_content", "pg_table"}:
|
1801
2192
|
content = result.data_storage.content
|
1802
2193
|
if content is None:
|
1803
2194
|
logger.warning(
|
@@ -1806,12 +2197,12 @@ class DataStorageMethods:
|
|
1806
2197
|
return None
|
1807
2198
|
return content
|
1808
2199
|
|
1809
|
-
raise
|
2200
|
+
raise DataStorageRetrievalError(f"Unsupported storage type: {storage_type}")
|
1810
2201
|
|
1811
2202
|
except HTTPStatusError as e:
|
1812
|
-
self._handle_http_errors(e)
|
2203
|
+
self._handle_http_errors(e, "retrieving")
|
1813
2204
|
except Exception as e:
|
1814
|
-
raise
|
2205
|
+
raise DataStorageRetrievalError(
|
1815
2206
|
f"An unexpected error occurred: {e!r}"
|
1816
2207
|
) from e
|
1817
2208
|
|
@@ -1824,7 +2215,7 @@ class DataStorageMethods:
|
|
1824
2215
|
async def afetch_data_from_storage(
|
1825
2216
|
self,
|
1826
2217
|
data_storage_id: UUID | None = None,
|
1827
|
-
) -> str | Path | None:
|
2218
|
+
) -> str | Path | list[Path] | None:
|
1828
2219
|
"""Fetch data from the storage system.
|
1829
2220
|
|
1830
2221
|
Args:
|
@@ -1833,31 +2224,48 @@ class DataStorageMethods:
|
|
1833
2224
|
Returns:
|
1834
2225
|
For PG_TABLE storage: string content
|
1835
2226
|
For GCS storage: Path to downloaded file (may be unzipped if it was a zip)
|
2227
|
+
For multi-location entries: dict of location IDs to dicts with signed URL and file name
|
1836
2228
|
None if not found or error occurred
|
1837
2229
|
"""
|
1838
2230
|
if not data_storage_id:
|
1839
|
-
raise
|
2231
|
+
raise DataStorageRetrievalError(
|
1840
2232
|
"data_storage_id must be provided at this time"
|
1841
2233
|
)
|
1842
2234
|
|
1843
2235
|
try:
|
1844
2236
|
response = await self.async_client.get(
|
1845
|
-
f"/v0.1/data-storage/{data_storage_id}"
|
2237
|
+
f"/v0.1/data-storage/data-entries/{data_storage_id}", timeout=100
|
1846
2238
|
)
|
1847
2239
|
response.raise_for_status()
|
1848
2240
|
result = DataStorageResponse.model_validate(response.json())
|
1849
2241
|
|
1850
|
-
|
2242
|
+
if len(result.storage_locations) > 1:
|
2243
|
+
return await gather_with_concurrency(
|
2244
|
+
DOWNLOAD_CONCURRENCY,
|
2245
|
+
[
|
2246
|
+
self._adownload_from_gcs(
|
2247
|
+
location.storage_config.signed_url or "",
|
2248
|
+
(location.storage_config.location or "").split("/")[-1],
|
2249
|
+
)
|
2250
|
+
for location in result.storage_locations
|
2251
|
+
],
|
2252
|
+
)
|
2253
|
+
|
2254
|
+
# Most scenarios will only have one location
|
2255
|
+
storage_location = result.storage_locations[0]
|
2256
|
+
storage_type = storage_location.storage_config.storage_type
|
1851
2257
|
|
1852
2258
|
if storage_type == "gcs":
|
1853
|
-
if not
|
1854
|
-
raise
|
2259
|
+
if not storage_location.storage_config.signed_url:
|
2260
|
+
raise DataStorageRetrievalError(
|
1855
2261
|
"No signed URL available for GCS download"
|
1856
2262
|
)
|
1857
2263
|
|
1858
|
-
return await self._adownload_from_gcs(
|
2264
|
+
return await self._adownload_from_gcs(
|
2265
|
+
storage_location.storage_config.signed_url
|
2266
|
+
)
|
1859
2267
|
|
1860
|
-
if storage_type
|
2268
|
+
if storage_type in {"raw_content", "pg_table"}:
|
1861
2269
|
content = result.data_storage.content
|
1862
2270
|
if content is None:
|
1863
2271
|
logger.warning(
|
@@ -1866,11 +2274,189 @@ class DataStorageMethods:
|
|
1866
2274
|
return None
|
1867
2275
|
return content
|
1868
2276
|
|
1869
|
-
raise
|
2277
|
+
raise DataStorageRetrievalError(f"Unsupported storage type: {storage_type}")
|
1870
2278
|
|
1871
2279
|
except HTTPStatusError as e:
|
1872
|
-
self._handle_http_errors(e)
|
2280
|
+
self._handle_http_errors(e, "retrieving")
|
2281
|
+
except Exception as e:
|
2282
|
+
raise DataStorageRetrievalError(
|
2283
|
+
f"An unexpected error occurred: {e!r}"
|
2284
|
+
) from e
|
2285
|
+
|
2286
|
+
@retry(
|
2287
|
+
stop=stop_after_attempt(3),
|
2288
|
+
wait=wait_exponential(multiplier=1, max=10),
|
2289
|
+
retry=retry_if_connection_error,
|
2290
|
+
before_sleep=before_sleep_log(logger, logging.WARNING),
|
2291
|
+
)
|
2292
|
+
async def acreate_dataset(
|
2293
|
+
self,
|
2294
|
+
name: str,
|
2295
|
+
description: str | None = None,
|
2296
|
+
dataset_id: UUID | None = None,
|
2297
|
+
):
|
2298
|
+
try:
|
2299
|
+
payload = CreateDatasetPayload(
|
2300
|
+
name=name,
|
2301
|
+
description=description,
|
2302
|
+
id=dataset_id,
|
2303
|
+
)
|
2304
|
+
response = await self.async_client.post(
|
2305
|
+
"/v0.1/data-storage/datasets",
|
2306
|
+
json=payload.model_dump(exclude_none=True),
|
2307
|
+
)
|
2308
|
+
response.raise_for_status()
|
2309
|
+
return CreateDatasetPayload.model_validate(response.json())
|
2310
|
+
except HTTPStatusError as e:
|
2311
|
+
self._handle_http_errors(e, "creating")
|
2312
|
+
except Exception as e:
|
2313
|
+
raise DataStorageCreationError(
|
2314
|
+
f"An unexpected error occurred: {e!r}"
|
2315
|
+
) from e
|
2316
|
+
|
2317
|
+
@retry(
|
2318
|
+
stop=stop_after_attempt(3),
|
2319
|
+
wait=wait_exponential(multiplier=1, max=10),
|
2320
|
+
retry=retry_if_connection_error,
|
2321
|
+
before_sleep=before_sleep_log(logger, logging.WARNING),
|
2322
|
+
)
|
2323
|
+
def create_dataset(
|
2324
|
+
self,
|
2325
|
+
name: str,
|
2326
|
+
description: str | None = None,
|
2327
|
+
dataset_id: UUID | None = None,
|
2328
|
+
):
|
2329
|
+
try:
|
2330
|
+
payload = CreateDatasetPayload(
|
2331
|
+
name=name,
|
2332
|
+
description=description,
|
2333
|
+
id=dataset_id,
|
2334
|
+
)
|
2335
|
+
response = self.client.post(
|
2336
|
+
"/v0.1/data-storage/datasets",
|
2337
|
+
json=payload.model_dump(exclude_none=True),
|
2338
|
+
)
|
2339
|
+
response.raise_for_status()
|
2340
|
+
return CreateDatasetPayload.model_validate(response.json())
|
2341
|
+
except HTTPStatusError as e:
|
2342
|
+
self._handle_http_errors(e, "creating")
|
1873
2343
|
except Exception as e:
|
1874
2344
|
raise DataStorageCreationError(
|
1875
2345
|
f"An unexpected error occurred: {e!r}"
|
1876
2346
|
) from e
|
2347
|
+
|
2348
|
+
@retry(
|
2349
|
+
stop=stop_after_attempt(3),
|
2350
|
+
wait=wait_exponential(multiplier=1, max=10),
|
2351
|
+
retry=retry_if_connection_error,
|
2352
|
+
before_sleep=before_sleep_log(logger, logging.WARNING),
|
2353
|
+
)
|
2354
|
+
async def adelete_dataset(self, dataset_id: UUID):
|
2355
|
+
"""Delete a dataset.
|
2356
|
+
|
2357
|
+
Note: This will delete all data storage entries associated with the dataset.
|
2358
|
+
|
2359
|
+
Args:
|
2360
|
+
dataset_id: ID of the dataset to delete
|
2361
|
+
|
2362
|
+
Raises:
|
2363
|
+
DataStorageError: If there's an error deleting the dataset
|
2364
|
+
"""
|
2365
|
+
try:
|
2366
|
+
await self.async_client.delete(f"/v0.1/data-storage/datasets/{dataset_id}")
|
2367
|
+
except HTTPStatusError as e:
|
2368
|
+
self._handle_http_errors(e, "deleting")
|
2369
|
+
except Exception as e:
|
2370
|
+
raise DataStorageError(f"An unexpected error occurred: {e!r}") from e
|
2371
|
+
|
2372
|
+
@retry(
|
2373
|
+
stop=stop_after_attempt(3),
|
2374
|
+
wait=wait_exponential(multiplier=1, max=10),
|
2375
|
+
retry=retry_if_connection_error,
|
2376
|
+
before_sleep=before_sleep_log(logger, logging.WARNING),
|
2377
|
+
)
|
2378
|
+
def delete_dataset(self, dataset_id: UUID):
|
2379
|
+
"""Delete a dataset.
|
2380
|
+
|
2381
|
+
Note: This will delete all data storage entries associated with the dataset.
|
2382
|
+
|
2383
|
+
Args:
|
2384
|
+
dataset_id: ID of the dataset to delete
|
2385
|
+
|
2386
|
+
Raises:
|
2387
|
+
DataStorageError: If there's an error deleting the dataset
|
2388
|
+
"""
|
2389
|
+
try:
|
2390
|
+
self.client.delete(f"/v0.1/data-storage/datasets/{dataset_id}")
|
2391
|
+
except HTTPStatusError as e:
|
2392
|
+
self._handle_http_errors(e, "deleting")
|
2393
|
+
except Exception as e:
|
2394
|
+
raise DataStorageError(f"An unexpected error occurred: {e!r}") from e
|
2395
|
+
|
2396
|
+
@retry(
|
2397
|
+
stop=stop_after_attempt(3),
|
2398
|
+
wait=wait_exponential(multiplier=1, max=10),
|
2399
|
+
retry=retry_if_connection_error,
|
2400
|
+
before_sleep=before_sleep_log(logger, logging.WARNING),
|
2401
|
+
)
|
2402
|
+
async def aget_dataset(self, dataset_id: UUID):
|
2403
|
+
try:
|
2404
|
+
response = await self.async_client.get(
|
2405
|
+
f"/v0.1/data-storage/datasets/{dataset_id}"
|
2406
|
+
)
|
2407
|
+
response.raise_for_status()
|
2408
|
+
|
2409
|
+
return response.json()
|
2410
|
+
except HTTPStatusError as e:
|
2411
|
+
self._handle_http_errors(e, "retrieving")
|
2412
|
+
except Exception as e:
|
2413
|
+
raise DataStorageError(f"An unexpected error occurred: {e!r}") from e
|
2414
|
+
|
2415
|
+
@retry(
|
2416
|
+
stop=stop_after_attempt(3),
|
2417
|
+
wait=wait_exponential(multiplier=1, max=10),
|
2418
|
+
retry=retry_if_connection_error,
|
2419
|
+
before_sleep=before_sleep_log(logger, logging.WARNING),
|
2420
|
+
)
|
2421
|
+
def get_dataset(self, dataset_id: UUID):
|
2422
|
+
try:
|
2423
|
+
response = self.client.get(f"/v0.1/data-storage/datasets/{dataset_id}")
|
2424
|
+
response.raise_for_status()
|
2425
|
+
|
2426
|
+
return response.json()
|
2427
|
+
except HTTPStatusError as e:
|
2428
|
+
self._handle_http_errors(e, "retrieving")
|
2429
|
+
except Exception as e:
|
2430
|
+
raise DataStorageError(f"An unexpected error occurred: {e!r}") from e
|
2431
|
+
|
2432
|
+
@retry(
|
2433
|
+
stop=stop_after_attempt(3),
|
2434
|
+
wait=wait_exponential(multiplier=1, max=10),
|
2435
|
+
retry=retry_if_connection_error,
|
2436
|
+
before_sleep=before_sleep_log(logger, logging.WARNING),
|
2437
|
+
)
|
2438
|
+
async def adelete_data_storage_entry(self, data_storage_entry_id: UUID):
|
2439
|
+
try:
|
2440
|
+
await self.async_client.delete(
|
2441
|
+
f"/v0.1/data-storage/data-entries/{data_storage_entry_id}"
|
2442
|
+
)
|
2443
|
+
except HTTPStatusError as e:
|
2444
|
+
self._handle_http_errors(e, "deleting")
|
2445
|
+
except Exception as e:
|
2446
|
+
raise DataStorageError(f"An unexpected error occurred: {e!r}") from e
|
2447
|
+
|
2448
|
+
@retry(
|
2449
|
+
stop=stop_after_attempt(3),
|
2450
|
+
wait=wait_exponential(multiplier=1, max=10),
|
2451
|
+
retry=retry_if_connection_error,
|
2452
|
+
before_sleep=before_sleep_log(logger, logging.WARNING),
|
2453
|
+
)
|
2454
|
+
def delete_data_storage_entry(self, data_storage_entry_id: UUID):
|
2455
|
+
try:
|
2456
|
+
self.client.delete(
|
2457
|
+
f"/v0.1/data-storage/data-entries/{data_storage_entry_id}"
|
2458
|
+
)
|
2459
|
+
except HTTPStatusError as e:
|
2460
|
+
self._handle_http_errors(e, "deleting")
|
2461
|
+
except Exception as e:
|
2462
|
+
raise DataStorageError(f"An unexpected error occurred: {e!r}") from e
|