futurehouse-client 0.4.2.dev274__py3-none-any.whl → 0.4.3.dev4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,7 +7,7 @@ import tempfile
7
7
  import zipfile
8
8
  from os import PathLike
9
9
  from pathlib import Path
10
- from typing import NoReturn
10
+ from typing import Any, NoReturn
11
11
  from uuid import UUID
12
12
 
13
13
  import aiofiles
@@ -15,6 +15,8 @@ import aiohttp
15
15
  import requests as requests_lib
16
16
  from google.resumable_media import requests as resumable_requests
17
17
  from httpx import AsyncClient, Client, HTTPStatusError, codes
18
+ from lmi.utils import gather_with_concurrency
19
+ from pydantic import HttpUrl
18
20
  from requests.adapters import HTTPAdapter
19
21
  from tenacity import (
20
22
  before_sleep_log,
@@ -26,12 +28,19 @@ from tqdm import tqdm
26
28
  from urllib3.util.retry import Retry
27
29
 
28
30
  from futurehouse_client.models.data_storage_methods import (
31
+ CreateDatasetPayload,
32
+ DataContentType,
29
33
  DataStorageLocationPayload,
30
34
  DataStorageRequestPayload,
31
35
  DataStorageResponse,
36
+ DataStorageType,
32
37
  DirectoryManifest,
33
38
  ManifestEntry,
34
39
  )
40
+ from futurehouse_client.models.rest import (
41
+ DataStorageSearchPayload,
42
+ SearchCriterion,
43
+ )
35
44
  from futurehouse_client.utils.general import retry_if_connection_error
36
45
 
37
46
  # this is only required if they're using a yaml manifest
@@ -54,6 +63,7 @@ INITIATE_HEADERS = {
54
63
  "x-goog-resumable": "start",
55
64
  "Content-Length": "0",
56
65
  }
66
+ DOWNLOAD_CONCURRENCY = 3
57
67
 
58
68
 
59
69
  def _should_ignore_file(
@@ -438,6 +448,10 @@ class DataStorageCreationError(DataStorageError):
438
448
  """Raised when there's an error creating a data storage entry."""
439
449
 
440
450
 
451
+ class DataStorageRetrievalError(DataStorageError):
452
+ """Raised when there's an error retrieving a data storage entry."""
453
+
454
+
441
455
  class ProgressWrapper:
442
456
  """Common progress wrapper for file uploads."""
443
457
 
@@ -462,7 +476,7 @@ class ProgressWrapper:
462
476
  return self.file_obj.tell()
463
477
 
464
478
 
465
- class DataStorageMethods:
479
+ class DataStorageMethods: # pylint: disable=too-many-public-methods
466
480
  """Data storage methods for RestClient.
467
481
 
468
482
  This class contains methods for interacting with the data storage API endpoints.
@@ -472,14 +486,12 @@ class DataStorageMethods:
472
486
  def _handle_http_errors(self, e: HTTPStatusError, operation: str) -> NoReturn:
473
487
  """Handle common HTTP errors for data storage operations."""
474
488
  if e.response.status_code == codes.FORBIDDEN:
475
- raise DataStorageCreationError(
489
+ raise DataStorageError(
476
490
  f"Error {operation} data storage entry, not authorized"
477
491
  ) from e
478
492
  if e.response.status_code == codes.UNPROCESSABLE_ENTITY:
479
- raise DataStorageCreationError(
480
- f"Invalid request payload: {e.response.text}"
481
- ) from e
482
- raise DataStorageCreationError(
493
+ raise DataStorageError(f"Invalid request payload: {e.response.text}") from e
494
+ raise DataStorageError(
483
495
  f"Error {operation} data storage entry: {e.response.status_code} - {e.response.text}"
484
496
  ) from e
485
497
 
@@ -487,7 +499,7 @@ class DataStorageMethods:
487
499
  """Validate file path exists and return Path object."""
488
500
  file_path = Path(file_path)
489
501
  if not file_path.exists():
490
- raise DataStorageCreationError(f"File or directory not found: {file_path}")
502
+ raise DataStorageError(f"File or directory not found: {file_path}")
491
503
  return file_path
492
504
 
493
505
  def _build_zip_path(self, name: str, path: str | None) -> str:
@@ -529,19 +541,24 @@ class DataStorageMethods:
529
541
  return extracted_items[0]
530
542
  return extract_dir
531
543
 
532
- async def _adownload_from_gcs(self, signed_url: str) -> Path:
544
+ async def _adownload_from_gcs(
545
+ self, signed_url: str, file_name: str | None = None
546
+ ) -> Path:
533
547
  """Download file from GCS using signed URL and handle unzipping if needed.
534
548
 
535
549
  Args:
536
550
  signed_url: The signed URL to download from
551
+ file_name: The name of the file to download
537
552
 
538
553
  Returns:
539
554
  Path to the downloaded file (or unzipped directory if it was a zip)
540
555
  """
556
+ file_name = file_name or "downloaded_file"
557
+
541
558
  try:
542
559
  with tempfile.TemporaryDirectory() as temp_dir_str:
543
560
  temp_dir = Path(temp_dir_str)
544
- temp_file = temp_dir / "downloaded_file"
561
+ temp_file = temp_dir / file_name
545
562
 
546
563
  async with self.async_client.stream("GET", signed_url) as response:
547
564
  response.raise_for_status()
@@ -549,11 +566,11 @@ class DataStorageMethods:
549
566
  content_disposition = response.headers.get(
550
567
  "content-disposition", ""
551
568
  )
552
- filename = "downloaded_file"
569
+ filename = file_name
553
570
  if "filename=" in content_disposition:
554
571
  filename = content_disposition.split("filename=")[-1].strip('"')
555
572
 
556
- if filename != "downloaded_file":
573
+ if filename != file_name:
557
574
  temp_file = temp_dir / filename
558
575
 
559
576
  async with aiofiles.open(temp_file, "wb") as f:
@@ -583,21 +600,23 @@ class DataStorageMethods:
583
600
  return final_file
584
601
 
585
602
  except Exception as e:
586
- raise DataStorageCreationError(f"Failed to download from GCS: {e}") from e
603
+ raise DataStorageError(f"Failed to download from GCS: {e}") from e
587
604
 
588
- def _download_from_gcs(self, signed_url: str) -> Path:
605
+ def _download_from_gcs(self, signed_url: str, file_name: str | None = None) -> Path:
589
606
  """Download file from GCS using signed URL and handle unzipping if needed (sync version).
590
607
 
591
608
  Args:
592
609
  signed_url: The signed URL to download from
593
-
610
+ file_name: The name of the file to download
594
611
  Returns:
595
612
  Path to the downloaded file (or unzipped directory if it was a zip)
596
613
  """
614
+ file_name = file_name or "downloaded_file"
615
+
597
616
  try:
598
617
  with tempfile.TemporaryDirectory() as temp_dir_str:
599
618
  temp_dir = Path(temp_dir_str)
600
- temp_file = temp_dir / "downloaded_file"
619
+ temp_file = temp_dir / file_name
601
620
 
602
621
  with requests_lib.get(signed_url, stream=True, timeout=30) as response:
603
622
  response.raise_for_status()
@@ -605,11 +624,11 @@ class DataStorageMethods:
605
624
  content_disposition = response.headers.get(
606
625
  "content-disposition", ""
607
626
  )
608
- filename = "downloaded_file"
627
+ filename = file_name
609
628
  if "filename=" in content_disposition:
610
629
  filename = content_disposition.split("filename=")[-1].strip('"')
611
630
 
612
- if filename != "downloaded_file":
631
+ if filename != file_name:
613
632
  temp_file = temp_dir / filename
614
633
 
615
634
  with open(temp_file, "wb") as f:
@@ -639,9 +658,7 @@ class DataStorageMethods:
639
658
  return final_file
640
659
 
641
660
  except Exception as e:
642
- raise DataStorageCreationError(f"Failed to download from GCS: {e}") from e
643
-
644
- # =====================================
661
+ raise DataStorageError(f"Failed to download from GCS: {e}") from e
645
662
 
646
663
  def _prepare_single_file_upload(
647
664
  self, name: str, file_path: Path, description: str | None, path: str | None
@@ -676,7 +693,7 @@ class DataStorageMethods:
676
693
  ) -> DataStorageResponse:
677
694
  """Create data storage entry via API (sync version)."""
678
695
  response = self.client.post(
679
- "/v0.1/data-storage",
696
+ "/v0.1/data-storage/data-entries",
680
697
  json=payload.model_dump(mode="json", exclude_none=True),
681
698
  )
682
699
  response.raise_for_status()
@@ -687,7 +704,7 @@ class DataStorageMethods:
687
704
  ) -> DataStorageResponse:
688
705
  """Create data storage entry via API (async version)."""
689
706
  response = await self.async_client.post(
690
- "/v0.1/data-storage",
707
+ "/v0.1/data-storage/data-entries",
691
708
  json=payload.model_dump(mode="json", exclude_none=True),
692
709
  )
693
710
  response.raise_for_status()
@@ -800,24 +817,30 @@ class DataStorageMethods:
800
817
  )
801
818
  data_storage_response = self._create_data_storage_entry(payload)
802
819
 
803
- if not data_storage_response.signed_url:
804
- raise DataStorageCreationError("No signed URL returned for zip upload")
820
+ for storage_location in data_storage_response.storage_locations:
821
+ if not storage_location.storage_config.signed_url:
822
+ raise DataStorageCreationError(
823
+ "No signed URL returned for zip upload"
824
+ )
805
825
 
806
- with tqdm(
807
- total=zip_size,
808
- unit="B",
809
- unit_scale=True,
810
- unit_divisor=1024,
811
- desc=f"Uploading {dir_path.name} (zipped)",
812
- miniters=1,
813
- mininterval=0.1,
814
- ) as pbar:
815
- _upload_file_with_progress(
816
- data_storage_response.signed_url, temp_zip_path, pbar, zip_size
817
- )
826
+ with tqdm(
827
+ total=zip_size,
828
+ unit="B",
829
+ unit_scale=True,
830
+ unit_divisor=1024,
831
+ desc=f"Uploading {dir_path.name} (zipped)",
832
+ miniters=1,
833
+ mininterval=0.1,
834
+ ) as pbar:
835
+ _upload_file_with_progress(
836
+ storage_location.storage_config.signed_url,
837
+ temp_zip_path,
838
+ pbar,
839
+ zip_size,
840
+ )
818
841
 
819
842
  status_response = self.client.patch(
820
- f"/v0.1/data-storage/{data_storage_response.data_storage.id}",
843
+ f"/v0.1/data-storage/data-entries/{data_storage_response.data_storage.id}",
821
844
  json={"status": "active"},
822
845
  )
823
846
  status_response.raise_for_status()
@@ -871,24 +894,30 @@ class DataStorageMethods:
871
894
 
872
895
  data_storage_response = await self._acreate_data_storage_entry(payload)
873
896
 
874
- if not data_storage_response.signed_url:
875
- raise DataStorageCreationError("No signed URL returned for zip upload")
897
+ for storage_location in data_storage_response.storage_locations:
898
+ if not storage_location.storage_config.signed_url:
899
+ raise DataStorageCreationError(
900
+ "No signed URL returned for zip upload"
901
+ )
876
902
 
877
- with tqdm(
878
- total=zip_size,
879
- unit="B",
880
- unit_scale=True,
881
- unit_divisor=1024,
882
- desc=f"Uploading {dir_path.name} (zipped)",
883
- miniters=1,
884
- mininterval=0.1,
885
- ) as pbar:
886
- await _aupload_file_with_progress(
887
- data_storage_response.signed_url, temp_zip_path, pbar, zip_size
888
- )
903
+ with tqdm(
904
+ total=zip_size,
905
+ unit="B",
906
+ unit_scale=True,
907
+ unit_divisor=1024,
908
+ desc=f"Uploading {dir_path.name} (zipped)",
909
+ miniters=1,
910
+ mininterval=0.1,
911
+ ) as pbar:
912
+ await _aupload_file_with_progress(
913
+ storage_location.storage_config.signed_url,
914
+ temp_zip_path,
915
+ pbar,
916
+ zip_size,
917
+ )
889
918
 
890
919
  status_response = await self.async_client.patch(
891
- f"/v0.1/data-storage/{data_storage_response.data_storage.id}",
920
+ f"/v0.1/data-storage/data-entries/{data_storage_response.data_storage.id}",
892
921
  json={"status": "active"},
893
922
  )
894
923
  status_response.raise_for_status()
@@ -951,30 +980,34 @@ class DataStorageMethods:
951
980
 
952
981
  data_storage_response = self._create_data_storage_entry(payload)
953
982
 
954
- if not data_storage_response.signed_url:
955
- raise DataStorageCreationError("No signed URL returned from server")
983
+ for storage_location in data_storage_response.storage_locations:
984
+ if not storage_location.storage_config.signed_url:
985
+ raise DataStorageCreationError("No signed URL returned from server")
956
986
 
957
- with tqdm(
958
- total=file_size,
959
- unit="B",
960
- unit_scale=True,
961
- unit_divisor=1024,
962
- desc=f"Uploading {file_path.name}",
963
- miniters=1,
964
- mininterval=0.1,
965
- ) as pbar:
966
- try:
967
- _upload_file_with_progress(
968
- data_storage_response.signed_url, file_path, pbar, file_size
969
- )
970
- logger.debug("File upload to signed URL completed successfully")
971
- except Exception as e:
972
- logger.error(f"Failed to upload file to signed URL: {e}")
973
- raise
987
+ with tqdm(
988
+ total=file_size,
989
+ unit="B",
990
+ unit_scale=True,
991
+ unit_divisor=1024,
992
+ desc=f"Uploading {file_path.name}",
993
+ miniters=1,
994
+ mininterval=0.1,
995
+ ) as pbar:
996
+ try:
997
+ _upload_file_with_progress(
998
+ storage_location.storage_config.signed_url,
999
+ file_path,
1000
+ pbar,
1001
+ file_size,
1002
+ )
1003
+ logger.debug("File upload to signed URL completed successfully")
1004
+ except Exception as e:
1005
+ logger.error(f"Failed to upload file to signed URL: {e}")
1006
+ raise
974
1007
 
975
1008
  logger.debug("Updating data storage status to active")
976
1009
  status_response = self.client.patch(
977
- f"/v0.1/data-storage/{data_storage_response.data_storage.id}",
1010
+ f"/v0.1/data-storage/data-entries/{data_storage_response.data_storage.id}",
978
1011
  json={"status": "active"},
979
1012
  )
980
1013
  status_response.raise_for_status()
@@ -1015,24 +1048,28 @@ class DataStorageMethods:
1015
1048
 
1016
1049
  data_storage_response = await self._acreate_data_storage_entry(payload)
1017
1050
 
1018
- if not data_storage_response.signed_url:
1019
- raise DataStorageCreationError("No signed URL returned from server")
1051
+ for location in data_storage_response.storage_locations:
1052
+ if not location.storage_config.signed_url:
1053
+ raise DataStorageCreationError(
1054
+ f"No signed URL returned from server for location: {location.id}"
1055
+ )
1020
1056
 
1021
- with tqdm(
1022
- total=file_size,
1023
- unit="B",
1024
- unit_scale=True,
1025
- unit_divisor=1024,
1026
- desc=f"Uploading {file_path.name}",
1027
- miniters=1,
1028
- mininterval=0.1,
1029
- ) as pbar:
1030
- await _aupload_file_with_progress(
1031
- data_storage_response.signed_url, file_path, pbar, file_size
1032
- )
1057
+ with tqdm(
1058
+ total=file_size,
1059
+ unit="B",
1060
+ unit_scale=True,
1061
+ unit_divisor=1024,
1062
+ desc=f"Uploading {file_path.name}",
1063
+ miniters=1,
1064
+ mininterval=0.1,
1065
+ leave=False,
1066
+ ) as pbar:
1067
+ await _aupload_file_with_progress(
1068
+ location.storage_config.signed_url, file_path, pbar, file_size
1069
+ )
1033
1070
 
1034
1071
  status_response = await self.async_client.patch(
1035
- f"/v0.1/data-storage/{data_storage_response.data_storage.id}",
1072
+ f"/v0.1/data-storage/data-entries/{data_storage_response.data_storage.id}",
1036
1073
  json={"status": "active"},
1037
1074
  )
1038
1075
  status_response.raise_for_status()
@@ -1075,25 +1112,26 @@ class DataStorageMethods:
1075
1112
  )
1076
1113
  data_storage_response = self._create_data_storage_entry(payload)
1077
1114
 
1078
- if not data_storage_response.signed_url:
1079
- raise DataStorageCreationError("No signed URL returned from server")
1115
+ for location in data_storage_response.storage_locations:
1116
+ if not location.storage_config.signed_url:
1117
+ raise DataStorageCreationError("No signed URL returned from server")
1080
1118
 
1081
- with tqdm(
1082
- total=file_size,
1083
- unit="B",
1084
- unit_scale=True,
1085
- unit_divisor=1024,
1086
- desc=f"Uploading {file_path.name}",
1087
- miniters=1,
1088
- mininterval=0.1,
1089
- leave=False,
1090
- ) as pbar:
1091
- _upload_file_with_progress(
1092
- data_storage_response.signed_url, file_path, pbar, file_size
1093
- )
1119
+ with tqdm(
1120
+ total=file_size,
1121
+ unit="B",
1122
+ unit_scale=True,
1123
+ unit_divisor=1024,
1124
+ desc=f"Uploading {file_path.name}",
1125
+ miniters=1,
1126
+ mininterval=0.1,
1127
+ leave=False,
1128
+ ) as pbar:
1129
+ _upload_file_with_progress(
1130
+ location.storage_config.signed_url, file_path, pbar, file_size
1131
+ )
1094
1132
 
1095
1133
  status_response = self.client.patch(
1096
- f"/v0.1/data-storage/{data_storage_response.data_storage.id}",
1134
+ f"/v0.1/data-storage/data-entries/{data_storage_response.data_storage.id}",
1097
1135
  json={"status": "active"},
1098
1136
  )
1099
1137
  status_response.raise_for_status()
@@ -1295,7 +1333,9 @@ class DataStorageMethods:
1295
1333
  )
1296
1334
  data_storage_response = await self._acreate_data_storage_entry(payload)
1297
1335
 
1298
- if not data_storage_response.signed_url:
1336
+ storage_location = data_storage_response.storage_locations[0]
1337
+
1338
+ if not storage_location.storage_config.signed_url:
1299
1339
  raise DataStorageCreationError("No signed URL returned from server")
1300
1340
 
1301
1341
  with tqdm(
@@ -1308,11 +1348,11 @@ class DataStorageMethods:
1308
1348
  mininterval=0.1,
1309
1349
  ) as pbar:
1310
1350
  await _aupload_file_with_progress(
1311
- data_storage_response.signed_url, file_path, pbar, file_size
1351
+ storage_location.storage_config.signed_url, file_path, pbar, file_size
1312
1352
  )
1313
1353
 
1314
1354
  status_response = await self.async_client.patch(
1315
- f"/v0.1/data-storage/{data_storage_response.data_storage.id}",
1355
+ f"/v0.1/data-storage/data-entries/{data_storage_response.data_storage.id}",
1316
1356
  json={"status": "active"},
1317
1357
  )
1318
1358
  status_response.raise_for_status()
@@ -1553,6 +1593,130 @@ class DataStorageMethods:
1553
1593
  f"An unexpected error occurred: {e!r}"
1554
1594
  ) from e
1555
1595
 
1596
+ @retry(
1597
+ stop=stop_after_attempt(3),
1598
+ wait=wait_exponential(multiplier=1, max=10),
1599
+ retry=retry_if_connection_error,
1600
+ before_sleep=before_sleep_log(logger, logging.WARNING),
1601
+ )
1602
+ async def astore_link(
1603
+ self,
1604
+ name: str,
1605
+ url: HttpUrl,
1606
+ description: str,
1607
+ instructions: str,
1608
+ api_key: str | None = None,
1609
+ metadata: dict[str, Any] | None = None,
1610
+ dataset_id: UUID | None = None,
1611
+ project_id: UUID | None = None,
1612
+ ) -> DataStorageResponse:
1613
+ """Asynchronously store a link/URL in the data storage system.
1614
+
1615
+ Args:
1616
+ name: Name of the link entry
1617
+ url: The URL/link to store
1618
+ description: Searchable details of the link
1619
+ instructions: Instructions for how to consume the link or api
1620
+ api_key: Any authentication key to access the api. If this is included, you should also include
1621
+ details of how the key should be consumed in the instructions.
1622
+ metadata: Any additional metadata about the link
1623
+ dataset_id: Optional dataset ID to add entry to, or None to create new dataset
1624
+ project_id: ID of the project this data storage entry belongs to
1625
+
1626
+ Returns:
1627
+ DataStorageResponse containing the created link storage entry
1628
+
1629
+ Raises:
1630
+ DataStorageCreationError: If there's an error creating the link storage entry
1631
+ """
1632
+ try:
1633
+ link_metadata = metadata.copy() if metadata else {}
1634
+ link_metadata["instructions"] = instructions
1635
+ if api_key:
1636
+ link_metadata["api_key"] = api_key
1637
+
1638
+ existing_location = DataStorageLocationPayload(
1639
+ storage_type=DataStorageType.LINK,
1640
+ content_type=DataContentType.TEXT,
1641
+ location=url,
1642
+ metadata=link_metadata or None,
1643
+ )
1644
+
1645
+ payload = DataStorageRequestPayload(
1646
+ name=name,
1647
+ content=url,
1648
+ description=description,
1649
+ dataset_id=dataset_id,
1650
+ project_id=project_id,
1651
+ existing_location=existing_location,
1652
+ )
1653
+ return await self._acreate_data_storage_entry(payload)
1654
+ except HTTPStatusError as e:
1655
+ self._handle_http_errors(e, "creating")
1656
+ except Exception as e:
1657
+ raise DataStorageCreationError(
1658
+ f"An unexpected error occurred: {e!r}"
1659
+ ) from e
1660
+
1661
+ def store_link(
1662
+ self,
1663
+ name: str,
1664
+ url: HttpUrl,
1665
+ description: str,
1666
+ instructions: str,
1667
+ api_key: str | None = None,
1668
+ metadata: dict[str, Any] | None = None,
1669
+ dataset_id: UUID | None = None,
1670
+ project_id: UUID | None = None,
1671
+ ) -> DataStorageResponse:
1672
+ """Store a link/URL in the data storage system.
1673
+
1674
+ Args:
1675
+ name: Name of the link entry
1676
+ url: The URL/link to store
1677
+ description: Searchable details of the link
1678
+ instructions: Instructions for how to consume the link or api
1679
+ api_key: Any authentication key to access the api. If this is included, you should also include
1680
+ details of how the key should be consumed in the instructions.
1681
+ metadata: Any additional metadata about the link
1682
+ dataset_id: Optional dataset ID to add entry to, or None to create new dataset
1683
+ project_id: ID of the project this data storage entry belongs to
1684
+
1685
+ Returns:
1686
+ DataStorageResponse containing the created link storage entry
1687
+
1688
+ Raises:
1689
+ DataStorageCreationError: If there's an error creating the link storage entry
1690
+ """
1691
+ try:
1692
+ link_metadata = metadata.copy() if metadata else {}
1693
+ link_metadata["instructions"] = instructions
1694
+ if api_key:
1695
+ link_metadata["api_key"] = api_key
1696
+
1697
+ existing_location = DataStorageLocationPayload(
1698
+ storage_type=DataStorageType.LINK,
1699
+ content_type=DataContentType.TEXT,
1700
+ location=url,
1701
+ metadata=link_metadata or None,
1702
+ )
1703
+
1704
+ payload = DataStorageRequestPayload(
1705
+ name=name,
1706
+ content=url,
1707
+ description=description,
1708
+ dataset_id=dataset_id,
1709
+ project_id=project_id,
1710
+ existing_location=existing_location,
1711
+ )
1712
+ return self._create_data_storage_entry(payload)
1713
+ except HTTPStatusError as e:
1714
+ self._handle_http_errors(e, "creating")
1715
+ except Exception as e:
1716
+ raise DataStorageCreationError(
1717
+ f"An unexpected error occurred: {e!r}"
1718
+ ) from e
1719
+
1556
1720
  @retry(
1557
1721
  stop=stop_after_attempt(3),
1558
1722
  wait=wait_exponential(multiplier=1, max=10),
@@ -1724,6 +1888,7 @@ class DataStorageMethods:
1724
1888
  name: str,
1725
1889
  existing_location: DataStorageLocationPayload,
1726
1890
  description: str | None = None,
1891
+ as_collection: bool = False,
1727
1892
  path: str | None = None,
1728
1893
  project_id: UUID | None = None,
1729
1894
  ) -> DataStorageResponse:
@@ -1733,6 +1898,9 @@ class DataStorageMethods:
1733
1898
  name: Name of the data storage entry
1734
1899
  existing_location: Describes the existing data source location to register
1735
1900
  description: Optional description of the data storage entry
1901
+ as_collection: If uploading a directory, `True` creates a single storage entry for
1902
+ the whole directory and multiple storage locations for each file, `False` assumes
1903
+ you are uploading a single file.
1736
1904
  path: Optional path for the data storage entry
1737
1905
  project_id: ID of the project this data storage entry belongs to
1738
1906
 
@@ -1749,9 +1917,11 @@ class DataStorageMethods:
1749
1917
  path=path,
1750
1918
  existing_location=existing_location,
1751
1919
  project_id=project_id,
1920
+ is_collection=as_collection,
1752
1921
  )
1753
1922
  response = self.client.post(
1754
- "/v0.1/data-storage", json=payload.model_dump(exclude_none=True)
1923
+ "/v0.1/data-storage/data-entries",
1924
+ json=payload.model_dump(exclude_none=True),
1755
1925
  )
1756
1926
  response.raise_for_status()
1757
1927
  return DataStorageResponse.model_validate(response.json())
@@ -1772,6 +1942,7 @@ class DataStorageMethods:
1772
1942
  self,
1773
1943
  name: str,
1774
1944
  existing_location: DataStorageLocationPayload,
1945
+ as_collection: bool = False,
1775
1946
  description: str | None = None,
1776
1947
  path: str | None = None,
1777
1948
  project_id: UUID | None = None,
@@ -1782,6 +1953,9 @@ class DataStorageMethods:
1782
1953
  name: Name of the data storage entry
1783
1954
  existing_location: Describes the existing data source location to register
1784
1955
  description: Optional description of the data storage entry
1956
+ as_collection: If uploading a directory, `True` creates a single storage entry for
1957
+ the whole directory and multiple storage locations for each file, `False` assumes
1958
+ you are uploading a single file.
1785
1959
  path: Optional path for the data storage entry
1786
1960
  project_id: ID of the project this data storage entry belongs to
1787
1961
 
@@ -1798,9 +1972,11 @@ class DataStorageMethods:
1798
1972
  path=path,
1799
1973
  existing_location=existing_location,
1800
1974
  project_id=project_id,
1975
+ is_collection=as_collection,
1801
1976
  )
1802
1977
  response = await self.async_client.post(
1803
- "/v0.1/data-storage", json=payload.model_dump(exclude_none=True)
1978
+ "/v0.1/data-storage/data-entries",
1979
+ json=payload.model_dump(exclude_none=True),
1804
1980
  )
1805
1981
  response.raise_for_status()
1806
1982
  return DataStorageResponse.model_validate(response.json())
@@ -1811,8 +1987,274 @@ class DataStorageMethods:
1811
1987
  f"An unexpected error occurred: {e!r}"
1812
1988
  ) from e
1813
1989
 
1814
- # TODO: EVERYTHING BELOW THIS LINE SHOULD BE MOVED TO FH_TOOLS REPO
1815
- # =================================================
1990
+ @retry(
1991
+ stop=stop_after_attempt(3),
1992
+ wait=wait_exponential(multiplier=1, max=10),
1993
+ retry=retry_if_connection_error,
1994
+ before_sleep=before_sleep_log(logger, logging.WARNING),
1995
+ )
1996
+ def search_data_storage(
1997
+ self,
1998
+ criteria: list[SearchCriterion] | None = None,
1999
+ size: int = 10,
2000
+ ) -> list[dict]:
2001
+ """Search data storage objects using structured criteria.
2002
+
2003
+ Args:
2004
+ criteria: List of search criteria (SearchCriterion objects with field, operator, value)
2005
+ size: Number of results to return (1-100)
2006
+
2007
+ Returns:
2008
+ List of search results with scores and data storage information
2009
+
2010
+ Raises:
2011
+ DataStorageCreationError: If there's an error searching data storage entries
2012
+
2013
+ Example:
2014
+ from futurehouse_client.models.rest import SearchCriterion, SearchOperator
2015
+ criteria = [
2016
+ SearchCriterion(field="name", operator=SearchOperator.CONTAINS, value="document"),
2017
+ SearchCriterion(field="project_id", operator=SearchOperator.EQUALS, value="my-project-id"),
2018
+ SearchCriterion(field="status", operator=SearchOperator.EQUALS, value="active"),
2019
+ ]
2020
+ results = client.search_data_storage(criteria=criteria, size=20)
2021
+ """
2022
+ try:
2023
+ payload = DataStorageSearchPayload(
2024
+ criteria=criteria or [],
2025
+ size=max(1, min(100, size)), # Clamp between 1-100
2026
+ )
2027
+
2028
+ response = self.client.post(
2029
+ "/v0.1/data-storage/search",
2030
+ json=payload.model_dump(mode="json"),
2031
+ )
2032
+ response.raise_for_status()
2033
+ return response.json()
2034
+
2035
+ except HTTPStatusError as e:
2036
+ if e.response.status_code == codes.SERVICE_UNAVAILABLE:
2037
+ raise DataStorageCreationError(
2038
+ "Search functionality is currently unavailable"
2039
+ ) from e
2040
+ self._handle_http_errors(e, "searching")
2041
+ except Exception as e:
2042
+ raise DataStorageCreationError(
2043
+ f"An unexpected error occurred during search: {e!r}"
2044
+ ) from e
2045
+
2046
+ @retry(
2047
+ stop=stop_after_attempt(3),
2048
+ wait=wait_exponential(multiplier=1, max=10),
2049
+ retry=retry_if_connection_error,
2050
+ before_sleep=before_sleep_log(logger, logging.WARNING),
2051
+ )
2052
+ async def asearch_data_storage(
2053
+ self,
2054
+ criteria: list[SearchCriterion] | None = None,
2055
+ size: int = 10,
2056
+ ) -> list[dict]:
2057
+ """Asynchronously search data storage objects using structured criteria.
2058
+
2059
+ Args:
2060
+ criteria: List of search criteria (SearchCriterion objects with field, operator, value)
2061
+ size: Number of results to return (1-100)
2062
+
2063
+ Returns:
2064
+ List of search results with scores and data storage information
2065
+
2066
+ Raises:
2067
+ DataStorageCreationError: If there's an error searching data storage entries
2068
+
2069
+ Example:
2070
+ from futurehouse_client.models.rest import SearchCriterion, SearchOperator
2071
+ criteria = [
2072
+ SearchCriterion(field="name", operator=SearchOperator.CONTAINS, value="document"),
2073
+ SearchCriterion(field="project_id", operator=SearchOperator.EQUALS, value="my-project-id"),
2074
+ SearchCriterion(field="status", operator=SearchOperator.EQUALS, value="active"),
2075
+ ]
2076
+ results = await client.asearch_data_storage(criteria=criteria, size=20)
2077
+ """
2078
+ try:
2079
+ payload = DataStorageSearchPayload(
2080
+ criteria=criteria or [],
2081
+ size=max(1, min(100, size)), # Clamp between 1-100
2082
+ )
2083
+
2084
+ response = await self.async_client.post(
2085
+ "/v0.1/data-storage/search",
2086
+ json=payload.model_dump(mode="json"),
2087
+ )
2088
+ response.raise_for_status()
2089
+ return response.json()
2090
+
2091
+ except HTTPStatusError as e:
2092
+ if e.response.status_code == codes.SERVICE_UNAVAILABLE:
2093
+ raise DataStorageCreationError(
2094
+ "Search functionality is currently unavailable"
2095
+ ) from e
2096
+ self._handle_http_errors(e, "searching")
2097
+ except Exception as e:
2098
+ raise DataStorageCreationError(
2099
+ f"An unexpected error occurred during async search: {e!r}"
2100
+ ) from e
2101
+
2102
+ @retry(
2103
+ stop=stop_after_attempt(3),
2104
+ wait=wait_exponential(multiplier=1, max=10),
2105
+ retry=retry_if_connection_error,
2106
+ before_sleep=before_sleep_log(logger, logging.WARNING),
2107
+ )
2108
+ def similarity_search_data_storage(
2109
+ self,
2110
+ embedding: list[float],
2111
+ size: int = 10,
2112
+ min_score: float = 0.7,
2113
+ dataset_id: UUID | None = None,
2114
+ tags: list[str] | None = None,
2115
+ user_id: str | None = None,
2116
+ project_id: str | None = None,
2117
+ ) -> list[dict]:
2118
+ """Search data storage objects using vector similarity.
2119
+
2120
+ Args:
2121
+ embedding: Embedding vector for similarity search
2122
+ size: Number of results to return (1-100)
2123
+ min_score: Minimum similarity score (0.0-1.0)
2124
+ dataset_id: Optional dataset ID filter
2125
+ tags: Optional list of tags to filter by
2126
+ user_id: Optional user ID filter (admin only)
2127
+ project_id: Optional project ID filter
2128
+
2129
+ Returns:
2130
+ List of search results with similarity scores and data storage information
2131
+
2132
+ Raises:
2133
+ DataStorageCreationError: If there's an error performing similarity search
2134
+ """
2135
+ try:
2136
+ # Validate inputs
2137
+ if not embedding:
2138
+ raise DataStorageCreationError("Embedding vector is required")
2139
+
2140
+ if not all(isinstance(x, int | float) for x in embedding):
2141
+ raise DataStorageCreationError("Embedding must be a list of numbers")
2142
+
2143
+ size = max(1, min(100, size)) # Clamp between 1-100
2144
+ min_score = max(0.0, min(1.0, min_score)) # Clamp between 0.0-1.0
2145
+
2146
+ # Build request payload
2147
+ payload = {
2148
+ "embedding": embedding,
2149
+ "size": size,
2150
+ "min_score": min_score,
2151
+ }
2152
+
2153
+ # Add optional filters
2154
+ if dataset_id is not None:
2155
+ payload["dataset_id"] = str(dataset_id)
2156
+ if tags is not None:
2157
+ payload["tags"] = tags
2158
+ if user_id is not None:
2159
+ payload["user_id"] = user_id
2160
+ if project_id is not None:
2161
+ payload["project_id"] = project_id
2162
+
2163
+ response = self.client.post(
2164
+ "/v0.1/data-storage/similarity-search", json=payload
2165
+ )
2166
+ response.raise_for_status()
2167
+ return response.json()
2168
+
2169
+ except HTTPStatusError as e:
2170
+ if e.response.status_code == codes.SERVICE_UNAVAILABLE:
2171
+ raise DataStorageCreationError(
2172
+ "Similarity search functionality is currently unavailable"
2173
+ ) from e
2174
+ self._handle_http_errors(e, "performing similarity search")
2175
+ except Exception as e:
2176
+ raise DataStorageCreationError(
2177
+ f"An unexpected error occurred during similarity search: {e!r}"
2178
+ ) from e
2179
+
2180
+ @retry(
2181
+ stop=stop_after_attempt(3),
2182
+ wait=wait_exponential(multiplier=1, max=10),
2183
+ retry=retry_if_connection_error,
2184
+ before_sleep=before_sleep_log(logger, logging.WARNING),
2185
+ )
2186
+ async def asimilarity_search_data_storage(
2187
+ self,
2188
+ embedding: list[float],
2189
+ size: int = 10,
2190
+ min_score: float = 0.7,
2191
+ dataset_id: UUID | None = None,
2192
+ tags: list[str] | None = None,
2193
+ user_id: str | None = None,
2194
+ project_id: str | None = None,
2195
+ ) -> list[dict]:
2196
+ """Asynchronously search data storage objects using vector similarity.
2197
+
2198
+ Args:
2199
+ embedding: Embedding vector for similarity search
2200
+ size: Number of results to return (1-100)
2201
+ min_score: Minimum similarity score (0.0-1.0)
2202
+ dataset_id: Optional dataset ID filter
2203
+ tags: Optional list of tags to filter by
2204
+ user_id: Optional user ID filter (admin only)
2205
+ project_id: Optional project ID filter
2206
+
2207
+ Returns:
2208
+ List of search results with similarity scores and data storage information
2209
+
2210
+ Raises:
2211
+ DataStorageCreationError: If there's an error performing similarity search
2212
+ """
2213
+ try:
2214
+ # Validate inputs
2215
+ if not embedding:
2216
+ raise DataStorageCreationError("Embedding vector is required")
2217
+
2218
+ if not all(isinstance(x, int | float) for x in embedding):
2219
+ raise DataStorageCreationError("Embedding must be a list of numbers")
2220
+
2221
+ size = max(1, min(100, size)) # Clamp between 1-100
2222
+ min_score = max(0.0, min(1.0, min_score)) # Clamp between 0.0-1.0
2223
+
2224
+ # Build request payload
2225
+ payload = {
2226
+ "embedding": embedding,
2227
+ "size": size,
2228
+ "min_score": min_score,
2229
+ }
2230
+
2231
+ # Add optional filters
2232
+ if dataset_id is not None:
2233
+ payload["dataset_id"] = str(dataset_id)
2234
+ if tags is not None:
2235
+ payload["tags"] = tags
2236
+ if user_id is not None:
2237
+ payload["user_id"] = user_id
2238
+ if project_id is not None:
2239
+ payload["project_id"] = project_id
2240
+
2241
+ response = await self.async_client.post(
2242
+ "/v0.1/data-storage/similarity-search", json=payload
2243
+ )
2244
+ response.raise_for_status()
2245
+ return response.json()
2246
+
2247
+ except HTTPStatusError as e:
2248
+ if e.response.status_code == codes.SERVICE_UNAVAILABLE:
2249
+ raise DataStorageCreationError(
2250
+ "Similarity search functionality is currently unavailable"
2251
+ ) from e
2252
+ self._handle_http_errors(e, "performing similarity search")
2253
+ except Exception as e:
2254
+ raise DataStorageCreationError(
2255
+ f"An unexpected error occurred during async similarity search: {e!r}"
2256
+ ) from e
2257
+
1816
2258
  @retry(
1817
2259
  stop=stop_after_attempt(3),
1818
2260
  wait=wait_exponential(multiplier=1, max=10),
@@ -1822,7 +2264,7 @@ class DataStorageMethods:
1822
2264
  def fetch_data_from_storage(
1823
2265
  self,
1824
2266
  data_storage_id: UUID | None = None,
1825
- ) -> str | Path | None:
2267
+ ) -> str | Path | list[Path] | None:
1826
2268
  """Fetch data from the storage system (sync version).
1827
2269
 
1828
2270
  Args:
@@ -1831,27 +2273,43 @@ class DataStorageMethods:
1831
2273
  Returns:
1832
2274
  For PG_TABLE storage: string content
1833
2275
  For GCS storage: Path to downloaded file (may be unzipped if it was a zip)
2276
+ For multi-location entries: dict of location IDs to dicts with signed URL and file name
1834
2277
  None if not found or error occurred
1835
2278
  """
1836
2279
  if not data_storage_id:
1837
- raise DataStorageCreationError(
2280
+ raise DataStorageRetrievalError(
1838
2281
  "data_storage_id must be provided at this time"
1839
2282
  )
1840
2283
 
1841
2284
  try:
1842
- response = self.client.get(f"/v0.1/data-storage/{data_storage_id}")
2285
+ response = self.client.get(
2286
+ f"/v0.1/data-storage/data-entries/{data_storage_id}", timeout=100
2287
+ )
1843
2288
  response.raise_for_status()
1844
2289
  result = DataStorageResponse.model_validate(response.json())
1845
2290
 
1846
- storage_type = result.storage_location.storage_config.storage_type
2291
+ if len(result.storage_locations) > 1:
2292
+ return [
2293
+ self._download_from_gcs(
2294
+ location.storage_config.signed_url or "",
2295
+ (location.storage_config.location or "").split("/")[-1],
2296
+ )
2297
+ for location in result.storage_locations
2298
+ ]
2299
+
2300
+ # Most scenarios will only have one location
2301
+ storage_location = result.storage_locations[0]
2302
+ storage_type = storage_location.storage_config.storage_type
1847
2303
 
1848
2304
  if storage_type == "gcs":
1849
- if not result.signed_url:
1850
- raise DataStorageCreationError(
2305
+ if not storage_location.storage_config.signed_url:
2306
+ raise DataStorageRetrievalError(
1851
2307
  "No signed URL available for GCS download"
1852
2308
  )
1853
2309
 
1854
- return self._download_from_gcs(result.signed_url)
2310
+ return self._download_from_gcs(
2311
+ storage_location.storage_config.signed_url
2312
+ )
1855
2313
 
1856
2314
  if storage_type in {"raw_content", "pg_table"}:
1857
2315
  content = result.data_storage.content
@@ -1862,12 +2320,12 @@ class DataStorageMethods:
1862
2320
  return None
1863
2321
  return content
1864
2322
 
1865
- raise DataStorageCreationError(f"Unsupported storage type: {storage_type}")
2323
+ raise DataStorageRetrievalError(f"Unsupported storage type: {storage_type}")
1866
2324
 
1867
2325
  except HTTPStatusError as e:
1868
2326
  self._handle_http_errors(e, "retrieving")
1869
2327
  except Exception as e:
1870
- raise DataStorageCreationError(
2328
+ raise DataStorageRetrievalError(
1871
2329
  f"An unexpected error occurred: {e!r}"
1872
2330
  ) from e
1873
2331
 
@@ -1880,7 +2338,7 @@ class DataStorageMethods:
1880
2338
  async def afetch_data_from_storage(
1881
2339
  self,
1882
2340
  data_storage_id: UUID | None = None,
1883
- ) -> str | Path | None:
2341
+ ) -> str | Path | list[Path] | None:
1884
2342
  """Fetch data from the storage system.
1885
2343
 
1886
2344
  Args:
@@ -1889,29 +2347,46 @@ class DataStorageMethods:
1889
2347
  Returns:
1890
2348
  For PG_TABLE storage: string content
1891
2349
  For GCS storage: Path to downloaded file (may be unzipped if it was a zip)
2350
+ For multi-location entries: dict of location IDs to dicts with signed URL and file name
1892
2351
  None if not found or error occurred
1893
2352
  """
1894
2353
  if not data_storage_id:
1895
- raise DataStorageCreationError(
2354
+ raise DataStorageRetrievalError(
1896
2355
  "data_storage_id must be provided at this time"
1897
2356
  )
1898
2357
 
1899
2358
  try:
1900
2359
  response = await self.async_client.get(
1901
- f"/v0.1/data-storage/{data_storage_id}"
2360
+ f"/v0.1/data-storage/data-entries/{data_storage_id}", timeout=100
1902
2361
  )
1903
2362
  response.raise_for_status()
1904
2363
  result = DataStorageResponse.model_validate(response.json())
1905
2364
 
1906
- storage_type = result.storage_location.storage_config.storage_type
2365
+ if len(result.storage_locations) > 1:
2366
+ return await gather_with_concurrency(
2367
+ DOWNLOAD_CONCURRENCY,
2368
+ [
2369
+ self._adownload_from_gcs(
2370
+ location.storage_config.signed_url or "",
2371
+ (location.storage_config.location or "").split("/")[-1],
2372
+ )
2373
+ for location in result.storage_locations
2374
+ ],
2375
+ )
2376
+
2377
+ # Most scenarios will only have one location
2378
+ storage_location = result.storage_locations[0]
2379
+ storage_type = storage_location.storage_config.storage_type
1907
2380
 
1908
2381
  if storage_type == "gcs":
1909
- if not result.signed_url:
1910
- raise DataStorageCreationError(
2382
+ if not storage_location.storage_config.signed_url:
2383
+ raise DataStorageRetrievalError(
1911
2384
  "No signed URL available for GCS download"
1912
2385
  )
1913
2386
 
1914
- return await self._adownload_from_gcs(result.signed_url)
2387
+ return await self._adownload_from_gcs(
2388
+ storage_location.storage_config.signed_url
2389
+ )
1915
2390
 
1916
2391
  if storage_type in {"raw_content", "pg_table"}:
1917
2392
  content = result.data_storage.content
@@ -1922,11 +2397,253 @@ class DataStorageMethods:
1922
2397
  return None
1923
2398
  return content
1924
2399
 
1925
- raise DataStorageCreationError(f"Unsupported storage type: {storage_type}")
2400
+ raise DataStorageRetrievalError(f"Unsupported storage type: {storage_type}")
1926
2401
 
1927
2402
  except HTTPStatusError as e:
1928
2403
  self._handle_http_errors(e, "retrieving")
2404
+ except Exception as e:
2405
+ raise DataStorageRetrievalError(
2406
+ f"An unexpected error occurred: {e!r}"
2407
+ ) from e
2408
+
2409
+ @retry(
2410
+ stop=stop_after_attempt(3),
2411
+ wait=wait_exponential(multiplier=1, max=10),
2412
+ retry=retry_if_connection_error,
2413
+ before_sleep=before_sleep_log(logger, logging.WARNING),
2414
+ )
2415
+ async def acreate_dataset(
2416
+ self,
2417
+ name: str,
2418
+ description: str | None = None,
2419
+ dataset_id: UUID | None = None,
2420
+ ):
2421
+ try:
2422
+ payload = CreateDatasetPayload(
2423
+ name=name,
2424
+ description=description,
2425
+ id=dataset_id,
2426
+ )
2427
+ response = await self.async_client.post(
2428
+ "/v0.1/data-storage/datasets",
2429
+ json=payload.model_dump(exclude_none=True),
2430
+ )
2431
+ response.raise_for_status()
2432
+ return CreateDatasetPayload.model_validate(response.json())
2433
+ except HTTPStatusError as e:
2434
+ self._handle_http_errors(e, "creating")
1929
2435
  except Exception as e:
1930
2436
  raise DataStorageCreationError(
1931
2437
  f"An unexpected error occurred: {e!r}"
1932
2438
  ) from e
2439
+
2440
+ @retry(
2441
+ stop=stop_after_attempt(3),
2442
+ wait=wait_exponential(multiplier=1, max=10),
2443
+ retry=retry_if_connection_error,
2444
+ before_sleep=before_sleep_log(logger, logging.WARNING),
2445
+ )
2446
+ def create_dataset(
2447
+ self,
2448
+ name: str,
2449
+ description: str | None = None,
2450
+ dataset_id: UUID | None = None,
2451
+ ):
2452
+ try:
2453
+ payload = CreateDatasetPayload(
2454
+ name=name,
2455
+ description=description,
2456
+ id=dataset_id,
2457
+ )
2458
+ response = self.client.post(
2459
+ "/v0.1/data-storage/datasets",
2460
+ json=payload.model_dump(exclude_none=True),
2461
+ )
2462
+ response.raise_for_status()
2463
+ return CreateDatasetPayload.model_validate(response.json())
2464
+ except HTTPStatusError as e:
2465
+ self._handle_http_errors(e, "creating")
2466
+ except Exception as e:
2467
+ raise DataStorageCreationError(
2468
+ f"An unexpected error occurred: {e!r}"
2469
+ ) from e
2470
+
2471
+ @retry(
2472
+ stop=stop_after_attempt(3),
2473
+ wait=wait_exponential(multiplier=1, max=10),
2474
+ retry=retry_if_connection_error,
2475
+ before_sleep=before_sleep_log(logger, logging.WARNING),
2476
+ )
2477
+ async def adelete_dataset(self, dataset_id: UUID):
2478
+ """Delete a dataset.
2479
+
2480
+ Note: This will delete all data storage entries associated with the dataset.
2481
+
2482
+ Args:
2483
+ dataset_id: ID of the dataset to delete
2484
+
2485
+ Raises:
2486
+ DataStorageError: If there's an error deleting the dataset
2487
+ """
2488
+ try:
2489
+ await self.async_client.delete(f"/v0.1/data-storage/datasets/{dataset_id}")
2490
+ except HTTPStatusError as e:
2491
+ self._handle_http_errors(e, "deleting")
2492
+ except Exception as e:
2493
+ raise DataStorageError(f"An unexpected error occurred: {e!r}") from e
2494
+
2495
+ @retry(
2496
+ stop=stop_after_attempt(3),
2497
+ wait=wait_exponential(multiplier=1, max=10),
2498
+ retry=retry_if_connection_error,
2499
+ before_sleep=before_sleep_log(logger, logging.WARNING),
2500
+ )
2501
+ def delete_dataset(self, dataset_id: UUID):
2502
+ """Delete a dataset.
2503
+
2504
+ Note: This will delete all data storage entries associated with the dataset.
2505
+
2506
+ Args:
2507
+ dataset_id: ID of the dataset to delete
2508
+
2509
+ Raises:
2510
+ DataStorageError: If there's an error deleting the dataset
2511
+ """
2512
+ try:
2513
+ self.client.delete(f"/v0.1/data-storage/datasets/{dataset_id}")
2514
+ except HTTPStatusError as e:
2515
+ self._handle_http_errors(e, "deleting")
2516
+ except Exception as e:
2517
+ raise DataStorageError(f"An unexpected error occurred: {e!r}") from e
2518
+
2519
+ @retry(
2520
+ stop=stop_after_attempt(3),
2521
+ wait=wait_exponential(multiplier=1, max=10),
2522
+ retry=retry_if_connection_error,
2523
+ before_sleep=before_sleep_log(logger, logging.WARNING),
2524
+ )
2525
+ async def aget_dataset(self, dataset_id: UUID):
2526
+ try:
2527
+ response = await self.async_client.get(
2528
+ f"/v0.1/data-storage/datasets/{dataset_id}"
2529
+ )
2530
+ response.raise_for_status()
2531
+
2532
+ return response.json()
2533
+ except HTTPStatusError as e:
2534
+ self._handle_http_errors(e, "retrieving")
2535
+ except Exception as e:
2536
+ raise DataStorageError(f"An unexpected error occurred: {e!r}") from e
2537
+
2538
+ @retry(
2539
+ stop=stop_after_attempt(3),
2540
+ wait=wait_exponential(multiplier=1, max=10),
2541
+ retry=retry_if_connection_error,
2542
+ before_sleep=before_sleep_log(logger, logging.WARNING),
2543
+ )
2544
+ def get_dataset(self, dataset_id: UUID):
2545
+ try:
2546
+ response = self.client.get(f"/v0.1/data-storage/datasets/{dataset_id}")
2547
+ response.raise_for_status()
2548
+
2549
+ return response.json()
2550
+ except HTTPStatusError as e:
2551
+ self._handle_http_errors(e, "retrieving")
2552
+ except Exception as e:
2553
+ raise DataStorageError(f"An unexpected error occurred: {e!r}") from e
2554
+
2555
+ @retry(
2556
+ stop=stop_after_attempt(3),
2557
+ wait=wait_exponential(multiplier=1, max=10),
2558
+ retry=retry_if_connection_error,
2559
+ before_sleep=before_sleep_log(logger, logging.WARNING),
2560
+ )
2561
+ def get_data_storage_entry(self, data_storage_id: UUID) -> DataStorageResponse:
2562
+ """Get a data storage entry with all details including storage locations and metadata.
2563
+
2564
+ Args:
2565
+ data_storage_id: ID of the data storage entry to retrieve
2566
+
2567
+ Returns:
2568
+ DataStorageResponse with entry details and storage locations
2569
+
2570
+ Raises:
2571
+ DataStorageRetrievalError: If there's an error retrieving the entry
2572
+ """
2573
+ try:
2574
+ response = self.client.get(
2575
+ f"/v0.1/data-storage/data-entries/{data_storage_id}", timeout=100
2576
+ )
2577
+ response.raise_for_status()
2578
+ return DataStorageResponse.model_validate(response.json())
2579
+ except HTTPStatusError as e:
2580
+ self._handle_http_errors(e, "retrieving")
2581
+ except Exception as e:
2582
+ raise DataStorageRetrievalError(
2583
+ f"An unexpected error occurred: {e!r}"
2584
+ ) from e
2585
+
2586
+ @retry(
2587
+ stop=stop_after_attempt(3),
2588
+ wait=wait_exponential(multiplier=1, max=10),
2589
+ retry=retry_if_connection_error,
2590
+ before_sleep=before_sleep_log(logger, logging.WARNING),
2591
+ )
2592
+ async def aget_data_storage_entry(
2593
+ self, data_storage_id: UUID
2594
+ ) -> DataStorageResponse:
2595
+ """Get a data storage entry with all details including storage locations and metadata.
2596
+
2597
+ Args:
2598
+ data_storage_id: ID of the data storage entry to retrieve
2599
+
2600
+ Returns:
2601
+ DataStorageResponse with entry details and storage locations
2602
+
2603
+ Raises:
2604
+ DataStorageRetrievalError: If there's an error retrieving the entry
2605
+ """
2606
+ try:
2607
+ response = await self.async_client.get(
2608
+ f"/v0.1/data-storage/data-entries/{data_storage_id}", timeout=100
2609
+ )
2610
+ response.raise_for_status()
2611
+ return DataStorageResponse.model_validate(response.json())
2612
+ except HTTPStatusError as e:
2613
+ self._handle_http_errors(e, "retrieving")
2614
+ except Exception as e:
2615
+ raise DataStorageRetrievalError(
2616
+ f"An unexpected error occurred: {e!r}"
2617
+ ) from e
2618
+
2619
+ @retry(
2620
+ stop=stop_after_attempt(3),
2621
+ wait=wait_exponential(multiplier=1, max=10),
2622
+ retry=retry_if_connection_error,
2623
+ before_sleep=before_sleep_log(logger, logging.WARNING),
2624
+ )
2625
+ async def adelete_data_storage_entry(self, data_storage_entry_id: UUID):
2626
+ try:
2627
+ await self.async_client.delete(
2628
+ f"/v0.1/data-storage/data-entries/{data_storage_entry_id}"
2629
+ )
2630
+ except HTTPStatusError as e:
2631
+ self._handle_http_errors(e, "deleting")
2632
+ except Exception as e:
2633
+ raise DataStorageError(f"An unexpected error occurred: {e!r}") from e
2634
+
2635
+ @retry(
2636
+ stop=stop_after_attempt(3),
2637
+ wait=wait_exponential(multiplier=1, max=10),
2638
+ retry=retry_if_connection_error,
2639
+ before_sleep=before_sleep_log(logger, logging.WARNING),
2640
+ )
2641
+ def delete_data_storage_entry(self, data_storage_entry_id: UUID):
2642
+ try:
2643
+ self.client.delete(
2644
+ f"/v0.1/data-storage/data-entries/{data_storage_entry_id}"
2645
+ )
2646
+ except HTTPStatusError as e:
2647
+ self._handle_http_errors(e, "deleting")
2648
+ except Exception as e:
2649
+ raise DataStorageError(f"An unexpected error occurred: {e!r}") from e