futurehouse-client 0.5.1__py3-none-any.whl → 0.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- futurehouse_client/clients/data_storage_methods.py +75 -56
- futurehouse_client/clients/rest_client.py +35 -12
- futurehouse_client/models/app.py +16 -0
- futurehouse_client/models/data_storage_methods.py +10 -3
- futurehouse_client/version.py +2 -2
- {futurehouse_client-0.5.1.dist-info → futurehouse_client-0.5.3.dist-info}/METADATA +1 -1
- {futurehouse_client-0.5.1.dist-info → futurehouse_client-0.5.3.dist-info}/RECORD +10 -10
- {futurehouse_client-0.5.1.dist-info → futurehouse_client-0.5.3.dist-info}/WHEEL +0 -0
- {futurehouse_client-0.5.1.dist-info → futurehouse_client-0.5.3.dist-info}/licenses/LICENSE +0 -0
- {futurehouse_client-0.5.1.dist-info → futurehouse_client-0.5.3.dist-info}/top_level.txt +0 -0
@@ -37,6 +37,7 @@ from futurehouse_client.models.data_storage_methods import (
|
|
37
37
|
DirectoryManifest,
|
38
38
|
GetDatasetAndEntriesResponse,
|
39
39
|
ManifestEntry,
|
40
|
+
RawFileFetchResponse,
|
40
41
|
)
|
41
42
|
from futurehouse_client.models.rest import (
|
42
43
|
DataStorageSearchPayload,
|
@@ -504,11 +505,15 @@ class DataStorageMethods:
|
|
504
505
|
raise DataStorageError(f"File or directory not found: {file_path}")
|
505
506
|
return file_path
|
506
507
|
|
507
|
-
def _build_zip_path(
|
508
|
+
def _build_zip_path(
|
509
|
+
self, name: str, path_override: str | Path | None
|
510
|
+
) -> str | Path:
|
508
511
|
"""Build GCS path for zip file."""
|
509
512
|
zip_filename = name if name.endswith(".zip") else f"{name}.zip"
|
510
|
-
if
|
511
|
-
|
513
|
+
if path_override:
|
514
|
+
if isinstance(path_override, str):
|
515
|
+
return f"{path_override.rstrip('/')}/{zip_filename}"
|
516
|
+
return path_override / zip_filename
|
512
517
|
return zip_filename
|
513
518
|
|
514
519
|
# TODO: methods in here need to be moved to fh tools
|
@@ -663,7 +668,11 @@ class DataStorageMethods:
|
|
663
668
|
raise DataStorageError(f"Failed to download from GCS: {e}") from e
|
664
669
|
|
665
670
|
def _prepare_single_file_upload(
|
666
|
-
self,
|
671
|
+
self,
|
672
|
+
name: str,
|
673
|
+
file_path: Path,
|
674
|
+
description: str | None,
|
675
|
+
file_path_override: str | Path | None,
|
667
676
|
) -> tuple[int, DataStorageRequestPayload | None]:
|
668
677
|
"""Prepare single file for upload, return file size and payload if text content."""
|
669
678
|
file_size = file_path.stat().st_size
|
@@ -681,7 +690,7 @@ class DataStorageMethods:
|
|
681
690
|
name=name,
|
682
691
|
description=description,
|
683
692
|
content=text_content,
|
684
|
-
|
693
|
+
file_path=file_path_override or file_path,
|
685
694
|
is_collection=False,
|
686
695
|
)
|
687
696
|
logger.warning(
|
@@ -777,7 +786,7 @@ class DataStorageMethods:
|
|
777
786
|
name: str,
|
778
787
|
dir_path: Path,
|
779
788
|
description: str | None,
|
780
|
-
|
789
|
+
dir_path_override: str | Path | None = None,
|
781
790
|
ignore_patterns: list[str] | None = None,
|
782
791
|
ignore_filename: str = ".gitignore",
|
783
792
|
project_id: UUID | None = None,
|
@@ -789,7 +798,7 @@ class DataStorageMethods:
|
|
789
798
|
name: Name for the directory collection
|
790
799
|
dir_path: Path to directory to zip and upload
|
791
800
|
description: Description for the collection
|
792
|
-
|
801
|
+
dir_path_override: Optional GCS path for the zip file
|
793
802
|
ignore_patterns: List of patterns to ignore when zipping
|
794
803
|
ignore_filename: Name of ignore file to read from directory
|
795
804
|
project_id: ID of the project this data storage entry belongs to
|
@@ -807,11 +816,11 @@ class DataStorageMethods:
|
|
807
816
|
dir_path, temp_zip_path, ignore_patterns, ignore_filename
|
808
817
|
)
|
809
818
|
|
810
|
-
zip_gcs_path = self._build_zip_path(name,
|
819
|
+
zip_gcs_path = self._build_zip_path(name, dir_path_override)
|
811
820
|
payload = DataStorageRequestPayload(
|
812
821
|
name=name,
|
813
822
|
description=description,
|
814
|
-
|
823
|
+
file_path=zip_gcs_path,
|
815
824
|
is_collection=True,
|
816
825
|
project_id=project_id,
|
817
826
|
tags=tags,
|
@@ -860,7 +869,7 @@ class DataStorageMethods:
|
|
860
869
|
name: str,
|
861
870
|
dir_path: Path,
|
862
871
|
description: str | None,
|
863
|
-
|
872
|
+
dir_path_override: str | Path | None = None,
|
864
873
|
ignore_patterns: list[str] | None = None,
|
865
874
|
ignore_filename: str = ".gitignore",
|
866
875
|
project_id: UUID | None = None,
|
@@ -872,7 +881,7 @@ class DataStorageMethods:
|
|
872
881
|
name: Name for the directory collection
|
873
882
|
dir_path: Path to directory to zip and upload
|
874
883
|
description: Description for the collection
|
875
|
-
|
884
|
+
dir_path_override: Optional GCS path for the zip file
|
876
885
|
ignore_patterns: List of patterns to ignore when zipping
|
877
886
|
ignore_filename: Name of ignore file to read from directory
|
878
887
|
project_id: ID of the project this data storage entry belongs to
|
@@ -890,11 +899,11 @@ class DataStorageMethods:
|
|
890
899
|
dir_path, temp_zip_path, ignore_patterns, ignore_filename
|
891
900
|
)
|
892
901
|
|
893
|
-
zip_gcs_path = self._build_zip_path(name,
|
902
|
+
zip_gcs_path = self._build_zip_path(name, dir_path_override)
|
894
903
|
payload = DataStorageRequestPayload(
|
895
904
|
name=name,
|
896
905
|
description=description,
|
897
|
-
|
906
|
+
file_path=zip_gcs_path,
|
898
907
|
is_collection=True,
|
899
908
|
project_id=project_id,
|
900
909
|
tags=tags,
|
@@ -940,7 +949,7 @@ class DataStorageMethods:
|
|
940
949
|
name: str,
|
941
950
|
file_path: Path,
|
942
951
|
description: str | None,
|
943
|
-
|
952
|
+
file_path_override: str | Path | None = None,
|
944
953
|
project_id: UUID | None = None,
|
945
954
|
) -> DataStorageResponse:
|
946
955
|
"""Upload a single file."""
|
@@ -960,7 +969,7 @@ class DataStorageMethods:
|
|
960
969
|
name=name,
|
961
970
|
description=description,
|
962
971
|
content=text_content,
|
963
|
-
|
972
|
+
file_path=file_path_override or file_path,
|
964
973
|
is_collection=False,
|
965
974
|
project_id=project_id,
|
966
975
|
)
|
@@ -977,7 +986,7 @@ class DataStorageMethods:
|
|
977
986
|
payload = DataStorageRequestPayload(
|
978
987
|
name=name,
|
979
988
|
description=description,
|
980
|
-
|
989
|
+
file_path=file_path_override or file_path,
|
981
990
|
is_collection=False,
|
982
991
|
project_id=project_id,
|
983
992
|
)
|
@@ -1028,13 +1037,13 @@ class DataStorageMethods:
|
|
1028
1037
|
name: str,
|
1029
1038
|
file_path: Path,
|
1030
1039
|
description: str | None,
|
1031
|
-
|
1040
|
+
file_path_override: str | Path | None = None,
|
1032
1041
|
dataset_id: UUID | None = None,
|
1033
1042
|
project_id: UUID | None = None,
|
1034
1043
|
) -> DataStorageResponse:
|
1035
1044
|
"""Asynchronously upload a single file."""
|
1036
1045
|
file_size, text_payload = self._prepare_single_file_upload(
|
1037
|
-
name, file_path, description,
|
1046
|
+
name, file_path, description, file_path_override
|
1038
1047
|
)
|
1039
1048
|
|
1040
1049
|
if text_payload:
|
@@ -1048,7 +1057,7 @@ class DataStorageMethods:
|
|
1048
1057
|
payload = DataStorageRequestPayload(
|
1049
1058
|
name=name,
|
1050
1059
|
description=description,
|
1051
|
-
|
1060
|
+
file_path=file_path_override or file_path,
|
1052
1061
|
is_collection=False,
|
1053
1062
|
dataset_id=dataset_id,
|
1054
1063
|
project_id=project_id,
|
@@ -1089,14 +1098,14 @@ class DataStorageMethods:
|
|
1089
1098
|
name: str,
|
1090
1099
|
file_path: Path,
|
1091
1100
|
description: str | None,
|
1092
|
-
|
1101
|
+
file_path_override: str | None,
|
1093
1102
|
parent_id: UUID | None,
|
1094
1103
|
dataset_id: UUID | None = None,
|
1095
1104
|
project_id: UUID | None = None,
|
1096
1105
|
) -> DataStorageResponse:
|
1097
1106
|
"""Upload a single file with a parent ID (sync version)."""
|
1098
1107
|
file_size, text_payload = self._prepare_single_file_upload(
|
1099
|
-
name, file_path, description,
|
1108
|
+
name, file_path, description, file_path_override
|
1100
1109
|
)
|
1101
1110
|
|
1102
1111
|
if text_payload:
|
@@ -1112,7 +1121,7 @@ class DataStorageMethods:
|
|
1112
1121
|
payload = DataStorageRequestPayload(
|
1113
1122
|
name=name,
|
1114
1123
|
description=description,
|
1115
|
-
|
1124
|
+
file_path=file_path_override or file_path,
|
1116
1125
|
is_collection=False,
|
1117
1126
|
parent_id=parent_id,
|
1118
1127
|
dataset_id=dataset_id,
|
@@ -1167,7 +1176,7 @@ class DataStorageMethods:
|
|
1167
1176
|
name=item.name,
|
1168
1177
|
file_path=item,
|
1169
1178
|
description=file_description,
|
1170
|
-
|
1179
|
+
file_path_override=None,
|
1171
1180
|
parent_id=current_parent_id,
|
1172
1181
|
dataset_id=dataset_id,
|
1173
1182
|
project_id=project_id,
|
@@ -1310,14 +1319,14 @@ class DataStorageMethods:
|
|
1310
1319
|
name: str,
|
1311
1320
|
file_path: Path,
|
1312
1321
|
description: str | None,
|
1313
|
-
|
1322
|
+
file_path_override: str | None,
|
1314
1323
|
parent_id: UUID | None,
|
1315
1324
|
dataset_id: UUID | None = None,
|
1316
1325
|
project_id: UUID | None = None,
|
1317
1326
|
) -> DataStorageResponse:
|
1318
1327
|
"""Asynchronously upload a single file with a parent ID."""
|
1319
1328
|
file_size, text_payload = self._prepare_single_file_upload(
|
1320
|
-
name, file_path, description,
|
1329
|
+
name, file_path, description, file_path_override
|
1321
1330
|
)
|
1322
1331
|
|
1323
1332
|
if text_payload:
|
@@ -1333,7 +1342,7 @@ class DataStorageMethods:
|
|
1333
1342
|
payload = DataStorageRequestPayload(
|
1334
1343
|
name=name,
|
1335
1344
|
description=description,
|
1336
|
-
|
1345
|
+
file_path=file_path_override or file_path,
|
1337
1346
|
is_collection=False,
|
1338
1347
|
parent_id=parent_id,
|
1339
1348
|
dataset_id=dataset_id,
|
@@ -1388,7 +1397,7 @@ class DataStorageMethods:
|
|
1388
1397
|
name=item.name,
|
1389
1398
|
file_path=item,
|
1390
1399
|
description=file_description,
|
1391
|
-
|
1400
|
+
file_path_override=None,
|
1392
1401
|
parent_id=current_parent_id,
|
1393
1402
|
dataset_id=dataset_id,
|
1394
1403
|
project_id=project_id,
|
@@ -1519,7 +1528,7 @@ class DataStorageMethods:
|
|
1519
1528
|
name: str,
|
1520
1529
|
content: str,
|
1521
1530
|
description: str | None = None,
|
1522
|
-
|
1531
|
+
file_path: str | None = None,
|
1523
1532
|
project_id: UUID | None = None,
|
1524
1533
|
) -> DataStorageResponse:
|
1525
1534
|
"""Store content as a string in the data storage system.
|
@@ -1528,7 +1537,7 @@ class DataStorageMethods:
|
|
1528
1537
|
name: Name of the data storage entry
|
1529
1538
|
content: Content to store as a string
|
1530
1539
|
description: Optional description of the data storage entry
|
1531
|
-
|
1540
|
+
file_path: Optional path for the data storage entry
|
1532
1541
|
project_id: ID of the project this data storage entry belongs to
|
1533
1542
|
|
1534
1543
|
Returns:
|
@@ -1544,7 +1553,7 @@ class DataStorageMethods:
|
|
1544
1553
|
- parent_id - ID of the parent entry for hierarchical storage
|
1545
1554
|
- project_id - ID of the project this entry belongs to
|
1546
1555
|
- dataset_id - ID of the dataset this entry belongs to
|
1547
|
-
-
|
1556
|
+
- file_path - Filepath in the storage system where this entry is located
|
1548
1557
|
- bigquery_schema - Target BigQuery schema for the entry
|
1549
1558
|
- user_id - ID of the user who created this entry
|
1550
1559
|
- created_at - Timestamp when the entry was created
|
@@ -1568,7 +1577,7 @@ class DataStorageMethods:
|
|
1568
1577
|
name=name,
|
1569
1578
|
content=content,
|
1570
1579
|
description=description,
|
1571
|
-
|
1580
|
+
file_path=file_path,
|
1572
1581
|
project_id=project_id,
|
1573
1582
|
)
|
1574
1583
|
return self._create_data_storage_entry(payload)
|
@@ -1590,7 +1599,7 @@ class DataStorageMethods:
|
|
1590
1599
|
name: str,
|
1591
1600
|
content: str,
|
1592
1601
|
description: str | None = None,
|
1593
|
-
|
1602
|
+
file_path: str | None = None,
|
1594
1603
|
dataset_id: UUID | None = None,
|
1595
1604
|
project_id: UUID | None = None,
|
1596
1605
|
) -> DataStorageResponse:
|
@@ -1600,7 +1609,7 @@ class DataStorageMethods:
|
|
1600
1609
|
name: Name of the data storage entry
|
1601
1610
|
content: Content to store as a string
|
1602
1611
|
description: Optional description of the data storage entry
|
1603
|
-
|
1612
|
+
file_path: Optional path for the data storage entry
|
1604
1613
|
dataset_id: Optional dataset ID to add entry to, or None to create new dataset
|
1605
1614
|
project_id: ID of the project this data storage entry belongs to
|
1606
1615
|
|
@@ -1617,7 +1626,7 @@ class DataStorageMethods:
|
|
1617
1626
|
- parent_id - ID of the parent entry for hierarchical storage
|
1618
1627
|
- project_id - ID of the project this entry belongs to
|
1619
1628
|
- dataset_id - ID of the dataset this entry belongs to
|
1620
|
-
-
|
1629
|
+
- file_path - Filepath in the storage system where this entry is located
|
1621
1630
|
- bigquery_schema - Target BigQuery schema for the entry
|
1622
1631
|
- user_id - ID of the user who created this entry
|
1623
1632
|
- created_at - Timestamp when the entry was created
|
@@ -1641,7 +1650,7 @@ class DataStorageMethods:
|
|
1641
1650
|
name=name,
|
1642
1651
|
content=content,
|
1643
1652
|
description=description,
|
1644
|
-
|
1653
|
+
file_path=file_path,
|
1645
1654
|
dataset_id=dataset_id,
|
1646
1655
|
project_id=project_id,
|
1647
1656
|
)
|
@@ -1788,7 +1797,7 @@ class DataStorageMethods:
|
|
1788
1797
|
name: str,
|
1789
1798
|
file_path: str | Path,
|
1790
1799
|
description: str | None = None,
|
1791
|
-
|
1800
|
+
file_path_override: str | Path | None = None,
|
1792
1801
|
as_collection: bool = False,
|
1793
1802
|
manifest_filename: str | None = None,
|
1794
1803
|
ignore_patterns: list[str] | None = None,
|
@@ -1808,7 +1817,7 @@ class DataStorageMethods:
|
|
1808
1817
|
name: Name of the data storage entry
|
1809
1818
|
file_path: Path to file or directory to upload
|
1810
1819
|
description: Optional description of the data storage entry
|
1811
|
-
|
1820
|
+
file_path_override: Optional path for the data storage entry
|
1812
1821
|
as_collection: If true, upload directories as a single zip file collection.
|
1813
1822
|
manifest_filename: Name of manifest file (JSON or YAML) containing:
|
1814
1823
|
- entries - Map of file/directory names to their manifest entries
|
@@ -1834,7 +1843,7 @@ class DataStorageMethods:
|
|
1834
1843
|
- parent_id - ID of the parent entry for hierarchical storage
|
1835
1844
|
- project_id - ID of the project this entry belongs to
|
1836
1845
|
- dataset_id - ID of the dataset this entry belongs to
|
1837
|
-
-
|
1846
|
+
- file_path - Filepath in the storage system where this entry is located
|
1838
1847
|
- bigquery_schema - Target BigQuery schema for the entry
|
1839
1848
|
- user_id - ID of the user who created this entry
|
1840
1849
|
- created_at - Timestamp when the entry was created
|
@@ -1861,7 +1870,7 @@ class DataStorageMethods:
|
|
1861
1870
|
name,
|
1862
1871
|
file_path,
|
1863
1872
|
description,
|
1864
|
-
|
1873
|
+
file_path_override,
|
1865
1874
|
ignore_patterns,
|
1866
1875
|
ignore_filename,
|
1867
1876
|
project_id,
|
@@ -1883,7 +1892,7 @@ class DataStorageMethods:
|
|
1883
1892
|
)
|
1884
1893
|
return responses[0]
|
1885
1894
|
return self._upload_data_single_file(
|
1886
|
-
name, file_path, description,
|
1895
|
+
name, file_path, description, file_path_override, project_id
|
1887
1896
|
)
|
1888
1897
|
|
1889
1898
|
except HTTPStatusError as e:
|
@@ -1904,7 +1913,7 @@ class DataStorageMethods:
|
|
1904
1913
|
name: str,
|
1905
1914
|
file_path: str | Path,
|
1906
1915
|
description: str | None = None,
|
1907
|
-
|
1916
|
+
file_path_override: str | Path | None = None,
|
1908
1917
|
as_collection: bool = False,
|
1909
1918
|
manifest_filename: str | None = None,
|
1910
1919
|
ignore_patterns: list[str] | None = None,
|
@@ -1918,7 +1927,7 @@ class DataStorageMethods:
|
|
1918
1927
|
name: Name of the data storage entry.
|
1919
1928
|
file_path: Path to the file or directory to upload.
|
1920
1929
|
description: Optional description for the entry.
|
1921
|
-
|
1930
|
+
file_path_override: Optional GCS path for the entry.
|
1922
1931
|
as_collection: If uploading a directory, `True` zips it into a single collection,
|
1923
1932
|
`False` uploads it as a hierarchical structure of individual objects.
|
1924
1933
|
manifest_filename: Optional manifest file (JSON or YAML) for hierarchical uploads containing:
|
@@ -1945,7 +1954,7 @@ class DataStorageMethods:
|
|
1945
1954
|
- parent_id - ID of the parent entry for hierarchical storage
|
1946
1955
|
- project_id - ID of the project this entry belongs to
|
1947
1956
|
- dataset_id - ID of the dataset this entry belongs to
|
1948
|
-
-
|
1957
|
+
- file_path - Filepath in the storage system where this entry is located
|
1949
1958
|
- bigquery_schema - Target BigQuery schema for the entry
|
1950
1959
|
- user_id - ID of the user who created this entry
|
1951
1960
|
- created_at - Timestamp when the entry was created
|
@@ -1972,7 +1981,7 @@ class DataStorageMethods:
|
|
1972
1981
|
name,
|
1973
1982
|
file_path,
|
1974
1983
|
description,
|
1975
|
-
|
1984
|
+
file_path_override,
|
1976
1985
|
ignore_patterns,
|
1977
1986
|
ignore_filename,
|
1978
1987
|
project_id,
|
@@ -1993,7 +2002,7 @@ class DataStorageMethods:
|
|
1993
2002
|
)
|
1994
2003
|
return responses[0]
|
1995
2004
|
return await self._aupload_data_single_file(
|
1996
|
-
name, file_path, description,
|
2005
|
+
name, file_path, description, file_path_override, dataset_id, project_id
|
1997
2006
|
)
|
1998
2007
|
|
1999
2008
|
except HTTPStatusError as e:
|
@@ -2015,7 +2024,6 @@ class DataStorageMethods:
|
|
2015
2024
|
existing_location: DataStorageLocationPayload,
|
2016
2025
|
description: str | None = None,
|
2017
2026
|
as_collection: bool = False,
|
2018
|
-
path: str | None = None,
|
2019
2027
|
project_id: UUID | None = None,
|
2020
2028
|
) -> DataStorageResponse:
|
2021
2029
|
"""Store content as a string in the data storage system.
|
@@ -2032,7 +2040,7 @@ class DataStorageMethods:
|
|
2032
2040
|
as_collection: If uploading a directory, `True` creates a single storage entry for
|
2033
2041
|
the whole directory and multiple storage locations for each file, `False` assumes
|
2034
2042
|
you are uploading a single file.
|
2035
|
-
|
2043
|
+
file_path: Optional path for the data storage entry
|
2036
2044
|
project_id: ID of the project this data storage entry belongs to
|
2037
2045
|
|
2038
2046
|
Returns:
|
@@ -2048,7 +2056,7 @@ class DataStorageMethods:
|
|
2048
2056
|
- parent_id - ID of the parent entry for hierarchical storage
|
2049
2057
|
- project_id - ID of the project this entry belongs to
|
2050
2058
|
- dataset_id - ID of the dataset this entry belongs to
|
2051
|
-
-
|
2059
|
+
- file_path - Filepath in the storage system where this entry is located
|
2052
2060
|
- bigquery_schema - Target BigQuery schema for the entry
|
2053
2061
|
- user_id - ID of the user who created this entry
|
2054
2062
|
- created_at - Timestamp when the entry was created
|
@@ -2071,7 +2079,6 @@ class DataStorageMethods:
|
|
2071
2079
|
payload = DataStorageRequestPayload(
|
2072
2080
|
name=name,
|
2073
2081
|
description=description,
|
2074
|
-
path=path,
|
2075
2082
|
existing_location=existing_location,
|
2076
2083
|
project_id=project_id,
|
2077
2084
|
is_collection=as_collection,
|
@@ -2101,7 +2108,6 @@ class DataStorageMethods:
|
|
2101
2108
|
existing_location: DataStorageLocationPayload,
|
2102
2109
|
as_collection: bool = False,
|
2103
2110
|
description: str | None = None,
|
2104
|
-
path: str | None = None,
|
2105
2111
|
project_id: UUID | None = None,
|
2106
2112
|
) -> DataStorageResponse:
|
2107
2113
|
"""Store content as a string in the data storage system.
|
@@ -2118,7 +2124,7 @@ class DataStorageMethods:
|
|
2118
2124
|
as_collection: If uploading a directory, `True` creates a single storage entry for
|
2119
2125
|
the whole directory and multiple storage locations for each file, `False` assumes
|
2120
2126
|
you are uploading a single file.
|
2121
|
-
|
2127
|
+
file_path: Optional path for the data storage entry
|
2122
2128
|
project_id: ID of the project this data storage entry belongs to
|
2123
2129
|
|
2124
2130
|
Returns:
|
@@ -2134,7 +2140,7 @@ class DataStorageMethods:
|
|
2134
2140
|
- parent_id - ID of the parent entry for hierarchical storage
|
2135
2141
|
- project_id - ID of the project this entry belongs to
|
2136
2142
|
- dataset_id - ID of the dataset this entry belongs to
|
2137
|
-
-
|
2143
|
+
- file_path - Filepath in the storage system where this entry is located
|
2138
2144
|
- bigquery_schema - Target BigQuery schema for the entry
|
2139
2145
|
- user_id - ID of the user who created this entry
|
2140
2146
|
- created_at - Timestamp when the entry was created
|
@@ -2157,7 +2163,6 @@ class DataStorageMethods:
|
|
2157
2163
|
payload = DataStorageRequestPayload(
|
2158
2164
|
name=name,
|
2159
2165
|
description=description,
|
2160
|
-
path=path,
|
2161
2166
|
existing_location=existing_location,
|
2162
2167
|
project_id=project_id,
|
2163
2168
|
is_collection=as_collection,
|
@@ -2464,7 +2469,7 @@ class DataStorageMethods:
|
|
2464
2469
|
def fetch_data_from_storage(
|
2465
2470
|
self,
|
2466
2471
|
data_storage_id: UUID | None = None,
|
2467
|
-
) -> str | Path | list[Path] | None:
|
2472
|
+
) -> RawFileFetchResponse | str | Path | list[Path] | None:
|
2468
2473
|
"""Fetch data from the storage system (sync version).
|
2469
2474
|
|
2470
2475
|
Args:
|
@@ -2527,6 +2532,13 @@ class DataStorageMethods:
|
|
2527
2532
|
f"No content found for data storage entry {data_storage_id}"
|
2528
2533
|
)
|
2529
2534
|
return None
|
2535
|
+
|
2536
|
+
if result.data_storage.file_path:
|
2537
|
+
return RawFileFetchResponse(
|
2538
|
+
filename=Path(result.data_storage.file_path),
|
2539
|
+
file_content=content,
|
2540
|
+
)
|
2541
|
+
|
2530
2542
|
return content
|
2531
2543
|
|
2532
2544
|
raise DataStorageRetrievalError(f"Unsupported storage type: {storage_type}")
|
@@ -2547,7 +2559,7 @@ class DataStorageMethods:
|
|
2547
2559
|
async def afetch_data_from_storage(
|
2548
2560
|
self,
|
2549
2561
|
data_storage_id: UUID | None = None,
|
2550
|
-
) -> str | Path | list[Path] | None:
|
2562
|
+
) -> RawFileFetchResponse | str | Path | list[Path] | None:
|
2551
2563
|
"""Fetch data from the storage system.
|
2552
2564
|
|
2553
2565
|
Args:
|
@@ -2613,6 +2625,13 @@ class DataStorageMethods:
|
|
2613
2625
|
f"No content found for data storage entry {data_storage_id}"
|
2614
2626
|
)
|
2615
2627
|
return None
|
2628
|
+
|
2629
|
+
if result.data_storage.file_path:
|
2630
|
+
return RawFileFetchResponse(
|
2631
|
+
filename=Path(result.data_storage.file_path),
|
2632
|
+
file_content=content,
|
2633
|
+
)
|
2634
|
+
|
2616
2635
|
return content
|
2617
2636
|
|
2618
2637
|
raise DataStorageRetrievalError(f"Unsupported storage type: {storage_type}")
|
@@ -2798,7 +2817,7 @@ class DataStorageMethods:
|
|
2798
2817
|
- parent_id - ID of the parent entry for hierarchical storage
|
2799
2818
|
- project_id - ID of the project this entry belongs to
|
2800
2819
|
- dataset_id - ID of the dataset this entry belongs to
|
2801
|
-
-
|
2820
|
+
- file_path - Filepath in the storage system where this entry is located
|
2802
2821
|
- bigquery_schema - Target BigQuery schema for the entry
|
2803
2822
|
- user_id - ID of the user who created this entry
|
2804
2823
|
- created_at - Timestamp when the entry was created
|
@@ -46,6 +46,7 @@ from futurehouse_client.clients.data_storage_methods import DataStorageMethods
|
|
46
46
|
from futurehouse_client.models.app import (
|
47
47
|
AuthType,
|
48
48
|
JobDeploymentConfig,
|
49
|
+
LiteTaskResponse,
|
49
50
|
Stage,
|
50
51
|
TaskRequest,
|
51
52
|
TaskResponse,
|
@@ -530,8 +531,12 @@ class RestClient(DataStorageMethods):
|
|
530
531
|
before_sleep=before_sleep_log(logger, logging.WARNING),
|
531
532
|
)
|
532
533
|
def get_task(
|
533
|
-
self,
|
534
|
-
|
534
|
+
self,
|
535
|
+
task_id: str | None = None,
|
536
|
+
history: bool = False,
|
537
|
+
verbose: bool = False,
|
538
|
+
lite: bool = False,
|
539
|
+
) -> "TaskResponse | LiteTaskResponse":
|
535
540
|
"""Get details for a specific task."""
|
536
541
|
task_id = task_id or self.trajectory_id
|
537
542
|
url = f"/v0.1/trajectories/{task_id}"
|
@@ -547,7 +552,9 @@ class RestClient(DataStorageMethods):
|
|
547
552
|
"job_id": task_id,
|
548
553
|
},
|
549
554
|
),
|
550
|
-
self.client.stream(
|
555
|
+
self.client.stream(
|
556
|
+
"GET", url, params={"history": history, "lite": lite}
|
557
|
+
) as response,
|
551
558
|
):
|
552
559
|
if response.status_code in {401, 403}:
|
553
560
|
raise PermissionError(
|
@@ -558,6 +565,10 @@ class RestClient(DataStorageMethods):
|
|
558
565
|
data = json.loads(json_data)
|
559
566
|
if "id" not in data:
|
560
567
|
data["id"] = task_id
|
568
|
+
|
569
|
+
if lite:
|
570
|
+
return LiteTaskResponse(**data)
|
571
|
+
|
561
572
|
verbose_response = TaskResponseVerbose(**data)
|
562
573
|
|
563
574
|
if verbose:
|
@@ -571,8 +582,12 @@ class RestClient(DataStorageMethods):
|
|
571
582
|
before_sleep=before_sleep_log(logger, logging.WARNING),
|
572
583
|
)
|
573
584
|
async def aget_task(
|
574
|
-
self,
|
575
|
-
|
585
|
+
self,
|
586
|
+
task_id: str | None = None,
|
587
|
+
history: bool = False,
|
588
|
+
verbose: bool = False,
|
589
|
+
lite: bool = False,
|
590
|
+
) -> "TaskResponse | LiteTaskResponse":
|
576
591
|
"""Get details for a specific task asynchronously."""
|
577
592
|
task_id = task_id or self.trajectory_id
|
578
593
|
url = f"/v0.1/trajectories/{task_id}"
|
@@ -588,7 +603,7 @@ class RestClient(DataStorageMethods):
|
|
588
603
|
},
|
589
604
|
):
|
590
605
|
async with self.async_client.stream(
|
591
|
-
"GET", url, params={"history": history}
|
606
|
+
"GET", url, params={"history": history, "lite": lite}
|
592
607
|
) as response:
|
593
608
|
if response.status_code in {401, 403}:
|
594
609
|
raise PermissionError(
|
@@ -599,6 +614,10 @@ class RestClient(DataStorageMethods):
|
|
599
614
|
data = json.loads(json_data)
|
600
615
|
if "id" not in data:
|
601
616
|
data["id"] = task_id
|
617
|
+
|
618
|
+
if lite:
|
619
|
+
return LiteTaskResponse(**data)
|
620
|
+
|
602
621
|
verbose_response = TaskResponseVerbose(**data)
|
603
622
|
|
604
623
|
if verbose:
|
@@ -735,7 +754,9 @@ class RestClient(DataStorageMethods):
|
|
735
754
|
progress_bar: bool = False,
|
736
755
|
concurrency: int = 10,
|
737
756
|
timeout: int = DEFAULT_AGENT_TIMEOUT,
|
738
|
-
) -> list[
|
757
|
+
) -> list[
|
758
|
+
LiteTaskResponse | TaskResponse
|
759
|
+
]: # return will always be lite because we always call with lite=True
|
739
760
|
all_tasks: Collection[TaskRequest | dict[str, Any]] = (
|
740
761
|
cast(Collection[TaskRequest | dict[str, Any]], [task_data])
|
741
762
|
if (isinstance(task_data, dict) or not isinstance(task_data, Collection))
|
@@ -749,7 +770,7 @@ class RestClient(DataStorageMethods):
|
|
749
770
|
)
|
750
771
|
|
751
772
|
start_time = time.monotonic()
|
752
|
-
completed_tasks: dict[str, TaskResponse] = {}
|
773
|
+
completed_tasks: dict[str, LiteTaskResponse | TaskResponse] = {}
|
753
774
|
|
754
775
|
if progress_bar:
|
755
776
|
progress = tqdm(
|
@@ -760,7 +781,7 @@ class RestClient(DataStorageMethods):
|
|
760
781
|
task_results = await gather_with_concurrency(
|
761
782
|
concurrency,
|
762
783
|
[
|
763
|
-
self.aget_task(task_id, verbose=verbose)
|
784
|
+
self.aget_task(task_id, verbose=verbose, lite=True)
|
764
785
|
for task_id in trajectory_ids
|
765
786
|
if task_id not in completed_tasks
|
766
787
|
],
|
@@ -807,7 +828,9 @@ class RestClient(DataStorageMethods):
|
|
807
828
|
verbose: bool = False,
|
808
829
|
progress_bar: bool = False,
|
809
830
|
timeout: int = DEFAULT_AGENT_TIMEOUT,
|
810
|
-
) -> list[
|
831
|
+
) -> list[
|
832
|
+
LiteTaskResponse | TaskResponse
|
833
|
+
]: # return will always be lite because we always call with lite=True
|
811
834
|
"""Run multiple tasks and wait for them to complete.
|
812
835
|
|
813
836
|
Args:
|
@@ -828,7 +851,7 @@ class RestClient(DataStorageMethods):
|
|
828
851
|
trajectory_ids = [self.create_task(task) for task in all_tasks]
|
829
852
|
|
830
853
|
start_time = time.monotonic()
|
831
|
-
completed_tasks: dict[str, TaskResponse] = {}
|
854
|
+
completed_tasks: dict[str, LiteTaskResponse | TaskResponse] = {}
|
832
855
|
|
833
856
|
if progress_bar:
|
834
857
|
progress = sync_tqdm(
|
@@ -842,7 +865,7 @@ class RestClient(DataStorageMethods):
|
|
842
865
|
if task_id in completed_tasks:
|
843
866
|
continue
|
844
867
|
|
845
|
-
task = self.get_task(task_id, verbose=verbose)
|
868
|
+
task = self.get_task(task_id, verbose=verbose, lite=True)
|
846
869
|
|
847
870
|
if not ExecutionStatus(task.status).is_terminal_state():
|
848
871
|
all_done = False
|
futurehouse_client/models/app.py
CHANGED
@@ -749,6 +749,22 @@ class SimpleOrganization(BaseModel):
|
|
749
749
|
display_name: str
|
750
750
|
|
751
751
|
|
752
|
+
class LiteTaskResponse(BaseModel):
|
753
|
+
task_id: UUID = Field(description="Identifier for a trajectory")
|
754
|
+
query: str = Field(description="Query executed for the trajectory")
|
755
|
+
status: str = Field(description="Current status of the trajectory")
|
756
|
+
|
757
|
+
@model_validator(mode="before")
|
758
|
+
@classmethod
|
759
|
+
def validate_fields(cls, original_data: Mapping[str, Any]) -> Mapping[str, Any]:
|
760
|
+
data = copy.deepcopy(original_data) # Avoid mutating the original data
|
761
|
+
if not isinstance(data, dict):
|
762
|
+
return data
|
763
|
+
data["query"] = data.get("task", data.get("query"))
|
764
|
+
data["task_id"] = cast(UUID, data.get("id", data.get("task_id")))
|
765
|
+
return data
|
766
|
+
|
767
|
+
|
752
768
|
class TaskResponse(BaseModel):
|
753
769
|
"""Base class for task responses. This holds attributes shared over all futurehouse jobs."""
|
754
770
|
|
@@ -56,7 +56,7 @@ class DataStorageEntry(BaseModel):
|
|
56
56
|
default=None,
|
57
57
|
description="ID of the dataset this entry belongs to",
|
58
58
|
)
|
59
|
-
|
59
|
+
file_path: str | None = Field(
|
60
60
|
default=None,
|
61
61
|
description="Path in the storage system where this entry is located, if a file.",
|
62
62
|
)
|
@@ -158,9 +158,9 @@ class DataStorageRequestPayload(BaseModel):
|
|
158
158
|
default=None,
|
159
159
|
description="ID of existing dataset to add entry to, or None to create new dataset",
|
160
160
|
)
|
161
|
-
|
161
|
+
file_path: PathLike | str | None = Field(
|
162
162
|
default=None,
|
163
|
-
description="
|
163
|
+
description="Filepath to store in the GCS bucket.",
|
164
164
|
)
|
165
165
|
existing_location: DataStorageLocationPayload | None = Field(
|
166
166
|
default=None, description="Target storage metadata"
|
@@ -385,5 +385,12 @@ class DirectoryUploadConfig(BaseModel):
|
|
385
385
|
)
|
386
386
|
|
387
387
|
|
388
|
+
class RawFileFetchResponse(BaseModel):
|
389
|
+
"""Response model for fetching a raw file."""
|
390
|
+
|
391
|
+
filename: Path = Field(description="Name of the file")
|
392
|
+
file_content: str = Field(description="Content of the file")
|
393
|
+
|
394
|
+
|
388
395
|
# Forward reference resolution for DirectoryManifest
|
389
396
|
DirectoryManifest.model_rebuild()
|
futurehouse_client/version.py
CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
28
28
|
commit_id: COMMIT_ID
|
29
29
|
__commit_id__: COMMIT_ID
|
30
30
|
|
31
|
-
__version__ = version = '0.5.
|
32
|
-
__version_tuple__ = version_tuple = (0, 5,
|
31
|
+
__version__ = version = '0.5.3'
|
32
|
+
__version_tuple__ = version_tuple = (0, 5, 3)
|
33
33
|
|
34
34
|
__commit_id__ = commit_id = None
|
@@ -1,14 +1,14 @@
|
|
1
1
|
futurehouse_client/__init__.py,sha256=q5cpcuPkhTaueXsySsgWpH0F-2EsRxcdJfP91ze6khU,991
|
2
2
|
futurehouse_client/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
|
-
futurehouse_client/version.py,sha256=
|
3
|
+
futurehouse_client/version.py,sha256=EWl7XaGZUG57Di8WiRltpKAkwy1CShJuJ-i6_rAPr-w,704
|
4
4
|
futurehouse_client/clients/__init__.py,sha256=-HXNj-XJ3LRO5XM6MZ709iPs29YpApss0Q2YYg1qMZw,280
|
5
|
-
futurehouse_client/clients/data_storage_methods.py,sha256=
|
5
|
+
futurehouse_client/clients/data_storage_methods.py,sha256=3YurBAbEduZ8BdoHGdR9nro8a20pQPHAcbI-FRpVApY,121341
|
6
6
|
futurehouse_client/clients/job_client.py,sha256=b5gpzulZpxpv9R337r3UKItnMdtd6CGlI1sV3_VQJso,13985
|
7
|
-
futurehouse_client/clients/rest_client.py,sha256=
|
7
|
+
futurehouse_client/clients/rest_client.py,sha256=LI8-F4cUcv8gnWhI06NB0k6iMnfDZVKv9Uh0FfXiDwo,110915
|
8
8
|
futurehouse_client/models/__init__.py,sha256=N1MwDUYonsMN9NdaShsYcJspyL7H756MYj7VWFeD3fk,978
|
9
|
-
futurehouse_client/models/app.py,sha256=
|
9
|
+
futurehouse_client/models/app.py,sha256=2bFtQY1wl9oZM5Gyx1NrybKC9-fKzk20YRu20h2LiHY,32581
|
10
10
|
futurehouse_client/models/client.py,sha256=554vp7Cr-17BTeRZtN5DhCRQesRRtr31ZPkHXUrhyCE,3835
|
11
|
-
futurehouse_client/models/data_storage_methods.py,sha256=
|
11
|
+
futurehouse_client/models/data_storage_methods.py,sha256=qPM0-cljMUQKIpC93Ua0rbfXFVWZV7nA6Uehc3u9DtM,13763
|
12
12
|
futurehouse_client/models/job_event.py,sha256=lMrx-lV7BQkKl419ErWZ6Q1EjurmhBFSns0z6zwGaVo,2766
|
13
13
|
futurehouse_client/models/rest.py,sha256=SbeXZSPUCM0lQ_gVUPa64vKzMxuUVgqmJ5YThfDWs8g,4726
|
14
14
|
futurehouse_client/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -17,8 +17,8 @@ futurehouse_client/utils/general.py,sha256=PIkGLCSA3kUvc6mwR-prEB7YnMdKILOIm6cPo
|
|
17
17
|
futurehouse_client/utils/module_utils.py,sha256=aFyd-X-pDARXz9GWpn8SSViUVYdSbuy9vSkrzcVIaGI,4955
|
18
18
|
futurehouse_client/utils/monitoring.py,sha256=UjRlufe67kI3VxRHOd5fLtJmlCbVA2Wqwpd4uZhXkQM,8728
|
19
19
|
futurehouse_client/utils/world_model_tools.py,sha256=v2krZGrco0ur2a_pcRMtnQL05SxlIoBXuJ5R1JkQNws,2921
|
20
|
-
futurehouse_client-0.5.
|
21
|
-
futurehouse_client-0.5.
|
22
|
-
futurehouse_client-0.5.
|
23
|
-
futurehouse_client-0.5.
|
24
|
-
futurehouse_client-0.5.
|
20
|
+
futurehouse_client-0.5.3.dist-info/licenses/LICENSE,sha256=oQ9ZHjUi-_6GfP3gs14FlPb0OlGwE1QCCKFGnJ4LD2I,11341
|
21
|
+
futurehouse_client-0.5.3.dist-info/METADATA,sha256=fvr2V59GYUzFP4lAaWmIntFOwb-S1pjz_3pwzuYgAQc,27054
|
22
|
+
futurehouse_client-0.5.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
23
|
+
futurehouse_client-0.5.3.dist-info/top_level.txt,sha256=TRuLUCt_qBnggdFHCX4O_BoCu1j2X43lKfIZC-ElwWY,19
|
24
|
+
futurehouse_client-0.5.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|