futurehouse-client 0.5.1__tar.gz → 0.5.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. {futurehouse_client-0.5.1/src/futurehouse_client.egg-info → futurehouse_client-0.5.3}/PKG-INFO +1 -1
  2. {futurehouse_client-0.5.1 → futurehouse_client-0.5.3}/src/futurehouse_client/clients/data_storage_methods.py +75 -56
  3. {futurehouse_client-0.5.1 → futurehouse_client-0.5.3}/src/futurehouse_client/clients/rest_client.py +35 -12
  4. {futurehouse_client-0.5.1 → futurehouse_client-0.5.3}/src/futurehouse_client/models/app.py +16 -0
  5. {futurehouse_client-0.5.1 → futurehouse_client-0.5.3}/src/futurehouse_client/models/data_storage_methods.py +10 -3
  6. {futurehouse_client-0.5.1 → futurehouse_client-0.5.3}/src/futurehouse_client/version.py +3 -3
  7. {futurehouse_client-0.5.1 → futurehouse_client-0.5.3/src/futurehouse_client.egg-info}/PKG-INFO +1 -1
  8. {futurehouse_client-0.5.1 → futurehouse_client-0.5.3}/tests/test_data_storage_e2e.py +68 -4
  9. {futurehouse_client-0.5.1 → futurehouse_client-0.5.3}/tests/test_data_storage_methods.py +2 -2
  10. {futurehouse_client-0.5.1 → futurehouse_client-0.5.3}/tests/test_rest.py +3 -0
  11. {futurehouse_client-0.5.1 → futurehouse_client-0.5.3}/LICENSE +0 -0
  12. {futurehouse_client-0.5.1 → futurehouse_client-0.5.3}/README.md +0 -0
  13. {futurehouse_client-0.5.1 → futurehouse_client-0.5.3}/data_storage.md +0 -0
  14. {futurehouse_client-0.5.1 → futurehouse_client-0.5.3}/docs/__init__.py +0 -0
  15. {futurehouse_client-0.5.1 → futurehouse_client-0.5.3}/docs/client_notebook.ipynb +0 -0
  16. {futurehouse_client-0.5.1 → futurehouse_client-0.5.3}/pyproject.toml +0 -0
  17. {futurehouse_client-0.5.1 → futurehouse_client-0.5.3}/setup.cfg +0 -0
  18. {futurehouse_client-0.5.1 → futurehouse_client-0.5.3}/src/futurehouse_client/__init__.py +0 -0
  19. {futurehouse_client-0.5.1 → futurehouse_client-0.5.3}/src/futurehouse_client/clients/__init__.py +0 -0
  20. {futurehouse_client-0.5.1 → futurehouse_client-0.5.3}/src/futurehouse_client/clients/job_client.py +0 -0
  21. {futurehouse_client-0.5.1 → futurehouse_client-0.5.3}/src/futurehouse_client/models/__init__.py +0 -0
  22. {futurehouse_client-0.5.1 → futurehouse_client-0.5.3}/src/futurehouse_client/models/client.py +0 -0
  23. {futurehouse_client-0.5.1 → futurehouse_client-0.5.3}/src/futurehouse_client/models/job_event.py +0 -0
  24. {futurehouse_client-0.5.1 → futurehouse_client-0.5.3}/src/futurehouse_client/models/rest.py +0 -0
  25. {futurehouse_client-0.5.1 → futurehouse_client-0.5.3}/src/futurehouse_client/py.typed +0 -0
  26. {futurehouse_client-0.5.1 → futurehouse_client-0.5.3}/src/futurehouse_client/utils/__init__.py +0 -0
  27. {futurehouse_client-0.5.1 → futurehouse_client-0.5.3}/src/futurehouse_client/utils/auth.py +0 -0
  28. {futurehouse_client-0.5.1 → futurehouse_client-0.5.3}/src/futurehouse_client/utils/general.py +0 -0
  29. {futurehouse_client-0.5.1 → futurehouse_client-0.5.3}/src/futurehouse_client/utils/module_utils.py +0 -0
  30. {futurehouse_client-0.5.1 → futurehouse_client-0.5.3}/src/futurehouse_client/utils/monitoring.py +0 -0
  31. {futurehouse_client-0.5.1 → futurehouse_client-0.5.3}/src/futurehouse_client/utils/world_model_tools.py +0 -0
  32. {futurehouse_client-0.5.1 → futurehouse_client-0.5.3}/src/futurehouse_client.egg-info/SOURCES.txt +0 -0
  33. {futurehouse_client-0.5.1 → futurehouse_client-0.5.3}/src/futurehouse_client.egg-info/dependency_links.txt +0 -0
  34. {futurehouse_client-0.5.1 → futurehouse_client-0.5.3}/src/futurehouse_client.egg-info/requires.txt +0 -0
  35. {futurehouse_client-0.5.1 → futurehouse_client-0.5.3}/src/futurehouse_client.egg-info/top_level.txt +0 -0
  36. {futurehouse_client-0.5.1 → futurehouse_client-0.5.3}/tests/test_client.py +0 -0
  37. {futurehouse_client-0.5.1 → futurehouse_client-0.5.3}/tests/test_data/test_file.txt +0 -0
  38. {futurehouse_client-0.5.1 → futurehouse_client-0.5.3}/tests/test_data/test_information.txt +0 -0
  39. {futurehouse_client-0.5.1 → futurehouse_client-0.5.3}/tests/test_data/test_manifest.yaml +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: futurehouse-client
3
- Version: 0.5.1
3
+ Version: 0.5.3
4
4
  Summary: A client for interacting with endpoints of the FutureHouse service.
5
5
  Author-email: FutureHouse technical staff <hello@futurehouse.org>
6
6
  License: Apache License
@@ -37,6 +37,7 @@ from futurehouse_client.models.data_storage_methods import (
37
37
  DirectoryManifest,
38
38
  GetDatasetAndEntriesResponse,
39
39
  ManifestEntry,
40
+ RawFileFetchResponse,
40
41
  )
41
42
  from futurehouse_client.models.rest import (
42
43
  DataStorageSearchPayload,
@@ -504,11 +505,15 @@ class DataStorageMethods:
504
505
  raise DataStorageError(f"File or directory not found: {file_path}")
505
506
  return file_path
506
507
 
507
- def _build_zip_path(self, name: str, path: str | None) -> str:
508
+ def _build_zip_path(
509
+ self, name: str, path_override: str | Path | None
510
+ ) -> str | Path:
508
511
  """Build GCS path for zip file."""
509
512
  zip_filename = name if name.endswith(".zip") else f"{name}.zip"
510
- if path:
511
- return f"{path.rstrip('/')}/{zip_filename}"
513
+ if path_override:
514
+ if isinstance(path_override, str):
515
+ return f"{path_override.rstrip('/')}/{zip_filename}"
516
+ return path_override / zip_filename
512
517
  return zip_filename
513
518
 
514
519
  # TODO: methods in here need to be moved to fh tools
@@ -663,7 +668,11 @@ class DataStorageMethods:
663
668
  raise DataStorageError(f"Failed to download from GCS: {e}") from e
664
669
 
665
670
  def _prepare_single_file_upload(
666
- self, name: str, file_path: Path, description: str | None, path: str | None
671
+ self,
672
+ name: str,
673
+ file_path: Path,
674
+ description: str | None,
675
+ file_path_override: str | Path | None,
667
676
  ) -> tuple[int, DataStorageRequestPayload | None]:
668
677
  """Prepare single file for upload, return file size and payload if text content."""
669
678
  file_size = file_path.stat().st_size
@@ -681,7 +690,7 @@ class DataStorageMethods:
681
690
  name=name,
682
691
  description=description,
683
692
  content=text_content,
684
- path=path,
693
+ file_path=file_path_override or file_path,
685
694
  is_collection=False,
686
695
  )
687
696
  logger.warning(
@@ -777,7 +786,7 @@ class DataStorageMethods:
777
786
  name: str,
778
787
  dir_path: Path,
779
788
  description: str | None,
780
- path: str | None = None,
789
+ dir_path_override: str | Path | None = None,
781
790
  ignore_patterns: list[str] | None = None,
782
791
  ignore_filename: str = ".gitignore",
783
792
  project_id: UUID | None = None,
@@ -789,7 +798,7 @@ class DataStorageMethods:
789
798
  name: Name for the directory collection
790
799
  dir_path: Path to directory to zip and upload
791
800
  description: Description for the collection
792
- path: Optional GCS path for the zip file
801
+ dir_path_override: Optional GCS path for the zip file
793
802
  ignore_patterns: List of patterns to ignore when zipping
794
803
  ignore_filename: Name of ignore file to read from directory
795
804
  project_id: ID of the project this data storage entry belongs to
@@ -807,11 +816,11 @@ class DataStorageMethods:
807
816
  dir_path, temp_zip_path, ignore_patterns, ignore_filename
808
817
  )
809
818
 
810
- zip_gcs_path = self._build_zip_path(name, path)
819
+ zip_gcs_path = self._build_zip_path(name, dir_path_override)
811
820
  payload = DataStorageRequestPayload(
812
821
  name=name,
813
822
  description=description,
814
- path=zip_gcs_path,
823
+ file_path=zip_gcs_path,
815
824
  is_collection=True,
816
825
  project_id=project_id,
817
826
  tags=tags,
@@ -860,7 +869,7 @@ class DataStorageMethods:
860
869
  name: str,
861
870
  dir_path: Path,
862
871
  description: str | None,
863
- path: str | None = None,
872
+ dir_path_override: str | Path | None = None,
864
873
  ignore_patterns: list[str] | None = None,
865
874
  ignore_filename: str = ".gitignore",
866
875
  project_id: UUID | None = None,
@@ -872,7 +881,7 @@ class DataStorageMethods:
872
881
  name: Name for the directory collection
873
882
  dir_path: Path to directory to zip and upload
874
883
  description: Description for the collection
875
- path: Optional GCS path for the zip file
884
+ dir_path_override: Optional GCS path for the zip file
876
885
  ignore_patterns: List of patterns to ignore when zipping
877
886
  ignore_filename: Name of ignore file to read from directory
878
887
  project_id: ID of the project this data storage entry belongs to
@@ -890,11 +899,11 @@ class DataStorageMethods:
890
899
  dir_path, temp_zip_path, ignore_patterns, ignore_filename
891
900
  )
892
901
 
893
- zip_gcs_path = self._build_zip_path(name, path)
902
+ zip_gcs_path = self._build_zip_path(name, dir_path_override)
894
903
  payload = DataStorageRequestPayload(
895
904
  name=name,
896
905
  description=description,
897
- path=zip_gcs_path,
906
+ file_path=zip_gcs_path,
898
907
  is_collection=True,
899
908
  project_id=project_id,
900
909
  tags=tags,
@@ -940,7 +949,7 @@ class DataStorageMethods:
940
949
  name: str,
941
950
  file_path: Path,
942
951
  description: str | None,
943
- path: str | None = None,
952
+ file_path_override: str | Path | None = None,
944
953
  project_id: UUID | None = None,
945
954
  ) -> DataStorageResponse:
946
955
  """Upload a single file."""
@@ -960,7 +969,7 @@ class DataStorageMethods:
960
969
  name=name,
961
970
  description=description,
962
971
  content=text_content,
963
- path=path,
972
+ file_path=file_path_override or file_path,
964
973
  is_collection=False,
965
974
  project_id=project_id,
966
975
  )
@@ -977,7 +986,7 @@ class DataStorageMethods:
977
986
  payload = DataStorageRequestPayload(
978
987
  name=name,
979
988
  description=description,
980
- path=path,
989
+ file_path=file_path_override or file_path,
981
990
  is_collection=False,
982
991
  project_id=project_id,
983
992
  )
@@ -1028,13 +1037,13 @@ class DataStorageMethods:
1028
1037
  name: str,
1029
1038
  file_path: Path,
1030
1039
  description: str | None,
1031
- path: str | None = None,
1040
+ file_path_override: str | Path | None = None,
1032
1041
  dataset_id: UUID | None = None,
1033
1042
  project_id: UUID | None = None,
1034
1043
  ) -> DataStorageResponse:
1035
1044
  """Asynchronously upload a single file."""
1036
1045
  file_size, text_payload = self._prepare_single_file_upload(
1037
- name, file_path, description, path
1046
+ name, file_path, description, file_path_override
1038
1047
  )
1039
1048
 
1040
1049
  if text_payload:
@@ -1048,7 +1057,7 @@ class DataStorageMethods:
1048
1057
  payload = DataStorageRequestPayload(
1049
1058
  name=name,
1050
1059
  description=description,
1051
- path=path,
1060
+ file_path=file_path_override or file_path,
1052
1061
  is_collection=False,
1053
1062
  dataset_id=dataset_id,
1054
1063
  project_id=project_id,
@@ -1089,14 +1098,14 @@ class DataStorageMethods:
1089
1098
  name: str,
1090
1099
  file_path: Path,
1091
1100
  description: str | None,
1092
- path: str | None,
1101
+ file_path_override: str | None,
1093
1102
  parent_id: UUID | None,
1094
1103
  dataset_id: UUID | None = None,
1095
1104
  project_id: UUID | None = None,
1096
1105
  ) -> DataStorageResponse:
1097
1106
  """Upload a single file with a parent ID (sync version)."""
1098
1107
  file_size, text_payload = self._prepare_single_file_upload(
1099
- name, file_path, description, path
1108
+ name, file_path, description, file_path_override
1100
1109
  )
1101
1110
 
1102
1111
  if text_payload:
@@ -1112,7 +1121,7 @@ class DataStorageMethods:
1112
1121
  payload = DataStorageRequestPayload(
1113
1122
  name=name,
1114
1123
  description=description,
1115
- path=path,
1124
+ file_path=file_path_override or file_path,
1116
1125
  is_collection=False,
1117
1126
  parent_id=parent_id,
1118
1127
  dataset_id=dataset_id,
@@ -1167,7 +1176,7 @@ class DataStorageMethods:
1167
1176
  name=item.name,
1168
1177
  file_path=item,
1169
1178
  description=file_description,
1170
- path=None,
1179
+ file_path_override=None,
1171
1180
  parent_id=current_parent_id,
1172
1181
  dataset_id=dataset_id,
1173
1182
  project_id=project_id,
@@ -1310,14 +1319,14 @@ class DataStorageMethods:
1310
1319
  name: str,
1311
1320
  file_path: Path,
1312
1321
  description: str | None,
1313
- path: str | None,
1322
+ file_path_override: str | None,
1314
1323
  parent_id: UUID | None,
1315
1324
  dataset_id: UUID | None = None,
1316
1325
  project_id: UUID | None = None,
1317
1326
  ) -> DataStorageResponse:
1318
1327
  """Asynchronously upload a single file with a parent ID."""
1319
1328
  file_size, text_payload = self._prepare_single_file_upload(
1320
- name, file_path, description, path
1329
+ name, file_path, description, file_path_override
1321
1330
  )
1322
1331
 
1323
1332
  if text_payload:
@@ -1333,7 +1342,7 @@ class DataStorageMethods:
1333
1342
  payload = DataStorageRequestPayload(
1334
1343
  name=name,
1335
1344
  description=description,
1336
- path=path,
1345
+ file_path=file_path_override or file_path,
1337
1346
  is_collection=False,
1338
1347
  parent_id=parent_id,
1339
1348
  dataset_id=dataset_id,
@@ -1388,7 +1397,7 @@ class DataStorageMethods:
1388
1397
  name=item.name,
1389
1398
  file_path=item,
1390
1399
  description=file_description,
1391
- path=None,
1400
+ file_path_override=None,
1392
1401
  parent_id=current_parent_id,
1393
1402
  dataset_id=dataset_id,
1394
1403
  project_id=project_id,
@@ -1519,7 +1528,7 @@ class DataStorageMethods:
1519
1528
  name: str,
1520
1529
  content: str,
1521
1530
  description: str | None = None,
1522
- path: str | None = None,
1531
+ file_path: str | None = None,
1523
1532
  project_id: UUID | None = None,
1524
1533
  ) -> DataStorageResponse:
1525
1534
  """Store content as a string in the data storage system.
@@ -1528,7 +1537,7 @@ class DataStorageMethods:
1528
1537
  name: Name of the data storage entry
1529
1538
  content: Content to store as a string
1530
1539
  description: Optional description of the data storage entry
1531
- path: Optional path for the data storage entry
1540
+ file_path: Optional path for the data storage entry
1532
1541
  project_id: ID of the project this data storage entry belongs to
1533
1542
 
1534
1543
  Returns:
@@ -1544,7 +1553,7 @@ class DataStorageMethods:
1544
1553
  - parent_id - ID of the parent entry for hierarchical storage
1545
1554
  - project_id - ID of the project this entry belongs to
1546
1555
  - dataset_id - ID of the dataset this entry belongs to
1547
- - path - Path in the storage system where this entry is located
1556
+ - file_path - Filepath in the storage system where this entry is located
1548
1557
  - bigquery_schema - Target BigQuery schema for the entry
1549
1558
  - user_id - ID of the user who created this entry
1550
1559
  - created_at - Timestamp when the entry was created
@@ -1568,7 +1577,7 @@ class DataStorageMethods:
1568
1577
  name=name,
1569
1578
  content=content,
1570
1579
  description=description,
1571
- path=path,
1580
+ file_path=file_path,
1572
1581
  project_id=project_id,
1573
1582
  )
1574
1583
  return self._create_data_storage_entry(payload)
@@ -1590,7 +1599,7 @@ class DataStorageMethods:
1590
1599
  name: str,
1591
1600
  content: str,
1592
1601
  description: str | None = None,
1593
- path: str | None = None,
1602
+ file_path: str | None = None,
1594
1603
  dataset_id: UUID | None = None,
1595
1604
  project_id: UUID | None = None,
1596
1605
  ) -> DataStorageResponse:
@@ -1600,7 +1609,7 @@ class DataStorageMethods:
1600
1609
  name: Name of the data storage entry
1601
1610
  content: Content to store as a string
1602
1611
  description: Optional description of the data storage entry
1603
- path: Optional path for the data storage entry
1612
+ file_path: Optional path for the data storage entry
1604
1613
  dataset_id: Optional dataset ID to add entry to, or None to create new dataset
1605
1614
  project_id: ID of the project this data storage entry belongs to
1606
1615
 
@@ -1617,7 +1626,7 @@ class DataStorageMethods:
1617
1626
  - parent_id - ID of the parent entry for hierarchical storage
1618
1627
  - project_id - ID of the project this entry belongs to
1619
1628
  - dataset_id - ID of the dataset this entry belongs to
1620
- - path - Path in the storage system where this entry is located
1629
+ - file_path - Filepath in the storage system where this entry is located
1621
1630
  - bigquery_schema - Target BigQuery schema for the entry
1622
1631
  - user_id - ID of the user who created this entry
1623
1632
  - created_at - Timestamp when the entry was created
@@ -1641,7 +1650,7 @@ class DataStorageMethods:
1641
1650
  name=name,
1642
1651
  content=content,
1643
1652
  description=description,
1644
- path=path,
1653
+ file_path=file_path,
1645
1654
  dataset_id=dataset_id,
1646
1655
  project_id=project_id,
1647
1656
  )
@@ -1788,7 +1797,7 @@ class DataStorageMethods:
1788
1797
  name: str,
1789
1798
  file_path: str | Path,
1790
1799
  description: str | None = None,
1791
- path: str | None = None,
1800
+ file_path_override: str | Path | None = None,
1792
1801
  as_collection: bool = False,
1793
1802
  manifest_filename: str | None = None,
1794
1803
  ignore_patterns: list[str] | None = None,
@@ -1808,7 +1817,7 @@ class DataStorageMethods:
1808
1817
  name: Name of the data storage entry
1809
1818
  file_path: Path to file or directory to upload
1810
1819
  description: Optional description of the data storage entry
1811
- path: Optional path for the data storage entry
1820
+ file_path_override: Optional path for the data storage entry
1812
1821
  as_collection: If true, upload directories as a single zip file collection.
1813
1822
  manifest_filename: Name of manifest file (JSON or YAML) containing:
1814
1823
  - entries - Map of file/directory names to their manifest entries
@@ -1834,7 +1843,7 @@ class DataStorageMethods:
1834
1843
  - parent_id - ID of the parent entry for hierarchical storage
1835
1844
  - project_id - ID of the project this entry belongs to
1836
1845
  - dataset_id - ID of the dataset this entry belongs to
1837
- - path - Path in the storage system where this entry is located
1846
+ - file_path - Filepath in the storage system where this entry is located
1838
1847
  - bigquery_schema - Target BigQuery schema for the entry
1839
1848
  - user_id - ID of the user who created this entry
1840
1849
  - created_at - Timestamp when the entry was created
@@ -1861,7 +1870,7 @@ class DataStorageMethods:
1861
1870
  name,
1862
1871
  file_path,
1863
1872
  description,
1864
- path,
1873
+ file_path_override,
1865
1874
  ignore_patterns,
1866
1875
  ignore_filename,
1867
1876
  project_id,
@@ -1883,7 +1892,7 @@ class DataStorageMethods:
1883
1892
  )
1884
1893
  return responses[0]
1885
1894
  return self._upload_data_single_file(
1886
- name, file_path, description, path, project_id
1895
+ name, file_path, description, file_path_override, project_id
1887
1896
  )
1888
1897
 
1889
1898
  except HTTPStatusError as e:
@@ -1904,7 +1913,7 @@ class DataStorageMethods:
1904
1913
  name: str,
1905
1914
  file_path: str | Path,
1906
1915
  description: str | None = None,
1907
- path: str | None = None,
1916
+ file_path_override: str | Path | None = None,
1908
1917
  as_collection: bool = False,
1909
1918
  manifest_filename: str | None = None,
1910
1919
  ignore_patterns: list[str] | None = None,
@@ -1918,7 +1927,7 @@ class DataStorageMethods:
1918
1927
  name: Name of the data storage entry.
1919
1928
  file_path: Path to the file or directory to upload.
1920
1929
  description: Optional description for the entry.
1921
- path: Optional GCS path for the entry.
1930
+ file_path_override: Optional GCS path for the entry.
1922
1931
  as_collection: If uploading a directory, `True` zips it into a single collection,
1923
1932
  `False` uploads it as a hierarchical structure of individual objects.
1924
1933
  manifest_filename: Optional manifest file (JSON or YAML) for hierarchical uploads containing:
@@ -1945,7 +1954,7 @@ class DataStorageMethods:
1945
1954
  - parent_id - ID of the parent entry for hierarchical storage
1946
1955
  - project_id - ID of the project this entry belongs to
1947
1956
  - dataset_id - ID of the dataset this entry belongs to
1948
- - path - Path in the storage system where this entry is located
1957
+ - file_path - Filepath in the storage system where this entry is located
1949
1958
  - bigquery_schema - Target BigQuery schema for the entry
1950
1959
  - user_id - ID of the user who created this entry
1951
1960
  - created_at - Timestamp when the entry was created
@@ -1972,7 +1981,7 @@ class DataStorageMethods:
1972
1981
  name,
1973
1982
  file_path,
1974
1983
  description,
1975
- path,
1984
+ file_path_override,
1976
1985
  ignore_patterns,
1977
1986
  ignore_filename,
1978
1987
  project_id,
@@ -1993,7 +2002,7 @@ class DataStorageMethods:
1993
2002
  )
1994
2003
  return responses[0]
1995
2004
  return await self._aupload_data_single_file(
1996
- name, file_path, description, path, dataset_id, project_id
2005
+ name, file_path, description, file_path_override, dataset_id, project_id
1997
2006
  )
1998
2007
 
1999
2008
  except HTTPStatusError as e:
@@ -2015,7 +2024,6 @@ class DataStorageMethods:
2015
2024
  existing_location: DataStorageLocationPayload,
2016
2025
  description: str | None = None,
2017
2026
  as_collection: bool = False,
2018
- path: str | None = None,
2019
2027
  project_id: UUID | None = None,
2020
2028
  ) -> DataStorageResponse:
2021
2029
  """Store content as a string in the data storage system.
@@ -2032,7 +2040,7 @@ class DataStorageMethods:
2032
2040
  as_collection: If uploading a directory, `True` creates a single storage entry for
2033
2041
  the whole directory and multiple storage locations for each file, `False` assumes
2034
2042
  you are uploading a single file.
2035
- path: Optional path for the data storage entry
2043
+ file_path: Optional path for the data storage entry
2036
2044
  project_id: ID of the project this data storage entry belongs to
2037
2045
 
2038
2046
  Returns:
@@ -2048,7 +2056,7 @@ class DataStorageMethods:
2048
2056
  - parent_id - ID of the parent entry for hierarchical storage
2049
2057
  - project_id - ID of the project this entry belongs to
2050
2058
  - dataset_id - ID of the dataset this entry belongs to
2051
- - path - Path in the storage system where this entry is located
2059
+ - file_path - Filepath in the storage system where this entry is located
2052
2060
  - bigquery_schema - Target BigQuery schema for the entry
2053
2061
  - user_id - ID of the user who created this entry
2054
2062
  - created_at - Timestamp when the entry was created
@@ -2071,7 +2079,6 @@ class DataStorageMethods:
2071
2079
  payload = DataStorageRequestPayload(
2072
2080
  name=name,
2073
2081
  description=description,
2074
- path=path,
2075
2082
  existing_location=existing_location,
2076
2083
  project_id=project_id,
2077
2084
  is_collection=as_collection,
@@ -2101,7 +2108,6 @@ class DataStorageMethods:
2101
2108
  existing_location: DataStorageLocationPayload,
2102
2109
  as_collection: bool = False,
2103
2110
  description: str | None = None,
2104
- path: str | None = None,
2105
2111
  project_id: UUID | None = None,
2106
2112
  ) -> DataStorageResponse:
2107
2113
  """Store content as a string in the data storage system.
@@ -2118,7 +2124,7 @@ class DataStorageMethods:
2118
2124
  as_collection: If uploading a directory, `True` creates a single storage entry for
2119
2125
  the whole directory and multiple storage locations for each file, `False` assumes
2120
2126
  you are uploading a single file.
2121
- path: Optional path for the data storage entry
2127
+ file_path: Optional path for the data storage entry
2122
2128
  project_id: ID of the project this data storage entry belongs to
2123
2129
 
2124
2130
  Returns:
@@ -2134,7 +2140,7 @@ class DataStorageMethods:
2134
2140
  - parent_id - ID of the parent entry for hierarchical storage
2135
2141
  - project_id - ID of the project this entry belongs to
2136
2142
  - dataset_id - ID of the dataset this entry belongs to
2137
- - path - Path in the storage system where this entry is located
2143
+ - file_path - Filepath in the storage system where this entry is located
2138
2144
  - bigquery_schema - Target BigQuery schema for the entry
2139
2145
  - user_id - ID of the user who created this entry
2140
2146
  - created_at - Timestamp when the entry was created
@@ -2157,7 +2163,6 @@ class DataStorageMethods:
2157
2163
  payload = DataStorageRequestPayload(
2158
2164
  name=name,
2159
2165
  description=description,
2160
- path=path,
2161
2166
  existing_location=existing_location,
2162
2167
  project_id=project_id,
2163
2168
  is_collection=as_collection,
@@ -2464,7 +2469,7 @@ class DataStorageMethods:
2464
2469
  def fetch_data_from_storage(
2465
2470
  self,
2466
2471
  data_storage_id: UUID | None = None,
2467
- ) -> str | Path | list[Path] | None:
2472
+ ) -> RawFileFetchResponse | str | Path | list[Path] | None:
2468
2473
  """Fetch data from the storage system (sync version).
2469
2474
 
2470
2475
  Args:
@@ -2527,6 +2532,13 @@ class DataStorageMethods:
2527
2532
  f"No content found for data storage entry {data_storage_id}"
2528
2533
  )
2529
2534
  return None
2535
+
2536
+ if result.data_storage.file_path:
2537
+ return RawFileFetchResponse(
2538
+ filename=Path(result.data_storage.file_path),
2539
+ file_content=content,
2540
+ )
2541
+
2530
2542
  return content
2531
2543
 
2532
2544
  raise DataStorageRetrievalError(f"Unsupported storage type: {storage_type}")
@@ -2547,7 +2559,7 @@ class DataStorageMethods:
2547
2559
  async def afetch_data_from_storage(
2548
2560
  self,
2549
2561
  data_storage_id: UUID | None = None,
2550
- ) -> str | Path | list[Path] | None:
2562
+ ) -> RawFileFetchResponse | str | Path | list[Path] | None:
2551
2563
  """Fetch data from the storage system.
2552
2564
 
2553
2565
  Args:
@@ -2613,6 +2625,13 @@ class DataStorageMethods:
2613
2625
  f"No content found for data storage entry {data_storage_id}"
2614
2626
  )
2615
2627
  return None
2628
+
2629
+ if result.data_storage.file_path:
2630
+ return RawFileFetchResponse(
2631
+ filename=Path(result.data_storage.file_path),
2632
+ file_content=content,
2633
+ )
2634
+
2616
2635
  return content
2617
2636
 
2618
2637
  raise DataStorageRetrievalError(f"Unsupported storage type: {storage_type}")
@@ -2798,7 +2817,7 @@ class DataStorageMethods:
2798
2817
  - parent_id - ID of the parent entry for hierarchical storage
2799
2818
  - project_id - ID of the project this entry belongs to
2800
2819
  - dataset_id - ID of the dataset this entry belongs to
2801
- - path - Path in the storage system where this entry is located
2820
+ - file_path - Filepath in the storage system where this entry is located
2802
2821
  - bigquery_schema - Target BigQuery schema for the entry
2803
2822
  - user_id - ID of the user who created this entry
2804
2823
  - created_at - Timestamp when the entry was created
@@ -46,6 +46,7 @@ from futurehouse_client.clients.data_storage_methods import DataStorageMethods
46
46
  from futurehouse_client.models.app import (
47
47
  AuthType,
48
48
  JobDeploymentConfig,
49
+ LiteTaskResponse,
49
50
  Stage,
50
51
  TaskRequest,
51
52
  TaskResponse,
@@ -530,8 +531,12 @@ class RestClient(DataStorageMethods):
530
531
  before_sleep=before_sleep_log(logger, logging.WARNING),
531
532
  )
532
533
  def get_task(
533
- self, task_id: str | None = None, history: bool = False, verbose: bool = False
534
- ) -> "TaskResponse":
534
+ self,
535
+ task_id: str | None = None,
536
+ history: bool = False,
537
+ verbose: bool = False,
538
+ lite: bool = False,
539
+ ) -> "TaskResponse | LiteTaskResponse":
535
540
  """Get details for a specific task."""
536
541
  task_id = task_id or self.trajectory_id
537
542
  url = f"/v0.1/trajectories/{task_id}"
@@ -547,7 +552,9 @@ class RestClient(DataStorageMethods):
547
552
  "job_id": task_id,
548
553
  },
549
554
  ),
550
- self.client.stream("GET", url, params={"history": history}) as response,
555
+ self.client.stream(
556
+ "GET", url, params={"history": history, "lite": lite}
557
+ ) as response,
551
558
  ):
552
559
  if response.status_code in {401, 403}:
553
560
  raise PermissionError(
@@ -558,6 +565,10 @@ class RestClient(DataStorageMethods):
558
565
  data = json.loads(json_data)
559
566
  if "id" not in data:
560
567
  data["id"] = task_id
568
+
569
+ if lite:
570
+ return LiteTaskResponse(**data)
571
+
561
572
  verbose_response = TaskResponseVerbose(**data)
562
573
 
563
574
  if verbose:
@@ -571,8 +582,12 @@ class RestClient(DataStorageMethods):
571
582
  before_sleep=before_sleep_log(logger, logging.WARNING),
572
583
  )
573
584
  async def aget_task(
574
- self, task_id: str | None = None, history: bool = False, verbose: bool = False
575
- ) -> "TaskResponse":
585
+ self,
586
+ task_id: str | None = None,
587
+ history: bool = False,
588
+ verbose: bool = False,
589
+ lite: bool = False,
590
+ ) -> "TaskResponse | LiteTaskResponse":
576
591
  """Get details for a specific task asynchronously."""
577
592
  task_id = task_id or self.trajectory_id
578
593
  url = f"/v0.1/trajectories/{task_id}"
@@ -588,7 +603,7 @@ class RestClient(DataStorageMethods):
588
603
  },
589
604
  ):
590
605
  async with self.async_client.stream(
591
- "GET", url, params={"history": history}
606
+ "GET", url, params={"history": history, "lite": lite}
592
607
  ) as response:
593
608
  if response.status_code in {401, 403}:
594
609
  raise PermissionError(
@@ -599,6 +614,10 @@ class RestClient(DataStorageMethods):
599
614
  data = json.loads(json_data)
600
615
  if "id" not in data:
601
616
  data["id"] = task_id
617
+
618
+ if lite:
619
+ return LiteTaskResponse(**data)
620
+
602
621
  verbose_response = TaskResponseVerbose(**data)
603
622
 
604
623
  if verbose:
@@ -735,7 +754,9 @@ class RestClient(DataStorageMethods):
735
754
  progress_bar: bool = False,
736
755
  concurrency: int = 10,
737
756
  timeout: int = DEFAULT_AGENT_TIMEOUT,
738
- ) -> list[TaskResponse]:
757
+ ) -> list[
758
+ LiteTaskResponse | TaskResponse
759
+ ]: # return will always be lite because we always call with lite=True
739
760
  all_tasks: Collection[TaskRequest | dict[str, Any]] = (
740
761
  cast(Collection[TaskRequest | dict[str, Any]], [task_data])
741
762
  if (isinstance(task_data, dict) or not isinstance(task_data, Collection))
@@ -749,7 +770,7 @@ class RestClient(DataStorageMethods):
749
770
  )
750
771
 
751
772
  start_time = time.monotonic()
752
- completed_tasks: dict[str, TaskResponse] = {}
773
+ completed_tasks: dict[str, LiteTaskResponse | TaskResponse] = {}
753
774
 
754
775
  if progress_bar:
755
776
  progress = tqdm(
@@ -760,7 +781,7 @@ class RestClient(DataStorageMethods):
760
781
  task_results = await gather_with_concurrency(
761
782
  concurrency,
762
783
  [
763
- self.aget_task(task_id, verbose=verbose)
784
+ self.aget_task(task_id, verbose=verbose, lite=True)
764
785
  for task_id in trajectory_ids
765
786
  if task_id not in completed_tasks
766
787
  ],
@@ -807,7 +828,9 @@ class RestClient(DataStorageMethods):
807
828
  verbose: bool = False,
808
829
  progress_bar: bool = False,
809
830
  timeout: int = DEFAULT_AGENT_TIMEOUT,
810
- ) -> list[TaskResponse]:
831
+ ) -> list[
832
+ LiteTaskResponse | TaskResponse
833
+ ]: # return will always be lite because we always call with lite=True
811
834
  """Run multiple tasks and wait for them to complete.
812
835
 
813
836
  Args:
@@ -828,7 +851,7 @@ class RestClient(DataStorageMethods):
828
851
  trajectory_ids = [self.create_task(task) for task in all_tasks]
829
852
 
830
853
  start_time = time.monotonic()
831
- completed_tasks: dict[str, TaskResponse] = {}
854
+ completed_tasks: dict[str, LiteTaskResponse | TaskResponse] = {}
832
855
 
833
856
  if progress_bar:
834
857
  progress = sync_tqdm(
@@ -842,7 +865,7 @@ class RestClient(DataStorageMethods):
842
865
  if task_id in completed_tasks:
843
866
  continue
844
867
 
845
- task = self.get_task(task_id, verbose=verbose)
868
+ task = self.get_task(task_id, verbose=verbose, lite=True)
846
869
 
847
870
  if not ExecutionStatus(task.status).is_terminal_state():
848
871
  all_done = False
@@ -749,6 +749,22 @@ class SimpleOrganization(BaseModel):
749
749
  display_name: str
750
750
 
751
751
 
752
+ class LiteTaskResponse(BaseModel):
753
+ task_id: UUID = Field(description="Identifier for a trajectory")
754
+ query: str = Field(description="Query executed for the trajectory")
755
+ status: str = Field(description="Current status of the trajectory")
756
+
757
+ @model_validator(mode="before")
758
+ @classmethod
759
+ def validate_fields(cls, original_data: Mapping[str, Any]) -> Mapping[str, Any]:
760
+ data = copy.deepcopy(original_data) # Avoid mutating the original data
761
+ if not isinstance(data, dict):
762
+ return data
763
+ data["query"] = data.get("task", data.get("query"))
764
+ data["task_id"] = cast(UUID, data.get("id", data.get("task_id")))
765
+ return data
766
+
767
+
752
768
  class TaskResponse(BaseModel):
753
769
  """Base class for task responses. This holds attributes shared over all futurehouse jobs."""
754
770
 
@@ -56,7 +56,7 @@ class DataStorageEntry(BaseModel):
56
56
  default=None,
57
57
  description="ID of the dataset this entry belongs to",
58
58
  )
59
- path: str | None = Field(
59
+ file_path: str | None = Field(
60
60
  default=None,
61
61
  description="Path in the storage system where this entry is located, if a file.",
62
62
  )
@@ -158,9 +158,9 @@ class DataStorageRequestPayload(BaseModel):
158
158
  default=None,
159
159
  description="ID of existing dataset to add entry to, or None to create new dataset",
160
160
  )
161
- path: PathLike | str | None = Field(
161
+ file_path: PathLike | str | None = Field(
162
162
  default=None,
163
- description="Path to store in the GCS bucket.",
163
+ description="Filepath to store in the GCS bucket.",
164
164
  )
165
165
  existing_location: DataStorageLocationPayload | None = Field(
166
166
  default=None, description="Target storage metadata"
@@ -385,5 +385,12 @@ class DirectoryUploadConfig(BaseModel):
385
385
  )
386
386
 
387
387
 
388
+ class RawFileFetchResponse(BaseModel):
389
+ """Response model for fetching a raw file."""
390
+
391
+ filename: Path = Field(description="Name of the file")
392
+ file_content: str = Field(description="Content of the file")
393
+
394
+
388
395
  # Forward reference resolution for DirectoryManifest
389
396
  DirectoryManifest.model_rebuild()
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.5.1'
32
- __version_tuple__ = version_tuple = (0, 5, 1)
31
+ __version__ = version = '0.5.3'
32
+ __version_tuple__ = version_tuple = (0, 5, 3)
33
33
 
34
- __commit_id__ = commit_id = 'g0c21b6efd'
34
+ __commit_id__ = commit_id = 'gaaaa80cba'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: futurehouse-client
3
- Version: 0.5.1
3
+ Version: 0.5.3
4
4
  Summary: A client for interacting with endpoints of the FutureHouse service.
5
5
  Author-email: FutureHouse technical staff <hello@futurehouse.org>
6
6
  License: Apache License
@@ -17,6 +17,7 @@ from futurehouse_client.models.data_storage_methods import (
17
17
  DataContentType,
18
18
  DataStorageLocationPayload,
19
19
  DataStorageType,
20
+ RawFileFetchResponse,
20
21
  )
21
22
  from futurehouse_client.models.rest import (
22
23
  FilterLogic,
@@ -90,9 +91,10 @@ async def test_store_raw_content_async(admin_client: RestClient):
90
91
 
91
92
  @pytest.mark.timeout(300)
92
93
  def test_store_file_content_sync(admin_client: RestClient):
94
+ file_path = Path("packages/futurehouse-client/tests/test_data/test_file.txt")
93
95
  response = admin_client.store_file_content(
94
96
  name=f"E2E test entry file: {uuid4()}",
95
- file_path=Path("packages/futurehouse-client/tests/test_data/test_file.txt"),
97
+ file_path=file_path,
96
98
  )
97
99
 
98
100
  assert response is not None
@@ -102,8 +104,37 @@ def test_store_file_content_sync(admin_client: RestClient):
102
104
  fetch_response = admin_client.fetch_data_from_storage(response.data_storage.id)
103
105
 
104
106
  assert fetch_response is not None
107
+ assert isinstance(fetch_response, RawFileFetchResponse)
108
+ assert fetch_response.filename == file_path
105
109
  assert (
106
- fetch_response
110
+ fetch_response.file_content
111
+ == "Here is some random text that shall immortalize Eddie's brain in code.\n"
112
+ )
113
+
114
+ admin_client.delete_data_storage_entry(response.data_storage.id)
115
+
116
+
117
+ @pytest.mark.timeout(300)
118
+ def test_store_file_content_with_path_override_sync(admin_client: RestClient):
119
+ file_path = Path("packages/futurehouse-client/tests/test_data/test_file.txt")
120
+ file_path_override = Path("test_file_override.txt")
121
+ response = admin_client.store_file_content(
122
+ name=f"E2E test entry file: {uuid4()}",
123
+ file_path=file_path,
124
+ file_path_override=file_path_override,
125
+ )
126
+
127
+ assert response is not None
128
+ assert response.data_storage.id is not None
129
+ assert len(response.storage_locations) > 0
130
+
131
+ fetch_response = admin_client.fetch_data_from_storage(response.data_storage.id)
132
+
133
+ assert fetch_response is not None
134
+ assert isinstance(fetch_response, RawFileFetchResponse)
135
+ assert fetch_response.filename == file_path_override
136
+ assert (
137
+ fetch_response.file_content
107
138
  == "Here is some random text that shall immortalize Eddie's brain in code.\n"
108
139
  )
109
140
 
@@ -133,9 +164,40 @@ def test_store_dir_content_sync(admin_client: RestClient):
133
164
  @pytest.mark.timeout(300)
134
165
  @pytest.mark.asyncio
135
166
  async def test_store_file_content_async(admin_client: RestClient):
167
+ file_path = Path("packages/futurehouse-client/tests/test_data/test_file.txt")
168
+ response = await admin_client.astore_file_content(
169
+ name=f"E2E test entry file: {uuid4()}",
170
+ file_path=file_path,
171
+ )
172
+
173
+ assert response is not None
174
+ assert response.data_storage.id is not None
175
+ assert len(response.storage_locations) > 0
176
+
177
+ fetch_response = await admin_client.afetch_data_from_storage(
178
+ response.data_storage.id
179
+ )
180
+
181
+ assert fetch_response is not None
182
+ assert isinstance(fetch_response, RawFileFetchResponse)
183
+ assert fetch_response.filename == file_path
184
+ assert (
185
+ fetch_response.file_content
186
+ == "Here is some random text that shall immortalize Eddie's brain in code.\n"
187
+ )
188
+
189
+ await admin_client.adelete_data_storage_entry(response.data_storage.id)
190
+
191
+
192
+ @pytest.mark.timeout(300)
193
+ @pytest.mark.asyncio
194
+ async def test_store_file_content_with_path_override_async(admin_client: RestClient):
195
+ file_path = Path("packages/futurehouse-client/tests/test_data/test_file.txt")
196
+ file_path_override = Path("test_file_override.txt")
136
197
  response = await admin_client.astore_file_content(
137
198
  name=f"E2E test entry file: {uuid4()}",
138
- file_path=Path("packages/futurehouse-client/tests/test_data/test_file.txt"),
199
+ file_path=file_path,
200
+ file_path_override=file_path_override,
139
201
  )
140
202
 
141
203
  assert response is not None
@@ -147,8 +209,10 @@ async def test_store_file_content_async(admin_client: RestClient):
147
209
  )
148
210
 
149
211
  assert fetch_response is not None
212
+ assert isinstance(fetch_response, RawFileFetchResponse)
213
+ assert fetch_response.filename == file_path_override
150
214
  assert (
151
- fetch_response
215
+ fetch_response.file_content
152
216
  == "Here is some random text that shall immortalize Eddie's brain in code.\n"
153
217
  )
154
218
 
@@ -486,7 +486,7 @@ class TestDataStorageMethods:
486
486
  json={
487
487
  "name": "test_dir",
488
488
  "description": "Test directory",
489
- "path": "test_dir.zip", # The actual path includes .zip extension
489
+ "file_path": "test_dir.zip", # The actual path includes .zip extension
490
490
  "is_collection": True,
491
491
  },
492
492
  )
@@ -533,7 +533,7 @@ class TestDataStorageMethods:
533
533
  json={
534
534
  "name": "test_dir",
535
535
  "description": "Test directory",
536
- "path": "test_dir.zip", # The actual path includes .zip extension
536
+ "file_path": "test_dir.zip", # The actual path includes .zip extension
537
537
  "is_collection": True,
538
538
  },
539
539
  )
@@ -39,6 +39,7 @@ from futurehouse_client.clients.rest_client import (
39
39
  UserAgentRequestFetchError,
40
40
  )
41
41
  from futurehouse_client.models.app import (
42
+ LiteTaskResponse,
42
43
  PhoenixTaskResponse,
43
44
  PQATaskResponse,
44
45
  Stage,
@@ -150,10 +151,12 @@ async def test_job_response( # noqa: PLR0915
150
151
  with subtests.test("Test TaskResponse with queued task"):
151
152
  task_response = admin_client.get_task(task_id)
152
153
  assert task_response.status in {"queued", "in progress"}
154
+ assert not isinstance(task_response, LiteTaskResponse)
153
155
  assert task_response.job_name == pqa_task_req.name
154
156
  assert task_response.query == pqa_task_req.query
155
157
  task_response = await admin_client.aget_task(atask_id)
156
158
  assert task_response.status in {"queued", "in progress"}
159
+ assert not isinstance(task_response, LiteTaskResponse)
157
160
  assert task_response.job_name == pqa_task_req.name
158
161
  assert task_response.query == pqa_task_req.query
159
162