futurehouse-client 0.4.5.dev10__py3-none-any.whl → 0.4.5.dev119__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -35,10 +35,12 @@ from futurehouse_client.models.data_storage_methods import (
35
35
  DataStorageResponse,
36
36
  DataStorageType,
37
37
  DirectoryManifest,
38
+ GetDatasetAndEntriesResponse,
38
39
  ManifestEntry,
39
40
  )
40
41
  from futurehouse_client.models.rest import (
41
42
  DataStorageSearchPayload,
43
+ FilterLogic,
42
44
  SearchCriterion,
43
45
  )
44
46
  from futurehouse_client.utils.general import retry_if_connection_error
@@ -779,6 +781,7 @@ class DataStorageMethods:
779
781
  ignore_patterns: list[str] | None = None,
780
782
  ignore_filename: str = ".gitignore",
781
783
  project_id: UUID | None = None,
784
+ tags: list[str] | None = None,
782
785
  ) -> DataStorageResponse:
783
786
  """Upload a directory as a single zip file collection.
784
787
 
@@ -790,6 +793,7 @@ class DataStorageMethods:
790
793
  ignore_patterns: List of patterns to ignore when zipping
791
794
  ignore_filename: Name of ignore file to read from directory
792
795
  project_id: ID of the project this data storage entry belongs to
796
+ tags: List of tags to associate with the data storage entry
793
797
 
794
798
  Returns:
795
799
  DataStorageResponse for the uploaded zip file
@@ -810,6 +814,7 @@ class DataStorageMethods:
810
814
  path=zip_gcs_path,
811
815
  is_collection=True,
812
816
  project_id=project_id,
817
+ tags=tags,
813
818
  )
814
819
 
815
820
  logger.debug(
@@ -859,6 +864,7 @@ class DataStorageMethods:
859
864
  ignore_patterns: list[str] | None = None,
860
865
  ignore_filename: str = ".gitignore",
861
866
  project_id: UUID | None = None,
867
+ tags: list[str] | None = None,
862
868
  ) -> DataStorageResponse:
863
869
  """Asynchronously upload a directory as a single zip file.
864
870
 
@@ -870,6 +876,7 @@ class DataStorageMethods:
870
876
  ignore_patterns: List of patterns to ignore when zipping
871
877
  ignore_filename: Name of ignore file to read from directory
872
878
  project_id: ID of the project this data storage entry belongs to
879
+ tags: List of tags to associate with the data storage entry
873
880
 
874
881
  Returns:
875
882
  DataStorageResponse for the uploaded zip file
@@ -890,6 +897,7 @@ class DataStorageMethods:
890
897
  path=zip_gcs_path,
891
898
  is_collection=True,
892
899
  project_id=project_id,
900
+ tags=tags,
893
901
  )
894
902
 
895
903
  data_storage_response = await self._acreate_data_storage_entry(payload)
@@ -1524,7 +1532,33 @@ class DataStorageMethods:
1524
1532
  project_id: ID of the project this data storage entry belongs to
1525
1533
 
1526
1534
  Returns:
1527
- DataStorageResponse containing the created data storage entry and storage locations
1535
+ DataStorageResponse: A Pydantic model containing:
1536
+ - data_storage: DataStorageEntry with fields:
1537
+ - id - Unique identifier for the data storage entry
1538
+ - name - Name of the data storage entry
1539
+ - description - Description of the data storage entry
1540
+ - content - Content of the data storage entry
1541
+ - embedding - Embedding vector for the content
1542
+ - is_collection - Whether this entry is a collection
1543
+ - tags - List of tags associated with the entry
1544
+ - parent_id - ID of the parent entry for hierarchical storage
1545
+ - project_id - ID of the project this entry belongs to
1546
+ - dataset_id - ID of the dataset this entry belongs to
1547
+ - path - Path in the storage system where this entry is located
1548
+ - bigquery_schema - Target BigQuery schema for the entry
1549
+ - user_id - ID of the user who created this entry
1550
+ - created_at - Timestamp when the entry was created
1551
+ - modified_at - Timestamp when the entry was last updated
1552
+ - storage_locations with each location containing:
1553
+ - id - Unique identifier for the storage location
1554
+ - data_storage_id - ID of the associated data storage entry
1555
+ - storage_config pydantic model with fields:
1556
+ - storage_type - Type of storage (e.g., 'gcs', 'pg_table')
1557
+ - content_type - Type of content stored
1558
+ - content_schema - Content schema
1559
+ - metadata - Location metadata
1560
+ - location - Location path or identifier
1561
+ - signed_url - Signed URL for uploading/downloading
1528
1562
 
1529
1563
  Raises:
1530
1564
  DataStorageCreationError: If there's an error creating the data storage entry
@@ -1571,7 +1605,33 @@ class DataStorageMethods:
1571
1605
  project_id: ID of the project this data storage entry belongs to
1572
1606
 
1573
1607
  Returns:
1574
- DataStorageResponse containing the created data storage entry and storage locations
1608
+ DataStorageResponse: A Pydantic model containing:
1609
+ - data_storage: DataStorageEntry with fields:
1610
+ - id - Unique identifier for the data storage entry
1611
+ - name - Name of the data storage entry
1612
+ - description - Description of the data storage entry
1613
+ - content - Content of the data storage entry
1614
+ - embedding - Embedding vector for the content
1615
+ - is_collection - Whether this entry is a collection
1616
+ - tags - List of tags associated with the entry
1617
+ - parent_id - ID of the parent entry for hierarchical storage
1618
+ - project_id - ID of the project this entry belongs to
1619
+ - dataset_id - ID of the dataset this entry belongs to
1620
+ - path - Path in the storage system where this entry is located
1621
+ - bigquery_schema - Target BigQuery schema for the entry
1622
+ - user_id - ID of the user who created this entry
1623
+ - created_at - Timestamp when the entry was created
1624
+ - modified_at - Timestamp when the entry was last updated
1625
+ - storage_locations with each location containing:
1626
+ - id - Unique identifier for the storage location
1627
+ - data_storage_id - ID of the associated data storage entry
1628
+ - storage_config pydantic model with fields:
1629
+ - storage_type - Type of storage (e.g., 'gcs', 'pg_table')
1630
+ - content_type - Type of content stored
1631
+ - content_schema - Content schema
1632
+ - metadata - Location metadata
1633
+ - location - Location path or identifier
1634
+ - signed_url - Signed URL for uploading/downloading
1575
1635
 
1576
1636
  Raises:
1577
1637
  DataStorageCreationError: If there's an error creating the data storage entry
@@ -1734,6 +1794,7 @@ class DataStorageMethods:
1734
1794
  ignore_patterns: list[str] | None = None,
1735
1795
  ignore_filename: str = ".gitignore",
1736
1796
  project_id: UUID | None = None,
1797
+ dataset_id: UUID | None = None,
1737
1798
  ) -> DataStorageResponse:
1738
1799
  """Store file or directory content in the data storage system.
1739
1800
 
@@ -1749,13 +1810,45 @@ class DataStorageMethods:
1749
1810
  description: Optional description of the data storage entry
1750
1811
  path: Optional path for the data storage entry
1751
1812
  as_collection: If true, upload directories as a single zip file collection.
1752
- manifest_filename: Name of manifest file
1813
+ manifest_filename: Name of manifest file (JSON or YAML) containing:
1814
+ - entries - Map of file/directory names to their manifest entries
1815
+ - Each ManifestEntry contains:
1816
+ - description - Description of the file or directory
1817
+ - metadata - Additional metadata for the entry
1818
+ - Each DirectoryManifest contains nested entries following the same structure
1753
1819
  ignore_patterns: List of patterns to ignore when zipping directories
1754
1820
  ignore_filename: Name of ignore file to read from directory (default: .gitignore)
1755
1821
  project_id: ID of the project this data storage entry belongs to
1822
+ dataset_id: ID of the dataset this data storage entry belongs to
1756
1823
 
1757
1824
  Returns:
1758
- DataStorageResponse containing the final data storage entry
1825
+ DataStorageResponse: A Pydantic model containing:
1826
+ - data_storage: DataStorageEntry with fields:
1827
+ - id - Unique identifier for the data storage entry
1828
+ - name - Name of the data storage entry
1829
+ - description - Description of the data storage entry
1830
+ - content - Content of the data storage entry
1831
+ - embedding - Embedding vector for the content
1832
+ - is_collection - Whether this entry is a collection
1833
+ - tags - List of tags associated with the entry
1834
+ - parent_id - ID of the parent entry for hierarchical storage
1835
+ - project_id - ID of the project this entry belongs to
1836
+ - dataset_id - ID of the dataset this entry belongs to
1837
+ - path - Path in the storage system where this entry is located
1838
+ - bigquery_schema - Target BigQuery schema for the entry
1839
+ - user_id - ID of the user who created this entry
1840
+ - created_at - Timestamp when the entry was created
1841
+ - modified_at - Timestamp when the entry was last updated
1842
+ - storage_locations with each location containing:
1843
+ - id - Unique identifier for the storage location
1844
+ - data_storage_id - ID of the associated data storage entry
1845
+ - storage_config pydantic model with fields:
1846
+ - storage_type - Type of storage (e.g., 'gcs', 'pg_table')
1847
+ - content_type - Type of content stored
1848
+ - content_schema - Content schema
1849
+ - metadata - Location metadata
1850
+ - location - Location path or identifier
1851
+ - signed_url - Signed URL for uploading/downloading
1759
1852
 
1760
1853
  Raises:
1761
1854
  DataStorageCreationError: If there's an error in the process
@@ -1782,6 +1875,7 @@ class DataStorageMethods:
1782
1875
  ignore_patterns=ignore_patterns,
1783
1876
  ignore_filename=ignore_filename,
1784
1877
  project_id=project_id,
1878
+ dataset_id=dataset_id,
1785
1879
  )
1786
1880
  if not responses:
1787
1881
  raise DataStorageCreationError(
@@ -1827,15 +1921,47 @@ class DataStorageMethods:
1827
1921
  path: Optional GCS path for the entry.
1828
1922
  as_collection: If uploading a directory, `True` zips it into a single collection,
1829
1923
  `False` uploads it as a hierarchical structure of individual objects.
1830
- manifest_filename: Optional manifest file for hierarchical uploads.
1924
+ manifest_filename: Optional manifest file (JSON or YAML) for hierarchical uploads containing:
1925
+ - entries - Map of file/directory names to their manifest entries
1926
+ - Each ManifestEntry contains:
1927
+ - description - Description of the file or directory
1928
+ - metadata - Additional metadata for the entry
1929
+ - Each DirectoryManifest contains nested entries following the same structure
1831
1930
  ignore_patterns: List of patterns to ignore when zipping.
1832
1931
  ignore_filename: Name of ignore file to read (default: .gitignore).
1833
1932
  dataset_id: Optional dataset ID to add entry to, or None to create new dataset.
1834
1933
  project_id: ID of the project this data storage entry belongs to
1835
1934
 
1836
1935
  Returns:
1837
- The `DataStorageResponse` for the created entry. For hierarchical uploads,
1838
- this is the response for the root directory entry.
1936
+ DataStorageResponse: A Pydantic model containing:
1937
+ - data_storage: DataStorageEntry with fields:
1938
+ - id - Unique identifier for the data storage entry
1939
+ - name - Name of the data storage entry
1940
+ - description - Description of the data storage entry
1941
+ - content - Content of the data storage entry
1942
+ - embedding - Embedding vector for the content
1943
+ - is_collection - Whether this entry is a collection
1944
+ - tags - List of tags associated with the entry
1945
+ - parent_id - ID of the parent entry for hierarchical storage
1946
+ - project_id - ID of the project this entry belongs to
1947
+ - dataset_id - ID of the dataset this entry belongs to
1948
+ - path - Path in the storage system where this entry is located
1949
+ - bigquery_schema - Target BigQuery schema for the entry
1950
+ - user_id - ID of the user who created this entry
1951
+ - created_at - Timestamp when the entry was created
1952
+ - modified_at - Timestamp when the entry was last updated
1953
+ - storage_locations with each location containing:
1954
+ - id - Unique identifier for the storage location
1955
+ - data_storage_id - ID of the associated data storage entry
1956
+ - storage_config pydantic model with fields:
1957
+ - storage_type - Type of storage (e.g., 'gcs', 'pg_table')
1958
+ - content_type - Type of content stored
1959
+ - content_schema - Content schema
1960
+ - metadata - Location metadata
1961
+ - location - Location path or identifier
1962
+ - signed_url - Signed URL for uploading/downloading
1963
+
1964
+ For hierarchical uploads, this is the response for the root directory entry.
1839
1965
  """
1840
1966
  file_path = self._validate_file_path(file_path)
1841
1967
 
@@ -1896,7 +2022,12 @@ class DataStorageMethods:
1896
2022
 
1897
2023
  Args:
1898
2024
  name: Name of the data storage entry
1899
- existing_location: Describes the existing data source location to register
2025
+ existing_location: a pydantic model describing the existing data source location to register, containing:
2026
+ - storage_type - Type of storage (BIGQUERY, GCS, PG_TABLE, RAW_CONTENT, ELASTIC_SEARCH)
2027
+ - content_type - Type of content (BQ_DATASET, BQ_TABLE, TEXT, TEXT_W_EMBEDDINGS, DIRECTORY, FILE, INDEX, INDEX_W_EMBEDDINGS)
2028
+ - content_schema - Content schema for the data
2029
+ - metadata - Additional metadata for the location
2030
+ - location - Location path or identifier
1900
2031
  description: Optional description of the data storage entry
1901
2032
  as_collection: If uploading a directory, `True` creates a single storage entry for
1902
2033
  the whole directory and multiple storage locations for each file, `False` assumes
@@ -1905,7 +2036,33 @@ class DataStorageMethods:
1905
2036
  project_id: ID of the project this data storage entry belongs to
1906
2037
 
1907
2038
  Returns:
1908
- DataStorageResponse containing the created data storage entry and storage locations
2039
+ DataStorageResponse: A Pydantic model containing:
2040
+ - data_storage: DataStorageEntry with fields:
2041
+ - id - Unique identifier for the data storage entry
2042
+ - name - Name of the data storage entry
2043
+ - description - Description of the data storage entry
2044
+ - content - Content of the data storage entry
2045
+ - embedding - Embedding vector for the content
2046
+ - is_collection - Whether this entry is a collection
2047
+ - tags - List of tags associated with the entry
2048
+ - parent_id - ID of the parent entry for hierarchical storage
2049
+ - project_id - ID of the project this entry belongs to
2050
+ - dataset_id - ID of the dataset this entry belongs to
2051
+ - path - Path in the storage system where this entry is located
2052
+ - bigquery_schema - Target BigQuery schema for the entry
2053
+ - user_id - ID of the user who created this entry
2054
+ - created_at - Timestamp when the entry was created
2055
+ - modified_at - Timestamp when the entry was last updated
2056
+ - storage_locations with each location containing:
2057
+ - id - Unique identifier for the storage location
2058
+ - data_storage_id - ID of the associated data storage entry
2059
+ - storage_config pydantic model with fields:
2060
+ - storage_type - Type of storage (e.g., 'gcs', 'pg_table')
2061
+ - content_type - Type of content stored
2062
+ - content_schema - Content schema
2063
+ - metadata - Location metadata
2064
+ - location - Location path or identifier
2065
+ - signed_url - Signed URL for uploading/downloading
1909
2066
 
1910
2067
  Raises:
1911
2068
  DataStorageCreationError: If there's an error creating the data storage entry
@@ -1951,7 +2108,12 @@ class DataStorageMethods:
1951
2108
 
1952
2109
  Args:
1953
2110
  name: Name of the data storage entry
1954
- existing_location: Describes the existing data source location to register
2111
+ existing_location: a pydantic model describing the existing data source location to register, containing:
2112
+ - storage_type - Type of storage (BIGQUERY, GCS, PG_TABLE, RAW_CONTENT, ELASTIC_SEARCH)
2113
+ - content_type - Type of content (BQ_DATASET, BQ_TABLE, TEXT, TEXT_W_EMBEDDINGS, DIRECTORY, FILE, INDEX, INDEX_W_EMBEDDINGS)
2114
+ - content_schema - Content schema for the data
2115
+ - metadata - Additional metadata for the location
2116
+ - location - Location path or identifier
1955
2117
  description: Optional description of the data storage entry
1956
2118
  as_collection: If uploading a directory, `True` creates a single storage entry for
1957
2119
  the whole directory and multiple storage locations for each file, `False` assumes
@@ -1960,7 +2122,33 @@ class DataStorageMethods:
1960
2122
  project_id: ID of the project this data storage entry belongs to
1961
2123
 
1962
2124
  Returns:
1963
- DataStorageResponse containing the created data storage entry and storage locations
2125
+ DataStorageResponse: A Pydantic model containing:
2126
+ - data_storage: DataStorageEntry with fields:
2127
+ - id - Unique identifier for the data storage entry
2128
+ - name - Name of the data storage entry
2129
+ - description - Description of the data storage entry
2130
+ - content - Content of the data storage entry
2131
+ - embedding - Embedding vector for the content
2132
+ - is_collection - Whether this entry is a collection
2133
+ - tags - List of tags associated with the entry
2134
+ - parent_id - ID of the parent entry for hierarchical storage
2135
+ - project_id - ID of the project this entry belongs to
2136
+ - dataset_id - ID of the dataset this entry belongs to
2137
+ - path - Path in the storage system where this entry is located
2138
+ - bigquery_schema - Target BigQuery schema for the entry
2139
+ - user_id - ID of the user who created this entry
2140
+ - created_at - Timestamp when the entry was created
2141
+ - modified_at - Timestamp when the entry was last updated
2142
+ - storage_locations with each location containing:
2143
+ - id - Unique identifier for the storage location
2144
+ - data_storage_id - ID of the associated data storage entry
2145
+ - storage_config pydantic model with fields:
2146
+ - storage_type - Type of storage (e.g., 'gcs', 'pg_table')
2147
+ - content_type - Type of content stored
2148
+ - content_schema - Content schema
2149
+ - metadata - Location metadata
2150
+ - location - Location path or identifier
2151
+ - signed_url - Signed URL for uploading/downloading
1964
2152
 
1965
2153
  Raises:
1966
2154
  DataStorageCreationError: If there's an error creating the data storage entry
@@ -1997,12 +2185,17 @@ class DataStorageMethods:
1997
2185
  self,
1998
2186
  criteria: list[SearchCriterion] | None = None,
1999
2187
  size: int = 10,
2188
+ filter_logic: FilterLogic = FilterLogic.OR,
2000
2189
  ) -> list[dict]:
2001
2190
  """Search data storage objects using structured criteria.
2002
2191
 
2003
2192
  Args:
2004
- criteria: List of search criteria (SearchCriterion objects with field, operator, value)
2193
+ criteria: List of SearchCriterion pydantic models with fields:
2194
+ - field - Field name to search on
2195
+ - operator - Search operator (EQUALS, CONTAINS, STARTS_WITH, ENDS_WITH, GREATER_THAN, LESS_THAN, BETWEEN, IN)
2196
+ - value - Value to search for
2005
2197
  size: Number of results to return (1-100)
2198
+ filter_logic: Either "AND" (all criteria must match) or "OR" (at least one must match)
2006
2199
 
2007
2200
  Returns:
2008
2201
  List of search results with scores and data storage information
@@ -2023,6 +2216,7 @@ class DataStorageMethods:
2023
2216
  payload = DataStorageSearchPayload(
2024
2217
  criteria=criteria or [],
2025
2218
  size=max(1, min(100, size)), # Clamp between 1-100
2219
+ filter_logic=filter_logic,
2026
2220
  )
2027
2221
 
2028
2222
  response = self.client.post(
@@ -2053,12 +2247,17 @@ class DataStorageMethods:
2053
2247
  self,
2054
2248
  criteria: list[SearchCriterion] | None = None,
2055
2249
  size: int = 10,
2250
+ filter_logic: FilterLogic = FilterLogic.OR,
2056
2251
  ) -> list[dict]:
2057
2252
  """Asynchronously search data storage objects using structured criteria.
2058
2253
 
2059
2254
  Args:
2060
- criteria: List of search criteria (SearchCriterion objects with field, operator, value)
2255
+ criteria: List of SearchCriterion pydantic models with fields:
2256
+ - field - Field name to search on
2257
+ - operator - Search operator (EQUALS, CONTAINS, STARTS_WITH, ENDS_WITH, GREATER_THAN, LESS_THAN, BETWEEN, IN)
2258
+ - value - Value to search for
2061
2259
  size: Number of results to return (1-100)
2260
+ filter_logic: Either "AND" (all criteria must match) or "OR" (at least one must match)
2062
2261
 
2063
2262
  Returns:
2064
2263
  List of search results with scores and data storage information
@@ -2079,6 +2278,7 @@ class DataStorageMethods:
2079
2278
  payload = DataStorageSearchPayload(
2080
2279
  criteria=criteria or [],
2081
2280
  size=max(1, min(100, size)), # Clamp between 1-100
2281
+ filter_logic=filter_logic,
2082
2282
  )
2083
2283
 
2084
2284
  response = await self.async_client.post(
@@ -2118,11 +2318,11 @@ class DataStorageMethods:
2118
2318
  """Search data storage objects using vector similarity.
2119
2319
 
2120
2320
  Args:
2121
- embedding: Embedding vector for similarity search
2321
+ embedding: List of float values representing the embedding vector for similarity search
2122
2322
  size: Number of results to return (1-100)
2123
2323
  min_score: Minimum similarity score (0.0-1.0)
2124
2324
  dataset_id: Optional dataset ID filter
2125
- tags: Optional list of tags to filter by
2325
+ tags: Optional list of string tags to filter by
2126
2326
  user_id: Optional user ID filter (admin only)
2127
2327
  project_id: Optional project ID filter
2128
2328
 
@@ -2196,11 +2396,11 @@ class DataStorageMethods:
2196
2396
  """Asynchronously search data storage objects using vector similarity.
2197
2397
 
2198
2398
  Args:
2199
- embedding: Embedding vector for similarity search
2399
+ embedding: List of float values representing the embedding vector for similarity search
2200
2400
  size: Number of results to return (1-100)
2201
2401
  min_score: Minimum similarity score (0.0-1.0)
2202
2402
  dataset_id: Optional dataset ID filter
2203
- tags: Optional list of tags to filter by
2403
+ tags: Optional list of string tags to filter by
2204
2404
  user_id: Optional user ID filter (admin only)
2205
2405
  project_id: Optional project ID filter
2206
2406
 
@@ -2268,12 +2468,12 @@ class DataStorageMethods:
2268
2468
  """Fetch data from the storage system (sync version).
2269
2469
 
2270
2470
  Args:
2271
- data_storage_id: ID of the data storage entry to fetch
2471
+ data_storage_id: UUID of the data storage entry to fetch
2272
2472
 
2273
2473
  Returns:
2274
2474
  For PG_TABLE storage: string content
2275
2475
  For GCS storage: Path to downloaded file (may be unzipped if it was a zip)
2276
- For multi-location entries: dict of location IDs to dicts with signed URL and file name
2476
+ For multi-location entries: list of downloaded files
2277
2477
  None if not found or error occurred
2278
2478
  """
2279
2479
  if not data_storage_id:
@@ -2342,12 +2542,12 @@ class DataStorageMethods:
2342
2542
  """Fetch data from the storage system.
2343
2543
 
2344
2544
  Args:
2345
- data_storage_id: ID of the data storage entry to fetch
2545
+ data_storage_id: UUID of the data storage entry to fetch
2346
2546
 
2347
2547
  Returns:
2348
2548
  For PG_TABLE storage: string content
2349
2549
  For GCS storage: Path to downloaded file (may be unzipped if it was a zip)
2350
- For multi-location entries: dict of location IDs to dicts with signed URL and file name
2550
+ For multi-location entries: list of downloaded files
2351
2551
  None if not found or error occurred
2352
2552
  """
2353
2553
  if not data_storage_id:
@@ -2417,7 +2617,23 @@ class DataStorageMethods:
2417
2617
  name: str,
2418
2618
  description: str | None = None,
2419
2619
  dataset_id: UUID | None = None,
2420
- ):
2620
+ ) -> CreateDatasetPayload:
2621
+ """Asynchronously create a new dataset.
2622
+
2623
+ Args:
2624
+ name: Name of the dataset to create
2625
+ description: Optional description of the dataset
2626
+ dataset_id: Optional UUID to assign to the dataset, or None to auto-generate
2627
+
2628
+ Returns:
2629
+ CreateDatasetPayload: A Pydantic model containing:
2630
+ - id - ID of the created dataset (None if auto-generated)
2631
+ - name - Name of the dataset
2632
+ - description - Description of the dataset
2633
+
2634
+ Raises:
2635
+ DataStorageCreationError: If there's an error creating the dataset
2636
+ """
2421
2637
  try:
2422
2638
  payload = CreateDatasetPayload(
2423
2639
  name=name,
@@ -2448,7 +2664,23 @@ class DataStorageMethods:
2448
2664
  name: str,
2449
2665
  description: str | None = None,
2450
2666
  dataset_id: UUID | None = None,
2451
- ):
2667
+ ) -> CreateDatasetPayload:
2668
+ """Create a new dataset.
2669
+
2670
+ Args:
2671
+ name: Name of the dataset to create
2672
+ description: Optional description of the dataset
2673
+ dataset_id: Optional UUID to assign to the dataset, or None to auto-generate
2674
+
2675
+ Returns:
2676
+ CreateDatasetPayload: A Pydantic model containing:
2677
+ - id - ID of the created dataset (None if auto-generated)
2678
+ - name - Name of the dataset
2679
+ - description - Description of the dataset
2680
+
2681
+ Raises:
2682
+ DataStorageCreationError: If there's an error creating the dataset
2683
+ """
2452
2684
  try:
2453
2685
  payload = CreateDatasetPayload(
2454
2686
  name=name,
@@ -2522,14 +2754,48 @@ class DataStorageMethods:
2522
2754
  retry=retry_if_connection_error,
2523
2755
  before_sleep=before_sleep_log(logger, logging.WARNING),
2524
2756
  )
2525
- async def aget_dataset(self, dataset_id: UUID):
2757
+ async def aget_dataset(self, dataset_id: UUID) -> GetDatasetAndEntriesResponse:
2758
+ """Asynchronously retrieve a dataset by ID.
2759
+
2760
+ Args:
2761
+ dataset_id: UUID of the dataset to retrieve
2762
+
2763
+ Returns:
2764
+ GetDatasetAndEntriesResponse: A dict containing:
2765
+ - dataset: DatasetStorage with fields:
2766
+ - id - Unique identifier for the dataset
2767
+ - name - Name of the dataset
2768
+ - user_id - ID of the user who created the dataset
2769
+ - description - Description of the dataset
2770
+ - created_at - Timestamp when the dataset was created
2771
+ - modified_at - Timestamp when the dataset was last modified
2772
+ - data_storage_entries - List of data storage entries in the dataset, each containing:
2773
+ - id - Unique identifier for the data storage entry
2774
+ - name - Name of the data storage entry
2775
+ - description - Description of the data storage entry
2776
+ - content - Content of the data storage entry
2777
+ - embedding - Embedding vector for the content
2778
+ - is_collection - Whether this entry is a collection
2779
+ - tags - List of tags associated with the entry
2780
+ - parent_id - ID of the parent entry for hierarchical storage
2781
+ - project_id - ID of the project this entry belongs to
2782
+ - dataset_id - ID of the dataset this entry belongs to
2783
+ - path - Path in the storage system where this entry is located
2784
+ - bigquery_schema - Target BigQuery schema for the entry
2785
+ - user_id - ID of the user who created this entry
2786
+ - created_at - Timestamp when the entry was created
2787
+ - modified_at - Timestamp when the entry was last updated
2788
+
2789
+ Raises:
2790
+ DataStorageError: If there's an error retrieving the dataset
2791
+ """
2526
2792
  try:
2527
2793
  response = await self.async_client.get(
2528
2794
  f"/v0.1/data-storage/datasets/{dataset_id}"
2529
2795
  )
2530
2796
  response.raise_for_status()
2531
2797
 
2532
- return response.json()
2798
+ return GetDatasetAndEntriesResponse.model_validate(response.json())
2533
2799
  except HTTPStatusError as e:
2534
2800
  self._handle_http_errors(e, "retrieving")
2535
2801
  except Exception as e:
@@ -2541,12 +2807,46 @@ class DataStorageMethods:
2541
2807
  retry=retry_if_connection_error,
2542
2808
  before_sleep=before_sleep_log(logger, logging.WARNING),
2543
2809
  )
2544
- def get_dataset(self, dataset_id: UUID):
2810
+ def get_dataset(self, dataset_id: UUID) -> GetDatasetAndEntriesResponse:
2811
+ """Retrieve a dataset by ID.
2812
+
2813
+ Args:
2814
+ dataset_id: UUID of the dataset to retrieve
2815
+
2816
+ Returns:
2817
+ GetDatasetAndEntriesResponse: A dict containing:
2818
+ - dataset: DatasetStorage with fields:
2819
+ - id - Unique identifier for the dataset
2820
+ - name - Name of the dataset
2821
+ - user_id - ID of the user who created the dataset
2822
+ - description - Description of the dataset
2823
+ - created_at - Timestamp when the dataset was created
2824
+ - modified_at - Timestamp when the dataset was last modified
2825
+ - data_storage_entries - List of data storage entries in the dataset, each containing:
2826
+ - id - Unique identifier for the data storage entry
2827
+ - name - Name of the data storage entry
2828
+ - description - Description of the data storage entry
2829
+ - content - Content of the data storage entry
2830
+ - embedding - Embedding vector for the content
2831
+ - is_collection - Whether this entry is a collection
2832
+ - tags - List of tags associated with the entry
2833
+ - parent_id - ID of the parent entry for hierarchical storage
2834
+ - project_id - ID of the project this entry belongs to
2835
+ - dataset_id - ID of the dataset this entry belongs to
2836
+ - path - Path in the storage system where this entry is located
2837
+ - bigquery_schema - Target BigQuery schema for the entry
2838
+ - user_id - ID of the user who created this entry
2839
+ - created_at - Timestamp when the entry was created
2840
+ - modified_at - Timestamp when the entry was last updated
2841
+
2842
+ Raises:
2843
+ DataStorageError: If there's an error retrieving the dataset
2844
+ """
2545
2845
  try:
2546
2846
  response = self.client.get(f"/v0.1/data-storage/datasets/{dataset_id}")
2547
2847
  response.raise_for_status()
2548
2848
 
2549
- return response.json()
2849
+ return GetDatasetAndEntriesResponse.model_validate(response.json())
2550
2850
  except HTTPStatusError as e:
2551
2851
  self._handle_http_errors(e, "retrieving")
2552
2852
  except Exception as e:
@@ -2622,7 +2922,15 @@ class DataStorageMethods:
2622
2922
  retry=retry_if_connection_error,
2623
2923
  before_sleep=before_sleep_log(logger, logging.WARNING),
2624
2924
  )
2625
- async def adelete_data_storage_entry(self, data_storage_entry_id: UUID):
2925
+ async def adelete_data_storage_entry(self, data_storage_entry_id: UUID) -> None:
2926
+ """Asynchronously delete a data storage entry.
2927
+
2928
+ Args:
2929
+ data_storage_entry_id: UUID of the data storage entry to delete
2930
+
2931
+ Raises:
2932
+ DataStorageError: If there's an error deleting the data storage entry
2933
+ """
2626
2934
  try:
2627
2935
  await self.async_client.delete(
2628
2936
  f"/v0.1/data-storage/data-entries/{data_storage_entry_id}"
@@ -2638,7 +2946,15 @@ class DataStorageMethods:
2638
2946
  retry=retry_if_connection_error,
2639
2947
  before_sleep=before_sleep_log(logger, logging.WARNING),
2640
2948
  )
2641
- def delete_data_storage_entry(self, data_storage_entry_id: UUID):
2949
+ def delete_data_storage_entry(self, data_storage_entry_id: UUID) -> None:
2950
+ """Delete a data storage entry.
2951
+
2952
+ Args:
2953
+ data_storage_entry_id: UUID of the data storage entry to delete
2954
+
2955
+ Raises:
2956
+ DataStorageError: If there's an error deleting the data storage entry
2957
+ """
2642
2958
  try:
2643
2959
  self.client.delete(
2644
2960
  f"/v0.1/data-storage/data-entries/{data_storage_entry_id}"
@@ -2,6 +2,7 @@ from typing import Any, Generic, TypeAlias, TypeVar
2
2
 
3
3
  from aviary.message import Message
4
4
  from aviary.tools.base import Tool
5
+ from ldp.agent import Agent
5
6
  from ldp.data_structures import Transition
6
7
  from ldp.graph.ops import OpResult
7
8
  from pydantic import BaseModel, ConfigDict, Field, field_serializer
@@ -34,6 +35,7 @@ class ASVState(BaseState, Generic[T]):
34
35
  def serialize_action(self, action: OpResult[T]) -> dict:
35
36
  return action.to_dict()
36
37
 
38
+
37
39
  class EnvResetState(BaseState):
38
40
  observations: list[Message] = Field()
39
41
  tools: list[Tool] = Field()
@@ -57,6 +59,64 @@ class TransitionState(BaseState):
57
59
  }
58
60
 
59
61
 
62
+ class GlobalState(BaseState):
63
+ agent: Agent | None = None
64
+ env: Any | None = None
65
+ agent_state: Any | None = None
66
+ next_agent_state: Any | None = None
67
+ observations: list = []
68
+ action: Any | None = None
69
+ value: float = 0.0
70
+ last_step_state: Transition | None = None
71
+
72
+ def update_observations(self, obs: list[Message]) -> list[Message]:
73
+ previous_observations = self.observations or []
74
+ self.observations = obs
75
+ return previous_observations
76
+
77
+ def store_step_state(self, step_state: Transition) -> None:
78
+ self.last_step_state = step_state
79
+
80
+ def update_trajectory_data(self, **kwargs) -> None:
81
+ for key, value in kwargs.items():
82
+ setattr(self, key, value)
83
+
84
+ def _get_safe_previous_observations(
85
+ self, current_obs: list[Message] | None = None
86
+ ) -> list[Message]:
87
+ if self.last_step_state:
88
+ last_step_state = self.last_step_state
89
+ if last_step_state.next_observation:
90
+ return last_step_state.next_observation
91
+ if self.observations:
92
+ return self.observations
93
+ return current_obs or []
94
+
95
+ def create_step_state(self, callback_type: str, **kwargs) -> Transition:
96
+ defaults = {
97
+ "timestep": getattr(self.agent, "_timestep", 0) if self.agent else 0,
98
+ "agent_state": self.agent_state,
99
+ "next_agent_state": self.next_agent_state or self.agent_state,
100
+ "observation": self._get_safe_previous_observations(),
101
+ "next_observation": self.observations or [],
102
+ "action": self.action,
103
+ "reward": 0.0,
104
+ "truncated": False,
105
+ "done": False,
106
+ "value": self.value or 0.0,
107
+ "metadata": {"callback_type": callback_type},
108
+ }
109
+
110
+ for key, value in kwargs.items():
111
+ if key == "metadata" and isinstance(value, dict):
112
+ if isinstance(defaults["metadata"], dict):
113
+ defaults["metadata"].update(value)
114
+ else:
115
+ defaults[key] = value
116
+
117
+ return Transition(**defaults)
118
+
119
+
60
120
  StateType: TypeAlias = (
61
121
  BeforeTransitionState
62
122
  | InitialState
@@ -64,4 +124,5 @@ StateType: TypeAlias = (
64
124
  | EnvResetState
65
125
  | EnvStepState
66
126
  | TransitionState
127
+ | GlobalState
67
128
  )
@@ -3,10 +3,32 @@ from datetime import datetime
3
3
  from enum import StrEnum, auto
4
4
  from os import PathLike
5
5
  from pathlib import Path
6
- from typing import Any
6
+ from typing import Annotated, Any
7
7
  from uuid import UUID
8
8
 
9
- from pydantic import BaseModel, Field, JsonValue
9
+ from pydantic import (
10
+ BaseModel,
11
+ Field,
12
+ JsonValue,
13
+ PlainSerializer,
14
+ PlainValidator,
15
+ WithJsonSchema,
16
+ )
17
+ from sqlalchemy_utils import Ltree
18
+
19
+ LtreeField = Annotated[
20
+ Ltree,
21
+ PlainValidator(Ltree),
22
+ PlainSerializer(lambda v: v.path),
23
+ WithJsonSchema({"type": "string", "examples": ["some.path"]}),
24
+ ]
25
+
26
+
27
+ class DataStorageEntryStatus(StrEnum):
28
+ PENDING = auto()
29
+ ACTIVE = auto()
30
+ FAILED = auto()
31
+ DISABLED = auto()
10
32
 
11
33
 
12
34
  class DataStorageEntry(BaseModel):
@@ -20,6 +42,9 @@ class DataStorageEntry(BaseModel):
20
42
  content: str | None = Field(
21
43
  default=None, description="Content of the data storage entry"
22
44
  )
45
+ status: DataStorageEntryStatus = Field(
46
+ description="Status of the data storage entry"
47
+ )
23
48
  embedding: list[float] | None = Field(
24
49
  default=None, description="Embedding vector for the content"
25
50
  )
@@ -151,6 +176,26 @@ class DataStorageRequestPayload(BaseModel):
151
176
  existing_location: DataStorageLocationPayload | None = Field(
152
177
  default=None, description="Target storage metadata"
153
178
  )
179
+ tags: list[str] | None = Field(
180
+ default=None,
181
+ description="List of tags associated with the data storage entry",
182
+ )
183
+
184
+
185
+ class DatasetStorage(BaseModel):
186
+ """Pydantic model representing a DatasetStorage record."""
187
+
188
+ id: UUID
189
+ name: str
190
+ user_id: str
191
+ description: str | None = None
192
+ created_at: datetime
193
+ modified_at: datetime
194
+
195
+
196
+ class GetDatasetAndEntriesResponse(BaseModel):
197
+ dataset: DatasetStorage
198
+ data_storage_entries: list[DataStorageEntry]
154
199
 
155
200
 
156
201
  class CreateDatasetPayload(BaseModel):
@@ -67,7 +67,8 @@ class SearchOperator(StrEnum):
67
67
  """Operators for structured search criteria."""
68
68
 
69
69
  EQUALS = "equals"
70
- CONTAINS = "contains"
70
+ CONTAINS = "contains" # Exact phrase/substring matching
71
+ FULLTEXT = "fulltext" # Tokenized full-text search (match query)
71
72
  STARTS_WITH = "starts_with"
72
73
  ENDS_WITH = "ends_with"
73
74
  GREATER_THAN = "greater_than"
@@ -84,6 +85,11 @@ class SearchCriterion(BaseModel):
84
85
  value: str | list[str] | bool
85
86
 
86
87
 
88
+ class FilterLogic(StrEnum):
89
+ AND = "AND"
90
+ OR = "OR"
91
+
92
+
87
93
  class WorldModelSearchPayload(BaseModel):
88
94
  """Payload for structured world model search."""
89
95
 
@@ -173,3 +179,4 @@ class DataStorageSearchPayload(BaseModel):
173
179
 
174
180
  criteria: list[SearchCriterion]
175
181
  size: int = 10
182
+ filter_logic: FilterLogic = FilterLogic.OR
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.4.5.dev10'
32
- __version_tuple__ = version_tuple = (0, 4, 5, 'dev10')
31
+ __version__ = version = '0.4.5.dev119'
32
+ __version_tuple__ = version_tuple = (0, 4, 5, 'dev119')
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: futurehouse-client
3
- Version: 0.4.5.dev10
3
+ Version: 0.4.5.dev119
4
4
  Summary: A client for interacting with endpoints of the FutureHouse service.
5
5
  Author-email: FutureHouse technical staff <hello@futurehouse.org>
6
6
  License: Apache License
@@ -224,6 +224,7 @@ Requires-Dist: openai<1.100.0,>=1
224
224
  Requires-Dist: pydantic
225
225
  Requires-Dist: python-dotenv
226
226
  Requires-Dist: requests
227
+ Requires-Dist: sqlalchemy-utils>=0.41.2
227
228
  Requires-Dist: tenacity
228
229
  Requires-Dist: tqdm>=4.62
229
230
  Provides-Extra: dev
@@ -1,23 +1,23 @@
1
1
  futurehouse_client/__init__.py,sha256=PvFTkocA-hobsWoDEBEdrUgLIbuVbDs_0nvMdImJmHk,707
2
2
  futurehouse_client/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- futurehouse_client/version.py,sha256=0gN9IqRid_hywJfwdbe8e1dFFTTj5VGTzTuG5nFBWxw,719
3
+ futurehouse_client/version.py,sha256=ygTkTx_4WMa3DIXWf_ZxjBUe9cI-wOluDVgxQX8thfA,721
4
4
  futurehouse_client/clients/__init__.py,sha256=-HXNj-XJ3LRO5XM6MZ709iPs29YpApss0Q2YYg1qMZw,280
5
- futurehouse_client/clients/data_storage_methods.py,sha256=VESdX0J_frITd0QAjQ5UMPiqYvpToo0ooDCrS-U4uH8,99535
5
+ futurehouse_client/clients/data_storage_methods.py,sha256=f8ZsVicEtO50pRXoPzEB2GpiyqosNofyoW8vJeYvFnM,119266
6
6
  futurehouse_client/clients/job_client.py,sha256=b5gpzulZpxpv9R337r3UKItnMdtd6CGlI1sV3_VQJso,13985
7
7
  futurehouse_client/clients/rest_client.py,sha256=RdyFEipvADDCHyY5XFy565IoL9-N1myJjF0G8x2wlK8,103183
8
8
  futurehouse_client/models/__init__.py,sha256=0YlzKGymbY1g4cXxnUc0BUnthTkVBf12bCZlGUcMQqk,701
9
9
  futurehouse_client/models/app.py,sha256=UUg17I3zk6cH_7mrdojHGYvQfm_SeDkuUxsPlRyIYz0,31895
10
- futurehouse_client/models/client.py,sha256=Dg7bYgbdmP5GkDWp5e71vxs5YdkzyqqlwBN-A84Jt8w,1709
11
- futurehouse_client/models/data_storage_methods.py,sha256=GS1FbuMsUJSh7Evjt86vOri-95hfiLyASBS1xG7erNk,12793
12
- futurehouse_client/models/rest.py,sha256=Fqw0_ypULzd7IV93PKooSG9W5_g7fGFsdW9jNVVImHA,4514
10
+ futurehouse_client/models/client.py,sha256=3WLS0xdB7CYqHShi_gqyRa6PGj-QvP--0HzD1R93yvY,3868
11
+ futurehouse_client/models/data_storage_methods.py,sha256=cpF2g4y_REECaz--WhaJeLqXA_3m3keRP5XOXiL8GOI,13811
12
+ futurehouse_client/models/rest.py,sha256=SbeXZSPUCM0lQ_gVUPa64vKzMxuUVgqmJ5YThfDWs8g,4726
13
13
  futurehouse_client/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
14
  futurehouse_client/utils/auth.py,sha256=tgWELjKfg8eWme_qdcRmc8TjQN9DVZuHHaVXZNHLchk,2960
15
15
  futurehouse_client/utils/general.py,sha256=PIkGLCSA3kUvc6mwR-prEB7YnMdKILOIm6cPowSZzzs,2532
16
16
  futurehouse_client/utils/module_utils.py,sha256=aFyd-X-pDARXz9GWpn8SSViUVYdSbuy9vSkrzcVIaGI,4955
17
17
  futurehouse_client/utils/monitoring.py,sha256=UjRlufe67kI3VxRHOd5fLtJmlCbVA2Wqwpd4uZhXkQM,8728
18
18
  futurehouse_client/utils/world_model_tools.py,sha256=v2krZGrco0ur2a_pcRMtnQL05SxlIoBXuJ5R1JkQNws,2921
19
- futurehouse_client-0.4.5.dev10.dist-info/licenses/LICENSE,sha256=oQ9ZHjUi-_6GfP3gs14FlPb0OlGwE1QCCKFGnJ4LD2I,11341
20
- futurehouse_client-0.4.5.dev10.dist-info/METADATA,sha256=LDAeM4-wOHzn9c7svqmT7rIvXGfq0Gi9lrc5Nk_cUvc,27060
21
- futurehouse_client-0.4.5.dev10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
22
- futurehouse_client-0.4.5.dev10.dist-info/top_level.txt,sha256=TRuLUCt_qBnggdFHCX4O_BoCu1j2X43lKfIZC-ElwWY,19
23
- futurehouse_client-0.4.5.dev10.dist-info/RECORD,,
19
+ futurehouse_client-0.4.5.dev119.dist-info/licenses/LICENSE,sha256=oQ9ZHjUi-_6GfP3gs14FlPb0OlGwE1QCCKFGnJ4LD2I,11341
20
+ futurehouse_client-0.4.5.dev119.dist-info/METADATA,sha256=_GrNdEBxKiRCI4lXWI8WcIBjVRTEMF_FqvvrQcT3l_E,27101
21
+ futurehouse_client-0.4.5.dev119.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
22
+ futurehouse_client-0.4.5.dev119.dist-info/top_level.txt,sha256=TRuLUCt_qBnggdFHCX4O_BoCu1j2X43lKfIZC-ElwWY,19
23
+ futurehouse_client-0.4.5.dev119.dist-info/RECORD,,