futurehouse-client 0.4.4__py3-none-any.whl → 0.4.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- futurehouse_client/__init__.py +12 -0
- futurehouse_client/clients/data_storage_methods.py +367 -33
- futurehouse_client/clients/rest_client.py +187 -0
- futurehouse_client/models/__init__.py +12 -0
- futurehouse_client/models/data_storage_methods.py +47 -2
- futurehouse_client/models/job_event.py +75 -0
- futurehouse_client/models/rest.py +8 -1
- futurehouse_client/version.py +2 -2
- {futurehouse_client-0.4.4.dist-info → futurehouse_client-0.4.5.dist-info}/METADATA +2 -1
- {futurehouse_client-0.4.4.dist-info → futurehouse_client-0.4.5.dist-info}/RECORD +13 -12
- {futurehouse_client-0.4.4.dist-info → futurehouse_client-0.4.5.dist-info}/WHEEL +0 -0
- {futurehouse_client-0.4.4.dist-info → futurehouse_client-0.4.5.dist-info}/licenses/LICENSE +0 -0
- {futurehouse_client-0.4.4.dist-info → futurehouse_client-0.4.5.dist-info}/top_level.txt +0 -0
futurehouse_client/__init__.py
CHANGED
@@ -8,6 +8,13 @@ from .models.app import (
|
|
8
8
|
TaskResponse,
|
9
9
|
TaskResponseVerbose,
|
10
10
|
)
|
11
|
+
from .models.job_event import (
|
12
|
+
CostComponent,
|
13
|
+
ExecutionType,
|
14
|
+
JobEventCreateRequest,
|
15
|
+
JobEventCreateResponse,
|
16
|
+
JobEventUpdateRequest,
|
17
|
+
)
|
11
18
|
from .utils.world_model_tools import (
|
12
19
|
create_world_model_tool,
|
13
20
|
make_world_model_tools,
|
@@ -15,9 +22,14 @@ from .utils.world_model_tools import (
|
|
15
22
|
)
|
16
23
|
|
17
24
|
__all__ = [
|
25
|
+
"CostComponent",
|
26
|
+
"ExecutionType",
|
18
27
|
"FinchTaskResponse",
|
19
28
|
"FutureHouseClient",
|
20
29
|
"JobClient",
|
30
|
+
"JobEventCreateRequest",
|
31
|
+
"JobEventCreateResponse",
|
32
|
+
"JobEventUpdateRequest",
|
21
33
|
"JobNames",
|
22
34
|
"PQATaskResponse",
|
23
35
|
"PhoenixTaskResponse",
|
@@ -35,10 +35,12 @@ from futurehouse_client.models.data_storage_methods import (
|
|
35
35
|
DataStorageResponse,
|
36
36
|
DataStorageType,
|
37
37
|
DirectoryManifest,
|
38
|
+
GetDatasetAndEntriesResponse,
|
38
39
|
ManifestEntry,
|
39
40
|
)
|
40
41
|
from futurehouse_client.models.rest import (
|
41
42
|
DataStorageSearchPayload,
|
43
|
+
FilterLogic,
|
42
44
|
SearchCriterion,
|
43
45
|
)
|
44
46
|
from futurehouse_client.utils.general import retry_if_connection_error
|
@@ -779,6 +781,7 @@ class DataStorageMethods:
|
|
779
781
|
ignore_patterns: list[str] | None = None,
|
780
782
|
ignore_filename: str = ".gitignore",
|
781
783
|
project_id: UUID | None = None,
|
784
|
+
tags: list[str] | None = None,
|
782
785
|
) -> DataStorageResponse:
|
783
786
|
"""Upload a directory as a single zip file collection.
|
784
787
|
|
@@ -790,6 +793,7 @@ class DataStorageMethods:
|
|
790
793
|
ignore_patterns: List of patterns to ignore when zipping
|
791
794
|
ignore_filename: Name of ignore file to read from directory
|
792
795
|
project_id: ID of the project this data storage entry belongs to
|
796
|
+
tags: List of tags to associate with the data storage entry
|
793
797
|
|
794
798
|
Returns:
|
795
799
|
DataStorageResponse for the uploaded zip file
|
@@ -810,6 +814,7 @@ class DataStorageMethods:
|
|
810
814
|
path=zip_gcs_path,
|
811
815
|
is_collection=True,
|
812
816
|
project_id=project_id,
|
817
|
+
tags=tags,
|
813
818
|
)
|
814
819
|
|
815
820
|
logger.debug(
|
@@ -859,6 +864,7 @@ class DataStorageMethods:
|
|
859
864
|
ignore_patterns: list[str] | None = None,
|
860
865
|
ignore_filename: str = ".gitignore",
|
861
866
|
project_id: UUID | None = None,
|
867
|
+
tags: list[str] | None = None,
|
862
868
|
) -> DataStorageResponse:
|
863
869
|
"""Asynchronously upload a directory as a single zip file.
|
864
870
|
|
@@ -870,6 +876,7 @@ class DataStorageMethods:
|
|
870
876
|
ignore_patterns: List of patterns to ignore when zipping
|
871
877
|
ignore_filename: Name of ignore file to read from directory
|
872
878
|
project_id: ID of the project this data storage entry belongs to
|
879
|
+
tags: List of tags to associate with the data storage entry
|
873
880
|
|
874
881
|
Returns:
|
875
882
|
DataStorageResponse for the uploaded zip file
|
@@ -890,6 +897,7 @@ class DataStorageMethods:
|
|
890
897
|
path=zip_gcs_path,
|
891
898
|
is_collection=True,
|
892
899
|
project_id=project_id,
|
900
|
+
tags=tags,
|
893
901
|
)
|
894
902
|
|
895
903
|
data_storage_response = await self._acreate_data_storage_entry(payload)
|
@@ -1524,7 +1532,33 @@ class DataStorageMethods:
|
|
1524
1532
|
project_id: ID of the project this data storage entry belongs to
|
1525
1533
|
|
1526
1534
|
Returns:
|
1527
|
-
DataStorageResponse
|
1535
|
+
DataStorageResponse: A Pydantic model containing:
|
1536
|
+
- data_storage: DataStorageEntry with fields:
|
1537
|
+
- id - Unique identifier for the data storage entry
|
1538
|
+
- name - Name of the data storage entry
|
1539
|
+
- description - Description of the data storage entry
|
1540
|
+
- content - Content of the data storage entry
|
1541
|
+
- embedding - Embedding vector for the content
|
1542
|
+
- is_collection - Whether this entry is a collection
|
1543
|
+
- tags - List of tags associated with the entry
|
1544
|
+
- parent_id - ID of the parent entry for hierarchical storage
|
1545
|
+
- project_id - ID of the project this entry belongs to
|
1546
|
+
- dataset_id - ID of the dataset this entry belongs to
|
1547
|
+
- path - Path in the storage system where this entry is located
|
1548
|
+
- bigquery_schema - Target BigQuery schema for the entry
|
1549
|
+
- user_id - ID of the user who created this entry
|
1550
|
+
- created_at - Timestamp when the entry was created
|
1551
|
+
- modified_at - Timestamp when the entry was last updated
|
1552
|
+
- storage_locations with each location containing:
|
1553
|
+
- id - Unique identifier for the storage location
|
1554
|
+
- data_storage_id - ID of the associated data storage entry
|
1555
|
+
- storage_config pydantic model with fields:
|
1556
|
+
- storage_type - Type of storage (e.g., 'gcs', 'pg_table')
|
1557
|
+
- content_type - Type of content stored
|
1558
|
+
- content_schema - Content schema
|
1559
|
+
- metadata - Location metadata
|
1560
|
+
- location - Location path or identifier
|
1561
|
+
- signed_url - Signed URL for uploading/downloading
|
1528
1562
|
|
1529
1563
|
Raises:
|
1530
1564
|
DataStorageCreationError: If there's an error creating the data storage entry
|
@@ -1571,7 +1605,33 @@ class DataStorageMethods:
|
|
1571
1605
|
project_id: ID of the project this data storage entry belongs to
|
1572
1606
|
|
1573
1607
|
Returns:
|
1574
|
-
DataStorageResponse
|
1608
|
+
DataStorageResponse: A Pydantic model containing:
|
1609
|
+
- data_storage: DataStorageEntry with fields:
|
1610
|
+
- id - Unique identifier for the data storage entry
|
1611
|
+
- name - Name of the data storage entry
|
1612
|
+
- description - Description of the data storage entry
|
1613
|
+
- content - Content of the data storage entry
|
1614
|
+
- embedding - Embedding vector for the content
|
1615
|
+
- is_collection - Whether this entry is a collection
|
1616
|
+
- tags - List of tags associated with the entry
|
1617
|
+
- parent_id - ID of the parent entry for hierarchical storage
|
1618
|
+
- project_id - ID of the project this entry belongs to
|
1619
|
+
- dataset_id - ID of the dataset this entry belongs to
|
1620
|
+
- path - Path in the storage system where this entry is located
|
1621
|
+
- bigquery_schema - Target BigQuery schema for the entry
|
1622
|
+
- user_id - ID of the user who created this entry
|
1623
|
+
- created_at - Timestamp when the entry was created
|
1624
|
+
- modified_at - Timestamp when the entry was last updated
|
1625
|
+
- storage_locations with each location containing:
|
1626
|
+
- id - Unique identifier for the storage location
|
1627
|
+
- data_storage_id - ID of the associated data storage entry
|
1628
|
+
- storage_config pydantic model with fields:
|
1629
|
+
- storage_type - Type of storage (e.g., 'gcs', 'pg_table')
|
1630
|
+
- content_type - Type of content stored
|
1631
|
+
- content_schema - Content schema
|
1632
|
+
- metadata - Location metadata
|
1633
|
+
- location - Location path or identifier
|
1634
|
+
- signed_url - Signed URL for uploading/downloading
|
1575
1635
|
|
1576
1636
|
Raises:
|
1577
1637
|
DataStorageCreationError: If there's an error creating the data storage entry
|
@@ -1734,6 +1794,7 @@ class DataStorageMethods:
|
|
1734
1794
|
ignore_patterns: list[str] | None = None,
|
1735
1795
|
ignore_filename: str = ".gitignore",
|
1736
1796
|
project_id: UUID | None = None,
|
1797
|
+
dataset_id: UUID | None = None,
|
1737
1798
|
) -> DataStorageResponse:
|
1738
1799
|
"""Store file or directory content in the data storage system.
|
1739
1800
|
|
@@ -1749,13 +1810,45 @@ class DataStorageMethods:
|
|
1749
1810
|
description: Optional description of the data storage entry
|
1750
1811
|
path: Optional path for the data storage entry
|
1751
1812
|
as_collection: If true, upload directories as a single zip file collection.
|
1752
|
-
manifest_filename: Name of manifest file
|
1813
|
+
manifest_filename: Name of manifest file (JSON or YAML) containing:
|
1814
|
+
- entries - Map of file/directory names to their manifest entries
|
1815
|
+
- Each ManifestEntry contains:
|
1816
|
+
- description - Description of the file or directory
|
1817
|
+
- metadata - Additional metadata for the entry
|
1818
|
+
- Each DirectoryManifest contains nested entries following the same structure
|
1753
1819
|
ignore_patterns: List of patterns to ignore when zipping directories
|
1754
1820
|
ignore_filename: Name of ignore file to read from directory (default: .gitignore)
|
1755
1821
|
project_id: ID of the project this data storage entry belongs to
|
1822
|
+
dataset_id: ID of the dataset this data storage entry belongs to
|
1756
1823
|
|
1757
1824
|
Returns:
|
1758
|
-
DataStorageResponse
|
1825
|
+
DataStorageResponse: A Pydantic model containing:
|
1826
|
+
- data_storage: DataStorageEntry with fields:
|
1827
|
+
- id - Unique identifier for the data storage entry
|
1828
|
+
- name - Name of the data storage entry
|
1829
|
+
- description - Description of the data storage entry
|
1830
|
+
- content - Content of the data storage entry
|
1831
|
+
- embedding - Embedding vector for the content
|
1832
|
+
- is_collection - Whether this entry is a collection
|
1833
|
+
- tags - List of tags associated with the entry
|
1834
|
+
- parent_id - ID of the parent entry for hierarchical storage
|
1835
|
+
- project_id - ID of the project this entry belongs to
|
1836
|
+
- dataset_id - ID of the dataset this entry belongs to
|
1837
|
+
- path - Path in the storage system where this entry is located
|
1838
|
+
- bigquery_schema - Target BigQuery schema for the entry
|
1839
|
+
- user_id - ID of the user who created this entry
|
1840
|
+
- created_at - Timestamp when the entry was created
|
1841
|
+
- modified_at - Timestamp when the entry was last updated
|
1842
|
+
- storage_locations with each location containing:
|
1843
|
+
- id - Unique identifier for the storage location
|
1844
|
+
- data_storage_id - ID of the associated data storage entry
|
1845
|
+
- storage_config pydantic model with fields:
|
1846
|
+
- storage_type - Type of storage (e.g., 'gcs', 'pg_table')
|
1847
|
+
- content_type - Type of content stored
|
1848
|
+
- content_schema - Content schema
|
1849
|
+
- metadata - Location metadata
|
1850
|
+
- location - Location path or identifier
|
1851
|
+
- signed_url - Signed URL for uploading/downloading
|
1759
1852
|
|
1760
1853
|
Raises:
|
1761
1854
|
DataStorageCreationError: If there's an error in the process
|
@@ -1782,6 +1875,7 @@ class DataStorageMethods:
|
|
1782
1875
|
ignore_patterns=ignore_patterns,
|
1783
1876
|
ignore_filename=ignore_filename,
|
1784
1877
|
project_id=project_id,
|
1878
|
+
dataset_id=dataset_id,
|
1785
1879
|
)
|
1786
1880
|
if not responses:
|
1787
1881
|
raise DataStorageCreationError(
|
@@ -1827,15 +1921,47 @@ class DataStorageMethods:
|
|
1827
1921
|
path: Optional GCS path for the entry.
|
1828
1922
|
as_collection: If uploading a directory, `True` zips it into a single collection,
|
1829
1923
|
`False` uploads it as a hierarchical structure of individual objects.
|
1830
|
-
manifest_filename: Optional manifest file for hierarchical uploads
|
1924
|
+
manifest_filename: Optional manifest file (JSON or YAML) for hierarchical uploads containing:
|
1925
|
+
- entries - Map of file/directory names to their manifest entries
|
1926
|
+
- Each ManifestEntry contains:
|
1927
|
+
- description - Description of the file or directory
|
1928
|
+
- metadata - Additional metadata for the entry
|
1929
|
+
- Each DirectoryManifest contains nested entries following the same structure
|
1831
1930
|
ignore_patterns: List of patterns to ignore when zipping.
|
1832
1931
|
ignore_filename: Name of ignore file to read (default: .gitignore).
|
1833
1932
|
dataset_id: Optional dataset ID to add entry to, or None to create new dataset.
|
1834
1933
|
project_id: ID of the project this data storage entry belongs to
|
1835
1934
|
|
1836
1935
|
Returns:
|
1837
|
-
|
1838
|
-
|
1936
|
+
DataStorageResponse: A Pydantic model containing:
|
1937
|
+
- data_storage: DataStorageEntry with fields:
|
1938
|
+
- id - Unique identifier for the data storage entry
|
1939
|
+
- name - Name of the data storage entry
|
1940
|
+
- description - Description of the data storage entry
|
1941
|
+
- content - Content of the data storage entry
|
1942
|
+
- embedding - Embedding vector for the content
|
1943
|
+
- is_collection - Whether this entry is a collection
|
1944
|
+
- tags - List of tags associated with the entry
|
1945
|
+
- parent_id - ID of the parent entry for hierarchical storage
|
1946
|
+
- project_id - ID of the project this entry belongs to
|
1947
|
+
- dataset_id - ID of the dataset this entry belongs to
|
1948
|
+
- path - Path in the storage system where this entry is located
|
1949
|
+
- bigquery_schema - Target BigQuery schema for the entry
|
1950
|
+
- user_id - ID of the user who created this entry
|
1951
|
+
- created_at - Timestamp when the entry was created
|
1952
|
+
- modified_at - Timestamp when the entry was last updated
|
1953
|
+
- storage_locations with each location containing:
|
1954
|
+
- id - Unique identifier for the storage location
|
1955
|
+
- data_storage_id - ID of the associated data storage entry
|
1956
|
+
- storage_config pydantic model with fields:
|
1957
|
+
- storage_type - Type of storage (e.g., 'gcs', 'pg_table')
|
1958
|
+
- content_type - Type of content stored
|
1959
|
+
- content_schema - Content schema
|
1960
|
+
- metadata - Location metadata
|
1961
|
+
- location - Location path or identifier
|
1962
|
+
- signed_url - Signed URL for uploading/downloading
|
1963
|
+
|
1964
|
+
For hierarchical uploads, this is the response for the root directory entry.
|
1839
1965
|
"""
|
1840
1966
|
file_path = self._validate_file_path(file_path)
|
1841
1967
|
|
@@ -1896,7 +2022,12 @@ class DataStorageMethods:
|
|
1896
2022
|
|
1897
2023
|
Args:
|
1898
2024
|
name: Name of the data storage entry
|
1899
|
-
existing_location:
|
2025
|
+
existing_location: a pydantic model describing the existing data source location to register, containing:
|
2026
|
+
- storage_type - Type of storage (BIGQUERY, GCS, PG_TABLE, RAW_CONTENT, ELASTIC_SEARCH)
|
2027
|
+
- content_type - Type of content (BQ_DATASET, BQ_TABLE, TEXT, TEXT_W_EMBEDDINGS, DIRECTORY, FILE, INDEX, INDEX_W_EMBEDDINGS)
|
2028
|
+
- content_schema - Content schema for the data
|
2029
|
+
- metadata - Additional metadata for the location
|
2030
|
+
- location - Location path or identifier
|
1900
2031
|
description: Optional description of the data storage entry
|
1901
2032
|
as_collection: If uploading a directory, `True` creates a single storage entry for
|
1902
2033
|
the whole directory and multiple storage locations for each file, `False` assumes
|
@@ -1905,7 +2036,33 @@ class DataStorageMethods:
|
|
1905
2036
|
project_id: ID of the project this data storage entry belongs to
|
1906
2037
|
|
1907
2038
|
Returns:
|
1908
|
-
DataStorageResponse
|
2039
|
+
DataStorageResponse: A Pydantic model containing:
|
2040
|
+
- data_storage: DataStorageEntry with fields:
|
2041
|
+
- id - Unique identifier for the data storage entry
|
2042
|
+
- name - Name of the data storage entry
|
2043
|
+
- description - Description of the data storage entry
|
2044
|
+
- content - Content of the data storage entry
|
2045
|
+
- embedding - Embedding vector for the content
|
2046
|
+
- is_collection - Whether this entry is a collection
|
2047
|
+
- tags - List of tags associated with the entry
|
2048
|
+
- parent_id - ID of the parent entry for hierarchical storage
|
2049
|
+
- project_id - ID of the project this entry belongs to
|
2050
|
+
- dataset_id - ID of the dataset this entry belongs to
|
2051
|
+
- path - Path in the storage system where this entry is located
|
2052
|
+
- bigquery_schema - Target BigQuery schema for the entry
|
2053
|
+
- user_id - ID of the user who created this entry
|
2054
|
+
- created_at - Timestamp when the entry was created
|
2055
|
+
- modified_at - Timestamp when the entry was last updated
|
2056
|
+
- storage_locations with each location containing:
|
2057
|
+
- id - Unique identifier for the storage location
|
2058
|
+
- data_storage_id - ID of the associated data storage entry
|
2059
|
+
- storage_config pydantic model with fields:
|
2060
|
+
- storage_type - Type of storage (e.g., 'gcs', 'pg_table')
|
2061
|
+
- content_type - Type of content stored
|
2062
|
+
- content_schema - Content schema
|
2063
|
+
- metadata - Location metadata
|
2064
|
+
- location - Location path or identifier
|
2065
|
+
- signed_url - Signed URL for uploading/downloading
|
1909
2066
|
|
1910
2067
|
Raises:
|
1911
2068
|
DataStorageCreationError: If there's an error creating the data storage entry
|
@@ -1951,7 +2108,12 @@ class DataStorageMethods:
|
|
1951
2108
|
|
1952
2109
|
Args:
|
1953
2110
|
name: Name of the data storage entry
|
1954
|
-
existing_location:
|
2111
|
+
existing_location: a pydantic model describing the existing data source location to register, containing:
|
2112
|
+
- storage_type - Type of storage (BIGQUERY, GCS, PG_TABLE, RAW_CONTENT, ELASTIC_SEARCH)
|
2113
|
+
- content_type - Type of content (BQ_DATASET, BQ_TABLE, TEXT, TEXT_W_EMBEDDINGS, DIRECTORY, FILE, INDEX, INDEX_W_EMBEDDINGS)
|
2114
|
+
- content_schema - Content schema for the data
|
2115
|
+
- metadata - Additional metadata for the location
|
2116
|
+
- location - Location path or identifier
|
1955
2117
|
description: Optional description of the data storage entry
|
1956
2118
|
as_collection: If uploading a directory, `True` creates a single storage entry for
|
1957
2119
|
the whole directory and multiple storage locations for each file, `False` assumes
|
@@ -1960,7 +2122,33 @@ class DataStorageMethods:
|
|
1960
2122
|
project_id: ID of the project this data storage entry belongs to
|
1961
2123
|
|
1962
2124
|
Returns:
|
1963
|
-
DataStorageResponse
|
2125
|
+
DataStorageResponse: A Pydantic model containing:
|
2126
|
+
- data_storage: DataStorageEntry with fields:
|
2127
|
+
- id - Unique identifier for the data storage entry
|
2128
|
+
- name - Name of the data storage entry
|
2129
|
+
- description - Description of the data storage entry
|
2130
|
+
- content - Content of the data storage entry
|
2131
|
+
- embedding - Embedding vector for the content
|
2132
|
+
- is_collection - Whether this entry is a collection
|
2133
|
+
- tags - List of tags associated with the entry
|
2134
|
+
- parent_id - ID of the parent entry for hierarchical storage
|
2135
|
+
- project_id - ID of the project this entry belongs to
|
2136
|
+
- dataset_id - ID of the dataset this entry belongs to
|
2137
|
+
- path - Path in the storage system where this entry is located
|
2138
|
+
- bigquery_schema - Target BigQuery schema for the entry
|
2139
|
+
- user_id - ID of the user who created this entry
|
2140
|
+
- created_at - Timestamp when the entry was created
|
2141
|
+
- modified_at - Timestamp when the entry was last updated
|
2142
|
+
- storage_locations with each location containing:
|
2143
|
+
- id - Unique identifier for the storage location
|
2144
|
+
- data_storage_id - ID of the associated data storage entry
|
2145
|
+
- storage_config pydantic model with fields:
|
2146
|
+
- storage_type - Type of storage (e.g., 'gcs', 'pg_table')
|
2147
|
+
- content_type - Type of content stored
|
2148
|
+
- content_schema - Content schema
|
2149
|
+
- metadata - Location metadata
|
2150
|
+
- location - Location path or identifier
|
2151
|
+
- signed_url - Signed URL for uploading/downloading
|
1964
2152
|
|
1965
2153
|
Raises:
|
1966
2154
|
DataStorageCreationError: If there's an error creating the data storage entry
|
@@ -1997,12 +2185,17 @@ class DataStorageMethods:
|
|
1997
2185
|
self,
|
1998
2186
|
criteria: list[SearchCriterion] | None = None,
|
1999
2187
|
size: int = 10,
|
2188
|
+
filter_logic: FilterLogic = FilterLogic.OR,
|
2000
2189
|
) -> list[dict]:
|
2001
2190
|
"""Search data storage objects using structured criteria.
|
2002
2191
|
|
2003
2192
|
Args:
|
2004
|
-
criteria: List of
|
2193
|
+
criteria: List of SearchCriterion pydantic models with fields:
|
2194
|
+
- field - Field name to search on
|
2195
|
+
- operator - Search operator (EQUALS, CONTAINS, STARTS_WITH, ENDS_WITH, GREATER_THAN, LESS_THAN, BETWEEN, IN)
|
2196
|
+
- value - Value to search for
|
2005
2197
|
size: Number of results to return (1-100)
|
2198
|
+
filter_logic: Either "AND" (all criteria must match) or "OR" (at least one must match)
|
2006
2199
|
|
2007
2200
|
Returns:
|
2008
2201
|
List of search results with scores and data storage information
|
@@ -2023,6 +2216,7 @@ class DataStorageMethods:
|
|
2023
2216
|
payload = DataStorageSearchPayload(
|
2024
2217
|
criteria=criteria or [],
|
2025
2218
|
size=max(1, min(100, size)), # Clamp between 1-100
|
2219
|
+
filter_logic=filter_logic,
|
2026
2220
|
)
|
2027
2221
|
|
2028
2222
|
response = self.client.post(
|
@@ -2053,12 +2247,17 @@ class DataStorageMethods:
|
|
2053
2247
|
self,
|
2054
2248
|
criteria: list[SearchCriterion] | None = None,
|
2055
2249
|
size: int = 10,
|
2250
|
+
filter_logic: FilterLogic = FilterLogic.OR,
|
2056
2251
|
) -> list[dict]:
|
2057
2252
|
"""Asynchronously search data storage objects using structured criteria.
|
2058
2253
|
|
2059
2254
|
Args:
|
2060
|
-
criteria: List of
|
2255
|
+
criteria: List of SearchCriterion pydantic models with fields:
|
2256
|
+
- field - Field name to search on
|
2257
|
+
- operator - Search operator (EQUALS, CONTAINS, STARTS_WITH, ENDS_WITH, GREATER_THAN, LESS_THAN, BETWEEN, IN)
|
2258
|
+
- value - Value to search for
|
2061
2259
|
size: Number of results to return (1-100)
|
2260
|
+
filter_logic: Either "AND" (all criteria must match) or "OR" (at least one must match)
|
2062
2261
|
|
2063
2262
|
Returns:
|
2064
2263
|
List of search results with scores and data storage information
|
@@ -2079,6 +2278,7 @@ class DataStorageMethods:
|
|
2079
2278
|
payload = DataStorageSearchPayload(
|
2080
2279
|
criteria=criteria or [],
|
2081
2280
|
size=max(1, min(100, size)), # Clamp between 1-100
|
2281
|
+
filter_logic=filter_logic,
|
2082
2282
|
)
|
2083
2283
|
|
2084
2284
|
response = await self.async_client.post(
|
@@ -2118,11 +2318,11 @@ class DataStorageMethods:
|
|
2118
2318
|
"""Search data storage objects using vector similarity.
|
2119
2319
|
|
2120
2320
|
Args:
|
2121
|
-
embedding:
|
2321
|
+
embedding: List of float values representing the embedding vector for similarity search
|
2122
2322
|
size: Number of results to return (1-100)
|
2123
2323
|
min_score: Minimum similarity score (0.0-1.0)
|
2124
2324
|
dataset_id: Optional dataset ID filter
|
2125
|
-
tags: Optional list of tags to filter by
|
2325
|
+
tags: Optional list of string tags to filter by
|
2126
2326
|
user_id: Optional user ID filter (admin only)
|
2127
2327
|
project_id: Optional project ID filter
|
2128
2328
|
|
@@ -2196,11 +2396,11 @@ class DataStorageMethods:
|
|
2196
2396
|
"""Asynchronously search data storage objects using vector similarity.
|
2197
2397
|
|
2198
2398
|
Args:
|
2199
|
-
embedding:
|
2399
|
+
embedding: List of float values representing the embedding vector for similarity search
|
2200
2400
|
size: Number of results to return (1-100)
|
2201
2401
|
min_score: Minimum similarity score (0.0-1.0)
|
2202
2402
|
dataset_id: Optional dataset ID filter
|
2203
|
-
tags: Optional list of tags to filter by
|
2403
|
+
tags: Optional list of string tags to filter by
|
2204
2404
|
user_id: Optional user ID filter (admin only)
|
2205
2405
|
project_id: Optional project ID filter
|
2206
2406
|
|
@@ -2268,12 +2468,12 @@ class DataStorageMethods:
|
|
2268
2468
|
"""Fetch data from the storage system (sync version).
|
2269
2469
|
|
2270
2470
|
Args:
|
2271
|
-
data_storage_id:
|
2471
|
+
data_storage_id: UUID of the data storage entry to fetch
|
2272
2472
|
|
2273
2473
|
Returns:
|
2274
2474
|
For PG_TABLE storage: string content
|
2275
2475
|
For GCS storage: Path to downloaded file (may be unzipped if it was a zip)
|
2276
|
-
For multi-location entries:
|
2476
|
+
For multi-location entries: list of downloaded files
|
2277
2477
|
None if not found or error occurred
|
2278
2478
|
"""
|
2279
2479
|
if not data_storage_id:
|
@@ -2292,7 +2492,11 @@ class DataStorageMethods:
|
|
2292
2492
|
return [
|
2293
2493
|
self._download_from_gcs(
|
2294
2494
|
location.storage_config.signed_url or "",
|
2295
|
-
(
|
2495
|
+
(
|
2496
|
+
Path(location.storage_config.location).name
|
2497
|
+
if location.storage_config.location
|
2498
|
+
else None
|
2499
|
+
),
|
2296
2500
|
)
|
2297
2501
|
for location in result.storage_locations
|
2298
2502
|
]
|
@@ -2308,7 +2512,12 @@ class DataStorageMethods:
|
|
2308
2512
|
)
|
2309
2513
|
|
2310
2514
|
return self._download_from_gcs(
|
2311
|
-
storage_location.storage_config.signed_url
|
2515
|
+
storage_location.storage_config.signed_url,
|
2516
|
+
(
|
2517
|
+
storage_location.storage_config.location.split("/")[-1]
|
2518
|
+
if storage_location.storage_config.location
|
2519
|
+
else None
|
2520
|
+
),
|
2312
2521
|
)
|
2313
2522
|
|
2314
2523
|
if storage_type in {"raw_content", "pg_table"}:
|
@@ -2342,12 +2551,12 @@ class DataStorageMethods:
|
|
2342
2551
|
"""Fetch data from the storage system.
|
2343
2552
|
|
2344
2553
|
Args:
|
2345
|
-
data_storage_id:
|
2554
|
+
data_storage_id: UUID of the data storage entry to fetch
|
2346
2555
|
|
2347
2556
|
Returns:
|
2348
2557
|
For PG_TABLE storage: string content
|
2349
2558
|
For GCS storage: Path to downloaded file (may be unzipped if it was a zip)
|
2350
|
-
For multi-location entries:
|
2559
|
+
For multi-location entries: list of downloaded files
|
2351
2560
|
None if not found or error occurred
|
2352
2561
|
"""
|
2353
2562
|
if not data_storage_id:
|
@@ -2368,7 +2577,11 @@ class DataStorageMethods:
|
|
2368
2577
|
[
|
2369
2578
|
self._adownload_from_gcs(
|
2370
2579
|
location.storage_config.signed_url or "",
|
2371
|
-
(
|
2580
|
+
(
|
2581
|
+
location.storage_config.location.split("/")[-1]
|
2582
|
+
if location.storage_config.location
|
2583
|
+
else None
|
2584
|
+
),
|
2372
2585
|
)
|
2373
2586
|
for location in result.storage_locations
|
2374
2587
|
],
|
@@ -2385,7 +2598,12 @@ class DataStorageMethods:
|
|
2385
2598
|
)
|
2386
2599
|
|
2387
2600
|
return await self._adownload_from_gcs(
|
2388
|
-
storage_location.storage_config.signed_url
|
2601
|
+
storage_location.storage_config.signed_url,
|
2602
|
+
(
|
2603
|
+
storage_location.storage_config.location.split("/")[-1]
|
2604
|
+
if storage_location.storage_config.location
|
2605
|
+
else None
|
2606
|
+
),
|
2389
2607
|
)
|
2390
2608
|
|
2391
2609
|
if storage_type in {"raw_content", "pg_table"}:
|
@@ -2417,7 +2635,23 @@ class DataStorageMethods:
|
|
2417
2635
|
name: str,
|
2418
2636
|
description: str | None = None,
|
2419
2637
|
dataset_id: UUID | None = None,
|
2420
|
-
):
|
2638
|
+
) -> CreateDatasetPayload:
|
2639
|
+
"""Asynchronously create a new dataset.
|
2640
|
+
|
2641
|
+
Args:
|
2642
|
+
name: Name of the dataset to create
|
2643
|
+
description: Optional description of the dataset
|
2644
|
+
dataset_id: Optional UUID to assign to the dataset, or None to auto-generate
|
2645
|
+
|
2646
|
+
Returns:
|
2647
|
+
CreateDatasetPayload: A Pydantic model containing:
|
2648
|
+
- id - ID of the created dataset (None if auto-generated)
|
2649
|
+
- name - Name of the dataset
|
2650
|
+
- description - Description of the dataset
|
2651
|
+
|
2652
|
+
Raises:
|
2653
|
+
DataStorageCreationError: If there's an error creating the dataset
|
2654
|
+
"""
|
2421
2655
|
try:
|
2422
2656
|
payload = CreateDatasetPayload(
|
2423
2657
|
name=name,
|
@@ -2448,7 +2682,23 @@ class DataStorageMethods:
|
|
2448
2682
|
name: str,
|
2449
2683
|
description: str | None = None,
|
2450
2684
|
dataset_id: UUID | None = None,
|
2451
|
-
):
|
2685
|
+
) -> CreateDatasetPayload:
|
2686
|
+
"""Create a new dataset.
|
2687
|
+
|
2688
|
+
Args:
|
2689
|
+
name: Name of the dataset to create
|
2690
|
+
description: Optional description of the dataset
|
2691
|
+
dataset_id: Optional UUID to assign to the dataset, or None to auto-generate
|
2692
|
+
|
2693
|
+
Returns:
|
2694
|
+
CreateDatasetPayload: A Pydantic model containing:
|
2695
|
+
- id - ID of the created dataset (None if auto-generated)
|
2696
|
+
- name - Name of the dataset
|
2697
|
+
- description - Description of the dataset
|
2698
|
+
|
2699
|
+
Raises:
|
2700
|
+
DataStorageCreationError: If there's an error creating the dataset
|
2701
|
+
"""
|
2452
2702
|
try:
|
2453
2703
|
payload = CreateDatasetPayload(
|
2454
2704
|
name=name,
|
@@ -2522,14 +2772,48 @@ class DataStorageMethods:
|
|
2522
2772
|
retry=retry_if_connection_error,
|
2523
2773
|
before_sleep=before_sleep_log(logger, logging.WARNING),
|
2524
2774
|
)
|
2525
|
-
async def aget_dataset(self, dataset_id: UUID):
|
2775
|
+
async def aget_dataset(self, dataset_id: UUID) -> GetDatasetAndEntriesResponse:
|
2776
|
+
"""Asynchronously retrieve a dataset by ID.
|
2777
|
+
|
2778
|
+
Args:
|
2779
|
+
dataset_id: UUID of the dataset to retrieve
|
2780
|
+
|
2781
|
+
Returns:
|
2782
|
+
GetDatasetAndEntriesResponse: A dict containing:
|
2783
|
+
- dataset: DatasetStorage with fields:
|
2784
|
+
- id - Unique identifier for the dataset
|
2785
|
+
- name - Name of the dataset
|
2786
|
+
- user_id - ID of the user who created the dataset
|
2787
|
+
- description - Description of the dataset
|
2788
|
+
- created_at - Timestamp when the dataset was created
|
2789
|
+
- modified_at - Timestamp when the dataset was last modified
|
2790
|
+
- data_storage_entries - List of data storage entries in the dataset, each containing:
|
2791
|
+
- id - Unique identifier for the data storage entry
|
2792
|
+
- name - Name of the data storage entry
|
2793
|
+
- description - Description of the data storage entry
|
2794
|
+
- content - Content of the data storage entry
|
2795
|
+
- embedding - Embedding vector for the content
|
2796
|
+
- is_collection - Whether this entry is a collection
|
2797
|
+
- tags - List of tags associated with the entry
|
2798
|
+
- parent_id - ID of the parent entry for hierarchical storage
|
2799
|
+
- project_id - ID of the project this entry belongs to
|
2800
|
+
- dataset_id - ID of the dataset this entry belongs to
|
2801
|
+
- path - Path in the storage system where this entry is located
|
2802
|
+
- bigquery_schema - Target BigQuery schema for the entry
|
2803
|
+
- user_id - ID of the user who created this entry
|
2804
|
+
- created_at - Timestamp when the entry was created
|
2805
|
+
- modified_at - Timestamp when the entry was last updated
|
2806
|
+
|
2807
|
+
Raises:
|
2808
|
+
DataStorageError: If there's an error retrieving the dataset
|
2809
|
+
"""
|
2526
2810
|
try:
|
2527
2811
|
response = await self.async_client.get(
|
2528
2812
|
f"/v0.1/data-storage/datasets/{dataset_id}"
|
2529
2813
|
)
|
2530
2814
|
response.raise_for_status()
|
2531
2815
|
|
2532
|
-
return response.json()
|
2816
|
+
return GetDatasetAndEntriesResponse.model_validate(response.json())
|
2533
2817
|
except HTTPStatusError as e:
|
2534
2818
|
self._handle_http_errors(e, "retrieving")
|
2535
2819
|
except Exception as e:
|
@@ -2541,12 +2825,46 @@ class DataStorageMethods:
|
|
2541
2825
|
retry=retry_if_connection_error,
|
2542
2826
|
before_sleep=before_sleep_log(logger, logging.WARNING),
|
2543
2827
|
)
|
2544
|
-
def get_dataset(self, dataset_id: UUID):
|
2828
|
+
def get_dataset(self, dataset_id: UUID) -> GetDatasetAndEntriesResponse:
|
2829
|
+
"""Retrieve a dataset by ID.
|
2830
|
+
|
2831
|
+
Args:
|
2832
|
+
dataset_id: UUID of the dataset to retrieve
|
2833
|
+
|
2834
|
+
Returns:
|
2835
|
+
GetDatasetAndEntriesResponse: A dict containing:
|
2836
|
+
- dataset: DatasetStorage with fields:
|
2837
|
+
- id - Unique identifier for the dataset
|
2838
|
+
- name - Name of the dataset
|
2839
|
+
- user_id - ID of the user who created the dataset
|
2840
|
+
- description - Description of the dataset
|
2841
|
+
- created_at - Timestamp when the dataset was created
|
2842
|
+
- modified_at - Timestamp when the dataset was last modified
|
2843
|
+
- data_storage_entries - List of data storage entries in the dataset, each containing:
|
2844
|
+
- id - Unique identifier for the data storage entry
|
2845
|
+
- name - Name of the data storage entry
|
2846
|
+
- description - Description of the data storage entry
|
2847
|
+
- content - Content of the data storage entry
|
2848
|
+
- embedding - Embedding vector for the content
|
2849
|
+
- is_collection - Whether this entry is a collection
|
2850
|
+
- tags - List of tags associated with the entry
|
2851
|
+
- parent_id - ID of the parent entry for hierarchical storage
|
2852
|
+
- project_id - ID of the project this entry belongs to
|
2853
|
+
- dataset_id - ID of the dataset this entry belongs to
|
2854
|
+
- path - Path in the storage system where this entry is located
|
2855
|
+
- bigquery_schema - Target BigQuery schema for the entry
|
2856
|
+
- user_id - ID of the user who created this entry
|
2857
|
+
- created_at - Timestamp when the entry was created
|
2858
|
+
- modified_at - Timestamp when the entry was last updated
|
2859
|
+
|
2860
|
+
Raises:
|
2861
|
+
DataStorageError: If there's an error retrieving the dataset
|
2862
|
+
"""
|
2545
2863
|
try:
|
2546
2864
|
response = self.client.get(f"/v0.1/data-storage/datasets/{dataset_id}")
|
2547
2865
|
response.raise_for_status()
|
2548
2866
|
|
2549
|
-
return response.json()
|
2867
|
+
return GetDatasetAndEntriesResponse.model_validate(response.json())
|
2550
2868
|
except HTTPStatusError as e:
|
2551
2869
|
self._handle_http_errors(e, "retrieving")
|
2552
2870
|
except Exception as e:
|
@@ -2622,7 +2940,15 @@ class DataStorageMethods:
|
|
2622
2940
|
retry=retry_if_connection_error,
|
2623
2941
|
before_sleep=before_sleep_log(logger, logging.WARNING),
|
2624
2942
|
)
|
2625
|
-
async def adelete_data_storage_entry(self, data_storage_entry_id: UUID):
|
2943
|
+
async def adelete_data_storage_entry(self, data_storage_entry_id: UUID) -> None:
|
2944
|
+
"""Asynchronously delete a data storage entry.
|
2945
|
+
|
2946
|
+
Args:
|
2947
|
+
data_storage_entry_id: UUID of the data storage entry to delete
|
2948
|
+
|
2949
|
+
Raises:
|
2950
|
+
DataStorageError: If there's an error deleting the data storage entry
|
2951
|
+
"""
|
2626
2952
|
try:
|
2627
2953
|
await self.async_client.delete(
|
2628
2954
|
f"/v0.1/data-storage/data-entries/{data_storage_entry_id}"
|
@@ -2638,7 +2964,15 @@ class DataStorageMethods:
|
|
2638
2964
|
retry=retry_if_connection_error,
|
2639
2965
|
before_sleep=before_sleep_log(logger, logging.WARNING),
|
2640
2966
|
)
|
2641
|
-
def delete_data_storage_entry(self, data_storage_entry_id: UUID):
|
2967
|
+
def delete_data_storage_entry(self, data_storage_entry_id: UUID) -> None:
|
2968
|
+
"""Delete a data storage entry.
|
2969
|
+
|
2970
|
+
Args:
|
2971
|
+
data_storage_entry_id: UUID of the data storage entry to delete
|
2972
|
+
|
2973
|
+
Raises:
|
2974
|
+
DataStorageError: If there's an error deleting the data storage entry
|
2975
|
+
"""
|
2642
2976
|
try:
|
2643
2977
|
self.client.delete(
|
2644
2978
|
f"/v0.1/data-storage/data-entries/{data_storage_entry_id}"
|
@@ -52,6 +52,11 @@ from futurehouse_client.models.app import (
|
|
52
52
|
TaskResponseVerbose,
|
53
53
|
TrajectoryQueryParams,
|
54
54
|
)
|
55
|
+
from futurehouse_client.models.job_event import (
|
56
|
+
JobEventCreateRequest,
|
57
|
+
JobEventCreateResponse,
|
58
|
+
JobEventUpdateRequest,
|
59
|
+
)
|
55
60
|
from futurehouse_client.models.rest import (
|
56
61
|
DiscoveryResponse,
|
57
62
|
ExecutionStatus,
|
@@ -160,6 +165,18 @@ class FileUploadError(RestClientError):
|
|
160
165
|
"""Raised when there's an error uploading a file."""
|
161
166
|
|
162
167
|
|
168
|
+
class JobEventClientError(RestClientError):
|
169
|
+
"""Raised when there's an error with job event operations."""
|
170
|
+
|
171
|
+
|
172
|
+
class JobEventCreationError(JobEventClientError):
|
173
|
+
"""Raised when there's an error creating a job event."""
|
174
|
+
|
175
|
+
|
176
|
+
class JobEventUpdateError(JobEventClientError):
|
177
|
+
"""Raised when there's an error updating a job event."""
|
178
|
+
|
179
|
+
|
163
180
|
retry_if_connection_error = create_retry_if_connection_error(FileUploadError)
|
164
181
|
|
165
182
|
DEFAULT_AGENT_TIMEOUT: int = 2400 # seconds
|
@@ -2609,6 +2626,176 @@ class RestClient(DataStorageMethods):
|
|
2609
2626
|
f"Error fetching discoveries for project: {e!r}"
|
2610
2627
|
) from e
|
2611
2628
|
|
2629
|
+
@retry(
|
2630
|
+
stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
|
2631
|
+
wait=wait_exponential(multiplier=RETRY_MULTIPLIER, max=MAX_RETRY_WAIT),
|
2632
|
+
retry=retry_if_connection_error,
|
2633
|
+
before_sleep=before_sleep_log(logger, logging.WARNING),
|
2634
|
+
)
|
2635
|
+
def create_job_event(
|
2636
|
+
self, request: JobEventCreateRequest
|
2637
|
+
) -> JobEventCreateResponse:
|
2638
|
+
"""Create a new job event.
|
2639
|
+
|
2640
|
+
Args:
|
2641
|
+
request: Job event creation request
|
2642
|
+
|
2643
|
+
Returns:
|
2644
|
+
Job event creation response
|
2645
|
+
|
2646
|
+
Raises:
|
2647
|
+
JobEventCreationError: If the API call fails
|
2648
|
+
"""
|
2649
|
+
try:
|
2650
|
+
response = self.client.post(
|
2651
|
+
"/v0.1/job-events",
|
2652
|
+
json=request.model_dump(exclude_none=True, mode="json"),
|
2653
|
+
)
|
2654
|
+
response.raise_for_status()
|
2655
|
+
return JobEventCreateResponse(**response.json())
|
2656
|
+
except HTTPStatusError as e:
|
2657
|
+
if e.response.status_code == codes.BAD_REQUEST:
|
2658
|
+
raise JobEventCreationError(
|
2659
|
+
f"Invalid job event creation request: {e.response.text}."
|
2660
|
+
) from e
|
2661
|
+
if e.response.status_code == codes.NOT_FOUND:
|
2662
|
+
raise JobEventCreationError(
|
2663
|
+
f"Execution not found for job event creation: {e.response.text}."
|
2664
|
+
) from e
|
2665
|
+
raise JobEventCreationError(
|
2666
|
+
f"Error creating job event: {e.response.status_code} - {e.response.text}."
|
2667
|
+
) from e
|
2668
|
+
except Exception as e:
|
2669
|
+
raise JobEventCreationError(
|
2670
|
+
f"An unexpected error occurred during job event creation: {e!r}."
|
2671
|
+
) from e
|
2672
|
+
|
2673
|
+
@retry(
|
2674
|
+
stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
|
2675
|
+
wait=wait_exponential(multiplier=RETRY_MULTIPLIER, max=MAX_RETRY_WAIT),
|
2676
|
+
retry=retry_if_connection_error,
|
2677
|
+
before_sleep=before_sleep_log(logger, logging.WARNING),
|
2678
|
+
)
|
2679
|
+
async def acreate_job_event(
|
2680
|
+
self, request: JobEventCreateRequest
|
2681
|
+
) -> JobEventCreateResponse:
|
2682
|
+
"""Asynchronously create a new job event.
|
2683
|
+
|
2684
|
+
Args:
|
2685
|
+
request: Job event creation request
|
2686
|
+
|
2687
|
+
Returns:
|
2688
|
+
Job event creation response
|
2689
|
+
|
2690
|
+
Raises:
|
2691
|
+
JobEventCreationError: If the API call fails
|
2692
|
+
"""
|
2693
|
+
try:
|
2694
|
+
response = await self.async_client.post(
|
2695
|
+
"/v0.1/job-events",
|
2696
|
+
json=request.model_dump(exclude_none=True, mode="json"),
|
2697
|
+
)
|
2698
|
+
response.raise_for_status()
|
2699
|
+
return JobEventCreateResponse(**response.json())
|
2700
|
+
except HTTPStatusError as e:
|
2701
|
+
if e.response.status_code == codes.BAD_REQUEST:
|
2702
|
+
raise JobEventCreationError(
|
2703
|
+
f"Invalid job event creation request: {e.response.text}."
|
2704
|
+
) from e
|
2705
|
+
if e.response.status_code == codes.NOT_FOUND:
|
2706
|
+
raise JobEventCreationError(
|
2707
|
+
f"Execution not found for job event creation: {e.response.text}."
|
2708
|
+
) from e
|
2709
|
+
raise JobEventCreationError(
|
2710
|
+
f"Error creating job event: {e.response.status_code} - {e.response.text}."
|
2711
|
+
) from e
|
2712
|
+
except Exception as e:
|
2713
|
+
raise JobEventCreationError(
|
2714
|
+
f"An unexpected error occurred during job event creation: {e!r}."
|
2715
|
+
) from e
|
2716
|
+
|
2717
|
+
@retry(
|
2718
|
+
stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
|
2719
|
+
wait=wait_exponential(multiplier=RETRY_MULTIPLIER, max=MAX_RETRY_WAIT),
|
2720
|
+
retry=retry_if_connection_error,
|
2721
|
+
before_sleep=before_sleep_log(logger, logging.WARNING),
|
2722
|
+
)
|
2723
|
+
def update_job_event(
|
2724
|
+
self, job_event_id: UUID, request: JobEventUpdateRequest
|
2725
|
+
) -> None:
|
2726
|
+
"""Update an existing job event.
|
2727
|
+
|
2728
|
+
Args:
|
2729
|
+
job_event_id: ID of the job event to update
|
2730
|
+
request: Job event update request
|
2731
|
+
|
2732
|
+
Raises:
|
2733
|
+
JobEventUpdateError: If the API call fails
|
2734
|
+
"""
|
2735
|
+
try:
|
2736
|
+
response = self.client.patch(
|
2737
|
+
f"/v0.1/job-events/{job_event_id}",
|
2738
|
+
json=request.model_dump(exclude_none=True, mode="json"),
|
2739
|
+
)
|
2740
|
+
response.raise_for_status()
|
2741
|
+
except HTTPStatusError as e:
|
2742
|
+
if e.response.status_code == codes.NOT_FOUND:
|
2743
|
+
raise JobEventUpdateError(
|
2744
|
+
f"Job event with ID {job_event_id} not found."
|
2745
|
+
) from e
|
2746
|
+
if e.response.status_code == codes.BAD_REQUEST:
|
2747
|
+
raise JobEventUpdateError(
|
2748
|
+
f"Invalid job event update request: {e.response.text}."
|
2749
|
+
) from e
|
2750
|
+
raise JobEventUpdateError(
|
2751
|
+
f"Error updating job event: {e.response.status_code} - {e.response.text}."
|
2752
|
+
) from e
|
2753
|
+
except Exception as e:
|
2754
|
+
raise JobEventUpdateError(
|
2755
|
+
f"An unexpected error occurred during job event update: {e!r}."
|
2756
|
+
) from e
|
2757
|
+
|
2758
|
+
@retry(
|
2759
|
+
stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
|
2760
|
+
wait=wait_exponential(multiplier=RETRY_MULTIPLIER, max=MAX_RETRY_WAIT),
|
2761
|
+
retry=retry_if_connection_error,
|
2762
|
+
before_sleep=before_sleep_log(logger, logging.WARNING),
|
2763
|
+
)
|
2764
|
+
async def aupdate_job_event(
|
2765
|
+
self, job_event_id: UUID, request: JobEventUpdateRequest
|
2766
|
+
) -> None:
|
2767
|
+
"""Asynchronously update an existing job event.
|
2768
|
+
|
2769
|
+
Args:
|
2770
|
+
job_event_id: ID of the job event to update
|
2771
|
+
request: Job event update request
|
2772
|
+
|
2773
|
+
Raises:
|
2774
|
+
JobEventUpdateError: If the API call fails
|
2775
|
+
"""
|
2776
|
+
try:
|
2777
|
+
response = await self.async_client.patch(
|
2778
|
+
f"/v0.1/job-events/{job_event_id}",
|
2779
|
+
json=request.model_dump(exclude_none=True, mode="json"),
|
2780
|
+
)
|
2781
|
+
response.raise_for_status()
|
2782
|
+
except HTTPStatusError as e:
|
2783
|
+
if e.response.status_code == codes.NOT_FOUND:
|
2784
|
+
raise JobEventUpdateError(
|
2785
|
+
f"Job event with ID {job_event_id} not found."
|
2786
|
+
) from e
|
2787
|
+
if e.response.status_code == codes.BAD_REQUEST:
|
2788
|
+
raise JobEventUpdateError(
|
2789
|
+
f"Invalid job event update request: {e.response.text}."
|
2790
|
+
) from e
|
2791
|
+
raise JobEventUpdateError(
|
2792
|
+
f"Error updating job event: {e.response.status_code} - {e.response.text}."
|
2793
|
+
) from e
|
2794
|
+
except Exception as e:
|
2795
|
+
raise JobEventUpdateError(
|
2796
|
+
f"An unexpected error occurred during job event update: {e!r}."
|
2797
|
+
) from e
|
2798
|
+
|
2612
2799
|
|
2613
2800
|
def get_installed_packages() -> dict[str, str]:
|
2614
2801
|
"""Returns a dictionary of installed packages and their versions."""
|
@@ -13,13 +13,25 @@ from .app import (
|
|
13
13
|
TaskResponse,
|
14
14
|
TaskResponseVerbose,
|
15
15
|
)
|
16
|
+
from .job_event import (
|
17
|
+
CostComponent,
|
18
|
+
ExecutionType,
|
19
|
+
JobEventCreateRequest,
|
20
|
+
JobEventCreateResponse,
|
21
|
+
JobEventUpdateRequest,
|
22
|
+
)
|
16
23
|
from .rest import TrajectoryPatchRequest, WorldModel, WorldModelResponse
|
17
24
|
|
18
25
|
__all__ = [
|
19
26
|
"AuthType",
|
27
|
+
"CostComponent",
|
20
28
|
"DockerContainerConfiguration",
|
29
|
+
"ExecutionType",
|
21
30
|
"FramePath",
|
22
31
|
"JobDeploymentConfig",
|
32
|
+
"JobEventCreateRequest",
|
33
|
+
"JobEventCreateResponse",
|
34
|
+
"JobEventUpdateRequest",
|
23
35
|
"PQATaskResponse",
|
24
36
|
"RuntimeConfig",
|
25
37
|
"Stage",
|
@@ -3,10 +3,32 @@ from datetime import datetime
|
|
3
3
|
from enum import StrEnum, auto
|
4
4
|
from os import PathLike
|
5
5
|
from pathlib import Path
|
6
|
-
from typing import Any
|
6
|
+
from typing import Annotated, Any
|
7
7
|
from uuid import UUID
|
8
8
|
|
9
|
-
from pydantic import
|
9
|
+
from pydantic import (
|
10
|
+
BaseModel,
|
11
|
+
Field,
|
12
|
+
JsonValue,
|
13
|
+
PlainSerializer,
|
14
|
+
PlainValidator,
|
15
|
+
WithJsonSchema,
|
16
|
+
)
|
17
|
+
from sqlalchemy_utils import Ltree
|
18
|
+
|
19
|
+
LtreeField = Annotated[
|
20
|
+
Ltree,
|
21
|
+
PlainValidator(Ltree),
|
22
|
+
PlainSerializer(lambda v: v.path),
|
23
|
+
WithJsonSchema({"type": "string", "examples": ["some.path"]}),
|
24
|
+
]
|
25
|
+
|
26
|
+
|
27
|
+
class DataStorageEntryStatus(StrEnum):
|
28
|
+
PENDING = auto()
|
29
|
+
ACTIVE = auto()
|
30
|
+
FAILED = auto()
|
31
|
+
DISABLED = auto()
|
10
32
|
|
11
33
|
|
12
34
|
class DataStorageEntry(BaseModel):
|
@@ -20,6 +42,9 @@ class DataStorageEntry(BaseModel):
|
|
20
42
|
content: str | None = Field(
|
21
43
|
default=None, description="Content of the data storage entry"
|
22
44
|
)
|
45
|
+
status: DataStorageEntryStatus = Field(
|
46
|
+
description="Status of the data storage entry"
|
47
|
+
)
|
23
48
|
embedding: list[float] | None = Field(
|
24
49
|
default=None, description="Embedding vector for the content"
|
25
50
|
)
|
@@ -151,6 +176,26 @@ class DataStorageRequestPayload(BaseModel):
|
|
151
176
|
existing_location: DataStorageLocationPayload | None = Field(
|
152
177
|
default=None, description="Target storage metadata"
|
153
178
|
)
|
179
|
+
tags: list[str] | None = Field(
|
180
|
+
default=None,
|
181
|
+
description="List of tags associated with the data storage entry",
|
182
|
+
)
|
183
|
+
|
184
|
+
|
185
|
+
class DatasetStorage(BaseModel):
|
186
|
+
"""Pydantic model representing a DatasetStorage record."""
|
187
|
+
|
188
|
+
id: UUID
|
189
|
+
name: str
|
190
|
+
user_id: str
|
191
|
+
description: str | None = None
|
192
|
+
created_at: datetime
|
193
|
+
modified_at: datetime
|
194
|
+
|
195
|
+
|
196
|
+
class GetDatasetAndEntriesResponse(BaseModel):
|
197
|
+
dataset: DatasetStorage
|
198
|
+
data_storage_entries: list[DataStorageEntry]
|
154
199
|
|
155
200
|
|
156
201
|
class CreateDatasetPayload(BaseModel):
|
@@ -0,0 +1,75 @@
|
|
1
|
+
"""Job event models for cost and usage tracking."""
|
2
|
+
|
3
|
+
from datetime import datetime
|
4
|
+
from enum import StrEnum, auto
|
5
|
+
from typing import Any
|
6
|
+
from uuid import UUID
|
7
|
+
|
8
|
+
from pydantic import BaseModel, Field
|
9
|
+
|
10
|
+
|
11
|
+
class ExecutionType(StrEnum):
|
12
|
+
"""Type of execution for job events."""
|
13
|
+
|
14
|
+
TRAJECTORY = auto()
|
15
|
+
SESSION = auto()
|
16
|
+
|
17
|
+
|
18
|
+
class CostComponent(StrEnum):
|
19
|
+
"""Cost component types for job events."""
|
20
|
+
|
21
|
+
LLM_USAGE = auto()
|
22
|
+
EXTERNAL_SERVICE = auto()
|
23
|
+
STEP = auto()
|
24
|
+
|
25
|
+
|
26
|
+
class JobEventCreateRequest(BaseModel):
|
27
|
+
"""Request model for creating a job event matching crow-service schema."""
|
28
|
+
|
29
|
+
execution_id: UUID = Field(description="UUID for trajectory_id or session_id")
|
30
|
+
execution_type: ExecutionType = Field(
|
31
|
+
description="Either 'TRAJECTORY' or 'SESSION'"
|
32
|
+
)
|
33
|
+
cost_component: CostComponent = Field(
|
34
|
+
description="Cost component: 'LLM_USAGE', 'EXTERNAL_SERVICE', or 'STEP'"
|
35
|
+
)
|
36
|
+
started_at: datetime = Field(description="Start time of the job event")
|
37
|
+
ended_at: datetime = Field(description="End time of the job event")
|
38
|
+
crow: str | None = Field(default=None, description="unique identifier for the crow")
|
39
|
+
amount_acu: float | None = Field(default=None, description="Cost amount in ACUs")
|
40
|
+
amount_usd: float | None = Field(default=None, description="Cost amount in USD")
|
41
|
+
rate: float | None = Field(default=None, description="Rate per token/call in USD")
|
42
|
+
input_token_count: int | None = Field(
|
43
|
+
default=None, description="Input token count for LLM calls"
|
44
|
+
)
|
45
|
+
completion_token_count: int | None = Field(
|
46
|
+
default=None, description="Completion token count for LLM calls"
|
47
|
+
)
|
48
|
+
metadata: dict[str, Any] | None = Field(default=None)
|
49
|
+
|
50
|
+
|
51
|
+
class JobEventUpdateRequest(BaseModel):
|
52
|
+
"""Request model for updating a job event matching crow-service schema."""
|
53
|
+
|
54
|
+
amount_acu: float | None = Field(default=None, description="Cost amount in ACUs")
|
55
|
+
amount_usd: float | None = Field(default=None, description="Cost amount in USD")
|
56
|
+
rate: float | None = Field(default=None, description="Rate per token/call in USD")
|
57
|
+
input_token_count: int | None = Field(
|
58
|
+
default=None, description="Input token count for LLM calls"
|
59
|
+
)
|
60
|
+
completion_token_count: int | None = Field(
|
61
|
+
default=None, description="Completion token count for LLM calls"
|
62
|
+
)
|
63
|
+
metadata: dict[str, Any] | None = Field(default=None)
|
64
|
+
started_at: datetime | None = Field(
|
65
|
+
default=None, description="Start time of the job event"
|
66
|
+
)
|
67
|
+
ended_at: datetime | None = Field(
|
68
|
+
default=None, description="End time of the job event"
|
69
|
+
)
|
70
|
+
|
71
|
+
|
72
|
+
class JobEventCreateResponse(BaseModel):
|
73
|
+
"""Response model for job event creation."""
|
74
|
+
|
75
|
+
id: UUID = Field(description="UUID of the created job event")
|
@@ -67,7 +67,8 @@ class SearchOperator(StrEnum):
|
|
67
67
|
"""Operators for structured search criteria."""
|
68
68
|
|
69
69
|
EQUALS = "equals"
|
70
|
-
CONTAINS = "contains"
|
70
|
+
CONTAINS = "contains" # Exact phrase/substring matching
|
71
|
+
FULLTEXT = "fulltext" # Tokenized full-text search (match query)
|
71
72
|
STARTS_WITH = "starts_with"
|
72
73
|
ENDS_WITH = "ends_with"
|
73
74
|
GREATER_THAN = "greater_than"
|
@@ -84,6 +85,11 @@ class SearchCriterion(BaseModel):
|
|
84
85
|
value: str | list[str] | bool
|
85
86
|
|
86
87
|
|
88
|
+
class FilterLogic(StrEnum):
|
89
|
+
AND = "AND"
|
90
|
+
OR = "OR"
|
91
|
+
|
92
|
+
|
87
93
|
class WorldModelSearchPayload(BaseModel):
|
88
94
|
"""Payload for structured world model search."""
|
89
95
|
|
@@ -173,3 +179,4 @@ class DataStorageSearchPayload(BaseModel):
|
|
173
179
|
|
174
180
|
criteria: list[SearchCriterion]
|
175
181
|
size: int = 10
|
182
|
+
filter_logic: FilterLogic = FilterLogic.OR
|
futurehouse_client/version.py
CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
28
28
|
commit_id: COMMIT_ID
|
29
29
|
__commit_id__: COMMIT_ID
|
30
30
|
|
31
|
-
__version__ = version = '0.4.
|
32
|
-
__version_tuple__ = version_tuple = (0, 4,
|
31
|
+
__version__ = version = '0.4.5'
|
32
|
+
__version_tuple__ = version_tuple = (0, 4, 5)
|
33
33
|
|
34
34
|
__commit_id__ = commit_id = None
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: futurehouse-client
|
3
|
-
Version: 0.4.
|
3
|
+
Version: 0.4.5
|
4
4
|
Summary: A client for interacting with endpoints of the FutureHouse service.
|
5
5
|
Author-email: FutureHouse technical staff <hello@futurehouse.org>
|
6
6
|
License: Apache License
|
@@ -224,6 +224,7 @@ Requires-Dist: openai<1.100.0,>=1
|
|
224
224
|
Requires-Dist: pydantic
|
225
225
|
Requires-Dist: python-dotenv
|
226
226
|
Requires-Dist: requests
|
227
|
+
Requires-Dist: sqlalchemy-utils>=0.41.2
|
227
228
|
Requires-Dist: tenacity
|
228
229
|
Requires-Dist: tqdm>=4.62
|
229
230
|
Provides-Extra: dev
|
@@ -1,23 +1,24 @@
|
|
1
|
-
futurehouse_client/__init__.py,sha256=
|
1
|
+
futurehouse_client/__init__.py,sha256=q5cpcuPkhTaueXsySsgWpH0F-2EsRxcdJfP91ze6khU,991
|
2
2
|
futurehouse_client/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
|
-
futurehouse_client/version.py,sha256=
|
3
|
+
futurehouse_client/version.py,sha256=4nCeAo6j-wL_OKq7GV7I-Pvif2nNPnyKifWxv88z0Y4,704
|
4
4
|
futurehouse_client/clients/__init__.py,sha256=-HXNj-XJ3LRO5XM6MZ709iPs29YpApss0Q2YYg1qMZw,280
|
5
|
-
futurehouse_client/clients/data_storage_methods.py,sha256=
|
5
|
+
futurehouse_client/clients/data_storage_methods.py,sha256=wnKu8CazlEX_BQauX1qvZRRw7EpbD0oofmH1wXLnDf8,120034
|
6
6
|
futurehouse_client/clients/job_client.py,sha256=b5gpzulZpxpv9R337r3UKItnMdtd6CGlI1sV3_VQJso,13985
|
7
|
-
futurehouse_client/clients/rest_client.py,sha256=
|
8
|
-
futurehouse_client/models/__init__.py,sha256=
|
7
|
+
futurehouse_client/clients/rest_client.py,sha256=kLCR4dYduwX_16jaOZ26RGCOR2A_6nk2gpBKUqQ-KVI,110247
|
8
|
+
futurehouse_client/models/__init__.py,sha256=N1MwDUYonsMN9NdaShsYcJspyL7H756MYj7VWFeD3fk,978
|
9
9
|
futurehouse_client/models/app.py,sha256=UUg17I3zk6cH_7mrdojHGYvQfm_SeDkuUxsPlRyIYz0,31895
|
10
10
|
futurehouse_client/models/client.py,sha256=n4HD0KStKLm6Ek9nL9ylP-bkK10yzAaD1uIDF83Qp_A,1828
|
11
|
-
futurehouse_client/models/data_storage_methods.py,sha256=
|
12
|
-
futurehouse_client/models/
|
11
|
+
futurehouse_client/models/data_storage_methods.py,sha256=cpF2g4y_REECaz--WhaJeLqXA_3m3keRP5XOXiL8GOI,13811
|
12
|
+
futurehouse_client/models/job_event.py,sha256=lMrx-lV7BQkKl419ErWZ6Q1EjurmhBFSns0z6zwGaVo,2766
|
13
|
+
futurehouse_client/models/rest.py,sha256=SbeXZSPUCM0lQ_gVUPa64vKzMxuUVgqmJ5YThfDWs8g,4726
|
13
14
|
futurehouse_client/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
14
15
|
futurehouse_client/utils/auth.py,sha256=tgWELjKfg8eWme_qdcRmc8TjQN9DVZuHHaVXZNHLchk,2960
|
15
16
|
futurehouse_client/utils/general.py,sha256=PIkGLCSA3kUvc6mwR-prEB7YnMdKILOIm6cPowSZzzs,2532
|
16
17
|
futurehouse_client/utils/module_utils.py,sha256=aFyd-X-pDARXz9GWpn8SSViUVYdSbuy9vSkrzcVIaGI,4955
|
17
18
|
futurehouse_client/utils/monitoring.py,sha256=UjRlufe67kI3VxRHOd5fLtJmlCbVA2Wqwpd4uZhXkQM,8728
|
18
19
|
futurehouse_client/utils/world_model_tools.py,sha256=v2krZGrco0ur2a_pcRMtnQL05SxlIoBXuJ5R1JkQNws,2921
|
19
|
-
futurehouse_client-0.4.
|
20
|
-
futurehouse_client-0.4.
|
21
|
-
futurehouse_client-0.4.
|
22
|
-
futurehouse_client-0.4.
|
23
|
-
futurehouse_client-0.4.
|
20
|
+
futurehouse_client-0.4.5.dist-info/licenses/LICENSE,sha256=oQ9ZHjUi-_6GfP3gs14FlPb0OlGwE1QCCKFGnJ4LD2I,11341
|
21
|
+
futurehouse_client-0.4.5.dist-info/METADATA,sha256=Ey85Ppd3kJLB36xx8LDRt5ogYZQTH2QQ117vscLfofc,27094
|
22
|
+
futurehouse_client-0.4.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
23
|
+
futurehouse_client-0.4.5.dist-info/top_level.txt,sha256=TRuLUCt_qBnggdFHCX4O_BoCu1j2X43lKfIZC-ElwWY,19
|
24
|
+
futurehouse_client-0.4.5.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|