PyPI - futurehouse-client - Versions diffs - 0.4.5.dev49__py3-none-any.whl → 0.4.5.dev160__py3-none-any.whl - Mend

futurehouse-client 0.4.5.dev49py3-none-any.whl → 0.4.5.dev160py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

futurehouse_client/__init__.py CHANGED Viewed

@@ -8,6 +8,13 @@ from .models.app import (
     TaskResponse,
     TaskResponseVerbose,
 )
+from .models.job_event import (
+    CostComponent,
+    ExecutionType,
+    JobEventCreateRequest,
+    JobEventCreateResponse,
+    JobEventUpdateRequest,
+)
 from .utils.world_model_tools import (
     create_world_model_tool,
     make_world_model_tools,
@@ -15,9 +22,14 @@ from .utils.world_model_tools import (
 )
 __all__ = [
+    "CostComponent",
+    "ExecutionType",
     "FinchTaskResponse",
     "FutureHouseClient",
     "JobClient",
+    "JobEventCreateRequest",
+    "JobEventCreateResponse",
+    "JobEventUpdateRequest",
     "JobNames",
     "PQATaskResponse",
     "PhoenixTaskResponse",

futurehouse_client/clients/data_storage_methods.py CHANGED Viewed

@@ -35,10 +35,12 @@ from futurehouse_client.models.data_storage_methods import (
     DataStorageResponse,
     DataStorageType,
     DirectoryManifest,
+    GetDatasetAndEntriesResponse,
     ManifestEntry,
 )
 from futurehouse_client.models.rest import (
     DataStorageSearchPayload,
+    FilterLogic,
     SearchCriterion,
 )
 from futurehouse_client.utils.general import retry_if_connection_error
@@ -1530,7 +1532,33 @@ class DataStorageMethods:
             project_id: ID of the project this data storage entry belongs to
         Returns:
-            DataStorageResponse containing the created data storage entry and storage locations
+            DataStorageResponse: A Pydantic model containing:
+                - data_storage: DataStorageEntry with fields:
+                    - id - Unique identifier for the data storage entry
+                    - name - Name of the data storage entry
+                    - description - Description of the data storage entry
+                    - content - Content of the data storage entry
+                    - embedding - Embedding vector for the content
+                    - is_collection - Whether this entry is a collection
+                    - tags - List of tags associated with the entry
+                    - parent_id - ID of the parent entry for hierarchical storage
+                    - project_id - ID of the project this entry belongs to
+                    - dataset_id - ID of the dataset this entry belongs to
+                    - path - Path in the storage system where this entry is located
+                    - bigquery_schema - Target BigQuery schema for the entry
+                    - user_id - ID of the user who created this entry
+                    - created_at - Timestamp when the entry was created
+                    - modified_at - Timestamp when the entry was last updated
+                - storage_locations with each location containing:
+                    - id - Unique identifier for the storage location
+                    - data_storage_id - ID of the associated data storage entry
+                    - storage_config pydantic model with fields:
+                        - storage_type - Type of storage (e.g., 'gcs', 'pg_table')
+                        - content_type - Type of content stored
+                        - content_schema - Content schema
+                        - metadata - Location metadata
+                        - location - Location path or identifier
+                        - signed_url - Signed URL for uploading/downloading
         Raises:
             DataStorageCreationError: If there's an error creating the data storage entry
@@ -1577,7 +1605,33 @@ class DataStorageMethods:
             project_id: ID of the project this data storage entry belongs to
         Returns:
-            DataStorageResponse containing the created data storage entry and storage locations
+            DataStorageResponse: A Pydantic model containing:
+                - data_storage: DataStorageEntry with fields:
+                    - id - Unique identifier for the data storage entry
+                    - name - Name of the data storage entry
+                    - description - Description of the data storage entry
+                    - content - Content of the data storage entry
+                    - embedding - Embedding vector for the content
+                    - is_collection - Whether this entry is a collection
+                    - tags - List of tags associated with the entry
+                    - parent_id - ID of the parent entry for hierarchical storage
+                    - project_id - ID of the project this entry belongs to
+                    - dataset_id - ID of the dataset this entry belongs to
+                    - path - Path in the storage system where this entry is located
+                    - bigquery_schema - Target BigQuery schema for the entry
+                    - user_id - ID of the user who created this entry
+                    - created_at - Timestamp when the entry was created
+                    - modified_at - Timestamp when the entry was last updated
+                - storage_locations with each location containing:
+                    - id - Unique identifier for the storage location
+                    - data_storage_id - ID of the associated data storage entry
+                    - storage_config pydantic model with fields:
+                        - storage_type - Type of storage (e.g., 'gcs', 'pg_table')
+                        - content_type - Type of content stored
+                        - content_schema - Content schema
+                        - metadata - Location metadata
+                        - location - Location path or identifier
+                        - signed_url - Signed URL for uploading/downloading
         Raises:
             DataStorageCreationError: If there's an error creating the data storage entry
@@ -1740,6 +1794,7 @@ class DataStorageMethods:
         ignore_patterns: list[str] | None = None,
         ignore_filename: str = ".gitignore",
         project_id: UUID | None = None,
+        dataset_id: UUID | None = None,
     ) -> DataStorageResponse:
         """Store file or directory content in the data storage system.
@@ -1755,13 +1810,45 @@ class DataStorageMethods:
             description: Optional description of the data storage entry
             path: Optional path for the data storage entry
             as_collection: If true, upload directories as a single zip file collection.
-            manifest_filename: Name of manifest file
+            manifest_filename: Name of manifest file (JSON or YAML) containing:
+                - entries - Map of file/directory names to their manifest entries
+                - Each ManifestEntry contains:
+                    - description - Description of the file or directory
+                    - metadata - Additional metadata for the entry
+                - Each DirectoryManifest contains nested entries following the same structure
             ignore_patterns: List of patterns to ignore when zipping directories
             ignore_filename: Name of ignore file to read from directory (default: .gitignore)
             project_id: ID of the project this data storage entry belongs to
+            dataset_id: ID of the dataset this data storage entry belongs to
         Returns:
-            DataStorageResponse containing the final data storage entry
+            DataStorageResponse: A Pydantic model containing:
+                - data_storage: DataStorageEntry with fields:
+                    - id - Unique identifier for the data storage entry
+                    - name - Name of the data storage entry
+                    - description - Description of the data storage entry
+                    - content - Content of the data storage entry
+                    - embedding - Embedding vector for the content
+                    - is_collection - Whether this entry is a collection
+                    - tags - List of tags associated with the entry
+                    - parent_id - ID of the parent entry for hierarchical storage
+                    - project_id - ID of the project this entry belongs to
+                    - dataset_id - ID of the dataset this entry belongs to
+                    - path - Path in the storage system where this entry is located
+                    - bigquery_schema - Target BigQuery schema for the entry
+                    - user_id - ID of the user who created this entry
+                    - created_at - Timestamp when the entry was created
+                    - modified_at - Timestamp when the entry was last updated
+                - storage_locations with each location containing:
+                    - id - Unique identifier for the storage location
+                    - data_storage_id - ID of the associated data storage entry
+                    - storage_config pydantic model with fields:
+                        - storage_type - Type of storage (e.g., 'gcs', 'pg_table')
+                        - content_type - Type of content stored
+                        - content_schema - Content schema
+                        - metadata - Location metadata
+                        - location - Location path or identifier
+                        - signed_url - Signed URL for uploading/downloading
         Raises:
             DataStorageCreationError: If there's an error in the process
@@ -1788,6 +1875,7 @@ class DataStorageMethods:
                     ignore_patterns=ignore_patterns,
                     ignore_filename=ignore_filename,
                     project_id=project_id,
+                    dataset_id=dataset_id,
                 )
                 if not responses:
                     raise DataStorageCreationError(
@@ -1833,15 +1921,47 @@ class DataStorageMethods:
             path: Optional GCS path for the entry.
             as_collection: If uploading a directory, `True` zips it into a single collection,
                            `False` uploads it as a hierarchical structure of individual objects.
-            manifest_filename: Optional manifest file for hierarchical uploads.
+            manifest_filename: Optional manifest file (JSON or YAML) for hierarchical uploads containing:
+                - entries - Map of file/directory names to their manifest entries
+                - Each ManifestEntry contains:
+                    - description - Description of the file or directory
+                    - metadata - Additional metadata for the entry
+                - Each DirectoryManifest contains nested entries following the same structure
             ignore_patterns: List of patterns to ignore when zipping.
             ignore_filename: Name of ignore file to read (default: .gitignore).
             dataset_id: Optional dataset ID to add entry to, or None to create new dataset.
             project_id: ID of the project this data storage entry belongs to
         Returns:
-            The `DataStorageResponse` for the created entry. For hierarchical uploads,
-            this is the response for the root directory entry.
+            DataStorageResponse: A Pydantic model containing:
+                - data_storage: DataStorageEntry with fields:
+                    - id - Unique identifier for the data storage entry
+                    - name - Name of the data storage entry
+                    - description - Description of the data storage entry
+                    - content - Content of the data storage entry
+                    - embedding - Embedding vector for the content
+                    - is_collection - Whether this entry is a collection
+                    - tags - List of tags associated with the entry
+                    - parent_id - ID of the parent entry for hierarchical storage
+                    - project_id - ID of the project this entry belongs to
+                    - dataset_id - ID of the dataset this entry belongs to
+                    - path - Path in the storage system where this entry is located
+                    - bigquery_schema - Target BigQuery schema for the entry
+                    - user_id - ID of the user who created this entry
+                    - created_at - Timestamp when the entry was created
+                    - modified_at - Timestamp when the entry was last updated
+                - storage_locations with each location containing:
+                    - id - Unique identifier for the storage location
+                    - data_storage_id - ID of the associated data storage entry
+                    - storage_config pydantic model with fields:
+                        - storage_type - Type of storage (e.g., 'gcs', 'pg_table')
+                        - content_type - Type of content stored
+                        - content_schema - Content schema
+                        - metadata - Location metadata
+                        - location - Location path or identifier
+                        - signed_url - Signed URL for uploading/downloading
+            For hierarchical uploads, this is the response for the root directory entry.
         """
         file_path = self._validate_file_path(file_path)
@@ -1902,7 +2022,12 @@ class DataStorageMethods:
         Args:
             name: Name of the data storage entry
-            existing_location: Describes the existing data source location to register
+            existing_location: a pydantic model describing the existing data source location to register, containing:
+                - storage_type - Type of storage (BIGQUERY, GCS, PG_TABLE, RAW_CONTENT, ELASTIC_SEARCH)
+                - content_type - Type of content (BQ_DATASET, BQ_TABLE, TEXT, TEXT_W_EMBEDDINGS, DIRECTORY, FILE, INDEX, INDEX_W_EMBEDDINGS)
+                - content_schema - Content schema for the data
+                - metadata - Additional metadata for the location
+                - location - Location path or identifier
             description: Optional description of the data storage entry
             as_collection: If uploading a directory, `True` creates a single storage entry for
                 the whole directory and multiple storage locations for each file, `False` assumes
@@ -1911,7 +2036,33 @@ class DataStorageMethods:
             project_id: ID of the project this data storage entry belongs to
         Returns:
-            DataStorageResponse containing the created data storage entry and storage locations
+            DataStorageResponse: A Pydantic model containing:
+                - data_storage: DataStorageEntry with fields:
+                    - id - Unique identifier for the data storage entry
+                    - name - Name of the data storage entry
+                    - description - Description of the data storage entry
+                    - content - Content of the data storage entry
+                    - embedding - Embedding vector for the content
+                    - is_collection - Whether this entry is a collection
+                    - tags - List of tags associated with the entry
+                    - parent_id - ID of the parent entry for hierarchical storage
+                    - project_id - ID of the project this entry belongs to
+                    - dataset_id - ID of the dataset this entry belongs to
+                    - path - Path in the storage system where this entry is located
+                    - bigquery_schema - Target BigQuery schema for the entry
+                    - user_id - ID of the user who created this entry
+                    - created_at - Timestamp when the entry was created
+                    - modified_at - Timestamp when the entry was last updated
+                - storage_locations with each location containing:
+                    - id - Unique identifier for the storage location
+                    - data_storage_id - ID of the associated data storage entry
+                    - storage_config pydantic model with fields:
+                        - storage_type - Type of storage (e.g., 'gcs', 'pg_table')
+                        - content_type - Type of content stored
+                        - content_schema - Content schema
+                        - metadata - Location metadata
+                        - location - Location path or identifier
+                        - signed_url - Signed URL for uploading/downloading
         Raises:
             DataStorageCreationError: If there's an error creating the data storage entry
@@ -1957,7 +2108,12 @@ class DataStorageMethods:
         Args:
             name: Name of the data storage entry
-            existing_location: Describes the existing data source location to register
+            existing_location: a pydantic model describing the existing data source location to register, containing:
+                - storage_type - Type of storage (BIGQUERY, GCS, PG_TABLE, RAW_CONTENT, ELASTIC_SEARCH)
+                - content_type - Type of content (BQ_DATASET, BQ_TABLE, TEXT, TEXT_W_EMBEDDINGS, DIRECTORY, FILE, INDEX, INDEX_W_EMBEDDINGS)
+                - content_schema - Content schema for the data
+                - metadata - Additional metadata for the location
+                - location - Location path or identifier
             description: Optional description of the data storage entry
             as_collection: If uploading a directory, `True` creates a single storage entry for
                 the whole directory and multiple storage locations for each file, `False` assumes
@@ -1966,7 +2122,33 @@ class DataStorageMethods:
             project_id: ID of the project this data storage entry belongs to
         Returns:
-            DataStorageResponse containing the created data storage entry and storage locations
+            DataStorageResponse: A Pydantic model containing:
+                - data_storage: DataStorageEntry with fields:
+                    - id - Unique identifier for the data storage entry
+                    - name - Name of the data storage entry
+                    - description - Description of the data storage entry
+                    - content - Content of the data storage entry
+                    - embedding - Embedding vector for the content
+                    - is_collection - Whether this entry is a collection
+                    - tags - List of tags associated with the entry
+                    - parent_id - ID of the parent entry for hierarchical storage
+                    - project_id - ID of the project this entry belongs to
+                    - dataset_id - ID of the dataset this entry belongs to
+                    - path - Path in the storage system where this entry is located
+                    - bigquery_schema - Target BigQuery schema for the entry
+                    - user_id - ID of the user who created this entry
+                    - created_at - Timestamp when the entry was created
+                    - modified_at - Timestamp when the entry was last updated
+                - storage_locations with each location containing:
+                    - id - Unique identifier for the storage location
+                    - data_storage_id - ID of the associated data storage entry
+                    - storage_config pydantic model with fields:
+                        - storage_type - Type of storage (e.g., 'gcs', 'pg_table')
+                        - content_type - Type of content stored
+                        - content_schema - Content schema
+                        - metadata - Location metadata
+                        - location - Location path or identifier
+                        - signed_url - Signed URL for uploading/downloading
         Raises:
             DataStorageCreationError: If there's an error creating the data storage entry
@@ -2003,12 +2185,17 @@ class DataStorageMethods:
         self,
         criteria: list[SearchCriterion] | None = None,
         size: int = 10,
+        filter_logic: FilterLogic = FilterLogic.OR,
     ) -> list[dict]:
         """Search data storage objects using structured criteria.
         Args:
-            criteria: List of search criteria (SearchCriterion objects with field, operator, value)
+            criteria: List of SearchCriterion pydantic models with fields:
+                - field - Field name to search on
+                - operator - Search operator (EQUALS, CONTAINS, STARTS_WITH, ENDS_WITH, GREATER_THAN, LESS_THAN, BETWEEN, IN)
+                - value - Value to search for
             size: Number of results to return (1-100)
+            filter_logic: Either "AND" (all criteria must match) or "OR" (at least one must match)
         Returns:
             List of search results with scores and data storage information
@@ -2029,6 +2216,7 @@ class DataStorageMethods:
             payload = DataStorageSearchPayload(
                 criteria=criteria or [],
                 size=max(1, min(100, size)),  # Clamp between 1-100
+                filter_logic=filter_logic,
             )
             response = self.client.post(
@@ -2059,12 +2247,17 @@ class DataStorageMethods:
         self,
         criteria: list[SearchCriterion] | None = None,
         size: int = 10,
+        filter_logic: FilterLogic = FilterLogic.OR,
     ) -> list[dict]:
         """Asynchronously search data storage objects using structured criteria.
         Args:
-            criteria: List of search criteria (SearchCriterion objects with field, operator, value)
+            criteria: List of SearchCriterion pydantic models with fields:
+                - field - Field name to search on
+                - operator - Search operator (EQUALS, CONTAINS, STARTS_WITH, ENDS_WITH, GREATER_THAN, LESS_THAN, BETWEEN, IN)
+                - value - Value to search for
             size: Number of results to return (1-100)
+            filter_logic: Either "AND" (all criteria must match) or "OR" (at least one must match)
         Returns:
             List of search results with scores and data storage information
@@ -2085,6 +2278,7 @@ class DataStorageMethods:
             payload = DataStorageSearchPayload(
                 criteria=criteria or [],
                 size=max(1, min(100, size)),  # Clamp between 1-100
+                filter_logic=filter_logic,
             )
             response = await self.async_client.post(
@@ -2124,11 +2318,11 @@ class DataStorageMethods:
         """Search data storage objects using vector similarity.
         Args:
-            embedding: Embedding vector for similarity search
+            embedding: List of float values representing the embedding vector for similarity search
             size: Number of results to return (1-100)
             min_score: Minimum similarity score (0.0-1.0)
             dataset_id: Optional dataset ID filter
-            tags: Optional list of tags to filter by
+            tags: Optional list of string tags to filter by
             user_id: Optional user ID filter (admin only)
             project_id: Optional project ID filter
@@ -2202,11 +2396,11 @@ class DataStorageMethods:
         """Asynchronously search data storage objects using vector similarity.
         Args:
-            embedding: Embedding vector for similarity search
+            embedding: List of float values representing the embedding vector for similarity search
             size: Number of results to return (1-100)
             min_score: Minimum similarity score (0.0-1.0)
             dataset_id: Optional dataset ID filter
-            tags: Optional list of tags to filter by
+            tags: Optional list of string tags to filter by
             user_id: Optional user ID filter (admin only)
             project_id: Optional project ID filter
@@ -2274,12 +2468,12 @@ class DataStorageMethods:
         """Fetch data from the storage system (sync version).
         Args:
-            data_storage_id: ID of the data storage entry to fetch
+            data_storage_id: UUID of the data storage entry to fetch
         Returns:
             For PG_TABLE storage: string content
             For GCS storage: Path to downloaded file (may be unzipped if it was a zip)
-            For multi-location entries: dict of location IDs to dicts with signed URL and file name
+            For multi-location entries: list of downloaded files
             None if not found or error occurred
         """
         if not data_storage_id:
@@ -2348,12 +2542,12 @@ class DataStorageMethods:
         """Fetch data from the storage system.
         Args:
-            data_storage_id: ID of the data storage entry to fetch
+            data_storage_id: UUID of the data storage entry to fetch
         Returns:
             For PG_TABLE storage: string content
             For GCS storage: Path to downloaded file (may be unzipped if it was a zip)
-            For multi-location entries: dict of location IDs to dicts with signed URL and file name
+            For multi-location entries: list of downloaded files
             None if not found or error occurred
         """
         if not data_storage_id:
@@ -2423,7 +2617,23 @@ class DataStorageMethods:
         name: str,
         description: str | None = None,
         dataset_id: UUID | None = None,
-    ):
+    ) -> CreateDatasetPayload:
+        """Asynchronously create a new dataset.
+        Args:
+            name: Name of the dataset to create
+            description: Optional description of the dataset
+            dataset_id: Optional UUID to assign to the dataset, or None to auto-generate
+        Returns:
+            CreateDatasetPayload: A Pydantic model containing:
+                - id - ID of the created dataset (None if auto-generated)
+                - name - Name of the dataset
+                - description - Description of the dataset
+        Raises:
+            DataStorageCreationError: If there's an error creating the dataset
+        """
         try:
             payload = CreateDatasetPayload(
                 name=name,
@@ -2454,7 +2664,23 @@ class DataStorageMethods:
         name: str,
         description: str | None = None,
         dataset_id: UUID | None = None,
-    ):
+    ) -> CreateDatasetPayload:
+        """Create a new dataset.
+        Args:
+            name: Name of the dataset to create
+            description: Optional description of the dataset
+            dataset_id: Optional UUID to assign to the dataset, or None to auto-generate
+        Returns:
+            CreateDatasetPayload: A Pydantic model containing:
+                - id - ID of the created dataset (None if auto-generated)
+                - name - Name of the dataset
+                - description - Description of the dataset
+        Raises:
+            DataStorageCreationError: If there's an error creating the dataset
+        """
         try:
             payload = CreateDatasetPayload(
                 name=name,
@@ -2528,14 +2754,48 @@ class DataStorageMethods:
         retry=retry_if_connection_error,
         before_sleep=before_sleep_log(logger, logging.WARNING),
     )
-    async def aget_dataset(self, dataset_id: UUID):
+    async def aget_dataset(self, dataset_id: UUID) -> GetDatasetAndEntriesResponse:
+        """Asynchronously retrieve a dataset by ID.
+        Args:
+            dataset_id: UUID of the dataset to retrieve
+        Returns:
+            GetDatasetAndEntriesResponse: A dict containing:
+                - dataset: DatasetStorage with fields:
+                    - id - Unique identifier for the dataset
+                    - name - Name of the dataset
+                    - user_id - ID of the user who created the dataset
+                    - description - Description of the dataset
+                    - created_at - Timestamp when the dataset was created
+                    - modified_at - Timestamp when the dataset was last modified
+                - data_storage_entries - List of data storage entries in the dataset, each containing:
+                    - id - Unique identifier for the data storage entry
+                    - name - Name of the data storage entry
+                    - description - Description of the data storage entry
+                    - content - Content of the data storage entry
+                    - embedding - Embedding vector for the content
+                    - is_collection - Whether this entry is a collection
+                    - tags - List of tags associated with the entry
+                    - parent_id - ID of the parent entry for hierarchical storage
+                    - project_id - ID of the project this entry belongs to
+                    - dataset_id - ID of the dataset this entry belongs to
+                    - path - Path in the storage system where this entry is located
+                    - bigquery_schema - Target BigQuery schema for the entry
+                    - user_id - ID of the user who created this entry
+                    - created_at - Timestamp when the entry was created
+                    - modified_at - Timestamp when the entry was last updated
+        Raises:
+            DataStorageError: If there's an error retrieving the dataset
+        """
         try:
             response = await self.async_client.get(
                 f"/v0.1/data-storage/datasets/{dataset_id}"
             )
             response.raise_for_status()
-            return response.json()
+            return GetDatasetAndEntriesResponse.model_validate(response.json())
         except HTTPStatusError as e:
             self._handle_http_errors(e, "retrieving")
         except Exception as e:
@@ -2547,12 +2807,46 @@ class DataStorageMethods:
         retry=retry_if_connection_error,
         before_sleep=before_sleep_log(logger, logging.WARNING),
     )
-    def get_dataset(self, dataset_id: UUID):
+    def get_dataset(self, dataset_id: UUID) -> GetDatasetAndEntriesResponse:
+        """Retrieve a dataset by ID.
+        Args:
+            dataset_id: UUID of the dataset to retrieve
+        Returns:
+            GetDatasetAndEntriesResponse: A dict containing:
+                - dataset: DatasetStorage with fields:
+                    - id - Unique identifier for the dataset
+                    - name - Name of the dataset
+                    - user_id - ID of the user who created the dataset
+                    - description - Description of the dataset
+                    - created_at - Timestamp when the dataset was created
+                    - modified_at - Timestamp when the dataset was last modified
+                - data_storage_entries - List of data storage entries in the dataset, each containing:
+                    - id - Unique identifier for the data storage entry
+                    - name - Name of the data storage entry
+                    - description - Description of the data storage entry
+                    - content - Content of the data storage entry
+                    - embedding - Embedding vector for the content
+                    - is_collection - Whether this entry is a collection
+                    - tags - List of tags associated with the entry
+                    - parent_id - ID of the parent entry for hierarchical storage
+                    - project_id - ID of the project this entry belongs to
+                    - dataset_id - ID of the dataset this entry belongs to
+                    - path - Path in the storage system where this entry is located
+                    - bigquery_schema - Target BigQuery schema for the entry
+                    - user_id - ID of the user who created this entry
+                    - created_at - Timestamp when the entry was created
+                    - modified_at - Timestamp when the entry was last updated
+        Raises:
+            DataStorageError: If there's an error retrieving the dataset
+        """
         try:
             response = self.client.get(f"/v0.1/data-storage/datasets/{dataset_id}")
             response.raise_for_status()
-            return response.json()
+            return GetDatasetAndEntriesResponse.model_validate(response.json())
         except HTTPStatusError as e:
             self._handle_http_errors(e, "retrieving")
         except Exception as e:
@@ -2628,7 +2922,15 @@ class DataStorageMethods:
         retry=retry_if_connection_error,
         before_sleep=before_sleep_log(logger, logging.WARNING),
     )
-    async def adelete_data_storage_entry(self, data_storage_entry_id: UUID):
+    async def adelete_data_storage_entry(self, data_storage_entry_id: UUID) -> None:
+        """Asynchronously delete a data storage entry.
+        Args:
+            data_storage_entry_id: UUID of the data storage entry to delete
+        Raises:
+            DataStorageError: If there's an error deleting the data storage entry
+        """
         try:
             await self.async_client.delete(
                 f"/v0.1/data-storage/data-entries/{data_storage_entry_id}"
@@ -2644,7 +2946,15 @@ class DataStorageMethods:
         retry=retry_if_connection_error,
         before_sleep=before_sleep_log(logger, logging.WARNING),
     )
-    def delete_data_storage_entry(self, data_storage_entry_id: UUID):
+    def delete_data_storage_entry(self, data_storage_entry_id: UUID) -> None:
+        """Delete a data storage entry.
+        Args:
+            data_storage_entry_id: UUID of the data storage entry to delete
+        Raises:
+            DataStorageError: If there's an error deleting the data storage entry
+        """
         try:
             self.client.delete(
                 f"/v0.1/data-storage/data-entries/{data_storage_entry_id}"

futurehouse_client/clients/rest_client.py CHANGED Viewed

@@ -52,6 +52,11 @@ from futurehouse_client.models.app import (
     TaskResponseVerbose,
     TrajectoryQueryParams,
 )
+from futurehouse_client.models.job_event import (
+    JobEventCreateRequest,
+    JobEventCreateResponse,
+    JobEventUpdateRequest,
+)
 from futurehouse_client.models.rest import (
     DiscoveryResponse,
     ExecutionStatus,
@@ -160,6 +165,18 @@ class FileUploadError(RestClientError):
     """Raised when there's an error uploading a file."""
+class JobEventClientError(RestClientError):
+    """Raised when there's an error with job event operations."""
+class JobEventCreationError(JobEventClientError):
+    """Raised when there's an error creating a job event."""
+class JobEventUpdateError(JobEventClientError):
+    """Raised when there's an error updating a job event."""
 retry_if_connection_error = create_retry_if_connection_error(FileUploadError)
 DEFAULT_AGENT_TIMEOUT: int = 2400  # seconds
@@ -2609,6 +2626,176 @@ class RestClient(DataStorageMethods):
                 f"Error fetching discoveries for project: {e!r}"
             ) from e
+    @retry(
+        stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
+        wait=wait_exponential(multiplier=RETRY_MULTIPLIER, max=MAX_RETRY_WAIT),
+        retry=retry_if_connection_error,
+        before_sleep=before_sleep_log(logger, logging.WARNING),
+    )
+    def create_job_event(
+        self, request: JobEventCreateRequest
+    ) -> JobEventCreateResponse:
+        """Create a new job event.
+        Args:
+            request: Job event creation request
+        Returns:
+            Job event creation response
+        Raises:
+            JobEventCreationError: If the API call fails
+        """
+        try:
+            response = self.client.post(
+                "/v0.1/job-events",
+                json=request.model_dump(exclude_none=True, mode="json"),
+            )
+            response.raise_for_status()
+            return JobEventCreateResponse(**response.json())
+        except HTTPStatusError as e:
+            if e.response.status_code == codes.BAD_REQUEST:
+                raise JobEventCreationError(
+                    f"Invalid job event creation request: {e.response.text}."
+                ) from e
+            if e.response.status_code == codes.NOT_FOUND:
+                raise JobEventCreationError(
+                    f"Execution not found for job event creation: {e.response.text}."
+                ) from e
+            raise JobEventCreationError(
+                f"Error creating job event: {e.response.status_code} - {e.response.text}."
+            ) from e
+        except Exception as e:
+            raise JobEventCreationError(
+                f"An unexpected error occurred during job event creation: {e!r}."
+            ) from e
+    @retry(
+        stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
+        wait=wait_exponential(multiplier=RETRY_MULTIPLIER, max=MAX_RETRY_WAIT),
+        retry=retry_if_connection_error,
+        before_sleep=before_sleep_log(logger, logging.WARNING),
+    )
+    async def acreate_job_event(
+        self, request: JobEventCreateRequest
+    ) -> JobEventCreateResponse:
+        """Asynchronously create a new job event.
+        Args:
+            request: Job event creation request
+        Returns:
+            Job event creation response
+        Raises:
+            JobEventCreationError: If the API call fails
+        """
+        try:
+            response = await self.async_client.post(
+                "/v0.1/job-events",
+                json=request.model_dump(exclude_none=True, mode="json"),
+            )
+            response.raise_for_status()
+            return JobEventCreateResponse(**response.json())
+        except HTTPStatusError as e:
+            if e.response.status_code == codes.BAD_REQUEST:
+                raise JobEventCreationError(
+                    f"Invalid job event creation request: {e.response.text}."
+                ) from e
+            if e.response.status_code == codes.NOT_FOUND:
+                raise JobEventCreationError(
+                    f"Execution not found for job event creation: {e.response.text}."
+                ) from e
+            raise JobEventCreationError(
+                f"Error creating job event: {e.response.status_code} - {e.response.text}."
+            ) from e
+        except Exception as e:
+            raise JobEventCreationError(
+                f"An unexpected error occurred during job event creation: {e!r}."
+            ) from e
+    @retry(
+        stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
+        wait=wait_exponential(multiplier=RETRY_MULTIPLIER, max=MAX_RETRY_WAIT),
+        retry=retry_if_connection_error,
+        before_sleep=before_sleep_log(logger, logging.WARNING),
+    )
+    def update_job_event(
+        self, job_event_id: UUID, request: JobEventUpdateRequest
+    ) -> None:
+        """Update an existing job event.
+        Args:
+            job_event_id: ID of the job event to update
+            request: Job event update request
+        Raises:
+            JobEventUpdateError: If the API call fails
+        """
+        try:
+            response = self.client.patch(
+                f"/v0.1/job-events/{job_event_id}",
+                json=request.model_dump(exclude_none=True, mode="json"),
+            )
+            response.raise_for_status()
+        except HTTPStatusError as e:
+            if e.response.status_code == codes.NOT_FOUND:
+                raise JobEventUpdateError(
+                    f"Job event with ID {job_event_id} not found."
+                ) from e
+            if e.response.status_code == codes.BAD_REQUEST:
+                raise JobEventUpdateError(
+                    f"Invalid job event update request: {e.response.text}."
+                ) from e
+            raise JobEventUpdateError(
+                f"Error updating job event: {e.response.status_code} - {e.response.text}."
+            ) from e
+        except Exception as e:
+            raise JobEventUpdateError(
+                f"An unexpected error occurred during job event update: {e!r}."
+            ) from e
+    @retry(
+        stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
+        wait=wait_exponential(multiplier=RETRY_MULTIPLIER, max=MAX_RETRY_WAIT),
+        retry=retry_if_connection_error,
+        before_sleep=before_sleep_log(logger, logging.WARNING),
+    )
+    async def aupdate_job_event(
+        self, job_event_id: UUID, request: JobEventUpdateRequest
+    ) -> None:
+        """Asynchronously update an existing job event.
+        Args:
+            job_event_id: ID of the job event to update
+            request: Job event update request
+        Raises:
+            JobEventUpdateError: If the API call fails
+        """
+        try:
+            response = await self.async_client.patch(
+                f"/v0.1/job-events/{job_event_id}",
+                json=request.model_dump(exclude_none=True, mode="json"),
+            )
+            response.raise_for_status()
+        except HTTPStatusError as e:
+            if e.response.status_code == codes.NOT_FOUND:
+                raise JobEventUpdateError(
+                    f"Job event with ID {job_event_id} not found."
+                ) from e
+            if e.response.status_code == codes.BAD_REQUEST:
+                raise JobEventUpdateError(
+                    f"Invalid job event update request: {e.response.text}."
+                ) from e
+            raise JobEventUpdateError(
+                f"Error updating job event: {e.response.status_code} - {e.response.text}."
+            ) from e
+        except Exception as e:
+            raise JobEventUpdateError(
+                f"An unexpected error occurred during job event update: {e!r}."
+            ) from e
 def get_installed_packages() -> dict[str, str]:
     """Returns a dictionary of installed packages and their versions."""

futurehouse_client/models/__init__.py CHANGED Viewed

@@ -13,13 +13,25 @@ from .app import (
     TaskResponse,
     TaskResponseVerbose,
 )
+from .job_event import (
+    CostComponent,
+    ExecutionType,
+    JobEventCreateRequest,
+    JobEventCreateResponse,
+    JobEventUpdateRequest,
+)
 from .rest import TrajectoryPatchRequest, WorldModel, WorldModelResponse
 __all__ = [
     "AuthType",
+    "CostComponent",
     "DockerContainerConfiguration",
+    "ExecutionType",
     "FramePath",
     "JobDeploymentConfig",
+    "JobEventCreateRequest",
+    "JobEventCreateResponse",
+    "JobEventUpdateRequest",
     "PQATaskResponse",
     "RuntimeConfig",
     "Stage",

futurehouse_client/models/data_storage_methods.py CHANGED Viewed

@@ -3,10 +3,32 @@ from datetime import datetime
 from enum import StrEnum, auto
 from os import PathLike
 from pathlib import Path
-from typing import Any
+from typing import Annotated, Any
 from uuid import UUID
-from pydantic import BaseModel, Field, JsonValue
+from pydantic import (
+    BaseModel,
+    Field,
+    JsonValue,
+    PlainSerializer,
+    PlainValidator,
+    WithJsonSchema,
+)
+from sqlalchemy_utils import Ltree
+LtreeField = Annotated[
+    Ltree,
+    PlainValidator(Ltree),
+    PlainSerializer(lambda v: v.path),
+    WithJsonSchema({"type": "string", "examples": ["some.path"]}),
+]
+class DataStorageEntryStatus(StrEnum):
+    PENDING = auto()
+    ACTIVE = auto()
+    FAILED = auto()
+    DISABLED = auto()
 class DataStorageEntry(BaseModel):
@@ -20,6 +42,9 @@ class DataStorageEntry(BaseModel):
     content: str | None = Field(
         default=None, description="Content of the data storage entry"
     )
+    status: DataStorageEntryStatus = Field(
+        description="Status of the data storage entry"
+    )
     embedding: list[float] | None = Field(
         default=None, description="Embedding vector for the content"
     )
@@ -157,6 +182,22 @@ class DataStorageRequestPayload(BaseModel):
     )
+class DatasetStorage(BaseModel):
+    """Pydantic model representing a DatasetStorage record."""
+    id: UUID
+    name: str
+    user_id: str
+    description: str | None = None
+    created_at: datetime
+    modified_at: datetime
+class GetDatasetAndEntriesResponse(BaseModel):
+    dataset: DatasetStorage
+    data_storage_entries: list[DataStorageEntry]
 class CreateDatasetPayload(BaseModel):
     """Payload for creating a dataset."""

futurehouse_client/models/job_event.py ADDED Viewed

@@ -0,0 +1,75 @@
+"""Job event models for cost and usage tracking."""
+from datetime import datetime
+from enum import StrEnum, auto
+from typing import Any
+from uuid import UUID
+from pydantic import BaseModel, Field
+class ExecutionType(StrEnum):
+    """Type of execution for job events."""
+    TRAJECTORY = auto()
+    SESSION = auto()
+class CostComponent(StrEnum):
+    """Cost component types for job events."""
+    LLM_USAGE = auto()
+    EXTERNAL_SERVICE = auto()
+    STEP = auto()
+class JobEventCreateRequest(BaseModel):
+    """Request model for creating a job event matching crow-service schema."""
+    execution_id: UUID = Field(description="UUID for trajectory_id or session_id")
+    execution_type: ExecutionType = Field(
+        description="Either 'TRAJECTORY' or 'SESSION'"
+    )
+    cost_component: CostComponent = Field(
+        description="Cost component: 'LLM_USAGE', 'EXTERNAL_SERVICE', or 'STEP'"
+    )
+    started_at: datetime = Field(description="Start time of the job event")
+    ended_at: datetime = Field(description="End time of the job event")
+    crow: str | None = Field(default=None, description="unique identifier for the crow")
+    amount_acu: float | None = Field(default=None, description="Cost amount in ACUs")
+    amount_usd: float | None = Field(default=None, description="Cost amount in USD")
+    rate: float | None = Field(default=None, description="Rate per token/call in USD")
+    input_token_count: int | None = Field(
+        default=None, description="Input token count for LLM calls"
+    )
+    completion_token_count: int | None = Field(
+        default=None, description="Completion token count for LLM calls"
+    )
+    metadata: dict[str, Any] | None = Field(default=None)
+class JobEventUpdateRequest(BaseModel):
+    """Request model for updating a job event matching crow-service schema."""
+    amount_acu: float | None = Field(default=None, description="Cost amount in ACUs")
+    amount_usd: float | None = Field(default=None, description="Cost amount in USD")
+    rate: float | None = Field(default=None, description="Rate per token/call in USD")
+    input_token_count: int | None = Field(
+        default=None, description="Input token count for LLM calls"
+    )
+    completion_token_count: int | None = Field(
+        default=None, description="Completion token count for LLM calls"
+    )
+    metadata: dict[str, Any] | None = Field(default=None)
+    started_at: datetime | None = Field(
+        default=None, description="Start time of the job event"
+    )
+    ended_at: datetime | None = Field(
+        default=None, description="End time of the job event"
+    )
+class JobEventCreateResponse(BaseModel):
+    """Response model for job event creation."""
+    id: UUID = Field(description="UUID of the created job event")

futurehouse_client/models/rest.py CHANGED Viewed

@@ -85,6 +85,11 @@ class SearchCriterion(BaseModel):
     value: str | list[str] | bool
+class FilterLogic(StrEnum):
+    AND = "AND"
+    OR = "OR"
 class WorldModelSearchPayload(BaseModel):
     """Payload for structured world model search."""
@@ -174,3 +179,4 @@ class DataStorageSearchPayload(BaseModel):
     criteria: list[SearchCriterion]
     size: int = 10
+    filter_logic: FilterLogic = FilterLogic.OR

futurehouse_client/version.py CHANGED Viewed

@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
 commit_id: COMMIT_ID
 __commit_id__: COMMIT_ID
-__version__ = version = '0.4.5.dev49'
-__version_tuple__ = version_tuple = (0, 4, 5, 'dev49')
+__version__ = version = '0.4.5.dev160'
+__version_tuple__ = version_tuple = (0, 4, 5, 'dev160')
 __commit_id__ = commit_id = None

{futurehouse_client-0.4.5.dev49.dist-info → futurehouse_client-0.4.5.dev160.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: futurehouse-client
-Version: 0.4.5.dev49
+Version: 0.4.5.dev160
 Summary: A client for interacting with endpoints of the FutureHouse service.
 Author-email: FutureHouse technical staff <hello@futurehouse.org>
 License:                                  Apache License
@@ -224,6 +224,7 @@ Requires-Dist: openai<1.100.0,>=1
 Requires-Dist: pydantic
 Requires-Dist: python-dotenv
 Requires-Dist: requests
+Requires-Dist: sqlalchemy-utils>=0.41.2
 Requires-Dist: tenacity
 Requires-Dist: tqdm>=4.62
 Provides-Extra: dev

futurehouse_client-0.4.5.dev160.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,24 @@
+futurehouse_client/__init__.py,sha256=q5cpcuPkhTaueXsySsgWpH0F-2EsRxcdJfP91ze6khU,991
+futurehouse_client/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+futurehouse_client/version.py,sha256=6BA6oRbUzdnpPhiNHEHGYWEa8NjSzYLRAwSlZ3RVS6Y,721
+futurehouse_client/clients/__init__.py,sha256=-HXNj-XJ3LRO5XM6MZ709iPs29YpApss0Q2YYg1qMZw,280
+futurehouse_client/clients/data_storage_methods.py,sha256=f8ZsVicEtO50pRXoPzEB2GpiyqosNofyoW8vJeYvFnM,119266
+futurehouse_client/clients/job_client.py,sha256=b5gpzulZpxpv9R337r3UKItnMdtd6CGlI1sV3_VQJso,13985
+futurehouse_client/clients/rest_client.py,sha256=kLCR4dYduwX_16jaOZ26RGCOR2A_6nk2gpBKUqQ-KVI,110247
+futurehouse_client/models/__init__.py,sha256=N1MwDUYonsMN9NdaShsYcJspyL7H756MYj7VWFeD3fk,978
+futurehouse_client/models/app.py,sha256=UUg17I3zk6cH_7mrdojHGYvQfm_SeDkuUxsPlRyIYz0,31895
+futurehouse_client/models/client.py,sha256=n4HD0KStKLm6Ek9nL9ylP-bkK10yzAaD1uIDF83Qp_A,1828
+futurehouse_client/models/data_storage_methods.py,sha256=cpF2g4y_REECaz--WhaJeLqXA_3m3keRP5XOXiL8GOI,13811
+futurehouse_client/models/job_event.py,sha256=lMrx-lV7BQkKl419ErWZ6Q1EjurmhBFSns0z6zwGaVo,2766
+futurehouse_client/models/rest.py,sha256=SbeXZSPUCM0lQ_gVUPa64vKzMxuUVgqmJ5YThfDWs8g,4726
+futurehouse_client/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+futurehouse_client/utils/auth.py,sha256=tgWELjKfg8eWme_qdcRmc8TjQN9DVZuHHaVXZNHLchk,2960
+futurehouse_client/utils/general.py,sha256=PIkGLCSA3kUvc6mwR-prEB7YnMdKILOIm6cPowSZzzs,2532
+futurehouse_client/utils/module_utils.py,sha256=aFyd-X-pDARXz9GWpn8SSViUVYdSbuy9vSkrzcVIaGI,4955
+futurehouse_client/utils/monitoring.py,sha256=UjRlufe67kI3VxRHOd5fLtJmlCbVA2Wqwpd4uZhXkQM,8728
+futurehouse_client/utils/world_model_tools.py,sha256=v2krZGrco0ur2a_pcRMtnQL05SxlIoBXuJ5R1JkQNws,2921
+futurehouse_client-0.4.5.dev160.dist-info/licenses/LICENSE,sha256=oQ9ZHjUi-_6GfP3gs14FlPb0OlGwE1QCCKFGnJ4LD2I,11341
+futurehouse_client-0.4.5.dev160.dist-info/METADATA,sha256=ulzDMOtoPKkLAJxL6JPcqSzmuTqOmP5wxiB7l3bm_qM,27101
+futurehouse_client-0.4.5.dev160.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+futurehouse_client-0.4.5.dev160.dist-info/top_level.txt,sha256=TRuLUCt_qBnggdFHCX4O_BoCu1j2X43lKfIZC-ElwWY,19
+futurehouse_client-0.4.5.dev160.dist-info/RECORD,,

futurehouse_client-0.4.5.dev49.dist-info/RECORD DELETED Viewed

@@ -1,23 +0,0 @@
-futurehouse_client/__init__.py,sha256=PvFTkocA-hobsWoDEBEdrUgLIbuVbDs_0nvMdImJmHk,707
-futurehouse_client/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-futurehouse_client/version.py,sha256=0fTIuRsiUoxr-NFJaezh7gevHU-qV_oY3hDg8BKD-i8,719
-futurehouse_client/clients/__init__.py,sha256=-HXNj-XJ3LRO5XM6MZ709iPs29YpApss0Q2YYg1qMZw,280
-futurehouse_client/clients/data_storage_methods.py,sha256=yxtrZUFaVw6nwjAoRtOlyqr67M3tDJiFRjFtiFpJXHs,99811
-futurehouse_client/clients/job_client.py,sha256=b5gpzulZpxpv9R337r3UKItnMdtd6CGlI1sV3_VQJso,13985
-futurehouse_client/clients/rest_client.py,sha256=RdyFEipvADDCHyY5XFy565IoL9-N1myJjF0G8x2wlK8,103183
-futurehouse_client/models/__init__.py,sha256=0YlzKGymbY1g4cXxnUc0BUnthTkVBf12bCZlGUcMQqk,701
-futurehouse_client/models/app.py,sha256=UUg17I3zk6cH_7mrdojHGYvQfm_SeDkuUxsPlRyIYz0,31895
-futurehouse_client/models/client.py,sha256=n4HD0KStKLm6Ek9nL9ylP-bkK10yzAaD1uIDF83Qp_A,1828
-futurehouse_client/models/data_storage_methods.py,sha256=9L1C-BDaGJiWhr8Ps4P5kS4f0IuzXowCVeU2hYqore8,12932
-futurehouse_client/models/rest.py,sha256=Ze7Jwllkfsvu32ekqYqqBzLqv9LOmWIsjYlxamofM2s,4619
-futurehouse_client/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-futurehouse_client/utils/auth.py,sha256=tgWELjKfg8eWme_qdcRmc8TjQN9DVZuHHaVXZNHLchk,2960
-futurehouse_client/utils/general.py,sha256=PIkGLCSA3kUvc6mwR-prEB7YnMdKILOIm6cPowSZzzs,2532
-futurehouse_client/utils/module_utils.py,sha256=aFyd-X-pDARXz9GWpn8SSViUVYdSbuy9vSkrzcVIaGI,4955
-futurehouse_client/utils/monitoring.py,sha256=UjRlufe67kI3VxRHOd5fLtJmlCbVA2Wqwpd4uZhXkQM,8728
-futurehouse_client/utils/world_model_tools.py,sha256=v2krZGrco0ur2a_pcRMtnQL05SxlIoBXuJ5R1JkQNws,2921
-futurehouse_client-0.4.5.dev49.dist-info/licenses/LICENSE,sha256=oQ9ZHjUi-_6GfP3gs14FlPb0OlGwE1QCCKFGnJ4LD2I,11341
-futurehouse_client-0.4.5.dev49.dist-info/METADATA,sha256=XmAENG6NzYv8fxDYEiilIst3WyNPfS-HawsJjfC3TWU,27060
-futurehouse_client-0.4.5.dev49.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-futurehouse_client-0.4.5.dev49.dist-info/top_level.txt,sha256=TRuLUCt_qBnggdFHCX4O_BoCu1j2X43lKfIZC-ElwWY,19
-futurehouse_client-0.4.5.dev49.dist-info/RECORD,,

{futurehouse_client-0.4.5.dev49.dist-info → futurehouse_client-0.4.5.dev160.dist-info}/WHEEL RENAMED Viewed

File without changes

{futurehouse_client-0.4.5.dev49.dist-info → futurehouse_client-0.4.5.dev160.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{futurehouse_client-0.4.5.dev49.dist-info → futurehouse_client-0.4.5.dev160.dist-info}/top_level.txt RENAMED Viewed

File without changes

futurehouse-client 0.4.5.dev49__py3-none-any.whl → 0.4.5.dev160__py3-none-any.whl

futurehouse-client 0.4.5.dev49py3-none-any.whl → 0.4.5.dev160py3-none-any.whl