PyPI - sf-vector-sdk - Versions diffs - 0.2.0__py3-none-any.whl - Mend

sf-vector-sdk 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

sf_vector_sdk-0.2.0.dist-info/METADATA +476 -0
sf_vector_sdk-0.2.0.dist-info/RECORD +27 -0
sf_vector_sdk-0.2.0.dist-info/WHEEL +4 -0
vector_sdk/__init__.py +262 -0
vector_sdk/client.py +538 -0
vector_sdk/content_types.py +233 -0
vector_sdk/generated/embedding_pipeline/content_types/v1/content_types_pb2.py +57 -0
vector_sdk/generated/embedding_pipeline/content_types/v1/content_types_pb2.pyi +141 -0
vector_sdk/generated/embedding_pipeline/db/vectors/v1/vectors_pb2.py +58 -0
vector_sdk/generated/embedding_pipeline/db/vectors/v1/vectors_pb2.pyi +145 -0
vector_sdk/generated/embedding_pipeline/query/v1/query_pb2.py +58 -0
vector_sdk/generated/embedding_pipeline/query/v1/query_pb2.pyi +109 -0
vector_sdk/generated/embedding_pipeline/tools/v1/tools_pb2.py +39 -0
vector_sdk/generated/embedding_pipeline/tools/v1/tools_pb2.pyi +31 -0
vector_sdk/hash/__init__.py +31 -0
vector_sdk/hash/hasher.py +259 -0
vector_sdk/hash/types.py +67 -0
vector_sdk/namespaces/__init__.py +13 -0
vector_sdk/namespaces/base.py +45 -0
vector_sdk/namespaces/db.py +230 -0
vector_sdk/namespaces/embeddings.py +268 -0
vector_sdk/namespaces/search.py +258 -0
vector_sdk/structured/__init__.py +60 -0
vector_sdk/structured/router.py +190 -0
vector_sdk/structured/structured_embeddings.py +431 -0
vector_sdk/structured/tool_config.py +254 -0
vector_sdk/types.py +864 -0

vector_sdk/client.py ADDED Viewed

@@ -0,0 +1,538 @@
+"""
+Vector SDK Client.
+This module provides the main client classes for interacting with the
+Vector Gateway service via Redis Streams and HTTP APIs.
+"""
+import warnings
+from typing import Any, Optional
+from redis import Redis
+from vector_sdk.namespaces.db import DBNamespace
+from vector_sdk.namespaces.embeddings import EmbeddingsNamespace
+from vector_sdk.namespaces.search import SearchNamespace
+from vector_sdk.structured import StructuredEmbeddingsNamespace
+from vector_sdk.types import (
+    CloneResult,
+    DeleteFromNamespaceResult,
+    EmbeddingResult,
+    LookupResult,
+    QueryResult,
+    StorageConfig,
+)
+class VectorClient:
+    """
+    Main client for the Vector SDK.
+    Provides access to embedding, search, and database operations through
+    namespaced sub-clients for improved discoverability.
+    Example:
+        ```python
+        from vector_sdk import VectorClient
+        client = VectorClient(
+            redis_url="redis://localhost:6379",
+            http_url="http://localhost:8080",
+        )
+        # Create embeddings
+        result = client.embeddings.create_and_wait(
+            texts=[{"id": "doc1", "text": "Hello world"}],
+            content_type="document",
+        )
+        # Vector search
+        search_result = client.search.query_and_wait(
+            query_text="machine learning",
+            database="turbopuffer",
+            namespace="topics",
+            top_k=10,
+        )
+        # Direct database lookup (no embedding)
+        docs = client.db.get_by_ids(
+            ids=["doc1", "doc2"],
+            database="turbopuffer",
+            namespace="topics",
+        )
+        client.close()
+        ```
+    """
+    def __init__(self, redis_url: str, http_url: Optional[str] = None):
+        """
+        Initialize the client.
+        Args:
+            redis_url: Redis connection URL (e.g., "redis://localhost:6379")
+            http_url: Optional HTTP URL for query-gateway API (required for db operations)
+        """
+        self._redis = Redis.from_url(redis_url, decode_responses=True)
+        self._http_url = http_url
+        self._embeddings = EmbeddingsNamespace(self._redis, self._http_url)
+        self._search = SearchNamespace(self._redis, self._http_url)
+        self._db = DBNamespace(self._redis, self._http_url)
+        self._structured_embeddings = StructuredEmbeddingsNamespace(
+            self._redis, self._embeddings, self._http_url
+        )
+    @property
+    def embeddings(self) -> EmbeddingsNamespace:
+        """
+        Embedding operations namespace.
+        Use this to create embeddings, wait for results, and check queue depth.
+        Example:
+            ```python
+            # Create embeddings asynchronously
+            request_id = client.embeddings.create(texts, content_type)
+            result = client.embeddings.wait_for(request_id)
+            # Or create and wait in one call
+            result = client.embeddings.create_and_wait(texts, content_type)
+            # Check queue depth
+            depths = client.embeddings.get_queue_depth()
+            ```
+        """
+        return self._embeddings
+    @property
+    def search(self) -> SearchNamespace:
+        """
+        Vector search operations namespace.
+        Use this to perform semantic similarity searches.
+        Example:
+            ```python
+            # Search asynchronously
+            request_id = client.search.query("machine learning", database="turbopuffer")
+            result = client.search.wait_for(request_id)
+            # Or search and wait in one call
+            result = client.search.query_and_wait("machine learning", database="turbopuffer")
+            ```
+        """
+        return self._search
+    @property
+    def db(self) -> DBNamespace:
+        """
+        Direct database operations namespace.
+        Use this for operations that don't require embedding (lookup by ID,
+        search by metadata, clone, delete).
+        Requires `http_url` to be set in VectorClient constructor.
+        Example:
+            ```python
+            # Lookup by ID
+            result = client.db.get_by_ids(["id1", "id2"], database="turbopuffer")
+            # Find by metadata
+            result = client.db.find_by_metadata({"userId": "user1"}, database="mongodb")
+            # Clone between namespaces
+            client.db.clone("doc1", "ns1", "ns2")
+            # Delete
+            client.db.delete("doc1", "ns1")
+            ```
+        """
+        return self._db
+    @property
+    def structured_embeddings(self) -> StructuredEmbeddingsNamespace:
+        """
+        Structured embeddings operations namespace.
+        Use this for embedding known tool types (FlashCard, TestQuestion, etc.)
+        with automatic text extraction, content hash computation, and database routing.
+        Example:
+            ```python
+            # Embed a flashcard - SDK handles text extraction, hashing, and routing
+            result = client.structured_embeddings.embed_flashcard_and_wait(
+                data={"type": "BASIC", "term": "Mitochondria", "definition": "..."},
+                metadata=ToolMetadata(tool_id="tool123", user_id="user456"),
+            )
+            # Embed a test question
+            result = client.structured_embeddings.embed_test_question_and_wait(
+                data=TestQuestionInput(
+                    question="What is the capital?",
+                    answers=[...],
+                    question_type="multiplechoice",
+                ),
+                metadata=ToolMetadata(tool_id="tool456"),
+            )
+            ```
+        """
+        return self._structured_embeddings
+    def close(self) -> None:
+        """Close the Redis connection."""
+        self._redis.close()
+    def __enter__(self) -> "VectorClient":
+        """Context manager entry."""
+        return self
+    def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
+        """Context manager exit."""
+        self.close()
+class EmbeddingClient(VectorClient):
+    """
+    Backward-compatible client alias.
+    This class provides the original EmbeddingClient API that delegates to
+    the new namespace-based VectorClient. Use VectorClient directly for new code.
+    .. deprecated::
+        Use VectorClient instead for cleaner, namespaced API.
+    Example:
+        ```python
+        # Legacy usage (still works)
+        client = EmbeddingClient("redis://localhost:6379")
+        request_id = client.submit(texts, content_type)
+        result = client.wait_for_result(request_id)
+        # Recommended: Use VectorClient instead
+        client = VectorClient("redis://localhost:6379")
+        result = client.embeddings.create_and_wait(texts, content_type)
+        ```
+    """
+    # ========================================================================
+    # Embedding Methods (delegate to embeddings namespace)
+    # ========================================================================
+    def submit(
+        self,
+        texts: list[dict[str, Any]],
+        content_type: str,
+        priority: str = "normal",
+        storage: Optional[StorageConfig] = None,
+        metadata: Optional[dict[str, str]] = None,
+        request_id: Optional[str] = None,
+        embedding_model: Optional[str] = None,
+        embedding_dimensions: Optional[int] = None,
+    ) -> str:
+        """
+        Submit an embedding request to the gateway.
+        .. deprecated::
+            Use `client.embeddings.create()` instead.
+        """
+        warnings.warn(
+            "EmbeddingClient.submit() is deprecated. Use VectorClient.embeddings.create() instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        return self.embeddings.create(
+            texts=texts,
+            content_type=content_type,
+            priority=priority,
+            storage=storage,
+            metadata=metadata,
+            request_id=request_id,
+            embedding_model=embedding_model,
+            embedding_dimensions=embedding_dimensions,
+        )
+    def wait_for_result(
+        self,
+        request_id: str,
+        timeout: int = 60,
+    ) -> EmbeddingResult:
+        """
+        Wait for an embedding request to complete.
+        .. deprecated::
+            Use `client.embeddings.wait_for()` instead.
+        """
+        warnings.warn(
+            "EmbeddingClient.wait_for_result() is deprecated. Use VectorClient.embeddings.wait_for() instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        return self.embeddings.wait_for(request_id, timeout)
+    def submit_and_wait(
+        self,
+        texts: list[dict[str, Any]],
+        content_type: str,
+        priority: str = "normal",
+        storage: Optional[StorageConfig] = None,
+        metadata: Optional[dict[str, str]] = None,
+        timeout: int = 60,
+    ) -> EmbeddingResult:
+        """
+        Submit a request and wait for the result.
+        .. deprecated::
+            Use `client.embeddings.create_and_wait()` instead.
+        """
+        warnings.warn(
+            "EmbeddingClient.submit_and_wait() is deprecated. Use VectorClient.embeddings.create_and_wait() instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        return self.embeddings.create_and_wait(
+            texts=texts,
+            content_type=content_type,
+            priority=priority,
+            storage=storage,
+            metadata=metadata,
+            timeout=timeout,
+        )
+    def get_queue_depth(self) -> dict[str, int]:
+        """
+        Get the current queue depth for each priority.
+        .. deprecated::
+            Use `client.embeddings.get_queue_depth()` instead.
+        """
+        warnings.warn(
+            "EmbeddingClient.get_queue_depth() is deprecated. Use VectorClient.embeddings.get_queue_depth() instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        return self.embeddings.get_queue_depth()
+    # ========================================================================
+    # Query Methods (delegate to search namespace)
+    # ========================================================================
+    def query(
+        self,
+        query_text: str,
+        database: str,
+        top_k: int = 10,
+        min_score: Optional[float] = None,
+        filters: Optional[dict[str, str]] = None,
+        namespace: Optional[str] = None,
+        collection: Optional[str] = None,
+        database_name: Optional[str] = None,
+        include_vectors: bool = False,
+        include_metadata: bool = True,
+        embedding_model: Optional[str] = None,
+        embedding_dimensions: Optional[int] = None,
+        priority: str = "normal",
+        metadata: Optional[dict[str, str]] = None,
+        request_id: Optional[str] = None,
+    ) -> str:
+        """
+        Submit a query request to the query-gateway.
+        .. deprecated::
+            Use `client.search.query()` instead.
+        """
+        warnings.warn(
+            "EmbeddingClient.query() is deprecated. Use VectorClient.search.query() instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        return self.search.query(
+            query_text=query_text,
+            database=database,
+            top_k=top_k,
+            min_score=min_score,
+            filters=filters,
+            namespace=namespace,
+            collection=collection,
+            database_name=database_name,
+            include_vectors=include_vectors,
+            include_metadata=include_metadata,
+            embedding_model=embedding_model,
+            embedding_dimensions=embedding_dimensions,
+            priority=priority,
+            metadata=metadata,
+            request_id=request_id,
+        )
+    def wait_for_query_result(
+        self,
+        request_id: str,
+        timeout: int = 30,
+    ) -> QueryResult:
+        """
+        Wait for a query request to complete.
+        .. deprecated::
+            Use `client.search.wait_for()` instead.
+        """
+        warnings.warn(
+            "EmbeddingClient.wait_for_query_result() is deprecated. Use VectorClient.search.wait_for() instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        return self.search.wait_for(request_id, timeout)
+    def query_and_wait(
+        self,
+        query_text: str,
+        database: str,
+        top_k: int = 10,
+        min_score: Optional[float] = None,
+        filters: Optional[dict[str, str]] = None,
+        namespace: Optional[str] = None,
+        collection: Optional[str] = None,
+        database_name: Optional[str] = None,
+        include_vectors: bool = False,
+        include_metadata: bool = True,
+        embedding_model: Optional[str] = None,
+        embedding_dimensions: Optional[int] = None,
+        priority: str = "normal",
+        metadata: Optional[dict[str, str]] = None,
+        timeout: int = 30,
+    ) -> QueryResult:
+        """
+        Submit a query and wait for the result.
+        .. deprecated::
+            Use `client.search.query_and_wait()` instead.
+        """
+        warnings.warn(
+            "EmbeddingClient.query_and_wait() is deprecated. Use VectorClient.search.query_and_wait() instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        return self.search.query_and_wait(
+            query_text=query_text,
+            database=database,
+            top_k=top_k,
+            min_score=min_score,
+            filters=filters,
+            namespace=namespace,
+            collection=collection,
+            database_name=database_name,
+            include_vectors=include_vectors,
+            include_metadata=include_metadata,
+            embedding_model=embedding_model,
+            embedding_dimensions=embedding_dimensions,
+            priority=priority,
+            metadata=metadata,
+            timeout=timeout,
+        )
+    # ========================================================================
+    # Database Lookup Methods (delegate to db namespace)
+    # ========================================================================
+    def lookup_by_ids(
+        self,
+        ids: list[str],
+        database: str,
+        namespace: Optional[str] = None,
+        collection: Optional[str] = None,
+        database_name: Optional[str] = None,
+        include_vectors: bool = False,
+        include_metadata: bool = True,
+    ) -> LookupResult:
+        """
+        Look up documents by their IDs.
+        .. deprecated::
+            Use `client.db.get_by_ids()` instead.
+        """
+        warnings.warn(
+            "EmbeddingClient.lookup_by_ids() is deprecated. Use VectorClient.db.get_by_ids() instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        return self.db.get_by_ids(
+            ids=ids,
+            database=database,
+            namespace=namespace,
+            collection=collection,
+            database_name=database_name,
+            include_vectors=include_vectors,
+            include_metadata=include_metadata,
+        )
+    def search_by_metadata(
+        self,
+        filters: dict[str, Any],
+        database: str,
+        namespace: Optional[str] = None,
+        collection: Optional[str] = None,
+        database_name: Optional[str] = None,
+        limit: int = 100,
+        include_vectors: bool = False,
+    ) -> LookupResult:
+        """
+        Search for documents by metadata filters.
+        .. deprecated::
+            Use `client.db.find_by_metadata()` instead.
+        """
+        warnings.warn(
+            "EmbeddingClient.search_by_metadata() is deprecated. Use VectorClient.db.find_by_metadata() instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        return self.db.find_by_metadata(
+            filters=filters,
+            database=database,
+            namespace=namespace,
+            collection=collection,
+            database_name=database_name,
+            limit=limit,
+            include_vectors=include_vectors,
+        )
+    def clone_from_namespace(
+        self,
+        id: str,
+        source_namespace: str,
+        destination_namespace: str,
+    ) -> CloneResult:
+        """
+        Clone a document from one TurboPuffer namespace to another.
+        .. deprecated::
+            Use `client.db.clone()` instead.
+        """
+        warnings.warn(
+            "EmbeddingClient.clone_from_namespace() is deprecated. Use VectorClient.db.clone() instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        return self.db.clone(
+            id=id,
+            source_namespace=source_namespace,
+            destination_namespace=destination_namespace,
+        )
+    def delete_from_namespace(
+        self,
+        id: str,
+        namespace: str,
+    ) -> DeleteFromNamespaceResult:
+        """
+        Delete a document from a TurboPuffer namespace.
+        .. deprecated::
+            Use `client.db.delete()` instead.
+        """
+        warnings.warn(
+            "EmbeddingClient.delete_from_namespace() is deprecated. Use VectorClient.db.delete() instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        return self.db.delete(id=id, namespace=namespace)