PyPI - sf-vector-sdk - Versions diffs - 0.2.4__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

sf-vector-sdk 0.2.4py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

{sf_vector_sdk-0.2.4.dist-info → sf_vector_sdk-0.3.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sf-vector-sdk
-Version: 0.2.4
+Version: 0.3.0
 Summary: Python SDK for the Vector Gateway service (embeddings and vector search)
 Requires-Python: >=3.11
 Requires-Dist: redis>=5.0.0
@@ -316,6 +316,13 @@ result = client.db.clone("doc1", "ns1", "ns2")
 # Delete
 result = client.db.delete("doc1", "ns1")
+# Export entire namespace
+export_result = client.db.get_vectors_in_namespace(
+    namespace="tool_vectors",
+    include_vectors=True,
+)
+print(f"Exported {len(export_result.documents)} documents")
 ```
 ### Types

{sf_vector_sdk-0.2.4.dist-info → sf_vector_sdk-0.3.0.dist-info}/RECORD RENAMED Viewed

@@ -1,7 +1,7 @@
-vector_sdk/__init__.py,sha256=VoljCrab1syIU3NWthWI9ks2s2QDIroixzFGkYamJSY,6979
+vector_sdk/__init__.py,sha256=tzeggXDFYGrkc6_08Ta5EwCypWc7kTMLSWsv0DA-Vyo,7162
 vector_sdk/client.py,sha256=NQFGHyR1aM0UToRFy6e9Xm_v6mk0opqzKN8UlHu97n0,17186
 vector_sdk/content_types.py,sha256=krvFOR58iUZPfYlEVsk0sXD6_ANAFbxEBQGNpt1YPDU,7381
-vector_sdk/types.py,sha256=rQgA2z3ls21vY-DRPZgfmm8gYFkWJk1dQaJI-nbc0no,25514
+vector_sdk/types.py,sha256=h_nPB-UjHVgl_qTRf-2bGKlXiPphoNi3alM8BcJmJro,28207
 vector_sdk/generated/embedding_pipeline/content_types/v1/content_types_pb2.py,sha256=5dW14j_DyIPKCaFI2cxCKKtQoLMGtRqV3aiRZ8Utxw4,5962
 vector_sdk/generated/embedding_pipeline/content_types/v1/content_types_pb2.pyi,sha256=fOw6liHkiXSEyvEZ_QKexDUgFNhbemuGuk52hwQ5pnQ,6738
 vector_sdk/generated/embedding_pipeline/db/vectors/v1/vectors_pb2.py,sha256=nFmjLnJJh5H-t25FJ8oP7jLH-mAcuEw-EK0U-dYlgDI,7057
@@ -15,13 +15,13 @@ vector_sdk/hash/hasher.py,sha256=k5VSQB-T0TtBM5ipaVE_TQu_uiaiWNjOWSbByxjriwQ,861
 vector_sdk/hash/types.py,sha256=clBRk_D5SrXWU19K3Jg8COecz9--WZh9Ws4f70T3BXg,2044
 vector_sdk/namespaces/__init__.py,sha256=S9dJfB39s2zjYOpFn9Fvf8bk7mLKcXk5aPatKOA-xO0,374
 vector_sdk/namespaces/base.py,sha256=lioZBcd43mijnN0JwTMMEpQ6whiAjaueTDAAIZS1JM0,1156
-vector_sdk/namespaces/db.py,sha256=a5sEHrfy1xAjRjyM9qfZxr3IznZVA8BnY5W1Hq5jr4I,7230
+vector_sdk/namespaces/db.py,sha256=eh7k0gpZcZSIML67YPsTbqqeoS-c6ZC_CmlptpBeNFI,10182
 vector_sdk/namespaces/embeddings.py,sha256=r0cbCZjj0jZ9oyBpm8lA2BjUYzi8bmunWwFsYxiXtJo,7704
 vector_sdk/namespaces/search.py,sha256=8ruX0xp5vXD9tS8oXAu1vmF4aC25fNg4gDOtiR8aQ_0,7874
 vector_sdk/structured/__init__.py,sha256=ZUhrH_l7bX5vA78DSKqDucWhfhYmkDX-W_MPzo5J9JU,1758
 vector_sdk/structured/router.py,sha256=F3O1TYtbVFCPqVWCCYCt5QcRffX5WPlPQ7K3KlayooQ,5792
-vector_sdk/structured/structured_embeddings.py,sha256=e-EOYgpx7SXOo1xQV6-5ZgB6W3ZH1HS2Tx1m7O_1VNE,36869
+vector_sdk/structured/structured_embeddings.py,sha256=GiIrdAUi8ImsakASTS2Vtda7MZQYwyyhr3alJB-fJnM,36889
 vector_sdk/structured/tool_config.py,sha256=qMwP8UWQTt8mkTYFVgvNXd9Dh_WztJSsqcgAjvQ_YoY,8212
-sf_vector_sdk-0.2.4.dist-info/METADATA,sha256=kvP3u9ZJ3RUsLMcKz5yMRfkUworAcqJ-pZoLtXaYVoc,16069
-sf_vector_sdk-0.2.4.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-sf_vector_sdk-0.2.4.dist-info/RECORD,,
+sf_vector_sdk-0.3.0.dist-info/METADATA,sha256=HOHWG2quw9q65CMw1rAC27IEyU8gJGP-xjVj_RynDt4,16266
+sf_vector_sdk-0.3.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+sf_vector_sdk-0.3.0.dist-info/RECORD,,

vector_sdk/__init__.py CHANGED Viewed

@@ -141,11 +141,14 @@ from vector_sdk.types import (
     EmbeddingProvider,
     EmbeddingRequest,
     EmbeddingResult,
+    ExportTiming,
+    GetVectorsInNamespaceResult,
     LookupResult,
     LookupTiming,
     ModelConfig,
     ModelValidationError,
     MongoDBStorage,
+    NamespaceMetadata,
     PineconeStorageConfig,
     # Query types
     QueryConfig,
@@ -166,7 +169,7 @@ from vector_sdk.types import (
     validate_model,
 )
-__version__ = "0.2.4"
+__version__ = "0.3.0"
 __all__ = [
     # Clients (New API)
@@ -203,6 +206,10 @@ __all__ = [
     # Clone and Delete types
     "CloneResult",
     "DeleteFromNamespaceResult",
+    # Export namespace types
+    "GetVectorsInNamespaceResult",
+    "NamespaceMetadata",
+    "ExportTiming",
     # Query constants
     "QUERY_STREAM_CRITICAL",
     "QUERY_STREAM_HIGH",

vector_sdk/namespaces/db.py CHANGED Viewed

@@ -2,6 +2,8 @@
 Database namespace for direct database operations (no embedding required).
 """
+import json
+import time
 from typing import Any, Optional
 import requests
@@ -10,6 +12,7 @@ from vector_sdk.namespaces.base import BaseNamespace
 from vector_sdk.types import (
     CloneResult,
     DeleteFromNamespaceResult,
+    GetVectorsInNamespaceResult,
     LookupResult,
 )
@@ -228,3 +231,84 @@ class DBNamespace(BaseNamespace):
         response.raise_for_status()
         return DeleteFromNamespaceResult.from_dict(response.json())
+    def get_vectors_in_namespace(
+        self,
+        namespace: str,
+        include_vectors: bool = True,
+        include_metadata: bool = True,
+        timeout_ms: int = 300000,
+    ) -> GetVectorsInNamespaceResult:
+        """
+        Export all vectors from a TurboPuffer namespace.
+        This method submits an export job to the query gateway and waits for completion.
+        The gateway handles pagination automatically and returns all results at once.
+        Args:
+            namespace: TurboPuffer namespace to export from
+            include_vectors: Whether to include vectors in response (default: True)
+            include_metadata: Whether to include metadata in response (default: True)
+            timeout_ms: Maximum time to wait for export completion in milliseconds
+                        (default: 300000ms = 5 minutes)
+        Returns:
+            GetVectorsInNamespaceResult containing all documents and namespace metadata
+        Raises:
+            ValueError: If http_url is not configured or namespace is missing
+            TimeoutError: If export times out
+            requests.HTTPError: If the request fails
+            Exception: If the export fails on the server side
+        Example:
+            ```python
+            result = client.db.get_vectors_in_namespace(
+                namespace="tool_vectors",
+                include_vectors=True,
+                include_metadata=True,
+            )
+            print(f"Exported {len(result.documents)} documents")
+            print(f"Namespace has ~{result.metadata.approx_row_count} total rows")
+            ```
+        """
+        http_url = self._require_http_url("get_vectors_in_namespace")
+        if not namespace:
+            raise ValueError("namespace is required")
+        # 1. Submit export job to gateway
+        url = f"{http_url}/v1/export/turbopuffer"
+        body = {
+            "namespace": namespace,
+            "includeVectors": include_vectors,
+            "includeMetadata": include_metadata,
+        }
+        response = requests.post(url, json=body, timeout=30)
+        response.raise_for_status()
+        job_id = response.json()["jobId"]
+        # 2. Poll Redis for result
+        redis_key = f"namespace-export:{job_id}"
+        start_time = time.time()
+        poll_interval = 1.0  # Poll every 1 second
+        while (time.time() - start_time) * 1000 < timeout_ms:
+            result_str = self.redis.get(redis_key)
+            if result_str:
+                result_dict = json.loads(result_str)
+                result = GetVectorsInNamespaceResult.from_dict(result_dict)
+                if result.status == "failed":
+                    raise Exception(f"Export failed: {result.error}")
+                return result
+            # Wait before next poll
+            time.sleep(poll_interval)
+        raise TimeoutError(f"Export timeout after {timeout_ms}ms")

vector_sdk/structured/structured_embeddings.py CHANGED Viewed

@@ -844,7 +844,7 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
             storage=storage_config,
             metadata={
                 "toolCollection": tool_collection,
-                "batchSize": len(items),
+                "batchSize": str(len(items)),
             },
             embedding_model=tool_config.model,
             embedding_dimensions=tool_config.dimensions,
@@ -913,7 +913,7 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
             storage=storage_config,
             metadata={
                 "toolCollection": tool_collection,
-                "batchSize": len(items),
+                "batchSize": str(len(items)),
             },
             embedding_model=tool_config.model,
             embedding_dimensions=tool_config.dimensions,
@@ -1119,7 +1119,7 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
             storage=storage_config,
             metadata={
                 "toolCollection": tool_collection,
-                "batchSize": len(items),
+                "batchSize": str(len(items)),
             },
             embedding_model=tool_config.model,
             embedding_dimensions=tool_config.dimensions,
@@ -1198,7 +1198,7 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
             storage=storage_config,
             metadata={
                 "toolCollection": tool_collection,
-                "batchSize": len(items),
+                "batchSize": str(len(items)),
             },
             embedding_model=tool_config.model,
             embedding_dimensions=tool_config.dimensions,

vector_sdk/types.py CHANGED Viewed

@@ -862,3 +862,96 @@ class DeleteFromNamespaceResult:
             success=data["success"],
             timing=timing,
         )
+@dataclass
+class NamespaceMetadata:
+    """
+    Namespace metadata from TurboPuffer.
+    Attributes:
+        schema: Schema information (dimensions, attributes)
+        approx_row_count: Approximate number of rows in namespace
+        approx_logical_bytes: Approximate logical bytes used
+        created_at: When the namespace was created
+        updated_at: When the namespace was last updated
+    """
+    schema: dict[str, Any]
+    approx_row_count: int
+    approx_logical_bytes: int
+    created_at: str
+    updated_at: str
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> "NamespaceMetadata":
+        """Create from dictionary."""
+        return cls(
+            schema=data["schema"],
+            approx_row_count=data["approx_row_count"],
+            approx_logical_bytes=data["approx_logical_bytes"],
+            created_at=data["created_at"],
+            updated_at=data["updated_at"],
+        )
+@dataclass
+class ExportTiming:
+    """
+    Timing breakdown for export operations.
+    Attributes:
+        metadata_ms: Time to fetch namespace metadata (ms)
+        query_ms: Time to fetch all documents (ms)
+        total_ms: Total export time (ms)
+    """
+    metadata_ms: int
+    query_ms: int
+    total_ms: int
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> "ExportTiming":
+        """Create from dictionary."""
+        return cls(
+            metadata_ms=data["metadataMs"],
+            query_ms=data["queryMs"],
+            total_ms=data["totalMs"],
+        )
+@dataclass
+class GetVectorsInNamespaceResult:
+    """
+    Result of a namespace export operation.
+    Attributes:
+        job_id: Job ID for the export
+        status: Export status ("success" or "failed")
+        documents: All exported documents
+        metadata: Namespace metadata
+        error: Error message if failed
+        timing: Timing breakdown
+        completed_at: When the export completed
+    """
+    job_id: str
+    status: str
+    documents: list[Document]
+    metadata: NamespaceMetadata
+    error: Optional[str]
+    timing: ExportTiming
+    completed_at: str
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> "GetVectorsInNamespaceResult":
+        """Create from dictionary."""
+        documents = [Document.from_dict(d) for d in data["documents"]]
+        metadata = NamespaceMetadata.from_dict(data["metadata"])
+        timing = ExportTiming.from_dict(data["timing"])
+        return cls(
+            job_id=data["jobId"],
+            status=data["status"],
+            documents=documents,
+            metadata=metadata,
+            error=data.get("error"),
+            timing=timing,
+            completed_at=data["completedAt"],
+        )

{sf_vector_sdk-0.2.4.dist-info → sf_vector_sdk-0.3.0.dist-info}/WHEEL RENAMED Viewed

File without changes

sf-vector-sdk 0.2.4__py3-none-any.whl → 0.3.0__py3-none-any.whl

sf-vector-sdk 0.2.4py3-none-any.whl → 0.3.0py3-none-any.whl