sf-vector-sdk 0.2.4__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sf-vector-sdk
3
- Version: 0.2.4
3
+ Version: 0.3.0
4
4
  Summary: Python SDK for the Vector Gateway service (embeddings and vector search)
5
5
  Requires-Python: >=3.11
6
6
  Requires-Dist: redis>=5.0.0
@@ -316,6 +316,13 @@ result = client.db.clone("doc1", "ns1", "ns2")
316
316
 
317
317
  # Delete
318
318
  result = client.db.delete("doc1", "ns1")
319
+
320
+ # Export entire namespace
321
+ export_result = client.db.get_vectors_in_namespace(
322
+ namespace="tool_vectors",
323
+ include_vectors=True,
324
+ )
325
+ print(f"Exported {len(export_result.documents)} documents")
319
326
  ```
320
327
 
321
328
  ### Types
@@ -1,7 +1,7 @@
1
- vector_sdk/__init__.py,sha256=VoljCrab1syIU3NWthWI9ks2s2QDIroixzFGkYamJSY,6979
1
+ vector_sdk/__init__.py,sha256=tzeggXDFYGrkc6_08Ta5EwCypWc7kTMLSWsv0DA-Vyo,7162
2
2
  vector_sdk/client.py,sha256=NQFGHyR1aM0UToRFy6e9Xm_v6mk0opqzKN8UlHu97n0,17186
3
3
  vector_sdk/content_types.py,sha256=krvFOR58iUZPfYlEVsk0sXD6_ANAFbxEBQGNpt1YPDU,7381
4
- vector_sdk/types.py,sha256=rQgA2z3ls21vY-DRPZgfmm8gYFkWJk1dQaJI-nbc0no,25514
4
+ vector_sdk/types.py,sha256=h_nPB-UjHVgl_qTRf-2bGKlXiPphoNi3alM8BcJmJro,28207
5
5
  vector_sdk/generated/embedding_pipeline/content_types/v1/content_types_pb2.py,sha256=5dW14j_DyIPKCaFI2cxCKKtQoLMGtRqV3aiRZ8Utxw4,5962
6
6
  vector_sdk/generated/embedding_pipeline/content_types/v1/content_types_pb2.pyi,sha256=fOw6liHkiXSEyvEZ_QKexDUgFNhbemuGuk52hwQ5pnQ,6738
7
7
  vector_sdk/generated/embedding_pipeline/db/vectors/v1/vectors_pb2.py,sha256=nFmjLnJJh5H-t25FJ8oP7jLH-mAcuEw-EK0U-dYlgDI,7057
@@ -15,13 +15,13 @@ vector_sdk/hash/hasher.py,sha256=k5VSQB-T0TtBM5ipaVE_TQu_uiaiWNjOWSbByxjriwQ,861
15
15
  vector_sdk/hash/types.py,sha256=clBRk_D5SrXWU19K3Jg8COecz9--WZh9Ws4f70T3BXg,2044
16
16
  vector_sdk/namespaces/__init__.py,sha256=S9dJfB39s2zjYOpFn9Fvf8bk7mLKcXk5aPatKOA-xO0,374
17
17
  vector_sdk/namespaces/base.py,sha256=lioZBcd43mijnN0JwTMMEpQ6whiAjaueTDAAIZS1JM0,1156
18
- vector_sdk/namespaces/db.py,sha256=a5sEHrfy1xAjRjyM9qfZxr3IznZVA8BnY5W1Hq5jr4I,7230
18
+ vector_sdk/namespaces/db.py,sha256=eh7k0gpZcZSIML67YPsTbqqeoS-c6ZC_CmlptpBeNFI,10182
19
19
  vector_sdk/namespaces/embeddings.py,sha256=r0cbCZjj0jZ9oyBpm8lA2BjUYzi8bmunWwFsYxiXtJo,7704
20
20
  vector_sdk/namespaces/search.py,sha256=8ruX0xp5vXD9tS8oXAu1vmF4aC25fNg4gDOtiR8aQ_0,7874
21
21
  vector_sdk/structured/__init__.py,sha256=ZUhrH_l7bX5vA78DSKqDucWhfhYmkDX-W_MPzo5J9JU,1758
22
22
  vector_sdk/structured/router.py,sha256=F3O1TYtbVFCPqVWCCYCt5QcRffX5WPlPQ7K3KlayooQ,5792
23
- vector_sdk/structured/structured_embeddings.py,sha256=e-EOYgpx7SXOo1xQV6-5ZgB6W3ZH1HS2Tx1m7O_1VNE,36869
23
+ vector_sdk/structured/structured_embeddings.py,sha256=GiIrdAUi8ImsakASTS2Vtda7MZQYwyyhr3alJB-fJnM,36889
24
24
  vector_sdk/structured/tool_config.py,sha256=qMwP8UWQTt8mkTYFVgvNXd9Dh_WztJSsqcgAjvQ_YoY,8212
25
- sf_vector_sdk-0.2.4.dist-info/METADATA,sha256=kvP3u9ZJ3RUsLMcKz5yMRfkUworAcqJ-pZoLtXaYVoc,16069
26
- sf_vector_sdk-0.2.4.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
27
- sf_vector_sdk-0.2.4.dist-info/RECORD,,
25
+ sf_vector_sdk-0.3.0.dist-info/METADATA,sha256=HOHWG2quw9q65CMw1rAC27IEyU8gJGP-xjVj_RynDt4,16266
26
+ sf_vector_sdk-0.3.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
27
+ sf_vector_sdk-0.3.0.dist-info/RECORD,,
vector_sdk/__init__.py CHANGED
@@ -141,11 +141,14 @@ from vector_sdk.types import (
141
141
  EmbeddingProvider,
142
142
  EmbeddingRequest,
143
143
  EmbeddingResult,
144
+ ExportTiming,
145
+ GetVectorsInNamespaceResult,
144
146
  LookupResult,
145
147
  LookupTiming,
146
148
  ModelConfig,
147
149
  ModelValidationError,
148
150
  MongoDBStorage,
151
+ NamespaceMetadata,
149
152
  PineconeStorageConfig,
150
153
  # Query types
151
154
  QueryConfig,
@@ -166,7 +169,7 @@ from vector_sdk.types import (
166
169
  validate_model,
167
170
  )
168
171
 
169
- __version__ = "0.2.4"
172
+ __version__ = "0.3.0"
170
173
 
171
174
  __all__ = [
172
175
  # Clients (New API)
@@ -203,6 +206,10 @@ __all__ = [
203
206
  # Clone and Delete types
204
207
  "CloneResult",
205
208
  "DeleteFromNamespaceResult",
209
+ # Export namespace types
210
+ "GetVectorsInNamespaceResult",
211
+ "NamespaceMetadata",
212
+ "ExportTiming",
206
213
  # Query constants
207
214
  "QUERY_STREAM_CRITICAL",
208
215
  "QUERY_STREAM_HIGH",
@@ -2,6 +2,8 @@
2
2
  Database namespace for direct database operations (no embedding required).
3
3
  """
4
4
 
5
+ import json
6
+ import time
5
7
  from typing import Any, Optional
6
8
 
7
9
  import requests
@@ -10,6 +12,7 @@ from vector_sdk.namespaces.base import BaseNamespace
10
12
  from vector_sdk.types import (
11
13
  CloneResult,
12
14
  DeleteFromNamespaceResult,
15
+ GetVectorsInNamespaceResult,
13
16
  LookupResult,
14
17
  )
15
18
 
@@ -228,3 +231,84 @@ class DBNamespace(BaseNamespace):
228
231
  response.raise_for_status()
229
232
 
230
233
  return DeleteFromNamespaceResult.from_dict(response.json())
234
+
235
+ def get_vectors_in_namespace(
236
+ self,
237
+ namespace: str,
238
+ include_vectors: bool = True,
239
+ include_metadata: bool = True,
240
+ timeout_ms: int = 300000,
241
+ ) -> GetVectorsInNamespaceResult:
242
+ """
243
+ Export all vectors from a TurboPuffer namespace.
244
+
245
+ This method submits an export job to the query gateway and waits for completion.
246
+ The gateway handles pagination automatically and returns all results at once.
247
+
248
+ Args:
249
+ namespace: TurboPuffer namespace to export from
250
+ include_vectors: Whether to include vectors in response (default: True)
251
+ include_metadata: Whether to include metadata in response (default: True)
252
+ timeout_ms: Maximum time to wait for export completion in milliseconds
253
+ (default: 300000ms = 5 minutes)
254
+
255
+ Returns:
256
+ GetVectorsInNamespaceResult containing all documents and namespace metadata
257
+
258
+ Raises:
259
+ ValueError: If http_url is not configured or namespace is missing
260
+ TimeoutError: If export times out
261
+ requests.HTTPError: If the request fails
262
+ Exception: If the export fails on the server side
263
+
264
+ Example:
265
+ ```python
266
+ result = client.db.get_vectors_in_namespace(
267
+ namespace="tool_vectors",
268
+ include_vectors=True,
269
+ include_metadata=True,
270
+ )
271
+
272
+ print(f"Exported {len(result.documents)} documents")
273
+ print(f"Namespace has ~{result.metadata.approx_row_count} total rows")
274
+ ```
275
+ """
276
+ http_url = self._require_http_url("get_vectors_in_namespace")
277
+
278
+ if not namespace:
279
+ raise ValueError("namespace is required")
280
+
281
+ # 1. Submit export job to gateway
282
+ url = f"{http_url}/v1/export/turbopuffer"
283
+ body = {
284
+ "namespace": namespace,
285
+ "includeVectors": include_vectors,
286
+ "includeMetadata": include_metadata,
287
+ }
288
+
289
+ response = requests.post(url, json=body, timeout=30)
290
+ response.raise_for_status()
291
+
292
+ job_id = response.json()["jobId"]
293
+
294
+ # 2. Poll Redis for result
295
+ redis_key = f"namespace-export:{job_id}"
296
+ start_time = time.time()
297
+ poll_interval = 1.0 # Poll every 1 second
298
+
299
+ while (time.time() - start_time) * 1000 < timeout_ms:
300
+ result_str = self.redis.get(redis_key)
301
+
302
+ if result_str:
303
+ result_dict = json.loads(result_str)
304
+ result = GetVectorsInNamespaceResult.from_dict(result_dict)
305
+
306
+ if result.status == "failed":
307
+ raise Exception(f"Export failed: {result.error}")
308
+
309
+ return result
310
+
311
+ # Wait before next poll
312
+ time.sleep(poll_interval)
313
+
314
+ raise TimeoutError(f"Export timeout after {timeout_ms}ms")
@@ -844,7 +844,7 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
844
844
  storage=storage_config,
845
845
  metadata={
846
846
  "toolCollection": tool_collection,
847
- "batchSize": len(items),
847
+ "batchSize": str(len(items)),
848
848
  },
849
849
  embedding_model=tool_config.model,
850
850
  embedding_dimensions=tool_config.dimensions,
@@ -913,7 +913,7 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
913
913
  storage=storage_config,
914
914
  metadata={
915
915
  "toolCollection": tool_collection,
916
- "batchSize": len(items),
916
+ "batchSize": str(len(items)),
917
917
  },
918
918
  embedding_model=tool_config.model,
919
919
  embedding_dimensions=tool_config.dimensions,
@@ -1119,7 +1119,7 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
1119
1119
  storage=storage_config,
1120
1120
  metadata={
1121
1121
  "toolCollection": tool_collection,
1122
- "batchSize": len(items),
1122
+ "batchSize": str(len(items)),
1123
1123
  },
1124
1124
  embedding_model=tool_config.model,
1125
1125
  embedding_dimensions=tool_config.dimensions,
@@ -1198,7 +1198,7 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
1198
1198
  storage=storage_config,
1199
1199
  metadata={
1200
1200
  "toolCollection": tool_collection,
1201
- "batchSize": len(items),
1201
+ "batchSize": str(len(items)),
1202
1202
  },
1203
1203
  embedding_model=tool_config.model,
1204
1204
  embedding_dimensions=tool_config.dimensions,
vector_sdk/types.py CHANGED
@@ -862,3 +862,96 @@ class DeleteFromNamespaceResult:
862
862
  success=data["success"],
863
863
  timing=timing,
864
864
  )
865
+
866
+
867
+ @dataclass
868
+ class NamespaceMetadata:
869
+ """
870
+ Namespace metadata from TurboPuffer.
871
+
872
+ Attributes:
873
+ schema: Schema information (dimensions, attributes)
874
+ approx_row_count: Approximate number of rows in namespace
875
+ approx_logical_bytes: Approximate logical bytes used
876
+ created_at: When the namespace was created
877
+ updated_at: When the namespace was last updated
878
+ """
879
+ schema: dict[str, Any]
880
+ approx_row_count: int
881
+ approx_logical_bytes: int
882
+ created_at: str
883
+ updated_at: str
884
+
885
+ @classmethod
886
+ def from_dict(cls, data: dict[str, Any]) -> "NamespaceMetadata":
887
+ """Create from dictionary."""
888
+ return cls(
889
+ schema=data["schema"],
890
+ approx_row_count=data["approx_row_count"],
891
+ approx_logical_bytes=data["approx_logical_bytes"],
892
+ created_at=data["created_at"],
893
+ updated_at=data["updated_at"],
894
+ )
895
+
896
+
897
+ @dataclass
898
+ class ExportTiming:
899
+ """
900
+ Timing breakdown for export operations.
901
+
902
+ Attributes:
903
+ metadata_ms: Time to fetch namespace metadata (ms)
904
+ query_ms: Time to fetch all documents (ms)
905
+ total_ms: Total export time (ms)
906
+ """
907
+ metadata_ms: int
908
+ query_ms: int
909
+ total_ms: int
910
+
911
+ @classmethod
912
+ def from_dict(cls, data: dict[str, Any]) -> "ExportTiming":
913
+ """Create from dictionary."""
914
+ return cls(
915
+ metadata_ms=data["metadataMs"],
916
+ query_ms=data["queryMs"],
917
+ total_ms=data["totalMs"],
918
+ )
919
+
920
+
921
+ @dataclass
922
+ class GetVectorsInNamespaceResult:
923
+ """
924
+ Result of a namespace export operation.
925
+
926
+ Attributes:
927
+ job_id: Job ID for the export
928
+ status: Export status ("success" or "failed")
929
+ documents: All exported documents
930
+ metadata: Namespace metadata
931
+ error: Error message if failed
932
+ timing: Timing breakdown
933
+ completed_at: When the export completed
934
+ """
935
+ job_id: str
936
+ status: str
937
+ documents: list[Document]
938
+ metadata: NamespaceMetadata
939
+ error: Optional[str]
940
+ timing: ExportTiming
941
+ completed_at: str
942
+
943
+ @classmethod
944
+ def from_dict(cls, data: dict[str, Any]) -> "GetVectorsInNamespaceResult":
945
+ """Create from dictionary."""
946
+ documents = [Document.from_dict(d) for d in data["documents"]]
947
+ metadata = NamespaceMetadata.from_dict(data["metadata"])
948
+ timing = ExportTiming.from_dict(data["timing"])
949
+ return cls(
950
+ job_id=data["jobId"],
951
+ status=data["status"],
952
+ documents=documents,
953
+ metadata=metadata,
954
+ error=data.get("error"),
955
+ timing=timing,
956
+ completed_at=data["completedAt"],
957
+ )