sf-vector-sdk 0.2.4__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sf_vector_sdk-0.2.4.dist-info → sf_vector_sdk-0.3.0.dist-info}/METADATA +8 -1
- {sf_vector_sdk-0.2.4.dist-info → sf_vector_sdk-0.3.0.dist-info}/RECORD +7 -7
- vector_sdk/__init__.py +8 -1
- vector_sdk/namespaces/db.py +84 -0
- vector_sdk/structured/structured_embeddings.py +4 -4
- vector_sdk/types.py +93 -0
- {sf_vector_sdk-0.2.4.dist-info → sf_vector_sdk-0.3.0.dist-info}/WHEEL +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sf-vector-sdk
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Python SDK for the Vector Gateway service (embeddings and vector search)
|
|
5
5
|
Requires-Python: >=3.11
|
|
6
6
|
Requires-Dist: redis>=5.0.0
|
|
@@ -316,6 +316,13 @@ result = client.db.clone("doc1", "ns1", "ns2")
|
|
|
316
316
|
|
|
317
317
|
# Delete
|
|
318
318
|
result = client.db.delete("doc1", "ns1")
|
|
319
|
+
|
|
320
|
+
# Export entire namespace
|
|
321
|
+
export_result = client.db.get_vectors_in_namespace(
|
|
322
|
+
namespace="tool_vectors",
|
|
323
|
+
include_vectors=True,
|
|
324
|
+
)
|
|
325
|
+
print(f"Exported {len(export_result.documents)} documents")
|
|
319
326
|
```
|
|
320
327
|
|
|
321
328
|
### Types
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
vector_sdk/__init__.py,sha256=
|
|
1
|
+
vector_sdk/__init__.py,sha256=tzeggXDFYGrkc6_08Ta5EwCypWc7kTMLSWsv0DA-Vyo,7162
|
|
2
2
|
vector_sdk/client.py,sha256=NQFGHyR1aM0UToRFy6e9Xm_v6mk0opqzKN8UlHu97n0,17186
|
|
3
3
|
vector_sdk/content_types.py,sha256=krvFOR58iUZPfYlEVsk0sXD6_ANAFbxEBQGNpt1YPDU,7381
|
|
4
|
-
vector_sdk/types.py,sha256=
|
|
4
|
+
vector_sdk/types.py,sha256=h_nPB-UjHVgl_qTRf-2bGKlXiPphoNi3alM8BcJmJro,28207
|
|
5
5
|
vector_sdk/generated/embedding_pipeline/content_types/v1/content_types_pb2.py,sha256=5dW14j_DyIPKCaFI2cxCKKtQoLMGtRqV3aiRZ8Utxw4,5962
|
|
6
6
|
vector_sdk/generated/embedding_pipeline/content_types/v1/content_types_pb2.pyi,sha256=fOw6liHkiXSEyvEZ_QKexDUgFNhbemuGuk52hwQ5pnQ,6738
|
|
7
7
|
vector_sdk/generated/embedding_pipeline/db/vectors/v1/vectors_pb2.py,sha256=nFmjLnJJh5H-t25FJ8oP7jLH-mAcuEw-EK0U-dYlgDI,7057
|
|
@@ -15,13 +15,13 @@ vector_sdk/hash/hasher.py,sha256=k5VSQB-T0TtBM5ipaVE_TQu_uiaiWNjOWSbByxjriwQ,861
|
|
|
15
15
|
vector_sdk/hash/types.py,sha256=clBRk_D5SrXWU19K3Jg8COecz9--WZh9Ws4f70T3BXg,2044
|
|
16
16
|
vector_sdk/namespaces/__init__.py,sha256=S9dJfB39s2zjYOpFn9Fvf8bk7mLKcXk5aPatKOA-xO0,374
|
|
17
17
|
vector_sdk/namespaces/base.py,sha256=lioZBcd43mijnN0JwTMMEpQ6whiAjaueTDAAIZS1JM0,1156
|
|
18
|
-
vector_sdk/namespaces/db.py,sha256=
|
|
18
|
+
vector_sdk/namespaces/db.py,sha256=eh7k0gpZcZSIML67YPsTbqqeoS-c6ZC_CmlptpBeNFI,10182
|
|
19
19
|
vector_sdk/namespaces/embeddings.py,sha256=r0cbCZjj0jZ9oyBpm8lA2BjUYzi8bmunWwFsYxiXtJo,7704
|
|
20
20
|
vector_sdk/namespaces/search.py,sha256=8ruX0xp5vXD9tS8oXAu1vmF4aC25fNg4gDOtiR8aQ_0,7874
|
|
21
21
|
vector_sdk/structured/__init__.py,sha256=ZUhrH_l7bX5vA78DSKqDucWhfhYmkDX-W_MPzo5J9JU,1758
|
|
22
22
|
vector_sdk/structured/router.py,sha256=F3O1TYtbVFCPqVWCCYCt5QcRffX5WPlPQ7K3KlayooQ,5792
|
|
23
|
-
vector_sdk/structured/structured_embeddings.py,sha256=
|
|
23
|
+
vector_sdk/structured/structured_embeddings.py,sha256=GiIrdAUi8ImsakASTS2Vtda7MZQYwyyhr3alJB-fJnM,36889
|
|
24
24
|
vector_sdk/structured/tool_config.py,sha256=qMwP8UWQTt8mkTYFVgvNXd9Dh_WztJSsqcgAjvQ_YoY,8212
|
|
25
|
-
sf_vector_sdk-0.
|
|
26
|
-
sf_vector_sdk-0.
|
|
27
|
-
sf_vector_sdk-0.
|
|
25
|
+
sf_vector_sdk-0.3.0.dist-info/METADATA,sha256=HOHWG2quw9q65CMw1rAC27IEyU8gJGP-xjVj_RynDt4,16266
|
|
26
|
+
sf_vector_sdk-0.3.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
27
|
+
sf_vector_sdk-0.3.0.dist-info/RECORD,,
|
vector_sdk/__init__.py
CHANGED
|
@@ -141,11 +141,14 @@ from vector_sdk.types import (
|
|
|
141
141
|
EmbeddingProvider,
|
|
142
142
|
EmbeddingRequest,
|
|
143
143
|
EmbeddingResult,
|
|
144
|
+
ExportTiming,
|
|
145
|
+
GetVectorsInNamespaceResult,
|
|
144
146
|
LookupResult,
|
|
145
147
|
LookupTiming,
|
|
146
148
|
ModelConfig,
|
|
147
149
|
ModelValidationError,
|
|
148
150
|
MongoDBStorage,
|
|
151
|
+
NamespaceMetadata,
|
|
149
152
|
PineconeStorageConfig,
|
|
150
153
|
# Query types
|
|
151
154
|
QueryConfig,
|
|
@@ -166,7 +169,7 @@ from vector_sdk.types import (
|
|
|
166
169
|
validate_model,
|
|
167
170
|
)
|
|
168
171
|
|
|
169
|
-
__version__ = "0.
|
|
172
|
+
__version__ = "0.3.0"
|
|
170
173
|
|
|
171
174
|
__all__ = [
|
|
172
175
|
# Clients (New API)
|
|
@@ -203,6 +206,10 @@ __all__ = [
|
|
|
203
206
|
# Clone and Delete types
|
|
204
207
|
"CloneResult",
|
|
205
208
|
"DeleteFromNamespaceResult",
|
|
209
|
+
# Export namespace types
|
|
210
|
+
"GetVectorsInNamespaceResult",
|
|
211
|
+
"NamespaceMetadata",
|
|
212
|
+
"ExportTiming",
|
|
206
213
|
# Query constants
|
|
207
214
|
"QUERY_STREAM_CRITICAL",
|
|
208
215
|
"QUERY_STREAM_HIGH",
|
vector_sdk/namespaces/db.py
CHANGED
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
Database namespace for direct database operations (no embedding required).
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
+
import json
|
|
6
|
+
import time
|
|
5
7
|
from typing import Any, Optional
|
|
6
8
|
|
|
7
9
|
import requests
|
|
@@ -10,6 +12,7 @@ from vector_sdk.namespaces.base import BaseNamespace
|
|
|
10
12
|
from vector_sdk.types import (
|
|
11
13
|
CloneResult,
|
|
12
14
|
DeleteFromNamespaceResult,
|
|
15
|
+
GetVectorsInNamespaceResult,
|
|
13
16
|
LookupResult,
|
|
14
17
|
)
|
|
15
18
|
|
|
@@ -228,3 +231,84 @@ class DBNamespace(BaseNamespace):
|
|
|
228
231
|
response.raise_for_status()
|
|
229
232
|
|
|
230
233
|
return DeleteFromNamespaceResult.from_dict(response.json())
|
|
234
|
+
|
|
235
|
+
def get_vectors_in_namespace(
|
|
236
|
+
self,
|
|
237
|
+
namespace: str,
|
|
238
|
+
include_vectors: bool = True,
|
|
239
|
+
include_metadata: bool = True,
|
|
240
|
+
timeout_ms: int = 300000,
|
|
241
|
+
) -> GetVectorsInNamespaceResult:
|
|
242
|
+
"""
|
|
243
|
+
Export all vectors from a TurboPuffer namespace.
|
|
244
|
+
|
|
245
|
+
This method submits an export job to the query gateway and waits for completion.
|
|
246
|
+
The gateway handles pagination automatically and returns all results at once.
|
|
247
|
+
|
|
248
|
+
Args:
|
|
249
|
+
namespace: TurboPuffer namespace to export from
|
|
250
|
+
include_vectors: Whether to include vectors in response (default: True)
|
|
251
|
+
include_metadata: Whether to include metadata in response (default: True)
|
|
252
|
+
timeout_ms: Maximum time to wait for export completion in milliseconds
|
|
253
|
+
(default: 300000ms = 5 minutes)
|
|
254
|
+
|
|
255
|
+
Returns:
|
|
256
|
+
GetVectorsInNamespaceResult containing all documents and namespace metadata
|
|
257
|
+
|
|
258
|
+
Raises:
|
|
259
|
+
ValueError: If http_url is not configured or namespace is missing
|
|
260
|
+
TimeoutError: If export times out
|
|
261
|
+
requests.HTTPError: If the request fails
|
|
262
|
+
Exception: If the export fails on the server side
|
|
263
|
+
|
|
264
|
+
Example:
|
|
265
|
+
```python
|
|
266
|
+
result = client.db.get_vectors_in_namespace(
|
|
267
|
+
namespace="tool_vectors",
|
|
268
|
+
include_vectors=True,
|
|
269
|
+
include_metadata=True,
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
print(f"Exported {len(result.documents)} documents")
|
|
273
|
+
print(f"Namespace has ~{result.metadata.approx_row_count} total rows")
|
|
274
|
+
```
|
|
275
|
+
"""
|
|
276
|
+
http_url = self._require_http_url("get_vectors_in_namespace")
|
|
277
|
+
|
|
278
|
+
if not namespace:
|
|
279
|
+
raise ValueError("namespace is required")
|
|
280
|
+
|
|
281
|
+
# 1. Submit export job to gateway
|
|
282
|
+
url = f"{http_url}/v1/export/turbopuffer"
|
|
283
|
+
body = {
|
|
284
|
+
"namespace": namespace,
|
|
285
|
+
"includeVectors": include_vectors,
|
|
286
|
+
"includeMetadata": include_metadata,
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
response = requests.post(url, json=body, timeout=30)
|
|
290
|
+
response.raise_for_status()
|
|
291
|
+
|
|
292
|
+
job_id = response.json()["jobId"]
|
|
293
|
+
|
|
294
|
+
# 2. Poll Redis for result
|
|
295
|
+
redis_key = f"namespace-export:{job_id}"
|
|
296
|
+
start_time = time.time()
|
|
297
|
+
poll_interval = 1.0 # Poll every 1 second
|
|
298
|
+
|
|
299
|
+
while (time.time() - start_time) * 1000 < timeout_ms:
|
|
300
|
+
result_str = self.redis.get(redis_key)
|
|
301
|
+
|
|
302
|
+
if result_str:
|
|
303
|
+
result_dict = json.loads(result_str)
|
|
304
|
+
result = GetVectorsInNamespaceResult.from_dict(result_dict)
|
|
305
|
+
|
|
306
|
+
if result.status == "failed":
|
|
307
|
+
raise Exception(f"Export failed: {result.error}")
|
|
308
|
+
|
|
309
|
+
return result
|
|
310
|
+
|
|
311
|
+
# Wait before next poll
|
|
312
|
+
time.sleep(poll_interval)
|
|
313
|
+
|
|
314
|
+
raise TimeoutError(f"Export timeout after {timeout_ms}ms")
|
|
@@ -844,7 +844,7 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
|
|
|
844
844
|
storage=storage_config,
|
|
845
845
|
metadata={
|
|
846
846
|
"toolCollection": tool_collection,
|
|
847
|
-
"batchSize": len(items),
|
|
847
|
+
"batchSize": str(len(items)),
|
|
848
848
|
},
|
|
849
849
|
embedding_model=tool_config.model,
|
|
850
850
|
embedding_dimensions=tool_config.dimensions,
|
|
@@ -913,7 +913,7 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
|
|
|
913
913
|
storage=storage_config,
|
|
914
914
|
metadata={
|
|
915
915
|
"toolCollection": tool_collection,
|
|
916
|
-
"batchSize": len(items),
|
|
916
|
+
"batchSize": str(len(items)),
|
|
917
917
|
},
|
|
918
918
|
embedding_model=tool_config.model,
|
|
919
919
|
embedding_dimensions=tool_config.dimensions,
|
|
@@ -1119,7 +1119,7 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
|
|
|
1119
1119
|
storage=storage_config,
|
|
1120
1120
|
metadata={
|
|
1121
1121
|
"toolCollection": tool_collection,
|
|
1122
|
-
"batchSize": len(items),
|
|
1122
|
+
"batchSize": str(len(items)),
|
|
1123
1123
|
},
|
|
1124
1124
|
embedding_model=tool_config.model,
|
|
1125
1125
|
embedding_dimensions=tool_config.dimensions,
|
|
@@ -1198,7 +1198,7 @@ class StructuredEmbeddingsNamespace(BaseNamespace):
|
|
|
1198
1198
|
storage=storage_config,
|
|
1199
1199
|
metadata={
|
|
1200
1200
|
"toolCollection": tool_collection,
|
|
1201
|
-
"batchSize": len(items),
|
|
1201
|
+
"batchSize": str(len(items)),
|
|
1202
1202
|
},
|
|
1203
1203
|
embedding_model=tool_config.model,
|
|
1204
1204
|
embedding_dimensions=tool_config.dimensions,
|
vector_sdk/types.py
CHANGED
|
@@ -862,3 +862,96 @@ class DeleteFromNamespaceResult:
|
|
|
862
862
|
success=data["success"],
|
|
863
863
|
timing=timing,
|
|
864
864
|
)
|
|
865
|
+
|
|
866
|
+
|
|
867
|
+
@dataclass
|
|
868
|
+
class NamespaceMetadata:
|
|
869
|
+
"""
|
|
870
|
+
Namespace metadata from TurboPuffer.
|
|
871
|
+
|
|
872
|
+
Attributes:
|
|
873
|
+
schema: Schema information (dimensions, attributes)
|
|
874
|
+
approx_row_count: Approximate number of rows in namespace
|
|
875
|
+
approx_logical_bytes: Approximate logical bytes used
|
|
876
|
+
created_at: When the namespace was created
|
|
877
|
+
updated_at: When the namespace was last updated
|
|
878
|
+
"""
|
|
879
|
+
schema: dict[str, Any]
|
|
880
|
+
approx_row_count: int
|
|
881
|
+
approx_logical_bytes: int
|
|
882
|
+
created_at: str
|
|
883
|
+
updated_at: str
|
|
884
|
+
|
|
885
|
+
@classmethod
|
|
886
|
+
def from_dict(cls, data: dict[str, Any]) -> "NamespaceMetadata":
|
|
887
|
+
"""Create from dictionary."""
|
|
888
|
+
return cls(
|
|
889
|
+
schema=data["schema"],
|
|
890
|
+
approx_row_count=data["approx_row_count"],
|
|
891
|
+
approx_logical_bytes=data["approx_logical_bytes"],
|
|
892
|
+
created_at=data["created_at"],
|
|
893
|
+
updated_at=data["updated_at"],
|
|
894
|
+
)
|
|
895
|
+
|
|
896
|
+
|
|
897
|
+
@dataclass
|
|
898
|
+
class ExportTiming:
|
|
899
|
+
"""
|
|
900
|
+
Timing breakdown for export operations.
|
|
901
|
+
|
|
902
|
+
Attributes:
|
|
903
|
+
metadata_ms: Time to fetch namespace metadata (ms)
|
|
904
|
+
query_ms: Time to fetch all documents (ms)
|
|
905
|
+
total_ms: Total export time (ms)
|
|
906
|
+
"""
|
|
907
|
+
metadata_ms: int
|
|
908
|
+
query_ms: int
|
|
909
|
+
total_ms: int
|
|
910
|
+
|
|
911
|
+
@classmethod
|
|
912
|
+
def from_dict(cls, data: dict[str, Any]) -> "ExportTiming":
|
|
913
|
+
"""Create from dictionary."""
|
|
914
|
+
return cls(
|
|
915
|
+
metadata_ms=data["metadataMs"],
|
|
916
|
+
query_ms=data["queryMs"],
|
|
917
|
+
total_ms=data["totalMs"],
|
|
918
|
+
)
|
|
919
|
+
|
|
920
|
+
|
|
921
|
+
@dataclass
|
|
922
|
+
class GetVectorsInNamespaceResult:
|
|
923
|
+
"""
|
|
924
|
+
Result of a namespace export operation.
|
|
925
|
+
|
|
926
|
+
Attributes:
|
|
927
|
+
job_id: Job ID for the export
|
|
928
|
+
status: Export status ("success" or "failed")
|
|
929
|
+
documents: All exported documents
|
|
930
|
+
metadata: Namespace metadata
|
|
931
|
+
error: Error message if failed
|
|
932
|
+
timing: Timing breakdown
|
|
933
|
+
completed_at: When the export completed
|
|
934
|
+
"""
|
|
935
|
+
job_id: str
|
|
936
|
+
status: str
|
|
937
|
+
documents: list[Document]
|
|
938
|
+
metadata: NamespaceMetadata
|
|
939
|
+
error: Optional[str]
|
|
940
|
+
timing: ExportTiming
|
|
941
|
+
completed_at: str
|
|
942
|
+
|
|
943
|
+
@classmethod
|
|
944
|
+
def from_dict(cls, data: dict[str, Any]) -> "GetVectorsInNamespaceResult":
|
|
945
|
+
"""Create from dictionary."""
|
|
946
|
+
documents = [Document.from_dict(d) for d in data["documents"]]
|
|
947
|
+
metadata = NamespaceMetadata.from_dict(data["metadata"])
|
|
948
|
+
timing = ExportTiming.from_dict(data["timing"])
|
|
949
|
+
return cls(
|
|
950
|
+
job_id=data["jobId"],
|
|
951
|
+
status=data["status"],
|
|
952
|
+
documents=documents,
|
|
953
|
+
metadata=metadata,
|
|
954
|
+
error=data.get("error"),
|
|
955
|
+
timing=timing,
|
|
956
|
+
completed_at=data["completedAt"],
|
|
957
|
+
)
|
|
File without changes
|