sf-vector-sdk 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vector_sdk/client.py ADDED
@@ -0,0 +1,538 @@
1
+ """
2
+ Vector SDK Client.
3
+
4
+ This module provides the main client classes for interacting with the
5
+ Vector Gateway service via Redis Streams and HTTP APIs.
6
+ """
7
+
8
+ import warnings
9
+ from typing import Any, Optional
10
+
11
+ from redis import Redis
12
+
13
+ from vector_sdk.namespaces.db import DBNamespace
14
+ from vector_sdk.namespaces.embeddings import EmbeddingsNamespace
15
+ from vector_sdk.namespaces.search import SearchNamespace
16
+ from vector_sdk.structured import StructuredEmbeddingsNamespace
17
+ from vector_sdk.types import (
18
+ CloneResult,
19
+ DeleteFromNamespaceResult,
20
+ EmbeddingResult,
21
+ LookupResult,
22
+ QueryResult,
23
+ StorageConfig,
24
+ )
25
+
26
+
27
+ class VectorClient:
28
+ """
29
+ Main client for the Vector SDK.
30
+
31
+ Provides access to embedding, search, and database operations through
32
+ namespaced sub-clients for improved discoverability.
33
+
34
+ Example:
35
+ ```python
36
+ from vector_sdk import VectorClient
37
+
38
+ client = VectorClient(
39
+ redis_url="redis://localhost:6379",
40
+ http_url="http://localhost:8080",
41
+ )
42
+
43
+ # Create embeddings
44
+ result = client.embeddings.create_and_wait(
45
+ texts=[{"id": "doc1", "text": "Hello world"}],
46
+ content_type="document",
47
+ )
48
+
49
+ # Vector search
50
+ search_result = client.search.query_and_wait(
51
+ query_text="machine learning",
52
+ database="turbopuffer",
53
+ namespace="topics",
54
+ top_k=10,
55
+ )
56
+
57
+ # Direct database lookup (no embedding)
58
+ docs = client.db.get_by_ids(
59
+ ids=["doc1", "doc2"],
60
+ database="turbopuffer",
61
+ namespace="topics",
62
+ )
63
+
64
+ client.close()
65
+ ```
66
+ """
67
+
68
+ def __init__(self, redis_url: str, http_url: Optional[str] = None):
69
+ """
70
+ Initialize the client.
71
+
72
+ Args:
73
+ redis_url: Redis connection URL (e.g., "redis://localhost:6379")
74
+ http_url: Optional HTTP URL for query-gateway API (required for db operations)
75
+ """
76
+ self._redis = Redis.from_url(redis_url, decode_responses=True)
77
+ self._http_url = http_url
78
+
79
+ self._embeddings = EmbeddingsNamespace(self._redis, self._http_url)
80
+ self._search = SearchNamespace(self._redis, self._http_url)
81
+ self._db = DBNamespace(self._redis, self._http_url)
82
+ self._structured_embeddings = StructuredEmbeddingsNamespace(
83
+ self._redis, self._embeddings, self._http_url
84
+ )
85
+
86
+ @property
87
+ def embeddings(self) -> EmbeddingsNamespace:
88
+ """
89
+ Embedding operations namespace.
90
+
91
+ Use this to create embeddings, wait for results, and check queue depth.
92
+
93
+ Example:
94
+ ```python
95
+ # Create embeddings asynchronously
96
+ request_id = client.embeddings.create(texts, content_type)
97
+ result = client.embeddings.wait_for(request_id)
98
+
99
+ # Or create and wait in one call
100
+ result = client.embeddings.create_and_wait(texts, content_type)
101
+
102
+ # Check queue depth
103
+ depths = client.embeddings.get_queue_depth()
104
+ ```
105
+ """
106
+ return self._embeddings
107
+
108
+ @property
109
+ def search(self) -> SearchNamespace:
110
+ """
111
+ Vector search operations namespace.
112
+
113
+ Use this to perform semantic similarity searches.
114
+
115
+ Example:
116
+ ```python
117
+ # Search asynchronously
118
+ request_id = client.search.query("machine learning", database="turbopuffer")
119
+ result = client.search.wait_for(request_id)
120
+
121
+ # Or search and wait in one call
122
+ result = client.search.query_and_wait("machine learning", database="turbopuffer")
123
+ ```
124
+ """
125
+ return self._search
126
+
127
+ @property
128
+ def db(self) -> DBNamespace:
129
+ """
130
+ Direct database operations namespace.
131
+
132
+ Use this for operations that don't require embedding (lookup by ID,
133
+ search by metadata, clone, delete).
134
+
135
+ Requires `http_url` to be set in VectorClient constructor.
136
+
137
+ Example:
138
+ ```python
139
+ # Lookup by ID
140
+ result = client.db.get_by_ids(["id1", "id2"], database="turbopuffer")
141
+
142
+ # Find by metadata
143
+ result = client.db.find_by_metadata({"userId": "user1"}, database="mongodb")
144
+
145
+ # Clone between namespaces
146
+ client.db.clone("doc1", "ns1", "ns2")
147
+
148
+ # Delete
149
+ client.db.delete("doc1", "ns1")
150
+ ```
151
+ """
152
+ return self._db
153
+
154
+ @property
155
+ def structured_embeddings(self) -> StructuredEmbeddingsNamespace:
156
+ """
157
+ Structured embeddings operations namespace.
158
+
159
+ Use this for embedding known tool types (FlashCard, TestQuestion, etc.)
160
+ with automatic text extraction, content hash computation, and database routing.
161
+
162
+ Example:
163
+ ```python
164
+ # Embed a flashcard - SDK handles text extraction, hashing, and routing
165
+ result = client.structured_embeddings.embed_flashcard_and_wait(
166
+ data={"type": "BASIC", "term": "Mitochondria", "definition": "..."},
167
+ metadata=ToolMetadata(tool_id="tool123", user_id="user456"),
168
+ )
169
+
170
+ # Embed a test question
171
+ result = client.structured_embeddings.embed_test_question_and_wait(
172
+ data=TestQuestionInput(
173
+ question="What is the capital?",
174
+ answers=[...],
175
+ question_type="multiplechoice",
176
+ ),
177
+ metadata=ToolMetadata(tool_id="tool456"),
178
+ )
179
+ ```
180
+ """
181
+ return self._structured_embeddings
182
+
183
+ def close(self) -> None:
184
+ """Close the Redis connection."""
185
+ self._redis.close()
186
+
187
+ def __enter__(self) -> "VectorClient":
188
+ """Context manager entry."""
189
+ return self
190
+
191
+ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
192
+ """Context manager exit."""
193
+ self.close()
194
+
195
+
196
+ class EmbeddingClient(VectorClient):
197
+ """
198
+ Backward-compatible client alias.
199
+
200
+ This class provides the original EmbeddingClient API that delegates to
201
+ the new namespace-based VectorClient. Use VectorClient directly for new code.
202
+
203
+ .. deprecated::
204
+ Use VectorClient instead for cleaner, namespaced API.
205
+
206
+ Example:
207
+ ```python
208
+ # Legacy usage (still works)
209
+ client = EmbeddingClient("redis://localhost:6379")
210
+ request_id = client.submit(texts, content_type)
211
+ result = client.wait_for_result(request_id)
212
+
213
+ # Recommended: Use VectorClient instead
214
+ client = VectorClient("redis://localhost:6379")
215
+ result = client.embeddings.create_and_wait(texts, content_type)
216
+ ```
217
+ """
218
+
219
+ # ========================================================================
220
+ # Embedding Methods (delegate to embeddings namespace)
221
+ # ========================================================================
222
+
223
+ def submit(
224
+ self,
225
+ texts: list[dict[str, Any]],
226
+ content_type: str,
227
+ priority: str = "normal",
228
+ storage: Optional[StorageConfig] = None,
229
+ metadata: Optional[dict[str, str]] = None,
230
+ request_id: Optional[str] = None,
231
+ embedding_model: Optional[str] = None,
232
+ embedding_dimensions: Optional[int] = None,
233
+ ) -> str:
234
+ """
235
+ Submit an embedding request to the gateway.
236
+
237
+ .. deprecated::
238
+ Use `client.embeddings.create()` instead.
239
+ """
240
+ warnings.warn(
241
+ "EmbeddingClient.submit() is deprecated. Use VectorClient.embeddings.create() instead.",
242
+ DeprecationWarning,
243
+ stacklevel=2,
244
+ )
245
+ return self.embeddings.create(
246
+ texts=texts,
247
+ content_type=content_type,
248
+ priority=priority,
249
+ storage=storage,
250
+ metadata=metadata,
251
+ request_id=request_id,
252
+ embedding_model=embedding_model,
253
+ embedding_dimensions=embedding_dimensions,
254
+ )
255
+
256
+ def wait_for_result(
257
+ self,
258
+ request_id: str,
259
+ timeout: int = 60,
260
+ ) -> EmbeddingResult:
261
+ """
262
+ Wait for an embedding request to complete.
263
+
264
+ .. deprecated::
265
+ Use `client.embeddings.wait_for()` instead.
266
+ """
267
+ warnings.warn(
268
+ "EmbeddingClient.wait_for_result() is deprecated. Use VectorClient.embeddings.wait_for() instead.",
269
+ DeprecationWarning,
270
+ stacklevel=2,
271
+ )
272
+ return self.embeddings.wait_for(request_id, timeout)
273
+
274
+ def submit_and_wait(
275
+ self,
276
+ texts: list[dict[str, Any]],
277
+ content_type: str,
278
+ priority: str = "normal",
279
+ storage: Optional[StorageConfig] = None,
280
+ metadata: Optional[dict[str, str]] = None,
281
+ timeout: int = 60,
282
+ ) -> EmbeddingResult:
283
+ """
284
+ Submit a request and wait for the result.
285
+
286
+ .. deprecated::
287
+ Use `client.embeddings.create_and_wait()` instead.
288
+ """
289
+ warnings.warn(
290
+ "EmbeddingClient.submit_and_wait() is deprecated. Use VectorClient.embeddings.create_and_wait() instead.",
291
+ DeprecationWarning,
292
+ stacklevel=2,
293
+ )
294
+ return self.embeddings.create_and_wait(
295
+ texts=texts,
296
+ content_type=content_type,
297
+ priority=priority,
298
+ storage=storage,
299
+ metadata=metadata,
300
+ timeout=timeout,
301
+ )
302
+
303
+ def get_queue_depth(self) -> dict[str, int]:
304
+ """
305
+ Get the current queue depth for each priority.
306
+
307
+ .. deprecated::
308
+ Use `client.embeddings.get_queue_depth()` instead.
309
+ """
310
+ warnings.warn(
311
+ "EmbeddingClient.get_queue_depth() is deprecated. Use VectorClient.embeddings.get_queue_depth() instead.",
312
+ DeprecationWarning,
313
+ stacklevel=2,
314
+ )
315
+ return self.embeddings.get_queue_depth()
316
+
317
+ # ========================================================================
318
+ # Query Methods (delegate to search namespace)
319
+ # ========================================================================
320
+
321
+ def query(
322
+ self,
323
+ query_text: str,
324
+ database: str,
325
+ top_k: int = 10,
326
+ min_score: Optional[float] = None,
327
+ filters: Optional[dict[str, str]] = None,
328
+ namespace: Optional[str] = None,
329
+ collection: Optional[str] = None,
330
+ database_name: Optional[str] = None,
331
+ include_vectors: bool = False,
332
+ include_metadata: bool = True,
333
+ embedding_model: Optional[str] = None,
334
+ embedding_dimensions: Optional[int] = None,
335
+ priority: str = "normal",
336
+ metadata: Optional[dict[str, str]] = None,
337
+ request_id: Optional[str] = None,
338
+ ) -> str:
339
+ """
340
+ Submit a query request to the query-gateway.
341
+
342
+ .. deprecated::
343
+ Use `client.search.query()` instead.
344
+ """
345
+ warnings.warn(
346
+ "EmbeddingClient.query() is deprecated. Use VectorClient.search.query() instead.",
347
+ DeprecationWarning,
348
+ stacklevel=2,
349
+ )
350
+ return self.search.query(
351
+ query_text=query_text,
352
+ database=database,
353
+ top_k=top_k,
354
+ min_score=min_score,
355
+ filters=filters,
356
+ namespace=namespace,
357
+ collection=collection,
358
+ database_name=database_name,
359
+ include_vectors=include_vectors,
360
+ include_metadata=include_metadata,
361
+ embedding_model=embedding_model,
362
+ embedding_dimensions=embedding_dimensions,
363
+ priority=priority,
364
+ metadata=metadata,
365
+ request_id=request_id,
366
+ )
367
+
368
+ def wait_for_query_result(
369
+ self,
370
+ request_id: str,
371
+ timeout: int = 30,
372
+ ) -> QueryResult:
373
+ """
374
+ Wait for a query request to complete.
375
+
376
+ .. deprecated::
377
+ Use `client.search.wait_for()` instead.
378
+ """
379
+ warnings.warn(
380
+ "EmbeddingClient.wait_for_query_result() is deprecated. Use VectorClient.search.wait_for() instead.",
381
+ DeprecationWarning,
382
+ stacklevel=2,
383
+ )
384
+ return self.search.wait_for(request_id, timeout)
385
+
386
+ def query_and_wait(
387
+ self,
388
+ query_text: str,
389
+ database: str,
390
+ top_k: int = 10,
391
+ min_score: Optional[float] = None,
392
+ filters: Optional[dict[str, str]] = None,
393
+ namespace: Optional[str] = None,
394
+ collection: Optional[str] = None,
395
+ database_name: Optional[str] = None,
396
+ include_vectors: bool = False,
397
+ include_metadata: bool = True,
398
+ embedding_model: Optional[str] = None,
399
+ embedding_dimensions: Optional[int] = None,
400
+ priority: str = "normal",
401
+ metadata: Optional[dict[str, str]] = None,
402
+ timeout: int = 30,
403
+ ) -> QueryResult:
404
+ """
405
+ Submit a query and wait for the result.
406
+
407
+ .. deprecated::
408
+ Use `client.search.query_and_wait()` instead.
409
+ """
410
+ warnings.warn(
411
+ "EmbeddingClient.query_and_wait() is deprecated. Use VectorClient.search.query_and_wait() instead.",
412
+ DeprecationWarning,
413
+ stacklevel=2,
414
+ )
415
+ return self.search.query_and_wait(
416
+ query_text=query_text,
417
+ database=database,
418
+ top_k=top_k,
419
+ min_score=min_score,
420
+ filters=filters,
421
+ namespace=namespace,
422
+ collection=collection,
423
+ database_name=database_name,
424
+ include_vectors=include_vectors,
425
+ include_metadata=include_metadata,
426
+ embedding_model=embedding_model,
427
+ embedding_dimensions=embedding_dimensions,
428
+ priority=priority,
429
+ metadata=metadata,
430
+ timeout=timeout,
431
+ )
432
+
433
+ # ========================================================================
434
+ # Database Lookup Methods (delegate to db namespace)
435
+ # ========================================================================
436
+
437
+ def lookup_by_ids(
438
+ self,
439
+ ids: list[str],
440
+ database: str,
441
+ namespace: Optional[str] = None,
442
+ collection: Optional[str] = None,
443
+ database_name: Optional[str] = None,
444
+ include_vectors: bool = False,
445
+ include_metadata: bool = True,
446
+ ) -> LookupResult:
447
+ """
448
+ Look up documents by their IDs.
449
+
450
+ .. deprecated::
451
+ Use `client.db.get_by_ids()` instead.
452
+ """
453
+ warnings.warn(
454
+ "EmbeddingClient.lookup_by_ids() is deprecated. Use VectorClient.db.get_by_ids() instead.",
455
+ DeprecationWarning,
456
+ stacklevel=2,
457
+ )
458
+ return self.db.get_by_ids(
459
+ ids=ids,
460
+ database=database,
461
+ namespace=namespace,
462
+ collection=collection,
463
+ database_name=database_name,
464
+ include_vectors=include_vectors,
465
+ include_metadata=include_metadata,
466
+ )
467
+
468
+ def search_by_metadata(
469
+ self,
470
+ filters: dict[str, Any],
471
+ database: str,
472
+ namespace: Optional[str] = None,
473
+ collection: Optional[str] = None,
474
+ database_name: Optional[str] = None,
475
+ limit: int = 100,
476
+ include_vectors: bool = False,
477
+ ) -> LookupResult:
478
+ """
479
+ Search for documents by metadata filters.
480
+
481
+ .. deprecated::
482
+ Use `client.db.find_by_metadata()` instead.
483
+ """
484
+ warnings.warn(
485
+ "EmbeddingClient.search_by_metadata() is deprecated. Use VectorClient.db.find_by_metadata() instead.",
486
+ DeprecationWarning,
487
+ stacklevel=2,
488
+ )
489
+ return self.db.find_by_metadata(
490
+ filters=filters,
491
+ database=database,
492
+ namespace=namespace,
493
+ collection=collection,
494
+ database_name=database_name,
495
+ limit=limit,
496
+ include_vectors=include_vectors,
497
+ )
498
+
499
+ def clone_from_namespace(
500
+ self,
501
+ id: str,
502
+ source_namespace: str,
503
+ destination_namespace: str,
504
+ ) -> CloneResult:
505
+ """
506
+ Clone a document from one TurboPuffer namespace to another.
507
+
508
+ .. deprecated::
509
+ Use `client.db.clone()` instead.
510
+ """
511
+ warnings.warn(
512
+ "EmbeddingClient.clone_from_namespace() is deprecated. Use VectorClient.db.clone() instead.",
513
+ DeprecationWarning,
514
+ stacklevel=2,
515
+ )
516
+ return self.db.clone(
517
+ id=id,
518
+ source_namespace=source_namespace,
519
+ destination_namespace=destination_namespace,
520
+ )
521
+
522
+ def delete_from_namespace(
523
+ self,
524
+ id: str,
525
+ namespace: str,
526
+ ) -> DeleteFromNamespaceResult:
527
+ """
528
+ Delete a document from a TurboPuffer namespace.
529
+
530
+ .. deprecated::
531
+ Use `client.db.delete()` instead.
532
+ """
533
+ warnings.warn(
534
+ "EmbeddingClient.delete_from_namespace() is deprecated. Use VectorClient.db.delete() instead.",
535
+ DeprecationWarning,
536
+ stacklevel=2,
537
+ )
538
+ return self.db.delete(id=id, namespace=namespace)