pyseekdb 0.1.0.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,485 @@
1
+ """
2
+ Collection class - represents a collection and provides unified data operation interface
3
+
4
+ Design Pattern:
5
+ 1. Collection itself contains no business logic
6
+ 2. All operations are delegated to the client that created it
7
+ 3. Different clients can have completely different underlying implementations
8
+ 4. User-facing interface is completely consistent
9
+ """
10
+ from typing import Any, List, Dict, Optional, Union
11
+ from .query_result import QueryResult
12
+
13
+
14
+ class Collection:
15
+ """
16
+ Collection unified interface class
17
+
18
+ Design Principles:
19
+ - Collection is a lightweight wrapper that only holds metadata
20
+ - All operations delegate to the client via self._client._collection_*() methods
21
+ - Different clients (OceanBase, Seekdb, Milvus, etc.) provide different implementations
22
+ - Users see identical interface regardless of which client created the collection
23
+ """
24
+
25
+ def __init__(
26
+ self,
27
+ client: Any, # BaseClient instance
28
+ name: str,
29
+ collection_id: Optional[str] = None,
30
+ dimension: Optional[int] = None,
31
+ **metadata
32
+ ):
33
+ """
34
+ Initialize collection object
35
+
36
+ Args:
37
+ client: The client instance that created this collection
38
+ name: Collection name
39
+ collection_id: Collection unique identifier (some databases may need this)
40
+ dimension: Vector dimension
41
+ **metadata: Other metadata
42
+ """
43
+ self._client = client # Core: hold reference to the client
44
+ self._name = name
45
+ self._id = collection_id
46
+ self._dimension = dimension
47
+ self._metadata = metadata
48
+
49
+ # ==================== Properties ====================
50
+
51
+ @property
52
+ def name(self) -> str:
53
+ """Collection name"""
54
+ return self._name
55
+
56
+ @property
57
+ def id(self) -> Optional[str]:
58
+ """Collection ID"""
59
+ return self._id
60
+
61
+ @property
62
+ def dimension(self) -> Optional[int]:
63
+ """Vector dimension"""
64
+ return self._dimension
65
+
66
+ @property
67
+ def client(self) -> Any:
68
+ """Associated client"""
69
+ return self._client
70
+
71
+ @property
72
+ def metadata(self) -> Dict[str, Any]:
73
+ """Collection metadata"""
74
+ return self._metadata
75
+
76
+ def __repr__(self) -> str:
77
+ return f"Collection(name='{self._name}', dimension={self._dimension}, client={self._client.mode})"
78
+
79
+ # ==================== DML Operations ====================
80
+ # All methods delegate to client's internal implementation
81
+
82
+ def add(
83
+ self,
84
+ ids: Union[str, List[str]],
85
+ vectors: Optional[Union[List[float], List[List[float]]]] = None,
86
+ metadatas: Optional[Union[Dict, List[Dict]]] = None,
87
+ documents: Optional[Union[str, List[str]]] = None,
88
+ **kwargs
89
+ ) -> None:
90
+ """
91
+ Add data to collection
92
+
93
+ Args:
94
+ ids: Single ID or list of IDs
95
+ vectors: Single vector or list of vectors (optional if documents provided)
96
+ metadatas: Single metadata dict or list of metadata dicts (optional)
97
+ documents: Single document or list of documents (optional)
98
+ **kwargs: Additional parameters
99
+
100
+ Examples:
101
+ # Add single item
102
+ collection.add(ids="1", vectors=[0.1, 0.2, 0.3], metadatas={"tag": "A"})
103
+
104
+ # Add multiple items
105
+ collection.add(
106
+ ids=["1", "2", "3"],
107
+ vectors=[[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]],
108
+ metadatas=[{"tag": "A"}, {"tag": "B"}, {"tag": "C"}]
109
+ )
110
+ """
111
+ return self._client._collection_add(
112
+ collection_id=self._id,
113
+ collection_name=self._name,
114
+ ids=ids,
115
+ vectors=vectors,
116
+ metadatas=metadatas,
117
+ documents=documents,
118
+ **kwargs
119
+ )
120
+
121
+ def update(
122
+ self,
123
+ ids: Union[str, List[str]],
124
+ vectors: Optional[Union[List[float], List[List[float]]]] = None,
125
+ metadatas: Optional[Union[Dict, List[Dict]]] = None,
126
+ documents: Optional[Union[str, List[str]]] = None,
127
+ **kwargs
128
+ ) -> None:
129
+ """
130
+ Update existing data in collection
131
+
132
+ Args:
133
+ ids: Single ID or list of IDs to update
134
+ vectors: New vectors (optional)
135
+ metadatas: New metadata (optional)
136
+ documents: New documents (optional)
137
+ **kwargs: Additional parameters
138
+
139
+ Note:
140
+ IDs must exist, otherwise an error will be raised
141
+
142
+ Examples:
143
+ # Update single item
144
+ collection.update(ids="1", metadatas={"tag": "B"})
145
+
146
+ # Update multiple items
147
+ collection.update(
148
+ ids=["1", "2"],
149
+ vectors=[[0.9, 0.8], [0.7, 0.6]]
150
+ )
151
+ """
152
+ return self._client._collection_update(
153
+ collection_id=self._id,
154
+ collection_name=self._name,
155
+ ids=ids,
156
+ vectors=vectors,
157
+ metadatas=metadatas,
158
+ documents=documents,
159
+ **kwargs
160
+ )
161
+
162
+ def upsert(
163
+ self,
164
+ ids: Union[str, List[str]],
165
+ vectors: Optional[Union[List[float], List[List[float]]]] = None,
166
+ metadatas: Optional[Union[Dict, List[Dict]]] = None,
167
+ documents: Optional[Union[str, List[str]]] = None,
168
+ **kwargs
169
+ ) -> None:
170
+ """
171
+ Insert or update data in collection
172
+
173
+ Args:
174
+ ids: Single ID or list of IDs
175
+ vectors: Vectors (optional if documents provided)
176
+ metadatas: Metadata (optional)
177
+ documents: Documents (optional)
178
+ **kwargs: Additional parameters
179
+
180
+ Note:
181
+ If ID exists, update it; otherwise, insert new data
182
+
183
+ Examples:
184
+ # Upsert single item
185
+ collection.upsert(ids="1", vectors=[0.1, 0.2], metadatas={"tag": "A"})
186
+
187
+ # Upsert multiple items
188
+ collection.upsert(
189
+ ids=["1", "2", "3"],
190
+ vectors=[[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]
191
+ )
192
+ """
193
+ return self._client._collection_upsert(
194
+ collection_id=self._id,
195
+ collection_name=self._name,
196
+ ids=ids,
197
+ vectors=vectors,
198
+ metadatas=metadatas,
199
+ documents=documents,
200
+ **kwargs
201
+ )
202
+
203
+ def delete(
204
+ self,
205
+ ids: Optional[Union[str, List[str]]] = None,
206
+ where: Optional[Dict[str, Any]] = None,
207
+ where_document: Optional[Dict[str, Any]] = None,
208
+ **kwargs
209
+ ) -> None:
210
+ """
211
+ Delete data from collection
212
+
213
+ Args:
214
+ ids: Single ID or list of IDs to delete (optional)
215
+ where: Filter condition on metadata (optional)
216
+ where_document: Filter condition on documents (optional)
217
+ **kwargs: Additional parameters
218
+
219
+ Note:
220
+ At least one of ids, where, or where_document must be provided
221
+
222
+ Examples:
223
+ # Delete by IDs
224
+ collection.delete(ids=["1", "2", "3"])
225
+
226
+ # Delete by metadata filter
227
+ collection.delete(where={"tag": "A"})
228
+
229
+ # Delete by document filter
230
+ collection.delete(where_document={"$contains": "keyword"})
231
+ """
232
+ return self._client._collection_delete(
233
+ collection_id=self._id,
234
+ collection_name=self._name,
235
+ ids=ids,
236
+ where=where,
237
+ where_document=where_document,
238
+ **kwargs
239
+ )
240
+
241
+ # ==================== DQL Operations ====================
242
+
243
+ def query(
244
+ self,
245
+ query_embeddings: Optional[Union[List[float], List[List[float]]]] = None,
246
+ query_texts: Optional[Union[str, List[str]]] = None,
247
+ n_results: int = 10,
248
+ where: Optional[Dict[str, Any]] = None,
249
+ where_document: Optional[Dict[str, Any]] = None,
250
+ include: Optional[List[str]] = None,
251
+ **kwargs
252
+ ) -> Union[QueryResult, List[QueryResult]]:
253
+ """
254
+ Query collection by vector similarity
255
+
256
+ Args:
257
+ query_embeddings: Query vector(s) (optional if query_texts provided)
258
+ query_texts: Query text(s) to be embedded (optional if query_embeddings provided)
259
+ n_results: Number of results to return (default: 10)
260
+ where: Filter condition on metadata supporting:
261
+ - Comparison operators: $eq, $lt, $gt, $lte, $gte, $ne, $in, $nin
262
+ - Logical operators: $or, $and, $not
263
+ where_document: Filter condition on documents supporting:
264
+ - $contains: full-text search
265
+ - $regex: regular expression matching
266
+ - Logical operators: $or, $and
267
+ include: Fields to include in results, e.g., ["documents", "metadatas", "embeddings"] (optional)
268
+ By default, returns "documents" and "metadatas". Always includes "_id".
269
+ **kwargs: Additional parameters
270
+
271
+ Returns:
272
+ - If single vector/text provided: QueryResult object containing query results
273
+ - If multiple vectors/texts provided: List of QueryResult objects, one for each query vector
274
+ Each QueryResult item contains:
275
+ - _id: record ID (always included)
276
+ - document: document text (if included)
277
+ - embedding: vector embedding (if included)
278
+ - metadata: metadata dictionary (if included)
279
+ - distance: similarity distance (always included for query)
280
+
281
+ Examples:
282
+ # Query by single embedding (returns QueryResult)
283
+ results = collection.query(
284
+ query_embeddings=[0.1, 0.2, 0.3],
285
+ n_results=5
286
+ )
287
+
288
+ # Query by multiple embeddings (returns List[QueryResult])
289
+ results = collection.query(
290
+ query_embeddings=[[11.1, 12.1, 13.1], [1.1, 2.3, 3.2]],
291
+ n_results=5
292
+ )
293
+ # results[0] is QueryResult for first vector, results[1] for second vector
294
+
295
+ # Query with filters
296
+ results = collection.query(
297
+ query_embeddings=[[0.1, 0.2, 0.3]],
298
+ where={"chapter": {"$gte": 3}},
299
+ where_document={"$contains": "machine learning"},
300
+ include=["documents", "metadatas", "embeddings"]
301
+ )
302
+
303
+ # Query by texts (will be embedded automatically)
304
+ results = collection.query(
305
+ query_texts=["my query text"],
306
+ n_results=10
307
+ )
308
+
309
+ # Query by multiple texts (returns List[QueryResult])
310
+ results = collection.query(
311
+ query_texts=["text1", "text2"],
312
+ n_results=10
313
+ )
314
+ """
315
+ return self._client._collection_query(
316
+ collection_id=self._id,
317
+ collection_name=self._name,
318
+ query_embeddings=query_embeddings,
319
+ query_texts=query_texts,
320
+ n_results=n_results,
321
+ where=where,
322
+ where_document=where_document,
323
+ include=include,
324
+ **kwargs
325
+ )
326
+
327
+ def get(
328
+ self,
329
+ ids: Optional[Union[str, List[str]]] = None,
330
+ where: Optional[Dict[str, Any]] = None,
331
+ where_document: Optional[Dict[str, Any]] = None,
332
+ limit: Optional[int] = None,
333
+ offset: Optional[int] = None,
334
+ include: Optional[List[str]] = None,
335
+ **kwargs
336
+ ) -> Union[QueryResult, List[QueryResult]]:
337
+ """
338
+ Get data from collection by IDs or filters
339
+
340
+ Args:
341
+ ids: Single ID or list of IDs to retrieve (optional)
342
+ where: Filter condition on metadata (optional)
343
+ where_document: Filter condition on documents (optional)
344
+ limit: Maximum number of results to return (optional)
345
+ offset: Number of results to skip (optional)
346
+ include: Fields to include in results, e.g., ["metadatas", "documents", "embeddings"] (optional)
347
+ **kwargs: Additional parameters
348
+
349
+ Returns:
350
+ - If single ID provided: QueryResult object containing get results for that ID
351
+ - If multiple IDs provided: List of QueryResult objects, one for each ID
352
+ - If filters provided (no IDs): QueryResult object containing all matching results
353
+
354
+ Note:
355
+ If no parameters provided, returns all data (up to limit)
356
+
357
+ Examples:
358
+ # Get by single ID (returns QueryResult)
359
+ results = collection.get(ids="1")
360
+
361
+ # Get by multiple IDs (returns List[QueryResult])
362
+ results = collection.get(ids=["1", "2", "3"])
363
+ # results[0] is QueryResult for ID "1", results[1] for ID "2", etc.
364
+
365
+ # Get by filter (returns QueryResult)
366
+ results = collection.get(
367
+ where={"tag": "A"},
368
+ limit=10
369
+ )
370
+
371
+ # Get all data
372
+ results = collection.get(limit=100)
373
+ """
374
+ return self._client._collection_get(
375
+ collection_id=self._id,
376
+ collection_name=self._name,
377
+ ids=ids,
378
+ where=where,
379
+ where_document=where_document,
380
+ limit=limit,
381
+ offset=offset,
382
+ include=include,
383
+ **kwargs
384
+ )
385
+
386
+ def hybrid_search(
387
+ self,
388
+ query: Optional[Dict[str, Any]] = None,
389
+ knn: Optional[Dict[str, Any]] = None,
390
+ rank: Optional[Dict[str, Any]] = None,
391
+ n_results: int = 10,
392
+ include: Optional[List[str]] = None,
393
+ **kwargs
394
+ ) -> Dict[str, Any]:
395
+ """
396
+ Hybrid search combining full-text search and vector similarity search
397
+
398
+ Args:
399
+ query: Full-text search configuration dict with:
400
+ - where_document: Document filter conditions (e.g., {"$contains": "text"})
401
+ - where: Metadata filter conditions (e.g., {"page": {"$gte": 5}})
402
+ - n_results: Number of results for full-text search (optional)
403
+ knn: Vector search configuration dict with:
404
+ - query_texts: Query text(s) to be embedded (optional if query_embeddings provided)
405
+ - query_embeddings: Query vector(s) (optional if query_texts provided)
406
+ - where: Metadata filter conditions (optional)
407
+ - n_results: Number of results for vector search (optional)
408
+ rank: Ranking configuration dict (e.g., {"rrf": {"rank_window_size": 60, "rank_constant": 60}})
409
+ n_results: Final number of results to return after ranking (default: 10)
410
+ include: Fields to include in results (e.g., ["documents", "metadatas", "embeddings"])
411
+ **kwargs: Additional parameters
412
+
413
+ Returns:
414
+ Search results dictionary containing ids, distances, metadatas, documents, embeddings, etc.
415
+
416
+ Examples:
417
+ # Hybrid search with both full-text and vector search
418
+ results = collection.hybrid_search(
419
+ query={
420
+ "where_document": {"$contains": "machine learning"},
421
+ "where": {"category": {"$eq": "science"}},
422
+ "n_results": 10
423
+ },
424
+ knn={
425
+ "query_texts": ["AI research"],
426
+ "where": {"year": {"$gte": 2020}},
427
+ "n_results": 10
428
+ },
429
+ rank={"rrf": {}},
430
+ n_results=5,
431
+ include=["documents", "metadatas", "embeddings"]
432
+ )
433
+ """
434
+ return self._client._collection_hybrid_search(
435
+ collection_id=self._id,
436
+ collection_name=self._name,
437
+ query=query,
438
+ knn=knn,
439
+ rank=rank,
440
+ n_results=n_results,
441
+ include=include,
442
+ **kwargs
443
+ )
444
+
445
+ # ==================== Collection Info ====================
446
+
447
+ def count(self) -> int:
448
+ """
449
+ Get the number of items in collection
450
+
451
+ Returns:
452
+ Item count
453
+
454
+ Examples:
455
+ count = collection.count()
456
+ print(f"Collection has {count} items")
457
+ """
458
+ return self._client._collection_count(
459
+ collection_id=self._id,
460
+ collection_name=self._name
461
+ )
462
+
463
+ def peek(self, limit: int = 10) -> QueryResult:
464
+ """
465
+ Quickly preview the first few items in the collection
466
+
467
+ Args:
468
+ limit: Number of items to preview (default: 10)
469
+
470
+ Returns:
471
+ QueryResult object containing the first limit items
472
+
473
+ Examples:
474
+ # Preview first 5 items
475
+ preview = collection.peek(limit=5)
476
+ for item in preview:
477
+ print(f"ID: {item._id}, Document: {item.document}")
478
+ """
479
+ return self._client._collection_get(
480
+ collection_id=self._id,
481
+ collection_name=self._name,
482
+ limit=limit,
483
+ offset=0,
484
+ include=["documents", "metadatas", "embeddings"]
485
+ )
@@ -0,0 +1,55 @@
1
+ """
2
+ Database model definition
3
+ """
4
+ from typing import Optional
5
+
6
+
7
+ class Database:
8
+ """
9
+ Database object representing a database instance.
10
+
11
+ Note:
12
+ - tenant is None for embedded/server mode (no tenant concept)
13
+ - tenant is set for OceanBase mode (multi-tenant architecture)
14
+ """
15
+
16
+ def __init__(
17
+ self,
18
+ name: str,
19
+ tenant: Optional[str] = None,
20
+ charset: Optional[str] = None,
21
+ collation: Optional[str] = None,
22
+ **kwargs
23
+ ):
24
+ """
25
+ Initialize Database object
26
+
27
+ Args:
28
+ name: database name
29
+ tenant: tenant name (only for OceanBase, None for embedded/server mode)
30
+ charset: character set
31
+ collation: collation
32
+ **kwargs: other metadata
33
+ """
34
+ self.name = name
35
+ self.tenant = tenant
36
+ self.charset = charset
37
+ self.collation = collation
38
+ self.metadata = kwargs
39
+
40
+ def __repr__(self):
41
+ if self.tenant:
42
+ return f"<Database name={self.name} tenant={self.tenant}>"
43
+ return f"<Database name={self.name}>"
44
+
45
+ def __str__(self):
46
+ return self.name
47
+
48
+ def __eq__(self, other):
49
+ if isinstance(other, Database):
50
+ return self.name == other.name and self.tenant == other.tenant
51
+ return False
52
+
53
+ def __hash__(self):
54
+ return hash((self.name, self.tenant))
55
+