pyseekdb 0.1.0.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,412 @@
1
+ """
2
+ Comprehensive Example: Complete guide to all SeekDBClient features
3
+
4
+ This example demonstrates all available operations:
5
+ 1. Client connection (all modes)
6
+ 2. Collection management
7
+ 3. DML operations (add, update, upsert, delete)
8
+ 4. DQL operations (query, get, hybrid_search)
9
+ 5. Filter operators
10
+ 6. Collection information methods
11
+
12
+ This is a complete reference for all client capabilities.
13
+ """
14
+ import uuid
15
+ import random
16
+ import seekdbclient
17
+
18
+ # ============================================================================
19
+ # PART 1: CLIENT CONNECTION
20
+ # ============================================================================
21
+
22
+ # Option 1: Embedded mode (local SeekDB)
23
+ # client = seekdbclient.Client(
24
+ # path="./seekdb",
25
+ # database="test"
26
+ # )
27
+
28
+ # Option 2: Server mode (remote SeekDB server)
29
+ client = seekdbclient.Client(
30
+ host="127.0.0.1",
31
+ port=2881,
32
+ database="test",
33
+ user="root",
34
+ password=""
35
+ )
36
+
37
+ # Option 3: OceanBase mode
38
+ # ob_client = seekdbclient.OBClient(
39
+ # host="127.0.0.1",
40
+ # port=11402,
41
+ # tenant="mysql",
42
+ # database="test",
43
+ # user="root",
44
+ # password=""
45
+ # )
46
+
47
+ # ============================================================================
48
+ # PART 2: COLLECTION MANAGEMENT
49
+ # ============================================================================
50
+
51
+ collection_name = "comprehensive_example"
52
+ dimension = 128
53
+
54
+ # 2.1 Create a collection
55
+ collection = client.get_or_create_collection(
56
+ name=collection_name,
57
+ dimension=dimension
58
+ )
59
+
60
+ # 2.2 Check if collection exists
61
+ exists = client.has_collection(collection_name)
62
+
63
+ # 2.3 Get collection object
64
+ retrieved_collection = client.get_collection(collection_name)
65
+
66
+ # 2.4 List all collections
67
+ all_collections = client.list_collections()
68
+
69
+ # 2.5 Get or create collection (creates if doesn't exist)
70
+ collection2 = client.get_or_create_collection(
71
+ name="another_collection",
72
+ dimension=64
73
+ )
74
+
75
+ # ============================================================================
76
+ # PART 3: DML OPERATIONS - ADD DATA
77
+ # ============================================================================
78
+
79
+ # Generate sample data
80
+ random.seed(42)
81
+ documents = [
82
+ "Machine learning is transforming the way we solve problems",
83
+ "Python programming language is widely used in data science",
84
+ "Vector databases enable efficient similarity search",
85
+ "Neural networks mimic the structure of the human brain",
86
+ "Natural language processing helps computers understand human language",
87
+ "Deep learning requires large amounts of training data",
88
+ "Reinforcement learning agents learn through trial and error",
89
+ "Computer vision enables machines to interpret visual information"
90
+ ]
91
+
92
+ # Generate vectors (in real usage, use an embedding model)
93
+ vectors = []
94
+ for i in range(len(documents)):
95
+ vector = [random.random() for _ in range(dimension)]
96
+ vectors.append(vector)
97
+
98
+ ids = [str(uuid.uuid4()) for _ in documents]
99
+
100
+ # 3.1 Add single item
101
+ single_id = str(uuid.uuid4())
102
+ collection.add(
103
+ ids=single_id,
104
+ documents="This is a single document",
105
+ vectors=[random.random() for _ in range(dimension)],
106
+ metadatas={"type": "single", "category": "test"}
107
+ )
108
+
109
+ # 3.2 Add multiple items
110
+ collection.add(
111
+ ids=ids,
112
+ documents=documents,
113
+ vectors=vectors,
114
+ metadatas=[
115
+ {"category": "AI", "score": 95, "tag": "ml", "year": 2023},
116
+ {"category": "Programming", "score": 88, "tag": "python", "year": 2022},
117
+ {"category": "Database", "score": 92, "tag": "vector", "year": 2023},
118
+ {"category": "AI", "score": 90, "tag": "neural", "year": 2022},
119
+ {"category": "NLP", "score": 87, "tag": "language", "year": 2023},
120
+ {"category": "AI", "score": 93, "tag": "deep", "year": 2023},
121
+ {"category": "AI", "score": 85, "tag": "reinforcement", "year": 2022},
122
+ {"category": "CV", "score": 91, "tag": "vision", "year": 2023}
123
+ ]
124
+ )
125
+
126
+ # 3.3 Add with only vectors (no documents)
127
+ vector_only_ids = [str(uuid.uuid4()) for _ in range(2)]
128
+ collection.add(
129
+ ids=vector_only_ids,
130
+ vectors=[[random.random() for _ in range(dimension)] for _ in range(2)],
131
+ metadatas=[{"type": "vector_only"}, {"type": "vector_only"}]
132
+ )
133
+
134
+ # ============================================================================
135
+ # PART 4: DML OPERATIONS - UPDATE DATA
136
+ # ============================================================================
137
+
138
+ # 4.1 Update single item
139
+ collection.update(
140
+ ids=ids[0],
141
+ metadatas={"category": "AI", "score": 98, "tag": "ml", "year": 2024, "updated": True}
142
+ )
143
+
144
+ # 4.2 Update multiple items
145
+ collection.update(
146
+ ids=ids[1:3],
147
+ documents=["Updated document 1", "Updated document 2"],
148
+ metadatas=[
149
+ {"category": "Programming", "score": 95, "updated": True},
150
+ {"category": "Database", "score": 97, "updated": True}
151
+ ]
152
+ )
153
+
154
+ # 4.3 Update vectors
155
+ new_vectors = [[random.random() for _ in range(dimension)] for _ in range(2)]
156
+ collection.update(
157
+ ids=ids[2:4],
158
+ vectors=new_vectors
159
+ )
160
+
161
+ # ============================================================================
162
+ # PART 5: DML OPERATIONS - UPSERT DATA
163
+ # ============================================================================
164
+
165
+ # 5.1 Upsert existing item (will update)
166
+ collection.upsert(
167
+ ids=ids[0],
168
+ documents="Upserted document (was updated)",
169
+ vectors=[random.random() for _ in range(dimension)],
170
+ metadatas={"category": "AI", "upserted": True}
171
+ )
172
+
173
+ # 5.2 Upsert new item (will insert)
174
+ new_id = str(uuid.uuid4())
175
+ collection.upsert(
176
+ ids=new_id,
177
+ documents="This is a new document from upsert",
178
+ vectors=[random.random() for _ in range(dimension)],
179
+ metadatas={"category": "New", "upserted": True}
180
+ )
181
+
182
+ # 5.3 Upsert multiple items
183
+ upsert_ids = [ids[4], str(uuid.uuid4())] # One existing, one new
184
+ collection.upsert(
185
+ ids=upsert_ids,
186
+ documents=["Upserted doc 1", "Upserted doc 2"],
187
+ vectors=[[random.random() for _ in range(dimension)] for _ in range(2)],
188
+ metadatas=[{"upserted": True}, {"upserted": True}]
189
+ )
190
+
191
+ # ============================================================================
192
+ # PART 6: DQL OPERATIONS - QUERY (VECTOR SIMILARITY SEARCH)
193
+ # ============================================================================
194
+
195
+ # 6.1 Basic vector similarity query
196
+ query_vector = vectors[0] # Query with first document's vector
197
+ results = collection.query(
198
+ query_embeddings=query_vector,
199
+ n_results=3
200
+ )
201
+ print(f"Query results: {len(results)} items")
202
+
203
+ # 6.2 Query with metadata filter
204
+ results = collection.query(
205
+ query_embeddings=query_vector,
206
+ where={"category": {"$eq": "AI"}},
207
+ n_results=5
208
+ )
209
+
210
+ # 6.3 Query with comparison operators
211
+ results = collection.query(
212
+ query_embeddings=query_vector,
213
+ where={"score": {"$gte": 90}},
214
+ n_results=5
215
+ )
216
+
217
+ # 6.4 Query with $in operator
218
+ results = collection.query(
219
+ query_embeddings=query_vector,
220
+ where={"tag": {"$in": ["ml", "python", "neural"]}},
221
+ n_results=5
222
+ )
223
+
224
+ # 6.5 Query with logical operators ($or)
225
+ results = collection.query(
226
+ query_embeddings=query_vector,
227
+ where={
228
+ "$or": [
229
+ {"category": {"$eq": "AI"}},
230
+ {"tag": {"$eq": "python"}}
231
+ ]
232
+ },
233
+ n_results=5
234
+ )
235
+
236
+ # 6.6 Query with logical operators ($and)
237
+ results = collection.query(
238
+ query_embeddings=query_vector,
239
+ where={
240
+ "$and": [
241
+ {"category": {"$eq": "AI"}},
242
+ {"score": {"$gte": 90}}
243
+ ]
244
+ },
245
+ n_results=5
246
+ )
247
+
248
+ # 6.7 Query with document filter
249
+ results = collection.query(
250
+ query_embeddings=query_vector,
251
+ where_document={"$contains": "machine learning"},
252
+ n_results=5
253
+ )
254
+
255
+ # 6.8 Query with combined filters
256
+ results = collection.query(
257
+ query_embeddings=query_vector,
258
+ where={"category": {"$eq": "AI"}, "year": {"$gte": 2023}},
259
+ where_document={"$contains": "learning"},
260
+ n_results=5
261
+ )
262
+
263
+ # 6.9 Query with multiple vectors (batch query)
264
+ batch_vectors = [vectors[0], vectors[1]]
265
+ batch_results = collection.query(
266
+ query_embeddings=batch_vectors,
267
+ n_results=2
268
+ )
269
+
270
+ # 6.10 Query with specific fields
271
+ results = collection.query(
272
+ query_embeddings=query_vector,
273
+ include=["documents", "metadatas", "embeddings"],
274
+ n_results=2
275
+ )
276
+
277
+ # ============================================================================
278
+ # PART 7: DQL OPERATIONS - GET (RETRIEVE BY IDS OR FILTERS)
279
+ # ============================================================================
280
+
281
+ # 7.1 Get by single ID
282
+ result = collection.get(ids=ids[0])
283
+
284
+ # 7.2 Get by multiple IDs
285
+ results = collection.get(ids=ids[:3])
286
+
287
+ # 7.3 Get by metadata filter
288
+ results = collection.get(
289
+ where={"category": {"$eq": "AI"}},
290
+ limit=5
291
+ )
292
+
293
+ # 7.4 Get with comparison operators
294
+ results = collection.get(
295
+ where={"score": {"$gte": 90}},
296
+ limit=5
297
+ )
298
+
299
+ # 7.5 Get with $in operator
300
+ results = collection.get(
301
+ where={"tag": {"$in": ["ml", "python"]}},
302
+ limit=5
303
+ )
304
+
305
+ # 7.6 Get with logical operators
306
+ results = collection.get(
307
+ where={
308
+ "$or": [
309
+ {"category": {"$eq": "AI"}},
310
+ {"category": {"$eq": "Programming"}}
311
+ ]
312
+ },
313
+ limit=5
314
+ )
315
+
316
+ # 7.7 Get by document filter
317
+ results = collection.get(
318
+ where_document={"$contains": "Python"},
319
+ limit=5
320
+ )
321
+
322
+ # 7.8 Get with pagination
323
+ results_page1 = collection.get(limit=2, offset=0)
324
+ results_page2 = collection.get(limit=2, offset=2)
325
+
326
+ # 7.9 Get with specific fields
327
+ results = collection.get(
328
+ ids=ids[:2],
329
+ include=["documents", "metadatas", "embeddings"]
330
+ )
331
+
332
+ # 7.10 Get all data
333
+ all_results = collection.get(limit=100)
334
+
335
+ # ============================================================================
336
+ # PART 8: DQL OPERATIONS - HYBRID SEARCH
337
+ # ============================================================================
338
+
339
+ # 8.1 Hybrid search with full-text and vector search
340
+ # Note: This requires query_embeddings to be provided directly
341
+ # In real usage, you might have an embedding function
342
+ hybrid_results = collection.hybrid_search(
343
+ query={
344
+ "where_document": {"$contains": "machine learning"},
345
+ "where": {"category": {"$eq": "AI"}},
346
+ "n_results": 10
347
+ },
348
+ knn={
349
+ "query_embeddings": [vectors[0]],
350
+ "where": {"year": {"$gte": 2022}},
351
+ "n_results": 10
352
+ },
353
+ rank={"rrf": {}}, # Reciprocal Rank Fusion
354
+ n_results=5,
355
+ include=["documents", "metadatas"]
356
+ )
357
+ print(f"Hybrid search: {len(hybrid_results.get('ids', []))} results")
358
+
359
+ # ============================================================================
360
+ # PART 9: DML OPERATIONS - DELETE DATA
361
+ # ============================================================================
362
+
363
+ # 9.1 Delete by IDs
364
+ delete_ids = [vector_only_ids[0], new_id]
365
+ collection.delete(ids=delete_ids)
366
+
367
+ # 9.2 Delete by metadata filter
368
+ collection.delete(where={"type": {"$eq": "vector_only"}})
369
+
370
+ # 9.3 Delete by document filter
371
+ collection.delete(where_document={"$contains": "Updated document"})
372
+
373
+ # 9.4 Delete with combined filters
374
+ collection.delete(
375
+ where={"category": {"$eq": "CV"}},
376
+ where_document={"$contains": "vision"}
377
+ )
378
+
379
+ # ============================================================================
380
+ # PART 10: COLLECTION INFORMATION
381
+ # ============================================================================
382
+
383
+ # 10.1 Get collection count
384
+ count = collection.count()
385
+ print(f"Collection count: {count} items")
386
+
387
+ # 10.2 Get collection description
388
+ info = collection.describe()
389
+ print(f"Collection info: {info}")
390
+
391
+ # 10.3 Preview first few items in collection
392
+ preview = collection.peek(limit=5)
393
+ print(f"Preview: {len(preview)} items")
394
+ for item in preview:
395
+ print(f" ID: {item._id}, Document: {item.document}")
396
+
397
+ # 10.4 Count collections in database
398
+ collection_count = client.count_collection()
399
+ print(f"Database has {collection_count} collections")
400
+
401
+ # ============================================================================
402
+ # PART 11: CLEANUP
403
+ # ============================================================================
404
+
405
+ # Delete test collections
406
+ try:
407
+ client.delete_collection("another_collection")
408
+ except Exception as e:
409
+ print(f"Could not delete 'another_collection': {e}")
410
+
411
+ # Uncomment to delete main collection
412
+ client.delete_collection(collection_name)
@@ -0,0 +1,113 @@
1
+ """
2
+ Simple Example: Basic usage of SeekDBClient
3
+
4
+ This example demonstrates the most common operations:
5
+ 1. Create a client connection
6
+ 2. Create a collection
7
+ 3. Add data to the collection
8
+ 4. Query the collection
9
+ 5. Print query results
10
+
11
+ This is a minimal example to get you started quickly.
12
+ """
13
+ import uuid
14
+ import seekdbclient
15
+
16
+ # ==================== Step 1: Create Client Connection ====================
17
+ # You can use embedded mode, server mode, or OceanBase mode
18
+ # For this example, we'll use server mode (you can change to embedded or OceanBase)
19
+
20
+ # Server mode (connecting to remote SeekDB server)
21
+ client = seekdbclient.Client(
22
+ host="127.0.0.1",
23
+ port=2881,
24
+ database="test",
25
+ user="root",
26
+ password=""
27
+ )
28
+
29
+ # Alternative: Embedded mode (local SeekDB)
30
+ # client = seekdbclient.Client(
31
+ # path="./seekdb",
32
+ # database="test"
33
+ # )
34
+
35
+ # Alternative: OceanBase mode
36
+ # client = seekdbclient.OBClient(
37
+ # host="127.0.0.1",
38
+ # port=11402,
39
+ # tenant="mysql",
40
+ # database="test",
41
+ # user="root",
42
+ # password=""
43
+ # )
44
+
45
+ # ==================== Step 2: Create a Collection ====================
46
+ # A collection is like a table that stores documents with vector embeddings
47
+ collection_name = "my_simple_collection"
48
+ dimension = 128 # Vector dimension (must match your embedding model)
49
+
50
+ # Create collection
51
+ collection = client.create_collection(
52
+ name=collection_name,
53
+ dimension=dimension
54
+ )
55
+
56
+ # ==================== Step 3: Add Data to Collection ====================
57
+ # Generate some sample data
58
+ documents = [
59
+ "Machine learning is a subset of artificial intelligence",
60
+ "Python is a popular programming language",
61
+ "Vector databases enable semantic search",
62
+ "Neural networks are inspired by the human brain",
63
+ "Natural language processing helps computers understand text"
64
+ ]
65
+
66
+ # Generate simple vectors (in real usage, you would use an embedding model)
67
+ # For demonstration, we'll create random vectors
68
+ import random
69
+ random.seed(42) # For reproducibility
70
+
71
+ vectors = []
72
+ for i in range(len(documents)):
73
+ # Generate a random vector of dimension 128
74
+ vector = [random.random() for _ in range(dimension)]
75
+ vectors.append(vector)
76
+
77
+ # Generate unique IDs for each document
78
+ # ids = [str(uuid.uuid4()) for _ in documents]
79
+ ids = ["id1", "id2", "id3", "id4", "id5"]
80
+
81
+ # Add data to collection
82
+ collection.add(
83
+ ids=ids,
84
+ documents=documents,
85
+ vectors=vectors,
86
+ metadatas=[
87
+ {"category": "AI", "index": 0},
88
+ {"category": "Programming", "index": 1},
89
+ {"category": "Database", "index": 2},
90
+ {"category": "AI", "index": 3},
91
+ {"category": "NLP", "index": 4}
92
+ ]
93
+ )
94
+
95
+ # ==================== Step 4: Query the Collection ====================
96
+ # Create a query vector (in real usage, you would embed your query text)
97
+ # For demonstration, we'll use a vector similar to the first document
98
+ query_vector = vectors[0] # Query with vector similar to first document
99
+
100
+ # Perform vector similarity search
101
+ results = collection.query(
102
+ query_embeddings=query_vector,
103
+ n_results=3 # Return top 3 most similar documents
104
+ )
105
+
106
+
107
+ # ==================== Step 5: Print Query Results ====================
108
+ print(f"Query results: {len(results)} items found")
109
+ for i, item in enumerate(results, 1):
110
+ print(f"Result {i}: ID={item._id}, Distance={item.distance:.4f}, Document={item.document[:50]}...")
111
+
112
+ # ==================== Step 6: Delete the Collection ====================
113
+ client.delete_collection(collection_name)
File without changes