cognee-community-vector-adapter-qdrant 0.1.0__tar.gz → 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,13 +1,13 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cognee-community-vector-adapter-qdrant
3
- Version: 0.1.0
3
+ Version: 0.2.1
4
4
  Summary: Qdrant vector database adapter for cognee
5
5
  Requires-Python: >=3.11,<=3.13
6
6
  Classifier: Programming Language :: Python :: 3
7
7
  Classifier: Programming Language :: Python :: 3.11
8
8
  Classifier: Programming Language :: Python :: 3.12
9
9
  Classifier: Programming Language :: Python :: 3.13
10
- Requires-Dist: cognee (>=0.4.0)
10
+ Requires-Dist: cognee (==0.5.2)
11
11
  Requires-Dist: instructor (>=1.11)
12
12
  Requires-Dist: qdrant-client (>=1.16.0)
13
13
  Requires-Dist: starlette (>=0.48.0)
@@ -49,6 +49,11 @@ Import and register the adapter in your code:
49
49
  from cognee_community_vector_adapter_qdrant import register
50
50
  ```
51
51
 
52
+ Also, specify the dataset handler in the .env file:
53
+ ```dotenv
54
+ VECTOR_DATASET_DATABASE_HANDLER="qdrant"
55
+ ```
56
+
52
57
  ## Example
53
58
  See example in `example.py` file.
54
59
 
@@ -34,5 +34,10 @@ Import and register the adapter in your code:
34
34
  from cognee_community_vector_adapter_qdrant import register
35
35
  ```
36
36
 
37
+ Also, specify the dataset handler in the .env file:
38
+ ```dotenv
39
+ VECTOR_DATASET_DATABASE_HANDLER="qdrant"
40
+ ```
41
+
37
42
  ## Example
38
43
  See example in `example.py` file.
@@ -0,0 +1,39 @@
1
+ from typing import Optional
2
+ from uuid import UUID
3
+
4
+ from cognee.infrastructure.databases.dataset_database_handler import DatasetDatabaseHandlerInterface
5
+ from cognee.infrastructure.databases.vector import get_vectordb_config
6
+ from cognee.infrastructure.databases.vector.create_vector_engine import create_vector_engine
7
+ from cognee.modules.users.models import DatasetDatabase, User
8
+
9
+
10
+ class QdrantDatasetDatabaseHandler(DatasetDatabaseHandlerInterface):
11
+ @classmethod
12
+ async def create_dataset(cls, dataset_id: Optional[UUID], user: Optional[User]) -> dict:
13
+ vector_config = get_vectordb_config()
14
+
15
+ if vector_config.vector_db_provider != "qdrant":
16
+ raise ValueError(
17
+ "QdrantDatasetDatabaseHandler can only be used with the"
18
+ "Qdrant vector database provider."
19
+ )
20
+
21
+ vector_db_name = f"{dataset_id}"
22
+
23
+ return {
24
+ "vector_database_provider": vector_config.vector_db_provider,
25
+ "vector_database_url": vector_config.vector_db_url,
26
+ "vector_database_key": vector_config.vector_db_key,
27
+ "vector_database_name": vector_db_name,
28
+ "vector_dataset_database_handler": "qdrant",
29
+ }
30
+
31
+ @classmethod
32
+ async def delete_dataset(cls, dataset_database: DatasetDatabase) -> None:
33
+ vector_engine = create_vector_engine(
34
+ vector_db_provider=dataset_database.vector_database_provider,
35
+ vector_db_url=dataset_database.vector_database_url,
36
+ vector_db_key=dataset_database.vector_database_key,
37
+ vector_db_name=dataset_database.vector_database_name,
38
+ )
39
+ await vector_engine.prune()
@@ -45,8 +45,16 @@ class QDrantAdapter(VectorDBInterface):
45
45
  api_key: str = None
46
46
  qdrant_path: str = None
47
47
 
48
- def __init__(self, url, api_key, embedding_engine: EmbeddingEngine, qdrant_path=None):
48
+ def __init__(
49
+ self,
50
+ url,
51
+ api_key,
52
+ embedding_engine: EmbeddingEngine,
53
+ qdrant_path=None,
54
+ database_name: str = "cognee_db",
55
+ ):
49
56
  self.embedding_engine = embedding_engine
57
+ self.database_name = database_name
50
58
 
51
59
  if qdrant_path is not None:
52
60
  self.qdrant_path = qdrant_path
@@ -86,9 +94,24 @@ class QDrantAdapter(VectorDBInterface):
86
94
  vectors_config={
87
95
  "text": models.VectorParams(
88
96
  size=self.embedding_engine.get_vector_size(),
89
- distance="Cosine",
97
+ distance=models.Distance.COSINE,
90
98
  )
91
99
  },
100
+ # With this config definition, we avoid creating a global index
101
+ hnsw_config=models.HnswConfigDiff(
102
+ payload_m=16,
103
+ m=0,
104
+ ),
105
+ )
106
+ # This index co-locates vectors from the same dataset together,
107
+ # which can improve performance
108
+ await client.create_payload_index(
109
+ collection_name=collection_name,
110
+ field_name="database_name",
111
+ field_schema=models.KeywordIndexParams(
112
+ type=models.KeywordIndexType.KEYWORD,
113
+ is_tenant=True,
114
+ ),
92
115
  )
93
116
 
94
117
  await client.close()
@@ -105,7 +128,7 @@ class QDrantAdapter(VectorDBInterface):
105
128
  def convert_to_qdrant_point(data_point: DataPoint):
106
129
  return models.PointStruct(
107
130
  id=str(data_point.id),
108
- payload=data_point.model_dump(),
131
+ payload={**data_point.model_dump(), "database_name": self.database_name},
109
132
  vector={"text": data_vectors[data_points.index(data_point)]},
110
133
  )
111
134
 
@@ -157,6 +180,7 @@ class QDrantAdapter(VectorDBInterface):
157
180
  query_vector: list[float] | None = None,
158
181
  limit: int | None = 15,
159
182
  with_vector: bool = False,
183
+ include_payload: bool = False,
160
184
  ) -> list[ScoredResult]:
161
185
  if query_text is None and query_vector is None:
162
186
  raise MissingQueryParameterError()
@@ -182,9 +206,20 @@ class QDrantAdapter(VectorDBInterface):
182
206
  query_result = await client.query_points(
183
207
  collection_name=collection_name,
184
208
  query=query_vector,
209
+ query_filter=models.Filter(
210
+ must=[
211
+ models.FieldCondition(
212
+ key="database_name",
213
+ match=models.MatchValue(
214
+ value=self.database_name,
215
+ ),
216
+ )
217
+ ]
218
+ ),
185
219
  using="text",
186
220
  limit=limit,
187
221
  with_vectors=with_vector,
222
+ with_payload=include_payload,
188
223
  )
189
224
 
190
225
  await client.close()
@@ -195,11 +230,13 @@ class QDrantAdapter(VectorDBInterface):
195
230
  return [
196
231
  ScoredResult(
197
232
  id=parse_id(str(result.id)),
198
- payload={
233
+ payload=None
234
+ if not result.payload
235
+ else {
199
236
  **result.payload,
200
237
  "id": parse_id(str(result.id)),
201
238
  },
202
- score=1 - result.score if hasattr(result, 'score') else 1.0,
239
+ score=1 - result.score if hasattr(result, "score") else 1.0,
203
240
  )
204
241
  for result in results
205
242
  ]
@@ -215,6 +252,7 @@ class QDrantAdapter(VectorDBInterface):
215
252
  query_texts: list[str],
216
253
  limit: int | None = None,
217
254
  with_vectors: bool = False,
255
+ include_payload: bool = False,
218
256
  ):
219
257
  """
220
258
  Perform batch search in a Qdrant collection with dynamic search requests.
@@ -225,6 +263,7 @@ class QDrantAdapter(VectorDBInterface):
225
263
  - limit (int): List of result limits for search requests.
226
264
  - with_vectors (bool, optional): Bool indicating whether to return
227
265
  vectors for search requests.
266
+ - include_payload (bool, optional): Bool indicating whether to return payload in results.
228
267
 
229
268
  Returns:
230
269
  - results: The search results from Qdrant.
@@ -246,8 +285,19 @@ class QDrantAdapter(VectorDBInterface):
246
285
  query_results = await client.query_batch(
247
286
  collection_name=collection_name,
248
287
  query_texts=query_texts,
288
+ query_filter=models.Filter(
289
+ must=[
290
+ models.FieldCondition(
291
+ key="database_name",
292
+ match=models.MatchValue(
293
+ value=self.database_name,
294
+ ),
295
+ )
296
+ ]
297
+ ),
249
298
  limit=limit,
250
299
  with_vectors=with_vectors,
300
+ with_payload=include_payload,
251
301
  )
252
302
 
253
303
  await client.close()
@@ -255,10 +305,9 @@ class QDrantAdapter(VectorDBInterface):
255
305
  # Extract points from each query result and filter by score
256
306
  filtered_results = []
257
307
  for query_result in query_results:
258
- points = query_result.points if hasattr(query_result, 'points') else []
308
+ points = query_result.points if hasattr(query_result, "points") else []
259
309
  filtered_points = [
260
- result for result in points
261
- if hasattr(result, 'score') and result.score > 0.9
310
+ result for result in points if hasattr(result, "score") and result.score > 0.9
262
311
  ]
263
312
  filtered_results.append(filtered_points)
264
313
 
@@ -279,7 +328,22 @@ class QDrantAdapter(VectorDBInterface):
279
328
  response = await client.get_collections()
280
329
 
281
330
  for collection in response.collections:
282
- await client.delete_collection(collection.name)
331
+ await client.delete(
332
+ collection.name,
333
+ points_selector=models.FilterSelector(
334
+ filter=models.Filter(
335
+ must=[
336
+ models.FieldCondition(
337
+ key="database_name",
338
+ match=models.MatchValue(value=self.database_name),
339
+ )
340
+ ]
341
+ )
342
+ ),
343
+ )
344
+ remaining_points = await client.count(collection_name=collection.name)
345
+ if remaining_points.count == 0:
346
+ await client.delete_collection(collection_name=collection.name)
283
347
 
284
348
  await client.close()
285
349
 
@@ -295,7 +359,23 @@ class QDrantAdapter(VectorDBInterface):
295
359
 
296
360
  response = await client.get_collections()
297
361
 
298
- result = [collection.name for collection in response.collections]
362
+ # We do this filtering because one user could see another user's collections otherwise
363
+ result = []
364
+ for collection in response.collections:
365
+ relevant_count = await client.count(
366
+ collection_name=collection.name,
367
+ count_filter=models.Filter(
368
+ must=[
369
+ models.FieldCondition(
370
+ key="database_name", match=models.MatchValue(value=self.database_name)
371
+ )
372
+ ]
373
+ ),
374
+ exact=True,
375
+ )
376
+
377
+ if relevant_count.count > 0:
378
+ result.append(collection.name)
299
379
 
300
380
  await client.close()
301
381
 
@@ -0,0 +1,8 @@
1
+ from cognee.infrastructure.databases.dataset_database_handler import use_dataset_database_handler
2
+ from cognee.infrastructure.databases.vector import use_vector_adapter
3
+
4
+ from .qdrant_adapter import QDrantAdapter
5
+ from .QdrantDatasetDatabaseHandler import QdrantDatasetDatabaseHandler
6
+
7
+ use_vector_adapter("qdrant", QDrantAdapter)
8
+ use_dataset_database_handler("qdrant", QdrantDatasetDatabaseHandler, "qdrant")
@@ -1,12 +1,12 @@
1
1
  [project]
2
2
  name = "cognee-community-vector-adapter-qdrant"
3
- version = "0.1.0"
3
+ version = "0.2.1"
4
4
  description = "Qdrant vector database adapter for cognee"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.11,<=3.13"
7
7
  dependencies = [
8
8
  "qdrant-client>=1.16.0",
9
- "cognee>=0.4.0",
9
+ "cognee==0.5.2",
10
10
  "starlette>=0.48.0",
11
11
  "instructor>=1.11"
12
12
  ]
@@ -1,5 +0,0 @@
1
- from cognee.infrastructure.databases.vector import use_vector_adapter
2
-
3
- from .qdrant_adapter import QDrantAdapter
4
-
5
- use_vector_adapter("qdrant", QDrantAdapter)