cognee-community-vector-adapter-qdrant 0.1.0__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,13 +1,13 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cognee-community-vector-adapter-qdrant
3
- Version: 0.1.0
3
+ Version: 0.2.0
4
4
  Summary: Qdrant vector database adapter for cognee
5
5
  Requires-Python: >=3.11,<=3.13
6
6
  Classifier: Programming Language :: Python :: 3
7
7
  Classifier: Programming Language :: Python :: 3.11
8
8
  Classifier: Programming Language :: Python :: 3.12
9
9
  Classifier: Programming Language :: Python :: 3.13
10
- Requires-Dist: cognee (>=0.4.0)
10
+ Requires-Dist: cognee (==0.5.1)
11
11
  Requires-Dist: instructor (>=1.11)
12
12
  Requires-Dist: qdrant-client (>=1.16.0)
13
13
  Requires-Dist: starlette (>=0.48.0)
@@ -49,6 +49,11 @@ Import and register the adapter in your code:
49
49
  from cognee_community_vector_adapter_qdrant import register
50
50
  ```
51
51
 
52
+ Also, specify the dataset handler in the .env file:
53
+ ```dotenv
54
+ VECTOR_DATASET_DATABASE_HANDLER="qdrant"
55
+ ```
56
+
52
57
  ## Example
53
58
  See example in `example.py` file.
54
59
 
@@ -34,5 +34,10 @@ Import and register the adapter in your code:
34
34
  from cognee_community_vector_adapter_qdrant import register
35
35
  ```
36
36
 
37
+ Also, specify the dataset handler in the .env file:
38
+ ```dotenv
39
+ VECTOR_DATASET_DATABASE_HANDLER="qdrant"
40
+ ```
41
+
37
42
  ## Example
38
43
  See example in `example.py` file.
@@ -0,0 +1,39 @@
1
+ from typing import Optional
2
+ from uuid import UUID
3
+
4
+ from cognee.infrastructure.databases.dataset_database_handler import DatasetDatabaseHandlerInterface
5
+ from cognee.infrastructure.databases.vector import get_vectordb_config
6
+ from cognee.infrastructure.databases.vector.create_vector_engine import create_vector_engine
7
+ from cognee.modules.users.models import DatasetDatabase, User
8
+
9
+
10
+ class QdrantDatasetDatabaseHandler(DatasetDatabaseHandlerInterface):
11
+ @classmethod
12
+ async def create_dataset(cls, dataset_id: Optional[UUID], user: Optional[User]) -> dict:
13
+ vector_config = get_vectordb_config()
14
+
15
+ if vector_config.vector_db_provider != "qdrant":
16
+ raise ValueError(
17
+ "QdrantDatasetDatabaseHandler can only be used with the"
18
+ "Qdrant vector database provider."
19
+ )
20
+
21
+ vector_db_name = f"{dataset_id}"
22
+
23
+ return {
24
+ "vector_database_provider": vector_config.vector_db_provider,
25
+ "vector_database_url": vector_config.vector_db_url,
26
+ "vector_database_key": vector_config.vector_db_key,
27
+ "vector_database_name": vector_db_name,
28
+ "vector_dataset_database_handler": "qdrant",
29
+ }
30
+
31
+ @classmethod
32
+ async def delete_dataset(cls, dataset_database: DatasetDatabase) -> None:
33
+ vector_engine = create_vector_engine(
34
+ vector_db_provider=dataset_database.vector_database_provider,
35
+ vector_db_url=dataset_database.vector_database_url,
36
+ vector_db_key=dataset_database.vector_database_key,
37
+ vector_db_name=dataset_database.vector_database_name,
38
+ )
39
+ await vector_engine.prune()
@@ -45,8 +45,16 @@ class QDrantAdapter(VectorDBInterface):
45
45
  api_key: str = None
46
46
  qdrant_path: str = None
47
47
 
48
- def __init__(self, url, api_key, embedding_engine: EmbeddingEngine, qdrant_path=None):
48
+ def __init__(
49
+ self,
50
+ url,
51
+ api_key,
52
+ embedding_engine: EmbeddingEngine,
53
+ qdrant_path=None,
54
+ database_name: str = "cognee_db",
55
+ ):
49
56
  self.embedding_engine = embedding_engine
57
+ self.database_name = database_name
50
58
 
51
59
  if qdrant_path is not None:
52
60
  self.qdrant_path = qdrant_path
@@ -86,9 +94,24 @@ class QDrantAdapter(VectorDBInterface):
86
94
  vectors_config={
87
95
  "text": models.VectorParams(
88
96
  size=self.embedding_engine.get_vector_size(),
89
- distance="Cosine",
97
+ distance=models.Distance.COSINE,
90
98
  )
91
99
  },
100
+ # With this config definition, we avoid creating a global index
101
+ hnsw_config=models.HnswConfigDiff(
102
+ payload_m=16,
103
+ m=0,
104
+ ),
105
+ )
106
+ # This index co-locates vectors from the same dataset together,
107
+ # which can improve performance
108
+ await client.create_payload_index(
109
+ collection_name=collection_name,
110
+ field_name="database_name",
111
+ field_schema=models.KeywordIndexParams(
112
+ type=models.KeywordIndexType.KEYWORD,
113
+ is_tenant=True,
114
+ ),
92
115
  )
93
116
 
94
117
  await client.close()
@@ -105,7 +128,7 @@ class QDrantAdapter(VectorDBInterface):
105
128
  def convert_to_qdrant_point(data_point: DataPoint):
106
129
  return models.PointStruct(
107
130
  id=str(data_point.id),
108
- payload=data_point.model_dump(),
131
+ payload={**data_point.model_dump(), "database_name": self.database_name},
109
132
  vector={"text": data_vectors[data_points.index(data_point)]},
110
133
  )
111
134
 
@@ -182,6 +205,16 @@ class QDrantAdapter(VectorDBInterface):
182
205
  query_result = await client.query_points(
183
206
  collection_name=collection_name,
184
207
  query=query_vector,
208
+ query_filter=models.Filter(
209
+ must=[
210
+ models.FieldCondition(
211
+ key="database_name",
212
+ match=models.MatchValue(
213
+ value=self.database_name,
214
+ ),
215
+ )
216
+ ]
217
+ ),
185
218
  using="text",
186
219
  limit=limit,
187
220
  with_vectors=with_vector,
@@ -199,7 +232,7 @@ class QDrantAdapter(VectorDBInterface):
199
232
  **result.payload,
200
233
  "id": parse_id(str(result.id)),
201
234
  },
202
- score=1 - result.score if hasattr(result, 'score') else 1.0,
235
+ score=1 - result.score if hasattr(result, "score") else 1.0,
203
236
  )
204
237
  for result in results
205
238
  ]
@@ -246,6 +279,16 @@ class QDrantAdapter(VectorDBInterface):
246
279
  query_results = await client.query_batch(
247
280
  collection_name=collection_name,
248
281
  query_texts=query_texts,
282
+ query_filter=models.Filter(
283
+ must=[
284
+ models.FieldCondition(
285
+ key="database_name",
286
+ match=models.MatchValue(
287
+ value=self.database_name,
288
+ ),
289
+ )
290
+ ]
291
+ ),
249
292
  limit=limit,
250
293
  with_vectors=with_vectors,
251
294
  )
@@ -255,10 +298,9 @@ class QDrantAdapter(VectorDBInterface):
255
298
  # Extract points from each query result and filter by score
256
299
  filtered_results = []
257
300
  for query_result in query_results:
258
- points = query_result.points if hasattr(query_result, 'points') else []
301
+ points = query_result.points if hasattr(query_result, "points") else []
259
302
  filtered_points = [
260
- result for result in points
261
- if hasattr(result, 'score') and result.score > 0.9
303
+ result for result in points if hasattr(result, "score") and result.score > 0.9
262
304
  ]
263
305
  filtered_results.append(filtered_points)
264
306
 
@@ -279,7 +321,22 @@ class QDrantAdapter(VectorDBInterface):
279
321
  response = await client.get_collections()
280
322
 
281
323
  for collection in response.collections:
282
- await client.delete_collection(collection.name)
324
+ await client.delete(
325
+ collection.name,
326
+ points_selector=models.FilterSelector(
327
+ filter=models.Filter(
328
+ must=[
329
+ models.FieldCondition(
330
+ key="database_name",
331
+ match=models.MatchValue(value=self.database_name),
332
+ )
333
+ ]
334
+ )
335
+ ),
336
+ )
337
+ remaining_points = await client.count(collection_name=collection.name)
338
+ if remaining_points.count == 0:
339
+ await client.delete_collection(collection_name=collection.name)
283
340
 
284
341
  await client.close()
285
342
 
@@ -295,7 +352,23 @@ class QDrantAdapter(VectorDBInterface):
295
352
 
296
353
  response = await client.get_collections()
297
354
 
298
- result = [collection.name for collection in response.collections]
355
+ # We do this filtering because one user could see another user's collections otherwise
356
+ result = []
357
+ for collection in response.collections:
358
+ relevant_count = await client.count(
359
+ collection_name=collection.name,
360
+ count_filter=models.Filter(
361
+ must=[
362
+ models.FieldCondition(
363
+ key="database_name", match=models.MatchValue(value=self.database_name)
364
+ )
365
+ ]
366
+ ),
367
+ exact=True,
368
+ )
369
+
370
+ if relevant_count.count > 0:
371
+ result.append(collection.name)
299
372
 
300
373
  await client.close()
301
374
 
@@ -0,0 +1,8 @@
1
+ from cognee.infrastructure.databases.dataset_database_handler import use_dataset_database_handler
2
+ from cognee.infrastructure.databases.vector import use_vector_adapter
3
+
4
+ from .qdrant_adapter import QDrantAdapter
5
+ from .QdrantDatasetDatabaseHandler import QdrantDatasetDatabaseHandler
6
+
7
+ use_vector_adapter("qdrant", QDrantAdapter)
8
+ use_dataset_database_handler("qdrant", QdrantDatasetDatabaseHandler, "qdrant")
@@ -1,12 +1,12 @@
1
1
  [project]
2
2
  name = "cognee-community-vector-adapter-qdrant"
3
- version = "0.1.0"
3
+ version = "0.2.0"
4
4
  description = "Qdrant vector database adapter for cognee"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.11,<=3.13"
7
7
  dependencies = [
8
8
  "qdrant-client>=1.16.0",
9
- "cognee>=0.4.0",
9
+ "cognee==0.5.1",
10
10
  "starlette>=0.48.0",
11
11
  "instructor>=1.11"
12
12
  ]
@@ -1,5 +0,0 @@
1
- from cognee.infrastructure.databases.vector import use_vector_adapter
2
-
3
- from .qdrant_adapter import QDrantAdapter
4
-
5
- use_vector_adapter("qdrant", QDrantAdapter)