cognee-community-vector-adapter-qdrant 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee_community_vector_adapter_qdrant/QdrantDatasetDatabaseHandler.py +39 -0
- cognee_community_vector_adapter_qdrant/qdrant_adapter.py +90 -10
- cognee_community_vector_adapter_qdrant/register.py +3 -0
- {cognee_community_vector_adapter_qdrant-0.1.0.dist-info → cognee_community_vector_adapter_qdrant-0.2.1.dist-info}/METADATA +7 -2
- cognee_community_vector_adapter_qdrant-0.2.1.dist-info/RECORD +7 -0
- cognee_community_vector_adapter_qdrant-0.1.0.dist-info/RECORD +0 -6
- {cognee_community_vector_adapter_qdrant-0.1.0.dist-info → cognee_community_vector_adapter_qdrant-0.2.1.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
from uuid import UUID
|
|
3
|
+
|
|
4
|
+
from cognee.infrastructure.databases.dataset_database_handler import DatasetDatabaseHandlerInterface
|
|
5
|
+
from cognee.infrastructure.databases.vector import get_vectordb_config
|
|
6
|
+
from cognee.infrastructure.databases.vector.create_vector_engine import create_vector_engine
|
|
7
|
+
from cognee.modules.users.models import DatasetDatabase, User
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class QdrantDatasetDatabaseHandler(DatasetDatabaseHandlerInterface):
|
|
11
|
+
@classmethod
|
|
12
|
+
async def create_dataset(cls, dataset_id: Optional[UUID], user: Optional[User]) -> dict:
|
|
13
|
+
vector_config = get_vectordb_config()
|
|
14
|
+
|
|
15
|
+
if vector_config.vector_db_provider != "qdrant":
|
|
16
|
+
raise ValueError(
|
|
17
|
+
"QdrantDatasetDatabaseHandler can only be used with the"
|
|
18
|
+
"Qdrant vector database provider."
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
vector_db_name = f"{dataset_id}"
|
|
22
|
+
|
|
23
|
+
return {
|
|
24
|
+
"vector_database_provider": vector_config.vector_db_provider,
|
|
25
|
+
"vector_database_url": vector_config.vector_db_url,
|
|
26
|
+
"vector_database_key": vector_config.vector_db_key,
|
|
27
|
+
"vector_database_name": vector_db_name,
|
|
28
|
+
"vector_dataset_database_handler": "qdrant",
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
@classmethod
|
|
32
|
+
async def delete_dataset(cls, dataset_database: DatasetDatabase) -> None:
|
|
33
|
+
vector_engine = create_vector_engine(
|
|
34
|
+
vector_db_provider=dataset_database.vector_database_provider,
|
|
35
|
+
vector_db_url=dataset_database.vector_database_url,
|
|
36
|
+
vector_db_key=dataset_database.vector_database_key,
|
|
37
|
+
vector_db_name=dataset_database.vector_database_name,
|
|
38
|
+
)
|
|
39
|
+
await vector_engine.prune()
|
|
@@ -45,8 +45,16 @@ class QDrantAdapter(VectorDBInterface):
|
|
|
45
45
|
api_key: str = None
|
|
46
46
|
qdrant_path: str = None
|
|
47
47
|
|
|
48
|
-
def __init__(
|
|
48
|
+
def __init__(
|
|
49
|
+
self,
|
|
50
|
+
url,
|
|
51
|
+
api_key,
|
|
52
|
+
embedding_engine: EmbeddingEngine,
|
|
53
|
+
qdrant_path=None,
|
|
54
|
+
database_name: str = "cognee_db",
|
|
55
|
+
):
|
|
49
56
|
self.embedding_engine = embedding_engine
|
|
57
|
+
self.database_name = database_name
|
|
50
58
|
|
|
51
59
|
if qdrant_path is not None:
|
|
52
60
|
self.qdrant_path = qdrant_path
|
|
@@ -86,9 +94,24 @@ class QDrantAdapter(VectorDBInterface):
|
|
|
86
94
|
vectors_config={
|
|
87
95
|
"text": models.VectorParams(
|
|
88
96
|
size=self.embedding_engine.get_vector_size(),
|
|
89
|
-
distance=
|
|
97
|
+
distance=models.Distance.COSINE,
|
|
90
98
|
)
|
|
91
99
|
},
|
|
100
|
+
# With this config definition, we avoid creating a global index
|
|
101
|
+
hnsw_config=models.HnswConfigDiff(
|
|
102
|
+
payload_m=16,
|
|
103
|
+
m=0,
|
|
104
|
+
),
|
|
105
|
+
)
|
|
106
|
+
# This index co-locates vectors from the same dataset together,
|
|
107
|
+
# which can improve performance
|
|
108
|
+
await client.create_payload_index(
|
|
109
|
+
collection_name=collection_name,
|
|
110
|
+
field_name="database_name",
|
|
111
|
+
field_schema=models.KeywordIndexParams(
|
|
112
|
+
type=models.KeywordIndexType.KEYWORD,
|
|
113
|
+
is_tenant=True,
|
|
114
|
+
),
|
|
92
115
|
)
|
|
93
116
|
|
|
94
117
|
await client.close()
|
|
@@ -105,7 +128,7 @@ class QDrantAdapter(VectorDBInterface):
|
|
|
105
128
|
def convert_to_qdrant_point(data_point: DataPoint):
|
|
106
129
|
return models.PointStruct(
|
|
107
130
|
id=str(data_point.id),
|
|
108
|
-
payload=data_point.model_dump(),
|
|
131
|
+
payload={**data_point.model_dump(), "database_name": self.database_name},
|
|
109
132
|
vector={"text": data_vectors[data_points.index(data_point)]},
|
|
110
133
|
)
|
|
111
134
|
|
|
@@ -157,6 +180,7 @@ class QDrantAdapter(VectorDBInterface):
|
|
|
157
180
|
query_vector: list[float] | None = None,
|
|
158
181
|
limit: int | None = 15,
|
|
159
182
|
with_vector: bool = False,
|
|
183
|
+
include_payload: bool = False,
|
|
160
184
|
) -> list[ScoredResult]:
|
|
161
185
|
if query_text is None and query_vector is None:
|
|
162
186
|
raise MissingQueryParameterError()
|
|
@@ -182,9 +206,20 @@ class QDrantAdapter(VectorDBInterface):
|
|
|
182
206
|
query_result = await client.query_points(
|
|
183
207
|
collection_name=collection_name,
|
|
184
208
|
query=query_vector,
|
|
209
|
+
query_filter=models.Filter(
|
|
210
|
+
must=[
|
|
211
|
+
models.FieldCondition(
|
|
212
|
+
key="database_name",
|
|
213
|
+
match=models.MatchValue(
|
|
214
|
+
value=self.database_name,
|
|
215
|
+
),
|
|
216
|
+
)
|
|
217
|
+
]
|
|
218
|
+
),
|
|
185
219
|
using="text",
|
|
186
220
|
limit=limit,
|
|
187
221
|
with_vectors=with_vector,
|
|
222
|
+
with_payload=include_payload,
|
|
188
223
|
)
|
|
189
224
|
|
|
190
225
|
await client.close()
|
|
@@ -195,11 +230,13 @@ class QDrantAdapter(VectorDBInterface):
|
|
|
195
230
|
return [
|
|
196
231
|
ScoredResult(
|
|
197
232
|
id=parse_id(str(result.id)),
|
|
198
|
-
payload=
|
|
233
|
+
payload=None
|
|
234
|
+
if not result.payload
|
|
235
|
+
else {
|
|
199
236
|
**result.payload,
|
|
200
237
|
"id": parse_id(str(result.id)),
|
|
201
238
|
},
|
|
202
|
-
score=1 - result.score if hasattr(result,
|
|
239
|
+
score=1 - result.score if hasattr(result, "score") else 1.0,
|
|
203
240
|
)
|
|
204
241
|
for result in results
|
|
205
242
|
]
|
|
@@ -215,6 +252,7 @@ class QDrantAdapter(VectorDBInterface):
|
|
|
215
252
|
query_texts: list[str],
|
|
216
253
|
limit: int | None = None,
|
|
217
254
|
with_vectors: bool = False,
|
|
255
|
+
include_payload: bool = False,
|
|
218
256
|
):
|
|
219
257
|
"""
|
|
220
258
|
Perform batch search in a Qdrant collection with dynamic search requests.
|
|
@@ -225,6 +263,7 @@ class QDrantAdapter(VectorDBInterface):
|
|
|
225
263
|
- limit (int): List of result limits for search requests.
|
|
226
264
|
- with_vectors (bool, optional): Bool indicating whether to return
|
|
227
265
|
vectors for search requests.
|
|
266
|
+
- include_payload (bool, optional): Bool indicating whether to return payload in results.
|
|
228
267
|
|
|
229
268
|
Returns:
|
|
230
269
|
- results: The search results from Qdrant.
|
|
@@ -246,8 +285,19 @@ class QDrantAdapter(VectorDBInterface):
|
|
|
246
285
|
query_results = await client.query_batch(
|
|
247
286
|
collection_name=collection_name,
|
|
248
287
|
query_texts=query_texts,
|
|
288
|
+
query_filter=models.Filter(
|
|
289
|
+
must=[
|
|
290
|
+
models.FieldCondition(
|
|
291
|
+
key="database_name",
|
|
292
|
+
match=models.MatchValue(
|
|
293
|
+
value=self.database_name,
|
|
294
|
+
),
|
|
295
|
+
)
|
|
296
|
+
]
|
|
297
|
+
),
|
|
249
298
|
limit=limit,
|
|
250
299
|
with_vectors=with_vectors,
|
|
300
|
+
with_payload=include_payload,
|
|
251
301
|
)
|
|
252
302
|
|
|
253
303
|
await client.close()
|
|
@@ -255,10 +305,9 @@ class QDrantAdapter(VectorDBInterface):
|
|
|
255
305
|
# Extract points from each query result and filter by score
|
|
256
306
|
filtered_results = []
|
|
257
307
|
for query_result in query_results:
|
|
258
|
-
points = query_result.points if hasattr(query_result,
|
|
308
|
+
points = query_result.points if hasattr(query_result, "points") else []
|
|
259
309
|
filtered_points = [
|
|
260
|
-
result for result in points
|
|
261
|
-
if hasattr(result, 'score') and result.score > 0.9
|
|
310
|
+
result for result in points if hasattr(result, "score") and result.score > 0.9
|
|
262
311
|
]
|
|
263
312
|
filtered_results.append(filtered_points)
|
|
264
313
|
|
|
@@ -279,7 +328,22 @@ class QDrantAdapter(VectorDBInterface):
|
|
|
279
328
|
response = await client.get_collections()
|
|
280
329
|
|
|
281
330
|
for collection in response.collections:
|
|
282
|
-
await client.
|
|
331
|
+
await client.delete(
|
|
332
|
+
collection.name,
|
|
333
|
+
points_selector=models.FilterSelector(
|
|
334
|
+
filter=models.Filter(
|
|
335
|
+
must=[
|
|
336
|
+
models.FieldCondition(
|
|
337
|
+
key="database_name",
|
|
338
|
+
match=models.MatchValue(value=self.database_name),
|
|
339
|
+
)
|
|
340
|
+
]
|
|
341
|
+
)
|
|
342
|
+
),
|
|
343
|
+
)
|
|
344
|
+
remaining_points = await client.count(collection_name=collection.name)
|
|
345
|
+
if remaining_points.count == 0:
|
|
346
|
+
await client.delete_collection(collection_name=collection.name)
|
|
283
347
|
|
|
284
348
|
await client.close()
|
|
285
349
|
|
|
@@ -295,7 +359,23 @@ class QDrantAdapter(VectorDBInterface):
|
|
|
295
359
|
|
|
296
360
|
response = await client.get_collections()
|
|
297
361
|
|
|
298
|
-
|
|
362
|
+
# We do this filtering because one user could see another user's collections otherwise
|
|
363
|
+
result = []
|
|
364
|
+
for collection in response.collections:
|
|
365
|
+
relevant_count = await client.count(
|
|
366
|
+
collection_name=collection.name,
|
|
367
|
+
count_filter=models.Filter(
|
|
368
|
+
must=[
|
|
369
|
+
models.FieldCondition(
|
|
370
|
+
key="database_name", match=models.MatchValue(value=self.database_name)
|
|
371
|
+
)
|
|
372
|
+
]
|
|
373
|
+
),
|
|
374
|
+
exact=True,
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
if relevant_count.count > 0:
|
|
378
|
+
result.append(collection.name)
|
|
299
379
|
|
|
300
380
|
await client.close()
|
|
301
381
|
|
|
@@ -1,5 +1,8 @@
|
|
|
1
|
+
from cognee.infrastructure.databases.dataset_database_handler import use_dataset_database_handler
|
|
1
2
|
from cognee.infrastructure.databases.vector import use_vector_adapter
|
|
2
3
|
|
|
3
4
|
from .qdrant_adapter import QDrantAdapter
|
|
5
|
+
from .QdrantDatasetDatabaseHandler import QdrantDatasetDatabaseHandler
|
|
4
6
|
|
|
5
7
|
use_vector_adapter("qdrant", QDrantAdapter)
|
|
8
|
+
use_dataset_database_handler("qdrant", QdrantDatasetDatabaseHandler, "qdrant")
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cognee-community-vector-adapter-qdrant
|
|
3
|
-
Version: 0.1
|
|
3
|
+
Version: 0.2.1
|
|
4
4
|
Summary: Qdrant vector database adapter for cognee
|
|
5
5
|
Requires-Python: >=3.11,<=3.13
|
|
6
6
|
Classifier: Programming Language :: Python :: 3
|
|
7
7
|
Classifier: Programming Language :: Python :: 3.11
|
|
8
8
|
Classifier: Programming Language :: Python :: 3.12
|
|
9
9
|
Classifier: Programming Language :: Python :: 3.13
|
|
10
|
-
Requires-Dist: cognee (
|
|
10
|
+
Requires-Dist: cognee (==0.5.2)
|
|
11
11
|
Requires-Dist: instructor (>=1.11)
|
|
12
12
|
Requires-Dist: qdrant-client (>=1.16.0)
|
|
13
13
|
Requires-Dist: starlette (>=0.48.0)
|
|
@@ -49,6 +49,11 @@ Import and register the adapter in your code:
|
|
|
49
49
|
from cognee_community_vector_adapter_qdrant import register
|
|
50
50
|
```
|
|
51
51
|
|
|
52
|
+
Also, specify the dataset handler in the .env file:
|
|
53
|
+
```dotenv
|
|
54
|
+
VECTOR_DATASET_DATABASE_HANDLER="qdrant"
|
|
55
|
+
```
|
|
56
|
+
|
|
52
57
|
## Example
|
|
53
58
|
See example in `example.py` file.
|
|
54
59
|
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
cognee_community_vector_adapter_qdrant/QdrantDatasetDatabaseHandler.py,sha256=mQFgATAdZeiBayoo4poLsXnCVrFPTLN3zSJdGzq0brk,1668
|
|
2
|
+
cognee_community_vector_adapter_qdrant/__init__.py,sha256=OYsRMTyBnNuusKB3rkQmwCppfdEf__AuXMw_Ma1k6lQ,71
|
|
3
|
+
cognee_community_vector_adapter_qdrant/qdrant_adapter.py,sha256=ONpLcjtufnSQEJoXLxfk77oOM2idiU3eGkZWSa5avFo,13591
|
|
4
|
+
cognee_community_vector_adapter_qdrant/register.py,sha256=_CUgC1Ton9HfPfsyi6dEQ5H1J47bXHAwFHIceWSo4SI,406
|
|
5
|
+
cognee_community_vector_adapter_qdrant-0.2.1.dist-info/METADATA,sha256=QY87_e1VZ73U0ACxk6ba4YsCJXmsKZRP-YJUD3bbWYI,1645
|
|
6
|
+
cognee_community_vector_adapter_qdrant-0.2.1.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
|
|
7
|
+
cognee_community_vector_adapter_qdrant-0.2.1.dist-info/RECORD,,
|
|
@@ -1,6 +0,0 @@
|
|
|
1
|
-
cognee_community_vector_adapter_qdrant/__init__.py,sha256=OYsRMTyBnNuusKB3rkQmwCppfdEf__AuXMw_Ma1k6lQ,71
|
|
2
|
-
cognee_community_vector_adapter_qdrant/qdrant_adapter.py,sha256=hFMmSaUArnhTZHR8I9VW3fHdmO-1VHw8TD9-fJ4rs-c,10379
|
|
3
|
-
cognee_community_vector_adapter_qdrant/register.py,sha256=K0cIQGN3an79wWCMXAIgwsymkloHGV2_joy7G-4aiB8,158
|
|
4
|
-
cognee_community_vector_adapter_qdrant-0.1.0.dist-info/METADATA,sha256=0dcxsPnOFHs-gDGnrmFJR9up07mdE6AAQ7tkvRhnPbA,1537
|
|
5
|
-
cognee_community_vector_adapter_qdrant-0.1.0.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
|
|
6
|
-
cognee_community_vector_adapter_qdrant-0.1.0.dist-info/RECORD,,
|