cognee-community-vector-adapter-qdrant 0.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee_community_vector_adapter_qdrant-0.0.2/PKG-INFO +28 -0
- cognee_community_vector_adapter_qdrant-0.0.2/README.md +14 -0
- cognee_community_vector_adapter_qdrant-0.0.2/cognee_community_vector_adapter_qdrant/__init__.py +3 -0
- cognee_community_vector_adapter_qdrant-0.0.2/cognee_community_vector_adapter_qdrant/qdrant_adapter.py +258 -0
- cognee_community_vector_adapter_qdrant-0.0.2/cognee_community_vector_adapter_qdrant/register.py +5 -0
- cognee_community_vector_adapter_qdrant-0.0.2/pyproject.toml +10 -0
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: cognee-community-vector-adapter-qdrant
|
|
3
|
+
Version: 0.0.2
|
|
4
|
+
Summary: Qdrant vector database adapter for cognee
|
|
5
|
+
Requires-Python: >=3.11,<=3.13
|
|
6
|
+
Classifier: Programming Language :: Python :: 3
|
|
7
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
8
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
10
|
+
Requires-Dist: cognee (>=0.2.1)
|
|
11
|
+
Requires-Dist: qdrant-client (>=1.14.2)
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
|
|
14
|
+
# Cognee Qdrant Adapter
|
|
15
|
+
|
|
16
|
+
## Install
|
|
17
|
+
|
|
18
|
+
Install [`qdrant-client`](https://pypi.org/project/qdrant-client/) in your project.
|
|
19
|
+
|
|
20
|
+
Put this line of code somewhere at the start of the execution, before cognee is initiated.
|
|
21
|
+
|
|
22
|
+
```python
|
|
23
|
+
import packages.vector.qdrant.register
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## Example
|
|
27
|
+
See example in `example.py` file.
|
|
28
|
+
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# Cognee Qdrant Adapter
|
|
2
|
+
|
|
3
|
+
## Install
|
|
4
|
+
|
|
5
|
+
Install [`qdrant-client`](https://pypi.org/project/qdrant-client/) in your project.
|
|
6
|
+
|
|
7
|
+
Put this line of code somewhere at the start of the execution, before cognee is initiated.
|
|
8
|
+
|
|
9
|
+
```python
|
|
10
|
+
import packages.vector.qdrant.register
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Example
|
|
14
|
+
See example in `example.py` file.
|
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from typing import Dict, List, Optional
|
|
3
|
+
from qdrant_client import AsyncQdrantClient, models
|
|
4
|
+
|
|
5
|
+
from cognee.exceptions import InvalidValueError
|
|
6
|
+
from cognee.shared.logging_utils import get_logger
|
|
7
|
+
|
|
8
|
+
from cognee.infrastructure.engine import DataPoint
|
|
9
|
+
from cognee.infrastructure.engine.utils import parse_id
|
|
10
|
+
from cognee.infrastructure.databases.vector import VectorDBInterface
|
|
11
|
+
from cognee.infrastructure.databases.vector.models.ScoredResult import ScoredResult
|
|
12
|
+
from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
|
|
13
|
+
from cognee.infrastructure.databases.vector.exceptions import CollectionNotFoundError
|
|
14
|
+
|
|
15
|
+
logger = get_logger("QDrantAdapter")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class IndexSchema(DataPoint):
|
|
19
|
+
text: str
|
|
20
|
+
|
|
21
|
+
metadata: dict = {"index_fields": ["text"]}
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def create_hnsw_config(hnsw_config: Dict):
|
|
26
|
+
if hnsw_config is not None:
|
|
27
|
+
return models.HnswConfig()
|
|
28
|
+
return None
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def create_optimizers_config(optimizers_config: Dict):
|
|
32
|
+
if optimizers_config is not None:
|
|
33
|
+
return models.OptimizersConfig()
|
|
34
|
+
return None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def create_quantization_config(quantization_config: Dict):
|
|
38
|
+
if quantization_config is not None:
|
|
39
|
+
return models.QuantizationConfig()
|
|
40
|
+
return None
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class QDrantAdapter(VectorDBInterface):
|
|
44
|
+
name = "Qdrant"
|
|
45
|
+
url: str = None
|
|
46
|
+
api_key: str = None
|
|
47
|
+
qdrant_path: str = None
|
|
48
|
+
|
|
49
|
+
def __init__(self, url, api_key, embedding_engine: EmbeddingEngine, qdrant_path=None):
|
|
50
|
+
self.embedding_engine = embedding_engine
|
|
51
|
+
|
|
52
|
+
if qdrant_path is not None:
|
|
53
|
+
self.qdrant_path = qdrant_path
|
|
54
|
+
else:
|
|
55
|
+
self.url = url
|
|
56
|
+
self.api_key = api_key
|
|
57
|
+
self.VECTOR_DB_LOCK = asyncio.Lock()
|
|
58
|
+
|
|
59
|
+
def get_qdrant_client(self) -> AsyncQdrantClient:
|
|
60
|
+
if self.qdrant_path is not None:
|
|
61
|
+
return AsyncQdrantClient(path=self.qdrant_path, port=6333)
|
|
62
|
+
elif self.url is not None:
|
|
63
|
+
return AsyncQdrantClient(url=self.url, api_key=self.api_key, port=6333)
|
|
64
|
+
|
|
65
|
+
return AsyncQdrantClient(location=":memory:")
|
|
66
|
+
|
|
67
|
+
async def embed_data(self, data: List[str]) -> List[float]:
|
|
68
|
+
return await self.embedding_engine.embed_text(data)
|
|
69
|
+
|
|
70
|
+
async def has_collection(self, collection_name: str) -> bool:
|
|
71
|
+
client = self.get_qdrant_client()
|
|
72
|
+
result = await client.collection_exists(collection_name)
|
|
73
|
+
await client.close()
|
|
74
|
+
return result
|
|
75
|
+
|
|
76
|
+
async def create_collection(
|
|
77
|
+
self,
|
|
78
|
+
collection_name: str,
|
|
79
|
+
payload_schema=None,
|
|
80
|
+
):
|
|
81
|
+
async with self.VECTOR_DB_LOCK:
|
|
82
|
+
client = self.get_qdrant_client()
|
|
83
|
+
|
|
84
|
+
if not await client.collection_exists(collection_name):
|
|
85
|
+
await client.create_collection(
|
|
86
|
+
collection_name=collection_name,
|
|
87
|
+
vectors_config={
|
|
88
|
+
"text": models.VectorParams(
|
|
89
|
+
size=self.embedding_engine.get_vector_size(), distance="Cosine"
|
|
90
|
+
)
|
|
91
|
+
},
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
await client.close()
|
|
95
|
+
|
|
96
|
+
async def create_data_points(self, collection_name: str, data_points: List[DataPoint]):
|
|
97
|
+
from qdrant_client.http.exceptions import UnexpectedResponse
|
|
98
|
+
|
|
99
|
+
client = self.get_qdrant_client()
|
|
100
|
+
|
|
101
|
+
data_vectors = await self.embed_data(
|
|
102
|
+
[DataPoint.get_embeddable_data(data_point) for data_point in data_points]
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
def convert_to_qdrant_point(data_point: DataPoint):
|
|
106
|
+
return models.PointStruct(
|
|
107
|
+
id=str(data_point.id),
|
|
108
|
+
payload=data_point.model_dump(),
|
|
109
|
+
vector={"text": data_vectors[data_points.index(data_point)]},
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
points = [convert_to_qdrant_point(point) for point in data_points]
|
|
113
|
+
|
|
114
|
+
try:
|
|
115
|
+
client.upload_points(collection_name=collection_name, points=points)
|
|
116
|
+
except UnexpectedResponse as error:
|
|
117
|
+
if "Collection not found" in str(error):
|
|
118
|
+
raise CollectionNotFoundError(
|
|
119
|
+
message=f"Collection {collection_name} not found!"
|
|
120
|
+
) from error
|
|
121
|
+
else:
|
|
122
|
+
raise error
|
|
123
|
+
except Exception as error:
|
|
124
|
+
logger.error("Error uploading data points to Qdrant: %s", str(error))
|
|
125
|
+
raise error
|
|
126
|
+
finally:
|
|
127
|
+
await client.close()
|
|
128
|
+
|
|
129
|
+
async def create_vector_index(self, index_name: str, index_property_name: str):
|
|
130
|
+
await self.create_collection(f"{index_name}_{index_property_name}")
|
|
131
|
+
|
|
132
|
+
async def index_data_points(
|
|
133
|
+
self, index_name: str, index_property_name: str, data_points: list[DataPoint]
|
|
134
|
+
):
|
|
135
|
+
await self.create_data_points(
|
|
136
|
+
f"{index_name}_{index_property_name}",
|
|
137
|
+
[
|
|
138
|
+
IndexSchema(
|
|
139
|
+
id=data_point.id,
|
|
140
|
+
text=getattr(data_point, data_point.metadata["index_fields"][0]),
|
|
141
|
+
)
|
|
142
|
+
for data_point in data_points
|
|
143
|
+
],
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
async def retrieve(self, collection_name: str, data_point_ids: list[str]):
|
|
147
|
+
client = self.get_qdrant_client()
|
|
148
|
+
results = await client.retrieve(collection_name, data_point_ids, with_payload=True)
|
|
149
|
+
await client.close()
|
|
150
|
+
return results
|
|
151
|
+
|
|
152
|
+
async def search(
|
|
153
|
+
self,
|
|
154
|
+
collection_name: str,
|
|
155
|
+
query_text: Optional[str] = None,
|
|
156
|
+
query_vector: Optional[List[float]] = None,
|
|
157
|
+
limit: int = 15,
|
|
158
|
+
with_vector: bool = False,
|
|
159
|
+
) -> List[ScoredResult]:
|
|
160
|
+
from qdrant_client.http.exceptions import UnexpectedResponse
|
|
161
|
+
|
|
162
|
+
if query_text is None and query_vector is None:
|
|
163
|
+
raise InvalidValueError(message="One of query_text or query_vector must be provided!")
|
|
164
|
+
|
|
165
|
+
if not await self.has_collection(collection_name):
|
|
166
|
+
return []
|
|
167
|
+
|
|
168
|
+
if query_vector is None:
|
|
169
|
+
query_vector = (await self.embed_data([query_text]))[0]
|
|
170
|
+
|
|
171
|
+
try:
|
|
172
|
+
client = self.get_qdrant_client()
|
|
173
|
+
if limit == 0:
|
|
174
|
+
collection_size = await client.count(collection_name=collection_name)
|
|
175
|
+
|
|
176
|
+
results = await client.search(
|
|
177
|
+
collection_name=collection_name,
|
|
178
|
+
query_vector=models.NamedVector(
|
|
179
|
+
name="text",
|
|
180
|
+
vector=query_vector
|
|
181
|
+
if query_vector is not None
|
|
182
|
+
else (await self.embed_data([query_text]))[0],
|
|
183
|
+
),
|
|
184
|
+
limit=limit if limit > 0 else collection_size.count,
|
|
185
|
+
with_vectors=with_vector,
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
await client.close()
|
|
189
|
+
|
|
190
|
+
return [
|
|
191
|
+
ScoredResult(
|
|
192
|
+
id=parse_id(result.id),
|
|
193
|
+
payload={
|
|
194
|
+
**result.payload,
|
|
195
|
+
"id": parse_id(result.id),
|
|
196
|
+
},
|
|
197
|
+
score=1 - result.score,
|
|
198
|
+
)
|
|
199
|
+
for result in results
|
|
200
|
+
]
|
|
201
|
+
finally:
|
|
202
|
+
await client.close()
|
|
203
|
+
|
|
204
|
+
async def batch_search(
|
|
205
|
+
self,
|
|
206
|
+
collection_name: str,
|
|
207
|
+
query_texts: List[str],
|
|
208
|
+
limit: int = None,
|
|
209
|
+
with_vectors: bool = False,
|
|
210
|
+
):
|
|
211
|
+
"""
|
|
212
|
+
Perform batch search in a Qdrant collection with dynamic search requests.
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
- collection_name (str): Name of the collection to search in.
|
|
216
|
+
- query_texts (List[str]): List of query texts to search for.
|
|
217
|
+
- limit (int): List of result limits for search requests.
|
|
218
|
+
- with_vectors (bool, optional): Bool indicating whether to return vectors for search requests.
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
- results: The search results from Qdrant.
|
|
222
|
+
"""
|
|
223
|
+
|
|
224
|
+
vectors = await self.embed_data(query_texts)
|
|
225
|
+
|
|
226
|
+
# Generate dynamic search requests based on the provided embeddings
|
|
227
|
+
requests = [
|
|
228
|
+
models.SearchRequest(
|
|
229
|
+
vector=models.NamedVector(name="text", vector=vector),
|
|
230
|
+
limit=limit,
|
|
231
|
+
with_vector=with_vectors,
|
|
232
|
+
)
|
|
233
|
+
for vector in vectors
|
|
234
|
+
]
|
|
235
|
+
|
|
236
|
+
client = self.get_qdrant_client()
|
|
237
|
+
|
|
238
|
+
# Perform batch search with the dynamically generated requests
|
|
239
|
+
results = await client.search_batch(collection_name=collection_name, requests=requests)
|
|
240
|
+
|
|
241
|
+
await client.close()
|
|
242
|
+
|
|
243
|
+
return [filter(lambda result: result.score > 0.9, result_group) for result_group in results]
|
|
244
|
+
|
|
245
|
+
async def delete_data_points(self, collection_name: str, data_point_ids: list[str]):
|
|
246
|
+
client = self.get_qdrant_client()
|
|
247
|
+
results = await client.delete(collection_name, data_point_ids)
|
|
248
|
+
return results
|
|
249
|
+
|
|
250
|
+
async def prune(self):
|
|
251
|
+
client = self.get_qdrant_client()
|
|
252
|
+
|
|
253
|
+
response = await client.get_collections()
|
|
254
|
+
|
|
255
|
+
for collection in response.collections:
|
|
256
|
+
await client.delete_collection(collection.name)
|
|
257
|
+
|
|
258
|
+
await client.close()
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "cognee-community-vector-adapter-qdrant"
|
|
3
|
+
version = "0.0.2"
|
|
4
|
+
description = "Qdrant vector database adapter for cognee"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.11,<=3.13"
|
|
7
|
+
dependencies = [
|
|
8
|
+
"qdrant-client>=1.14.2",
|
|
9
|
+
"cognee>=0.2.1",
|
|
10
|
+
]
|