cognee-community-vector-adapter-qdrant 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ from .qdrant_adapter import QDrantAdapter
2
+
3
+ __all__ = ["QDrantAdapter"]
@@ -0,0 +1,258 @@
1
+ import asyncio
2
+ from typing import Dict, List, Optional
3
+ from qdrant_client import AsyncQdrantClient, models
4
+
5
+ from cognee.exceptions import InvalidValueError
6
+ from cognee.shared.logging_utils import get_logger
7
+
8
+ from cognee.infrastructure.engine import DataPoint
9
+ from cognee.infrastructure.engine.utils import parse_id
10
+ from cognee.infrastructure.databases.vector import VectorDBInterface
11
+ from cognee.infrastructure.databases.vector.models.ScoredResult import ScoredResult
12
+ from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
13
+ from cognee.infrastructure.databases.vector.exceptions import CollectionNotFoundError
14
+
15
+ logger = get_logger("QDrantAdapter")
16
+
17
+
18
+ class IndexSchema(DataPoint):
19
+ text: str
20
+
21
+ metadata: dict = {"index_fields": ["text"]}
22
+
23
+
24
+
25
+ def create_hnsw_config(hnsw_config: Dict):
26
+ if hnsw_config is not None:
27
+ return models.HnswConfig()
28
+ return None
29
+
30
+
31
+ def create_optimizers_config(optimizers_config: Dict):
32
+ if optimizers_config is not None:
33
+ return models.OptimizersConfig()
34
+ return None
35
+
36
+
37
+ def create_quantization_config(quantization_config: Dict):
38
+ if quantization_config is not None:
39
+ return models.QuantizationConfig()
40
+ return None
41
+
42
+
43
+ class QDrantAdapter(VectorDBInterface):
44
+ name = "Qdrant"
45
+ url: str = None
46
+ api_key: str = None
47
+ qdrant_path: str = None
48
+
49
+ def __init__(self, url, api_key, embedding_engine: EmbeddingEngine, qdrant_path=None):
50
+ self.embedding_engine = embedding_engine
51
+
52
+ if qdrant_path is not None:
53
+ self.qdrant_path = qdrant_path
54
+ else:
55
+ self.url = url
56
+ self.api_key = api_key
57
+ self.VECTOR_DB_LOCK = asyncio.Lock()
58
+
59
+ def get_qdrant_client(self) -> AsyncQdrantClient:
60
+ if self.qdrant_path is not None:
61
+ return AsyncQdrantClient(path=self.qdrant_path, port=6333)
62
+ elif self.url is not None:
63
+ return AsyncQdrantClient(url=self.url, api_key=self.api_key, port=6333)
64
+
65
+ return AsyncQdrantClient(location=":memory:")
66
+
67
+ async def embed_data(self, data: List[str]) -> List[float]:
68
+ return await self.embedding_engine.embed_text(data)
69
+
70
+ async def has_collection(self, collection_name: str) -> bool:
71
+ client = self.get_qdrant_client()
72
+ result = await client.collection_exists(collection_name)
73
+ await client.close()
74
+ return result
75
+
76
+ async def create_collection(
77
+ self,
78
+ collection_name: str,
79
+ payload_schema=None,
80
+ ):
81
+ async with self.VECTOR_DB_LOCK:
82
+ client = self.get_qdrant_client()
83
+
84
+ if not await client.collection_exists(collection_name):
85
+ await client.create_collection(
86
+ collection_name=collection_name,
87
+ vectors_config={
88
+ "text": models.VectorParams(
89
+ size=self.embedding_engine.get_vector_size(), distance="Cosine"
90
+ )
91
+ },
92
+ )
93
+
94
+ await client.close()
95
+
96
+ async def create_data_points(self, collection_name: str, data_points: List[DataPoint]):
97
+ from qdrant_client.http.exceptions import UnexpectedResponse
98
+
99
+ client = self.get_qdrant_client()
100
+
101
+ data_vectors = await self.embed_data(
102
+ [DataPoint.get_embeddable_data(data_point) for data_point in data_points]
103
+ )
104
+
105
+ def convert_to_qdrant_point(data_point: DataPoint):
106
+ return models.PointStruct(
107
+ id=str(data_point.id),
108
+ payload=data_point.model_dump(),
109
+ vector={"text": data_vectors[data_points.index(data_point)]},
110
+ )
111
+
112
+ points = [convert_to_qdrant_point(point) for point in data_points]
113
+
114
+ try:
115
+ client.upload_points(collection_name=collection_name, points=points)
116
+ except UnexpectedResponse as error:
117
+ if "Collection not found" in str(error):
118
+ raise CollectionNotFoundError(
119
+ message=f"Collection {collection_name} not found!"
120
+ ) from error
121
+ else:
122
+ raise error
123
+ except Exception as error:
124
+ logger.error("Error uploading data points to Qdrant: %s", str(error))
125
+ raise error
126
+ finally:
127
+ await client.close()
128
+
129
+ async def create_vector_index(self, index_name: str, index_property_name: str):
130
+ await self.create_collection(f"{index_name}_{index_property_name}")
131
+
132
+ async def index_data_points(
133
+ self, index_name: str, index_property_name: str, data_points: list[DataPoint]
134
+ ):
135
+ await self.create_data_points(
136
+ f"{index_name}_{index_property_name}",
137
+ [
138
+ IndexSchema(
139
+ id=data_point.id,
140
+ text=getattr(data_point, data_point.metadata["index_fields"][0]),
141
+ )
142
+ for data_point in data_points
143
+ ],
144
+ )
145
+
146
+ async def retrieve(self, collection_name: str, data_point_ids: list[str]):
147
+ client = self.get_qdrant_client()
148
+ results = await client.retrieve(collection_name, data_point_ids, with_payload=True)
149
+ await client.close()
150
+ return results
151
+
152
+ async def search(
153
+ self,
154
+ collection_name: str,
155
+ query_text: Optional[str] = None,
156
+ query_vector: Optional[List[float]] = None,
157
+ limit: int = 15,
158
+ with_vector: bool = False,
159
+ ) -> List[ScoredResult]:
160
+ from qdrant_client.http.exceptions import UnexpectedResponse
161
+
162
+ if query_text is None and query_vector is None:
163
+ raise InvalidValueError(message="One of query_text or query_vector must be provided!")
164
+
165
+ if not await self.has_collection(collection_name):
166
+ return []
167
+
168
+ if query_vector is None:
169
+ query_vector = (await self.embed_data([query_text]))[0]
170
+
171
+ try:
172
+ client = self.get_qdrant_client()
173
+ if limit == 0:
174
+ collection_size = await client.count(collection_name=collection_name)
175
+
176
+ results = await client.search(
177
+ collection_name=collection_name,
178
+ query_vector=models.NamedVector(
179
+ name="text",
180
+ vector=query_vector
181
+ if query_vector is not None
182
+ else (await self.embed_data([query_text]))[0],
183
+ ),
184
+ limit=limit if limit > 0 else collection_size.count,
185
+ with_vectors=with_vector,
186
+ )
187
+
188
+ await client.close()
189
+
190
+ return [
191
+ ScoredResult(
192
+ id=parse_id(result.id),
193
+ payload={
194
+ **result.payload,
195
+ "id": parse_id(result.id),
196
+ },
197
+ score=1 - result.score,
198
+ )
199
+ for result in results
200
+ ]
201
+ finally:
202
+ await client.close()
203
+
204
+ async def batch_search(
205
+ self,
206
+ collection_name: str,
207
+ query_texts: List[str],
208
+ limit: int = None,
209
+ with_vectors: bool = False,
210
+ ):
211
+ """
212
+ Perform batch search in a Qdrant collection with dynamic search requests.
213
+
214
+ Args:
215
+ - collection_name (str): Name of the collection to search in.
216
+ - query_texts (List[str]): List of query texts to search for.
217
+ - limit (int): List of result limits for search requests.
218
+ - with_vectors (bool, optional): Bool indicating whether to return vectors for search requests.
219
+
220
+ Returns:
221
+ - results: The search results from Qdrant.
222
+ """
223
+
224
+ vectors = await self.embed_data(query_texts)
225
+
226
+ # Generate dynamic search requests based on the provided embeddings
227
+ requests = [
228
+ models.SearchRequest(
229
+ vector=models.NamedVector(name="text", vector=vector),
230
+ limit=limit,
231
+ with_vector=with_vectors,
232
+ )
233
+ for vector in vectors
234
+ ]
235
+
236
+ client = self.get_qdrant_client()
237
+
238
+ # Perform batch search with the dynamically generated requests
239
+ results = await client.search_batch(collection_name=collection_name, requests=requests)
240
+
241
+ await client.close()
242
+
243
+ return [filter(lambda result: result.score > 0.9, result_group) for result_group in results]
244
+
245
+ async def delete_data_points(self, collection_name: str, data_point_ids: list[str]):
246
+ client = self.get_qdrant_client()
247
+ results = await client.delete(collection_name, data_point_ids)
248
+ return results
249
+
250
+ async def prune(self):
251
+ client = self.get_qdrant_client()
252
+
253
+ response = await client.get_collections()
254
+
255
+ for collection in response.collections:
256
+ await client.delete_collection(collection.name)
257
+
258
+ await client.close()
@@ -0,0 +1,5 @@
1
+ from cognee.infrastructure.databases.vector import use_vector_adapter
2
+
3
+ from .qdrant_adapter import QDrantAdapter
4
+
5
+ use_vector_adapter("qdrant", QDrantAdapter)
@@ -0,0 +1,28 @@
1
+ Metadata-Version: 2.3
2
+ Name: cognee-community-vector-adapter-qdrant
3
+ Version: 0.0.2
4
+ Summary: Qdrant vector database adapter for cognee
5
+ Requires-Python: >=3.11,<=3.13
6
+ Classifier: Programming Language :: Python :: 3
7
+ Classifier: Programming Language :: Python :: 3.11
8
+ Classifier: Programming Language :: Python :: 3.12
9
+ Classifier: Programming Language :: Python :: 3.13
10
+ Requires-Dist: cognee (>=0.2.1)
11
+ Requires-Dist: qdrant-client (>=1.14.2)
12
+ Description-Content-Type: text/markdown
13
+
14
+ # Cognee Qdrant Adapter
15
+
16
+ ## Install
17
+
18
+ Install [`qdrant-client`](https://pypi.org/project/qdrant-client/) in your project.
19
+
20
+ Put this line of code somewhere at the start of the execution, before cognee is initiated.
21
+
22
+ ```python
23
+ import packages.vector.qdrant.register
24
+ ```
25
+
26
+ ## Example
27
+ See example in `example.py` file.
28
+
@@ -0,0 +1,6 @@
1
+ cognee_community_vector_adapter_qdrant/__init__.py,sha256=PVY2CqYr0JnjErQoDSEobMol0M2BwZFnSOppfJq1cHs,71
2
+ cognee_community_vector_adapter_qdrant/qdrant_adapter.py,sha256=TFUSVVrXy-0DZxu-DgjUuPIzvqSaaFvFyt_-pRPKCKQ,8858
3
+ cognee_community_vector_adapter_qdrant/register.py,sha256=K0cIQGN3an79wWCMXAIgwsymkloHGV2_joy7G-4aiB8,158
4
+ cognee_community_vector_adapter_qdrant-0.0.2.dist-info/METADATA,sha256=UZdDQQJITyPWanHHP-N322-aXVxjRxikua-Yg2reVsA,792
5
+ cognee_community_vector_adapter_qdrant-0.0.2.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
6
+ cognee_community_vector_adapter_qdrant-0.0.2.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: poetry-core 2.1.2
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any