qdrant-haystack 3.4.0__py3-none-any.whl → 3.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of qdrant-haystack might be problematic. Click here for more details.
- haystack_integrations/components/retrievers/qdrant/__init__.py +2 -2
- haystack_integrations/components/retrievers/qdrant/retriever.py +150 -21
- haystack_integrations/document_stores/qdrant/document_store.py +122 -34
- haystack_integrations/document_stores/qdrant/filters.py +3 -2
- {qdrant_haystack-3.4.0.dist-info → qdrant_haystack-3.6.0.dist-info}/METADATA +2 -1
- {qdrant_haystack-3.4.0.dist-info → qdrant_haystack-3.6.0.dist-info}/RECORD +8 -8
- {qdrant_haystack-3.4.0.dist-info → qdrant_haystack-3.6.0.dist-info}/WHEEL +0 -0
- {qdrant_haystack-3.4.0.dist-info → qdrant_haystack-3.6.0.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -2,6 +2,6 @@
|
|
|
2
2
|
#
|
|
3
3
|
# SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
|
|
5
|
-
from .retriever import QdrantEmbeddingRetriever, QdrantSparseEmbeddingRetriever
|
|
5
|
+
from .retriever import QdrantEmbeddingRetriever, QdrantHybridRetriever, QdrantSparseEmbeddingRetriever
|
|
6
6
|
|
|
7
|
-
__all__ = ("QdrantEmbeddingRetriever", "QdrantSparseEmbeddingRetriever")
|
|
7
|
+
__all__ = ("QdrantEmbeddingRetriever", "QdrantSparseEmbeddingRetriever", "QdrantHybridRetriever")
|
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
from typing import Any, Dict, List, Optional
|
|
1
|
+
from typing import Any, Dict, List, Optional, Union
|
|
2
2
|
|
|
3
3
|
from haystack import Document, component, default_from_dict, default_to_dict
|
|
4
4
|
from haystack.dataclasses.sparse_embedding import SparseEmbedding
|
|
5
5
|
from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
|
|
6
|
+
from qdrant_client.http import models
|
|
6
7
|
|
|
7
8
|
|
|
8
9
|
@component
|
|
@@ -12,6 +13,7 @@ class QdrantEmbeddingRetriever:
|
|
|
12
13
|
|
|
13
14
|
Usage example:
|
|
14
15
|
```python
|
|
16
|
+
from haystack.dataclasses import Document
|
|
15
17
|
from haystack_integrations.components.retrievers.qdrant import QdrantEmbeddingRetriever
|
|
16
18
|
from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
|
|
17
19
|
|
|
@@ -19,8 +21,10 @@ class QdrantEmbeddingRetriever:
|
|
|
19
21
|
":memory:",
|
|
20
22
|
recreate_index=True,
|
|
21
23
|
return_embedding=True,
|
|
22
|
-
wait_result_from_api=True,
|
|
23
24
|
)
|
|
25
|
+
|
|
26
|
+
document_store.write_documents([Document(content="test", embedding=[0.5]*768)])
|
|
27
|
+
|
|
24
28
|
retriever = QdrantEmbeddingRetriever(document_store=document_store)
|
|
25
29
|
|
|
26
30
|
# using a fake vector to keep the example simple
|
|
@@ -31,7 +35,7 @@ class QdrantEmbeddingRetriever:
|
|
|
31
35
|
def __init__(
|
|
32
36
|
self,
|
|
33
37
|
document_store: QdrantDocumentStore,
|
|
34
|
-
filters: Optional[Dict[str, Any]] = None,
|
|
38
|
+
filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
|
|
35
39
|
top_k: int = 10,
|
|
36
40
|
scale_score: bool = True,
|
|
37
41
|
return_embedding: bool = False,
|
|
@@ -40,12 +44,12 @@ class QdrantEmbeddingRetriever:
|
|
|
40
44
|
Create a QdrantEmbeddingRetriever component.
|
|
41
45
|
|
|
42
46
|
:param document_store: An instance of QdrantDocumentStore.
|
|
43
|
-
:param filters: A dictionary with filters to narrow down the search space.
|
|
44
|
-
:param top_k: The maximum number of documents to retrieve.
|
|
45
|
-
:param scale_score: Whether to scale the scores of the retrieved documents or not.
|
|
46
|
-
:param return_embedding: Whether to return the embedding of the retrieved Documents.
|
|
47
|
+
:param filters: A dictionary with filters to narrow down the search space.
|
|
48
|
+
:param top_k: The maximum number of documents to retrieve.
|
|
49
|
+
:param scale_score: Whether to scale the scores of the retrieved documents or not.
|
|
50
|
+
:param return_embedding: Whether to return the embedding of the retrieved Documents.
|
|
47
51
|
|
|
48
|
-
:raises ValueError: If
|
|
52
|
+
:raises ValueError: If `document_store` is not an instance of `QdrantDocumentStore`.
|
|
49
53
|
"""
|
|
50
54
|
|
|
51
55
|
if not isinstance(document_store, QdrantDocumentStore):
|
|
@@ -95,7 +99,7 @@ class QdrantEmbeddingRetriever:
|
|
|
95
99
|
def run(
|
|
96
100
|
self,
|
|
97
101
|
query_embedding: List[float],
|
|
98
|
-
filters: Optional[Dict[str, Any]] = None,
|
|
102
|
+
filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
|
|
99
103
|
top_k: Optional[int] = None,
|
|
100
104
|
scale_score: Optional[bool] = None,
|
|
101
105
|
return_embedding: Optional[bool] = None,
|
|
@@ -112,7 +116,7 @@ class QdrantEmbeddingRetriever:
|
|
|
112
116
|
The retrieved documents.
|
|
113
117
|
|
|
114
118
|
"""
|
|
115
|
-
docs = self._document_store.
|
|
119
|
+
docs = self._document_store._query_by_embedding(
|
|
116
120
|
query_embedding=query_embedding,
|
|
117
121
|
filters=filters or self._filters,
|
|
118
122
|
top_k=top_k or self._top_k,
|
|
@@ -132,14 +136,18 @@ class QdrantSparseEmbeddingRetriever:
|
|
|
132
136
|
```python
|
|
133
137
|
from haystack_integrations.components.retrievers.qdrant import QdrantSparseEmbeddingRetriever
|
|
134
138
|
from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
|
|
135
|
-
from haystack.dataclasses
|
|
139
|
+
from haystack.dataclasses import Document, SparseEmbedding
|
|
136
140
|
|
|
137
141
|
document_store = QdrantDocumentStore(
|
|
138
142
|
":memory:",
|
|
143
|
+
use_sparse_embeddings=True,
|
|
139
144
|
recreate_index=True,
|
|
140
145
|
return_embedding=True,
|
|
141
|
-
wait_result_from_api=True,
|
|
142
146
|
)
|
|
147
|
+
|
|
148
|
+
doc = Document(content="test", sparse_embedding=SparseEmbedding(indices=[0, 3, 5], values=[0.1, 0.5, 0.12]))
|
|
149
|
+
document_store.write_documents([doc])
|
|
150
|
+
|
|
143
151
|
retriever = QdrantSparseEmbeddingRetriever(document_store=document_store)
|
|
144
152
|
sparse_embedding = SparseEmbedding(indices=[0, 1, 2, 3], values=[0.1, 0.8, 0.05, 0.33])
|
|
145
153
|
retriever.run(query_sparse_embedding=sparse_embedding)
|
|
@@ -149,7 +157,7 @@ class QdrantSparseEmbeddingRetriever:
|
|
|
149
157
|
def __init__(
|
|
150
158
|
self,
|
|
151
159
|
document_store: QdrantDocumentStore,
|
|
152
|
-
filters: Optional[Dict[str, Any]] = None,
|
|
160
|
+
filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
|
|
153
161
|
top_k: int = 10,
|
|
154
162
|
scale_score: bool = True,
|
|
155
163
|
return_embedding: bool = False,
|
|
@@ -158,12 +166,12 @@ class QdrantSparseEmbeddingRetriever:
|
|
|
158
166
|
Create a QdrantSparseEmbeddingRetriever component.
|
|
159
167
|
|
|
160
168
|
:param document_store: An instance of QdrantDocumentStore.
|
|
161
|
-
:param filters: A dictionary with filters to narrow down the search space.
|
|
162
|
-
:param top_k: The maximum number of documents to retrieve.
|
|
163
|
-
:param scale_score: Whether to scale the scores of the retrieved documents or not.
|
|
164
|
-
:param return_embedding: Whether to return the sparse embedding of the retrieved Documents.
|
|
169
|
+
:param filters: A dictionary with filters to narrow down the search space.
|
|
170
|
+
:param top_k: The maximum number of documents to retrieve.
|
|
171
|
+
:param scale_score: Whether to scale the scores of the retrieved documents or not.
|
|
172
|
+
:param return_embedding: Whether to return the sparse embedding of the retrieved Documents.
|
|
165
173
|
|
|
166
|
-
:raises ValueError: If
|
|
174
|
+
:raises ValueError: If `document_store` is not an instance of `QdrantDocumentStore`.
|
|
167
175
|
"""
|
|
168
176
|
|
|
169
177
|
if not isinstance(document_store, QdrantDocumentStore):
|
|
@@ -196,7 +204,7 @@ class QdrantSparseEmbeddingRetriever:
|
|
|
196
204
|
return d
|
|
197
205
|
|
|
198
206
|
@classmethod
|
|
199
|
-
def from_dict(cls, data: Dict[str, Any]) -> "
|
|
207
|
+
def from_dict(cls, data: Dict[str, Any]) -> "QdrantSparseEmbeddingRetriever":
|
|
200
208
|
"""
|
|
201
209
|
Deserializes the component from a dictionary.
|
|
202
210
|
|
|
@@ -213,7 +221,7 @@ class QdrantSparseEmbeddingRetriever:
|
|
|
213
221
|
def run(
|
|
214
222
|
self,
|
|
215
223
|
query_sparse_embedding: SparseEmbedding,
|
|
216
|
-
filters: Optional[Dict[str, Any]] = None,
|
|
224
|
+
filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
|
|
217
225
|
top_k: Optional[int] = None,
|
|
218
226
|
scale_score: Optional[bool] = None,
|
|
219
227
|
return_embedding: Optional[bool] = None,
|
|
@@ -230,7 +238,7 @@ class QdrantSparseEmbeddingRetriever:
|
|
|
230
238
|
The retrieved documents.
|
|
231
239
|
|
|
232
240
|
"""
|
|
233
|
-
docs = self._document_store.
|
|
241
|
+
docs = self._document_store._query_by_sparse(
|
|
234
242
|
query_sparse_embedding=query_sparse_embedding,
|
|
235
243
|
filters=filters or self._filters,
|
|
236
244
|
top_k=top_k or self._top_k,
|
|
@@ -239,3 +247,124 @@ class QdrantSparseEmbeddingRetriever:
|
|
|
239
247
|
)
|
|
240
248
|
|
|
241
249
|
return {"documents": docs}
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
@component
|
|
253
|
+
class QdrantHybridRetriever:
|
|
254
|
+
"""
|
|
255
|
+
A component for retrieving documents from an QdrantDocumentStore using both dense and sparse vectors
|
|
256
|
+
and fusing the results using Reciprocal Rank Fusion.
|
|
257
|
+
|
|
258
|
+
Usage example:
|
|
259
|
+
```python
|
|
260
|
+
from haystack_integrations.components.retrievers.qdrant import QdrantHybridRetriever
|
|
261
|
+
from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
|
|
262
|
+
from haystack.dataclasses import Document, SparseEmbedding
|
|
263
|
+
|
|
264
|
+
document_store = QdrantDocumentStore(
|
|
265
|
+
":memory:",
|
|
266
|
+
use_sparse_embeddings=True,
|
|
267
|
+
recreate_index=True,
|
|
268
|
+
return_embedding=True,
|
|
269
|
+
wait_result_from_api=True,
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
doc = Document(content="test",
|
|
273
|
+
embedding=[0.5]*768,
|
|
274
|
+
sparse_embedding=SparseEmbedding(indices=[0, 3, 5], values=[0.1, 0.5, 0.12]))
|
|
275
|
+
|
|
276
|
+
document_store.write_documents([doc])
|
|
277
|
+
|
|
278
|
+
retriever = QdrantHybridRetriever(document_store=document_store)
|
|
279
|
+
embedding = [0.1]*768
|
|
280
|
+
sparse_embedding = SparseEmbedding(indices=[0, 1, 2, 3], values=[0.1, 0.8, 0.05, 0.33])
|
|
281
|
+
retriever.run(query_embedding=embedding, query_sparse_embedding=sparse_embedding)
|
|
282
|
+
```
|
|
283
|
+
"""
|
|
284
|
+
|
|
285
|
+
def __init__(
|
|
286
|
+
self,
|
|
287
|
+
document_store: QdrantDocumentStore,
|
|
288
|
+
filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
|
|
289
|
+
top_k: int = 10,
|
|
290
|
+
return_embedding: bool = False,
|
|
291
|
+
):
|
|
292
|
+
"""
|
|
293
|
+
Create a QdrantHybridRetriever component.
|
|
294
|
+
|
|
295
|
+
:param document_store: An instance of QdrantDocumentStore.
|
|
296
|
+
:param filters: A dictionary with filters to narrow down the search space.
|
|
297
|
+
:param top_k: The maximum number of documents to retrieve.
|
|
298
|
+
:param return_embedding: Whether to return the embeddings of the retrieved Documents.
|
|
299
|
+
|
|
300
|
+
:raises ValueError: If 'document_store' is not an instance of QdrantDocumentStore.
|
|
301
|
+
"""
|
|
302
|
+
|
|
303
|
+
if not isinstance(document_store, QdrantDocumentStore):
|
|
304
|
+
msg = "document_store must be an instance of QdrantDocumentStore"
|
|
305
|
+
raise ValueError(msg)
|
|
306
|
+
|
|
307
|
+
self._document_store = document_store
|
|
308
|
+
self._filters = filters
|
|
309
|
+
self._top_k = top_k
|
|
310
|
+
self._return_embedding = return_embedding
|
|
311
|
+
|
|
312
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
313
|
+
"""
|
|
314
|
+
Serializes the component to a dictionary.
|
|
315
|
+
|
|
316
|
+
:returns:
|
|
317
|
+
Dictionary with serialized data.
|
|
318
|
+
"""
|
|
319
|
+
return default_to_dict(
|
|
320
|
+
self,
|
|
321
|
+
document_store=self._document_store.to_dict(),
|
|
322
|
+
filters=self._filters,
|
|
323
|
+
top_k=self._top_k,
|
|
324
|
+
return_embedding=self._return_embedding,
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
@classmethod
|
|
328
|
+
def from_dict(cls, data: Dict[str, Any]) -> "QdrantHybridRetriever":
|
|
329
|
+
"""
|
|
330
|
+
Deserializes the component from a dictionary.
|
|
331
|
+
|
|
332
|
+
:param data:
|
|
333
|
+
Dictionary to deserialize from.
|
|
334
|
+
:returns:
|
|
335
|
+
Deserialized component.
|
|
336
|
+
"""
|
|
337
|
+
document_store = QdrantDocumentStore.from_dict(data["init_parameters"]["document_store"])
|
|
338
|
+
data["init_parameters"]["document_store"] = document_store
|
|
339
|
+
return default_from_dict(cls, data)
|
|
340
|
+
|
|
341
|
+
@component.output_types(documents=List[Document])
|
|
342
|
+
def run(
|
|
343
|
+
self,
|
|
344
|
+
query_embedding: List[float],
|
|
345
|
+
query_sparse_embedding: SparseEmbedding,
|
|
346
|
+
filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
|
|
347
|
+
top_k: Optional[int] = None,
|
|
348
|
+
return_embedding: Optional[bool] = None,
|
|
349
|
+
):
|
|
350
|
+
"""
|
|
351
|
+
Run the Sparse Embedding Retriever on the given input data.
|
|
352
|
+
|
|
353
|
+
:param query_embedding: Dense embedding of the query.
|
|
354
|
+
:param query_sparse_embedding: Sparse embedding of the query.
|
|
355
|
+
:param filters: A dictionary with filters to narrow down the search space.
|
|
356
|
+
:param top_k: The maximum number of documents to return.
|
|
357
|
+
:param return_embedding: Whether to return the embedding of the retrieved Documents.
|
|
358
|
+
:returns:
|
|
359
|
+
The retrieved documents.
|
|
360
|
+
|
|
361
|
+
"""
|
|
362
|
+
docs = self._document_store._query_hybrid(
|
|
363
|
+
query_embedding=query_embedding,
|
|
364
|
+
query_sparse_embedding=query_sparse_embedding,
|
|
365
|
+
filters=filters or self._filters,
|
|
366
|
+
top_k=top_k or self._top_k,
|
|
367
|
+
return_embedding=return_embedding or self._return_embedding,
|
|
368
|
+
)
|
|
369
|
+
|
|
370
|
+
return {"documents": docs}
|
|
@@ -16,6 +16,7 @@ from haystack.utils.filters import convert as convert_legacy_filters
|
|
|
16
16
|
from qdrant_client import grpc
|
|
17
17
|
from qdrant_client.http import models as rest
|
|
18
18
|
from qdrant_client.http.exceptions import UnexpectedResponse
|
|
19
|
+
from qdrant_client.hybrid.fusion import reciprocal_rank_fusion
|
|
19
20
|
from tqdm import tqdm
|
|
20
21
|
|
|
21
22
|
from .converters import (
|
|
@@ -65,7 +66,7 @@ class QdrantDocumentStore:
|
|
|
65
66
|
https: Optional[bool] = None,
|
|
66
67
|
api_key: Optional[Secret] = None,
|
|
67
68
|
prefix: Optional[str] = None,
|
|
68
|
-
timeout: Optional[
|
|
69
|
+
timeout: Optional[int] = None,
|
|
69
70
|
host: Optional[str] = None,
|
|
70
71
|
path: Optional[str] = None,
|
|
71
72
|
index: str = "Document",
|
|
@@ -95,23 +96,7 @@ class QdrantDocumentStore:
|
|
|
95
96
|
scroll_size: int = 10_000,
|
|
96
97
|
payload_fields_to_index: Optional[List[dict]] = None,
|
|
97
98
|
):
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
metadata = metadata or {}
|
|
101
|
-
self.client = qdrant_client.QdrantClient(
|
|
102
|
-
location=location,
|
|
103
|
-
url=url,
|
|
104
|
-
port=port,
|
|
105
|
-
grpc_port=grpc_port,
|
|
106
|
-
prefer_grpc=prefer_grpc,
|
|
107
|
-
https=https,
|
|
108
|
-
api_key=api_key.resolve_value() if api_key else None,
|
|
109
|
-
prefix=prefix,
|
|
110
|
-
timeout=timeout,
|
|
111
|
-
host=host,
|
|
112
|
-
path=path,
|
|
113
|
-
metadata=metadata,
|
|
114
|
-
)
|
|
99
|
+
self._client = None
|
|
115
100
|
|
|
116
101
|
# Store the Qdrant client specific attributes
|
|
117
102
|
self.location = location
|
|
@@ -125,7 +110,7 @@ class QdrantDocumentStore:
|
|
|
125
110
|
self.timeout = timeout
|
|
126
111
|
self.host = host
|
|
127
112
|
self.path = path
|
|
128
|
-
self.metadata = metadata
|
|
113
|
+
self.metadata = metadata or {}
|
|
129
114
|
self.api_key = api_key
|
|
130
115
|
|
|
131
116
|
# Store the Qdrant collection specific attributes
|
|
@@ -142,12 +127,6 @@ class QdrantDocumentStore:
|
|
|
142
127
|
self.recreate_index = recreate_index
|
|
143
128
|
self.payload_fields_to_index = payload_fields_to_index
|
|
144
129
|
self.use_sparse_embeddings = use_sparse_embeddings
|
|
145
|
-
|
|
146
|
-
# Make sure the collection is properly set up
|
|
147
|
-
self._set_up_collection(
|
|
148
|
-
index, embedding_dim, recreate_index, similarity, use_sparse_embeddings, on_disk, payload_fields_to_index
|
|
149
|
-
)
|
|
150
|
-
|
|
151
130
|
self.embedding_dim = embedding_dim
|
|
152
131
|
self.on_disk = on_disk
|
|
153
132
|
self.content_field = content_field
|
|
@@ -161,6 +140,35 @@ class QdrantDocumentStore:
|
|
|
161
140
|
self.write_batch_size = write_batch_size
|
|
162
141
|
self.scroll_size = scroll_size
|
|
163
142
|
|
|
143
|
+
@property
|
|
144
|
+
def client(self):
|
|
145
|
+
if not self._client:
|
|
146
|
+
self._client = qdrant_client.QdrantClient(
|
|
147
|
+
location=self.location,
|
|
148
|
+
url=self.url,
|
|
149
|
+
port=self.port,
|
|
150
|
+
grpc_port=self.grpc_port,
|
|
151
|
+
prefer_grpc=self.prefer_grpc,
|
|
152
|
+
https=self.https,
|
|
153
|
+
api_key=self.api_key.resolve_value() if self.api_key else None,
|
|
154
|
+
prefix=self.prefix,
|
|
155
|
+
timeout=self.timeout,
|
|
156
|
+
host=self.host,
|
|
157
|
+
path=self.path,
|
|
158
|
+
metadata=self.metadata,
|
|
159
|
+
)
|
|
160
|
+
# Make sure the collection is properly set up
|
|
161
|
+
self._set_up_collection(
|
|
162
|
+
self.index,
|
|
163
|
+
self.embedding_dim,
|
|
164
|
+
self.recreate_index,
|
|
165
|
+
self.similarity,
|
|
166
|
+
self.use_sparse_embeddings,
|
|
167
|
+
self.on_disk,
|
|
168
|
+
self.payload_fields_to_index,
|
|
169
|
+
)
|
|
170
|
+
return self._client
|
|
171
|
+
|
|
164
172
|
def count_documents(self) -> int:
|
|
165
173
|
try:
|
|
166
174
|
response = self.client.count(
|
|
@@ -175,13 +183,13 @@ class QdrantDocumentStore:
|
|
|
175
183
|
|
|
176
184
|
def filter_documents(
|
|
177
185
|
self,
|
|
178
|
-
filters: Optional[Dict[str, Any]] = None,
|
|
186
|
+
filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
|
|
179
187
|
) -> List[Document]:
|
|
180
|
-
if filters and not isinstance(filters, dict):
|
|
181
|
-
msg = "Filter must be a dictionary"
|
|
188
|
+
if filters and not isinstance(filters, dict) and not isinstance(filters, rest.Filter):
|
|
189
|
+
msg = "Filter must be a dictionary or an instance of `qdrant_client.http.models.Filter`"
|
|
182
190
|
raise ValueError(msg)
|
|
183
191
|
|
|
184
|
-
if filters and "operator" not in filters:
|
|
192
|
+
if filters and not isinstance(filters, rest.Filter) and "operator" not in filters:
|
|
185
193
|
filters = convert_legacy_filters(filters)
|
|
186
194
|
return list(
|
|
187
195
|
self.get_documents_generator(
|
|
@@ -259,7 +267,7 @@ class QdrantDocumentStore:
|
|
|
259
267
|
|
|
260
268
|
def get_documents_generator(
|
|
261
269
|
self,
|
|
262
|
-
filters: Optional[Dict[str, Any]] = None,
|
|
270
|
+
filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
|
|
263
271
|
) -> Generator[Document, None, None]:
|
|
264
272
|
index = self.index
|
|
265
273
|
qdrant_filters = convert_filters_to_qdrant(filters)
|
|
@@ -307,10 +315,10 @@ class QdrantDocumentStore:
|
|
|
307
315
|
)
|
|
308
316
|
return documents
|
|
309
317
|
|
|
310
|
-
def
|
|
318
|
+
def _query_by_sparse(
|
|
311
319
|
self,
|
|
312
320
|
query_sparse_embedding: SparseEmbedding,
|
|
313
|
-
filters: Optional[Dict[str, Any]] = None,
|
|
321
|
+
filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
|
|
314
322
|
top_k: int = 10,
|
|
315
323
|
scale_score: bool = True,
|
|
316
324
|
return_embedding: bool = False,
|
|
@@ -349,10 +357,10 @@ class QdrantDocumentStore:
|
|
|
349
357
|
document.score = score
|
|
350
358
|
return results
|
|
351
359
|
|
|
352
|
-
def
|
|
360
|
+
def _query_by_embedding(
|
|
353
361
|
self,
|
|
354
362
|
query_embedding: List[float],
|
|
355
|
-
filters: Optional[Dict[str, Any]] = None,
|
|
363
|
+
filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
|
|
356
364
|
top_k: int = 10,
|
|
357
365
|
scale_score: bool = True,
|
|
358
366
|
return_embedding: bool = False,
|
|
@@ -383,6 +391,86 @@ class QdrantDocumentStore:
|
|
|
383
391
|
document.score = score
|
|
384
392
|
return results
|
|
385
393
|
|
|
394
|
+
def _query_hybrid(
|
|
395
|
+
self,
|
|
396
|
+
query_embedding: List[float],
|
|
397
|
+
query_sparse_embedding: SparseEmbedding,
|
|
398
|
+
filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
|
|
399
|
+
top_k: int = 10,
|
|
400
|
+
return_embedding: bool = False,
|
|
401
|
+
) -> List[Document]:
|
|
402
|
+
"""
|
|
403
|
+
Retrieves documents based on dense and sparse embeddings and fuses the results using Reciprocal Rank Fusion.
|
|
404
|
+
|
|
405
|
+
This method is not part of the public interface of `QdrantDocumentStore` and shouldn't be used directly.
|
|
406
|
+
Use the `QdrantHybridRetriever` instead.
|
|
407
|
+
|
|
408
|
+
:param query_embedding: Dense embedding of the query.
|
|
409
|
+
:param query_sparse_embedding: Sparse embedding of the query.
|
|
410
|
+
:param filters: Filters applied to the retrieved Documents.
|
|
411
|
+
:param top_k: Maximum number of Documents to return.
|
|
412
|
+
:param return_embedding: Whether to return the embeddings of the retrieved documents.
|
|
413
|
+
|
|
414
|
+
:returns: List of Document that are most similar to `query_embedding` and `query_sparse_embedding`.
|
|
415
|
+
|
|
416
|
+
:raises QdrantStoreError:
|
|
417
|
+
If the Document Store was initialized with `use_sparse_embeddings=False`.
|
|
418
|
+
"""
|
|
419
|
+
|
|
420
|
+
# This implementation is based on the code from the Python Qdrant client:
|
|
421
|
+
# https://github.com/qdrant/qdrant-client/blob/8e3ea58f781e4110d11c0a6985b5e6bb66b85d33/qdrant_client/qdrant_fastembed.py#L519
|
|
422
|
+
if not self.use_sparse_embeddings:
|
|
423
|
+
message = (
|
|
424
|
+
"You are trying to query using sparse embeddings, but the Document Store "
|
|
425
|
+
"was initialized with `use_sparse_embeddings=False`. "
|
|
426
|
+
)
|
|
427
|
+
raise QdrantStoreError(message)
|
|
428
|
+
|
|
429
|
+
qdrant_filters = convert_filters_to_qdrant(filters)
|
|
430
|
+
|
|
431
|
+
sparse_request = rest.SearchRequest(
|
|
432
|
+
vector=rest.NamedSparseVector(
|
|
433
|
+
name=SPARSE_VECTORS_NAME,
|
|
434
|
+
vector=rest.SparseVector(
|
|
435
|
+
indices=query_sparse_embedding.indices,
|
|
436
|
+
values=query_sparse_embedding.values,
|
|
437
|
+
),
|
|
438
|
+
),
|
|
439
|
+
filter=qdrant_filters,
|
|
440
|
+
limit=top_k,
|
|
441
|
+
with_payload=True,
|
|
442
|
+
with_vector=return_embedding,
|
|
443
|
+
)
|
|
444
|
+
|
|
445
|
+
dense_request = rest.SearchRequest(
|
|
446
|
+
vector=rest.NamedVector(
|
|
447
|
+
name=DENSE_VECTORS_NAME,
|
|
448
|
+
vector=query_embedding,
|
|
449
|
+
),
|
|
450
|
+
filter=qdrant_filters,
|
|
451
|
+
limit=top_k,
|
|
452
|
+
with_payload=True,
|
|
453
|
+
with_vector=return_embedding,
|
|
454
|
+
)
|
|
455
|
+
|
|
456
|
+
try:
|
|
457
|
+
dense_request_response, sparse_request_response = self.client.search_batch(
|
|
458
|
+
collection_name=self.index, requests=[dense_request, sparse_request]
|
|
459
|
+
)
|
|
460
|
+
except Exception as e:
|
|
461
|
+
msg = "Error during hybrid search"
|
|
462
|
+
raise QdrantStoreError(msg) from e
|
|
463
|
+
|
|
464
|
+
try:
|
|
465
|
+
points = reciprocal_rank_fusion(responses=[dense_request_response, sparse_request_response], limit=top_k)
|
|
466
|
+
except Exception as e:
|
|
467
|
+
msg = "Error while applying Reciprocal Rank Fusion"
|
|
468
|
+
raise QdrantStoreError(msg) from e
|
|
469
|
+
|
|
470
|
+
results = [convert_qdrant_point_to_haystack_document(point, use_sparse_embeddings=True) for point in points]
|
|
471
|
+
|
|
472
|
+
return results
|
|
473
|
+
|
|
386
474
|
def _get_distance(self, similarity: str) -> rest.Distance:
|
|
387
475
|
try:
|
|
388
476
|
return self.SIMILARITY[similarity]
|
|
@@ -11,10 +11,11 @@ LOGICAL_OPERATORS = LOGICAL_OPERATORS.keys()
|
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
def convert_filters_to_qdrant(
|
|
14
|
-
filter_term: Optional[Union[List[dict], dict]] = None,
|
|
14
|
+
filter_term: Optional[Union[List[dict], dict, models.Filter]] = None,
|
|
15
15
|
) -> Optional[models.Filter]:
|
|
16
16
|
"""Converts Haystack filters to the format used by Qdrant."""
|
|
17
|
-
|
|
17
|
+
if isinstance(filter_term, models.Filter):
|
|
18
|
+
return filter_term
|
|
18
19
|
if not filter_term:
|
|
19
20
|
return None
|
|
20
21
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: qdrant-haystack
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.6.0
|
|
4
4
|
Summary: An integration of Qdrant ANN vector database backend with Haystack
|
|
5
5
|
Project-URL: Source, https://github.com/deepset-ai/haystack-core-integrations
|
|
6
6
|
Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/qdrant/README.md
|
|
@@ -9,6 +9,7 @@ Author-email: Kacper Łukawski <kacper.lukawski@qdrant.com>, Anush Shetty <anush
|
|
|
9
9
|
License-Expression: Apache-2.0
|
|
10
10
|
License-File: LICENSE.txt
|
|
11
11
|
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
12
13
|
Classifier: Programming Language :: Python
|
|
13
14
|
Classifier: Programming Language :: Python :: 3.8
|
|
14
15
|
Classifier: Programming Language :: Python :: 3.9
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
haystack_integrations/components/retrievers/qdrant/__init__.py,sha256=
|
|
2
|
-
haystack_integrations/components/retrievers/qdrant/retriever.py,sha256=
|
|
1
|
+
haystack_integrations/components/retrievers/qdrant/__init__.py,sha256=IRjcM4f8b5eKFEMn8tn6h6RrfslEGP3WafU7mrzNzQM,313
|
|
2
|
+
haystack_integrations/components/retrievers/qdrant/retriever.py,sha256=r416_a7_6l7ehfPLdFtyHncsPrHj3mFCyROeJJF9rwM,13463
|
|
3
3
|
haystack_integrations/document_stores/qdrant/__init__.py,sha256=kUGc5uewqArhmVR-JqB_NmJ4kNkTIQIvYDNSoO2ELn0,302
|
|
4
4
|
haystack_integrations/document_stores/qdrant/converters.py,sha256=oSO2YlsWEQbcw9CPlWfSg_HoTZlnkAhZw_6VlYWzKKs,2525
|
|
5
|
-
haystack_integrations/document_stores/qdrant/document_store.py,sha256=
|
|
6
|
-
haystack_integrations/document_stores/qdrant/filters.py,sha256=
|
|
5
|
+
haystack_integrations/document_stores/qdrant/document_store.py,sha256=ouPp-oM1M4VvkYtpZ2pl5kGVQ5Ei4kc--Jwd7cYRlzk,26827
|
|
6
|
+
haystack_integrations/document_stores/qdrant/filters.py,sha256=0w70Wa3Za1fNdbJ5O95sZDIpXfblJG_sBBUv0JTQ0-o,8337
|
|
7
7
|
haystack_integrations/document_stores/qdrant/migrate_to_sparse.py,sha256=i6wBC_9_JVzYZtqKm3dhHKTxhwNdcAdpgki8GABDp1c,4909
|
|
8
|
-
qdrant_haystack-3.
|
|
9
|
-
qdrant_haystack-3.
|
|
10
|
-
qdrant_haystack-3.
|
|
11
|
-
qdrant_haystack-3.
|
|
8
|
+
qdrant_haystack-3.6.0.dist-info/METADATA,sha256=8pAGEH2tOoSqU-YXe0hoCfgUUOMEdcV1KSEcj4YMOsM,1862
|
|
9
|
+
qdrant_haystack-3.6.0.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
|
|
10
|
+
qdrant_haystack-3.6.0.dist-info/licenses/LICENSE.txt,sha256=B05uMshqTA74s-0ltyHKI6yoPfJ3zYgQbvcXfDVGFf8,10280
|
|
11
|
+
qdrant_haystack-3.6.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|