qdrant-haystack 6.0.0__py3-none-any.whl → 10.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- haystack_integrations/components/retrievers/py.typed +0 -0
- haystack_integrations/components/retrievers/qdrant/__init__.py +1 -1
- haystack_integrations/components/retrievers/qdrant/retriever.py +269 -56
- haystack_integrations/document_stores/py.typed +0 -0
- haystack_integrations/document_stores/qdrant/converters.py +15 -13
- haystack_integrations/document_stores/qdrant/document_store.py +1802 -355
- haystack_integrations/document_stores/qdrant/filters.py +87 -168
- haystack_integrations/document_stores/qdrant/migrate_to_sparse.py +8 -3
- {qdrant_haystack-6.0.0.dist-info → qdrant_haystack-10.2.0.dist-info}/METADATA +12 -27
- qdrant_haystack-10.2.0.dist-info/RECORD +13 -0
- {qdrant_haystack-6.0.0.dist-info → qdrant_haystack-10.2.0.dist-info}/WHEEL +1 -1
- qdrant_haystack-6.0.0.dist-info/RECORD +0 -11
- {qdrant_haystack-6.0.0.dist-info → qdrant_haystack-10.2.0.dist-info}/licenses/LICENSE.txt +0 -0
|
File without changes
|
|
@@ -4,4 +4,4 @@
|
|
|
4
4
|
|
|
5
5
|
from .retriever import QdrantEmbeddingRetriever, QdrantHybridRetriever, QdrantSparseEmbeddingRetriever
|
|
6
6
|
|
|
7
|
-
__all__ = ("QdrantEmbeddingRetriever", "
|
|
7
|
+
__all__ = ("QdrantEmbeddingRetriever", "QdrantHybridRetriever", "QdrantSparseEmbeddingRetriever")
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
2
|
|
|
3
3
|
from haystack import Document, component, default_from_dict, default_to_dict
|
|
4
4
|
from haystack.dataclasses.sparse_embedding import SparseEmbedding
|
|
@@ -8,6 +8,11 @@ from qdrant_client.http import models
|
|
|
8
8
|
|
|
9
9
|
from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
|
|
10
10
|
|
|
11
|
+
FILTER_POLICY_MERGE_ERROR_MESSAGE = (
|
|
12
|
+
"Native Qdrant filters cannot be used with filter_policy set to MERGE. "
|
|
13
|
+
"Set filter_policy to REPLACE or use Haystack filters instead."
|
|
14
|
+
)
|
|
15
|
+
|
|
11
16
|
|
|
12
17
|
@component
|
|
13
18
|
class QdrantEmbeddingRetriever:
|
|
@@ -38,15 +43,15 @@ class QdrantEmbeddingRetriever:
|
|
|
38
43
|
def __init__(
|
|
39
44
|
self,
|
|
40
45
|
document_store: QdrantDocumentStore,
|
|
41
|
-
filters:
|
|
46
|
+
filters: dict[str, Any] | models.Filter | None = None,
|
|
42
47
|
top_k: int = 10,
|
|
43
48
|
scale_score: bool = False,
|
|
44
49
|
return_embedding: bool = False,
|
|
45
|
-
filter_policy:
|
|
46
|
-
score_threshold:
|
|
47
|
-
group_by:
|
|
48
|
-
group_size:
|
|
49
|
-
):
|
|
50
|
+
filter_policy: str | FilterPolicy = FilterPolicy.REPLACE,
|
|
51
|
+
score_threshold: float | None = None,
|
|
52
|
+
group_by: str | None = None,
|
|
53
|
+
group_size: int | None = None,
|
|
54
|
+
) -> None:
|
|
50
55
|
"""
|
|
51
56
|
Create a QdrantEmbeddingRetriever component.
|
|
52
57
|
|
|
@@ -84,7 +89,7 @@ class QdrantEmbeddingRetriever:
|
|
|
84
89
|
self._group_by = group_by
|
|
85
90
|
self._group_size = group_size
|
|
86
91
|
|
|
87
|
-
def to_dict(self) ->
|
|
92
|
+
def to_dict(self) -> dict[str, Any]:
|
|
88
93
|
"""
|
|
89
94
|
Serializes the component to a dictionary.
|
|
90
95
|
|
|
@@ -108,7 +113,7 @@ class QdrantEmbeddingRetriever:
|
|
|
108
113
|
return d
|
|
109
114
|
|
|
110
115
|
@classmethod
|
|
111
|
-
def from_dict(cls, data:
|
|
116
|
+
def from_dict(cls, data: dict[str, Any]) -> "QdrantEmbeddingRetriever":
|
|
112
117
|
"""
|
|
113
118
|
Deserializes the component from a dictionary.
|
|
114
119
|
|
|
@@ -125,18 +130,18 @@ class QdrantEmbeddingRetriever:
|
|
|
125
130
|
data["init_parameters"]["filter_policy"] = FilterPolicy.from_str(filter_policy)
|
|
126
131
|
return default_from_dict(cls, data)
|
|
127
132
|
|
|
128
|
-
@component.output_types(documents=
|
|
133
|
+
@component.output_types(documents=list[Document])
|
|
129
134
|
def run(
|
|
130
135
|
self,
|
|
131
|
-
query_embedding:
|
|
132
|
-
filters:
|
|
133
|
-
top_k:
|
|
134
|
-
scale_score:
|
|
135
|
-
return_embedding:
|
|
136
|
-
score_threshold:
|
|
137
|
-
group_by:
|
|
138
|
-
group_size:
|
|
139
|
-
):
|
|
136
|
+
query_embedding: list[float],
|
|
137
|
+
filters: dict[str, Any] | models.Filter | None = None,
|
|
138
|
+
top_k: int | None = None,
|
|
139
|
+
scale_score: bool | None = None,
|
|
140
|
+
return_embedding: bool | None = None,
|
|
141
|
+
score_threshold: float | None = None,
|
|
142
|
+
group_by: str | None = None,
|
|
143
|
+
group_size: int | None = None,
|
|
144
|
+
) -> dict[str, list[Document]]:
|
|
140
145
|
"""
|
|
141
146
|
Run the Embedding Retriever on the given input data.
|
|
142
147
|
|
|
@@ -153,8 +158,19 @@ class QdrantEmbeddingRetriever:
|
|
|
153
158
|
:returns:
|
|
154
159
|
The retrieved documents.
|
|
155
160
|
|
|
161
|
+
:raises ValueError: If 'filter_policy' is set to 'MERGE' and 'filters' is a native Qdrant filter.
|
|
156
162
|
"""
|
|
157
|
-
|
|
163
|
+
if self._filter_policy == FilterPolicy.MERGE and (
|
|
164
|
+
isinstance(self._filters, models.Filter) or isinstance(filters, models.Filter)
|
|
165
|
+
):
|
|
166
|
+
raise ValueError(FILTER_POLICY_MERGE_ERROR_MESSAGE)
|
|
167
|
+
|
|
168
|
+
# Replacing filters works with native Qdrant filters even if the type is wrong
|
|
169
|
+
filters = apply_filter_policy(
|
|
170
|
+
filter_policy=self._filter_policy,
|
|
171
|
+
init_filters=self._filters, # type: ignore[arg-type]
|
|
172
|
+
runtime_filters=filters, # type: ignore[arg-type]
|
|
173
|
+
)
|
|
158
174
|
|
|
159
175
|
docs = self._document_store._query_by_embedding(
|
|
160
176
|
query_embedding=query_embedding,
|
|
@@ -169,6 +185,61 @@ class QdrantEmbeddingRetriever:
|
|
|
169
185
|
|
|
170
186
|
return {"documents": docs}
|
|
171
187
|
|
|
188
|
+
@component.output_types(documents=list[Document])
|
|
189
|
+
async def run_async(
|
|
190
|
+
self,
|
|
191
|
+
query_embedding: list[float],
|
|
192
|
+
filters: dict[str, Any] | models.Filter | None = None,
|
|
193
|
+
top_k: int | None = None,
|
|
194
|
+
scale_score: bool | None = None,
|
|
195
|
+
return_embedding: bool | None = None,
|
|
196
|
+
score_threshold: float | None = None,
|
|
197
|
+
group_by: str | None = None,
|
|
198
|
+
group_size: int | None = None,
|
|
199
|
+
) -> dict[str, list[Document]]:
|
|
200
|
+
"""
|
|
201
|
+
Asynchronously run the Embedding Retriever on the given input data.
|
|
202
|
+
|
|
203
|
+
:param query_embedding: Embedding of the query.
|
|
204
|
+
:param filters: A dictionary with filters to narrow down the search space.
|
|
205
|
+
:param top_k: The maximum number of documents to return. If using `group_by` parameters, maximum number of
|
|
206
|
+
groups to return.
|
|
207
|
+
:param scale_score: Whether to scale the scores of the retrieved documents or not.
|
|
208
|
+
:param return_embedding: Whether to return the embedding of the retrieved Documents.
|
|
209
|
+
:param score_threshold: A minimal score threshold for the result.
|
|
210
|
+
:param group_by: Payload field to group by, must be a string or number field. If the field contains more than 1
|
|
211
|
+
value, all values will be used for grouping. One point can be in multiple groups.
|
|
212
|
+
:param group_size: Maximum amount of points to return per group. Default is 3.
|
|
213
|
+
:returns:
|
|
214
|
+
The retrieved documents.
|
|
215
|
+
|
|
216
|
+
:raises ValueError: If 'filter_policy' is set to 'MERGE' and 'filters' is a native Qdrant filter.
|
|
217
|
+
"""
|
|
218
|
+
if self._filter_policy == FilterPolicy.MERGE and (
|
|
219
|
+
isinstance(self._filters, models.Filter) or isinstance(filters, models.Filter)
|
|
220
|
+
):
|
|
221
|
+
raise ValueError(FILTER_POLICY_MERGE_ERROR_MESSAGE)
|
|
222
|
+
|
|
223
|
+
# Replacing filters works with native Qdrant filters even if the type is wrong
|
|
224
|
+
filters = apply_filter_policy(
|
|
225
|
+
filter_policy=self._filter_policy,
|
|
226
|
+
init_filters=self._filters, # type: ignore[arg-type]
|
|
227
|
+
runtime_filters=filters, # type: ignore[arg-type]
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
docs = await self._document_store._query_by_embedding_async(
|
|
231
|
+
query_embedding=query_embedding,
|
|
232
|
+
filters=filters,
|
|
233
|
+
top_k=top_k or self._top_k,
|
|
234
|
+
scale_score=scale_score or self._scale_score,
|
|
235
|
+
return_embedding=return_embedding or self._return_embedding,
|
|
236
|
+
score_threshold=score_threshold or self._score_threshold,
|
|
237
|
+
group_by=group_by or self._group_by,
|
|
238
|
+
group_size=group_size or self._group_size,
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
return {"documents": docs}
|
|
242
|
+
|
|
172
243
|
|
|
173
244
|
@component
|
|
174
245
|
class QdrantSparseEmbeddingRetriever:
|
|
@@ -200,15 +271,15 @@ class QdrantSparseEmbeddingRetriever:
|
|
|
200
271
|
def __init__(
|
|
201
272
|
self,
|
|
202
273
|
document_store: QdrantDocumentStore,
|
|
203
|
-
filters:
|
|
274
|
+
filters: dict[str, Any] | models.Filter | None = None,
|
|
204
275
|
top_k: int = 10,
|
|
205
276
|
scale_score: bool = False,
|
|
206
277
|
return_embedding: bool = False,
|
|
207
|
-
filter_policy:
|
|
208
|
-
score_threshold:
|
|
209
|
-
group_by:
|
|
210
|
-
group_size:
|
|
211
|
-
):
|
|
278
|
+
filter_policy: str | FilterPolicy = FilterPolicy.REPLACE,
|
|
279
|
+
score_threshold: float | None = None,
|
|
280
|
+
group_by: str | None = None,
|
|
281
|
+
group_size: int | None = None,
|
|
282
|
+
) -> None:
|
|
212
283
|
"""
|
|
213
284
|
Create a QdrantSparseEmbeddingRetriever component.
|
|
214
285
|
|
|
@@ -246,7 +317,7 @@ class QdrantSparseEmbeddingRetriever:
|
|
|
246
317
|
self._group_by = group_by
|
|
247
318
|
self._group_size = group_size
|
|
248
319
|
|
|
249
|
-
def to_dict(self) ->
|
|
320
|
+
def to_dict(self) -> dict[str, Any]:
|
|
250
321
|
"""
|
|
251
322
|
Serializes the component to a dictionary.
|
|
252
323
|
|
|
@@ -270,7 +341,7 @@ class QdrantSparseEmbeddingRetriever:
|
|
|
270
341
|
return d
|
|
271
342
|
|
|
272
343
|
@classmethod
|
|
273
|
-
def from_dict(cls, data:
|
|
344
|
+
def from_dict(cls, data: dict[str, Any]) -> "QdrantSparseEmbeddingRetriever":
|
|
274
345
|
"""
|
|
275
346
|
Deserializes the component from a dictionary.
|
|
276
347
|
|
|
@@ -287,18 +358,18 @@ class QdrantSparseEmbeddingRetriever:
|
|
|
287
358
|
data["init_parameters"]["filter_policy"] = FilterPolicy.from_str(filter_policy)
|
|
288
359
|
return default_from_dict(cls, data)
|
|
289
360
|
|
|
290
|
-
@component.output_types(documents=
|
|
361
|
+
@component.output_types(documents=list[Document])
|
|
291
362
|
def run(
|
|
292
363
|
self,
|
|
293
364
|
query_sparse_embedding: SparseEmbedding,
|
|
294
|
-
filters:
|
|
295
|
-
top_k:
|
|
296
|
-
scale_score:
|
|
297
|
-
return_embedding:
|
|
298
|
-
score_threshold:
|
|
299
|
-
group_by:
|
|
300
|
-
group_size:
|
|
301
|
-
):
|
|
365
|
+
filters: dict[str, Any] | models.Filter | None = None,
|
|
366
|
+
top_k: int | None = None,
|
|
367
|
+
scale_score: bool | None = None,
|
|
368
|
+
return_embedding: bool | None = None,
|
|
369
|
+
score_threshold: float | None = None,
|
|
370
|
+
group_by: str | None = None,
|
|
371
|
+
group_size: int | None = None,
|
|
372
|
+
) -> dict[str, list[Document]]:
|
|
302
373
|
"""
|
|
303
374
|
Run the Sparse Embedding Retriever on the given input data.
|
|
304
375
|
|
|
@@ -320,8 +391,19 @@ class QdrantSparseEmbeddingRetriever:
|
|
|
320
391
|
:returns:
|
|
321
392
|
The retrieved documents.
|
|
322
393
|
|
|
394
|
+
:raises ValueError: If 'filter_policy' is set to 'MERGE' and 'filters' is a native Qdrant filter.
|
|
323
395
|
"""
|
|
324
|
-
|
|
396
|
+
if self._filter_policy == FilterPolicy.MERGE and (
|
|
397
|
+
isinstance(self._filters, models.Filter) or isinstance(filters, models.Filter)
|
|
398
|
+
):
|
|
399
|
+
raise ValueError(FILTER_POLICY_MERGE_ERROR_MESSAGE)
|
|
400
|
+
|
|
401
|
+
# Replacing filters works with native Qdrant filters even if the type is wrong
|
|
402
|
+
filters = apply_filter_policy(
|
|
403
|
+
filter_policy=self._filter_policy,
|
|
404
|
+
init_filters=self._filters, # type: ignore[arg-type]
|
|
405
|
+
runtime_filters=filters, # type: ignore[arg-type]
|
|
406
|
+
)
|
|
325
407
|
|
|
326
408
|
docs = self._document_store._query_by_sparse(
|
|
327
409
|
query_sparse_embedding=query_sparse_embedding,
|
|
@@ -336,6 +418,66 @@ class QdrantSparseEmbeddingRetriever:
|
|
|
336
418
|
|
|
337
419
|
return {"documents": docs}
|
|
338
420
|
|
|
421
|
+
@component.output_types(documents=list[Document])
|
|
422
|
+
async def run_async(
|
|
423
|
+
self,
|
|
424
|
+
query_sparse_embedding: SparseEmbedding,
|
|
425
|
+
filters: dict[str, Any] | models.Filter | None = None,
|
|
426
|
+
top_k: int | None = None,
|
|
427
|
+
scale_score: bool | None = None,
|
|
428
|
+
return_embedding: bool | None = None,
|
|
429
|
+
score_threshold: float | None = None,
|
|
430
|
+
group_by: str | None = None,
|
|
431
|
+
group_size: int | None = None,
|
|
432
|
+
) -> dict[str, list[Document]]:
|
|
433
|
+
"""
|
|
434
|
+
Asynchronously run the Sparse Embedding Retriever on the given input data.
|
|
435
|
+
|
|
436
|
+
:param query_sparse_embedding: Sparse Embedding of the query.
|
|
437
|
+
:param filters: Filters applied to the retrieved Documents. The way runtime filters are applied depends on
|
|
438
|
+
the `filter_policy` chosen at retriever initialization. See init method docstring for more
|
|
439
|
+
details.
|
|
440
|
+
:param top_k: The maximum number of documents to return. If using `group_by` parameters, maximum number of
|
|
441
|
+
groups to return.
|
|
442
|
+
:param scale_score: Whether to scale the scores of the retrieved documents or not.
|
|
443
|
+
:param return_embedding: Whether to return the embedding of the retrieved Documents.
|
|
444
|
+
:param score_threshold: A minimal score threshold for the result.
|
|
445
|
+
Score of the returned result might be higher or smaller than the threshold
|
|
446
|
+
depending on the Distance function used.
|
|
447
|
+
E.g. for cosine similarity only higher scores will be returned.
|
|
448
|
+
:param group_by: Payload field to group by, must be a string or number field. If the field contains more than 1
|
|
449
|
+
value, all values will be used for grouping. One point can be in multiple groups.
|
|
450
|
+
:param group_size: Maximum amount of points to return per group. Default is 3.
|
|
451
|
+
:returns:
|
|
452
|
+
The retrieved documents.
|
|
453
|
+
|
|
454
|
+
:raises ValueError: If 'filter_policy' is set to 'MERGE' and 'filters' is a native Qdrant filter.
|
|
455
|
+
"""
|
|
456
|
+
if self._filter_policy == FilterPolicy.MERGE and (
|
|
457
|
+
isinstance(self._filters, models.Filter) or isinstance(filters, models.Filter)
|
|
458
|
+
):
|
|
459
|
+
raise ValueError(FILTER_POLICY_MERGE_ERROR_MESSAGE)
|
|
460
|
+
|
|
461
|
+
# Replacing filters works with native Qdrant filters even if the type is wrong
|
|
462
|
+
filters = apply_filter_policy(
|
|
463
|
+
filter_policy=self._filter_policy,
|
|
464
|
+
init_filters=self._filters, # type: ignore[arg-type]
|
|
465
|
+
runtime_filters=filters, # type: ignore[arg-type]
|
|
466
|
+
)
|
|
467
|
+
|
|
468
|
+
docs = await self._document_store._query_by_sparse_async(
|
|
469
|
+
query_sparse_embedding=query_sparse_embedding,
|
|
470
|
+
filters=filters,
|
|
471
|
+
top_k=top_k or self._top_k,
|
|
472
|
+
scale_score=scale_score or self._scale_score,
|
|
473
|
+
return_embedding=return_embedding or self._return_embedding,
|
|
474
|
+
score_threshold=score_threshold or self._score_threshold,
|
|
475
|
+
group_by=group_by or self._group_by,
|
|
476
|
+
group_size=group_size or self._group_size,
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
return {"documents": docs}
|
|
480
|
+
|
|
339
481
|
|
|
340
482
|
@component
|
|
341
483
|
class QdrantHybridRetriever:
|
|
@@ -373,14 +515,14 @@ class QdrantHybridRetriever:
|
|
|
373
515
|
def __init__(
|
|
374
516
|
self,
|
|
375
517
|
document_store: QdrantDocumentStore,
|
|
376
|
-
filters:
|
|
518
|
+
filters: dict[str, Any] | models.Filter | None = None,
|
|
377
519
|
top_k: int = 10,
|
|
378
520
|
return_embedding: bool = False,
|
|
379
|
-
filter_policy:
|
|
380
|
-
score_threshold:
|
|
381
|
-
group_by:
|
|
382
|
-
group_size:
|
|
383
|
-
):
|
|
521
|
+
filter_policy: str | FilterPolicy = FilterPolicy.REPLACE,
|
|
522
|
+
score_threshold: float | None = None,
|
|
523
|
+
group_by: str | None = None,
|
|
524
|
+
group_size: int | None = None,
|
|
525
|
+
) -> None:
|
|
384
526
|
"""
|
|
385
527
|
Create a QdrantHybridRetriever component.
|
|
386
528
|
|
|
@@ -416,7 +558,7 @@ class QdrantHybridRetriever:
|
|
|
416
558
|
self._group_by = group_by
|
|
417
559
|
self._group_size = group_size
|
|
418
560
|
|
|
419
|
-
def to_dict(self) ->
|
|
561
|
+
def to_dict(self) -> dict[str, Any]:
|
|
420
562
|
"""
|
|
421
563
|
Serializes the component to a dictionary.
|
|
422
564
|
|
|
@@ -436,7 +578,7 @@ class QdrantHybridRetriever:
|
|
|
436
578
|
)
|
|
437
579
|
|
|
438
580
|
@classmethod
|
|
439
|
-
def from_dict(cls, data:
|
|
581
|
+
def from_dict(cls, data: dict[str, Any]) -> "QdrantHybridRetriever":
|
|
440
582
|
"""
|
|
441
583
|
Deserializes the component from a dictionary.
|
|
442
584
|
|
|
@@ -453,18 +595,18 @@ class QdrantHybridRetriever:
|
|
|
453
595
|
data["init_parameters"]["filter_policy"] = FilterPolicy.from_str(filter_policy)
|
|
454
596
|
return default_from_dict(cls, data)
|
|
455
597
|
|
|
456
|
-
@component.output_types(documents=
|
|
598
|
+
@component.output_types(documents=list[Document])
|
|
457
599
|
def run(
|
|
458
600
|
self,
|
|
459
|
-
query_embedding:
|
|
601
|
+
query_embedding: list[float],
|
|
460
602
|
query_sparse_embedding: SparseEmbedding,
|
|
461
|
-
filters:
|
|
462
|
-
top_k:
|
|
463
|
-
return_embedding:
|
|
464
|
-
score_threshold:
|
|
465
|
-
group_by:
|
|
466
|
-
group_size:
|
|
467
|
-
):
|
|
603
|
+
filters: dict[str, Any] | models.Filter | None = None,
|
|
604
|
+
top_k: int | None = None,
|
|
605
|
+
return_embedding: bool | None = None,
|
|
606
|
+
score_threshold: float | None = None,
|
|
607
|
+
group_by: str | None = None,
|
|
608
|
+
group_size: int | None = None,
|
|
609
|
+
) -> dict[str, list[Document]]:
|
|
468
610
|
"""
|
|
469
611
|
Run the Sparse Embedding Retriever on the given input data.
|
|
470
612
|
|
|
@@ -486,8 +628,19 @@ class QdrantHybridRetriever:
|
|
|
486
628
|
:returns:
|
|
487
629
|
The retrieved documents.
|
|
488
630
|
|
|
631
|
+
:raises ValueError: If 'filter_policy' is set to 'MERGE' and 'filters' is a native Qdrant filter.
|
|
489
632
|
"""
|
|
490
|
-
|
|
633
|
+
if self._filter_policy == FilterPolicy.MERGE and (
|
|
634
|
+
isinstance(self._filters, models.Filter) or isinstance(filters, models.Filter)
|
|
635
|
+
):
|
|
636
|
+
raise ValueError(FILTER_POLICY_MERGE_ERROR_MESSAGE)
|
|
637
|
+
|
|
638
|
+
# Replacing filters works with native Qdrant filters even if the type is wrong
|
|
639
|
+
filters = apply_filter_policy(
|
|
640
|
+
filter_policy=self._filter_policy,
|
|
641
|
+
init_filters=self._filters, # type: ignore[arg-type]
|
|
642
|
+
runtime_filters=filters, # type: ignore[arg-type]
|
|
643
|
+
)
|
|
491
644
|
|
|
492
645
|
docs = self._document_store._query_hybrid(
|
|
493
646
|
query_embedding=query_embedding,
|
|
@@ -501,3 +654,63 @@ class QdrantHybridRetriever:
|
|
|
501
654
|
)
|
|
502
655
|
|
|
503
656
|
return {"documents": docs}
|
|
657
|
+
|
|
658
|
+
@component.output_types(documents=list[Document])
|
|
659
|
+
async def run_async(
|
|
660
|
+
self,
|
|
661
|
+
query_embedding: list[float],
|
|
662
|
+
query_sparse_embedding: SparseEmbedding,
|
|
663
|
+
filters: dict[str, Any] | models.Filter | None = None,
|
|
664
|
+
top_k: int | None = None,
|
|
665
|
+
return_embedding: bool | None = None,
|
|
666
|
+
score_threshold: float | None = None,
|
|
667
|
+
group_by: str | None = None,
|
|
668
|
+
group_size: int | None = None,
|
|
669
|
+
) -> dict[str, list[Document]]:
|
|
670
|
+
"""
|
|
671
|
+
Asynchronously run the Sparse Embedding Retriever on the given input data.
|
|
672
|
+
|
|
673
|
+
:param query_embedding: Dense embedding of the query.
|
|
674
|
+
:param query_sparse_embedding: Sparse embedding of the query.
|
|
675
|
+
:param filters: Filters applied to the retrieved Documents. The way runtime filters are applied depends on
|
|
676
|
+
the `filter_policy` chosen at retriever initialization. See init method docstring for more
|
|
677
|
+
details.
|
|
678
|
+
:param top_k: The maximum number of documents to return. If using `group_by` parameters, maximum number of
|
|
679
|
+
groups to return.
|
|
680
|
+
:param return_embedding: Whether to return the embedding of the retrieved Documents.
|
|
681
|
+
:param score_threshold: A minimal score threshold for the result.
|
|
682
|
+
Score of the returned result might be higher or smaller than the threshold
|
|
683
|
+
depending on the Distance function used.
|
|
684
|
+
E.g. for cosine similarity only higher scores will be returned.
|
|
685
|
+
:param group_by: Payload field to group by, must be a string or number field. If the field contains more than 1
|
|
686
|
+
value, all values will be used for grouping. One point can be in multiple groups.
|
|
687
|
+
:param group_size: Maximum amount of points to return per group. Default is 3.
|
|
688
|
+
:returns:
|
|
689
|
+
The retrieved documents.
|
|
690
|
+
|
|
691
|
+
:raises ValueError: If 'filter_policy' is set to 'MERGE' and 'filters' is a native Qdrant filter.
|
|
692
|
+
"""
|
|
693
|
+
if self._filter_policy == FilterPolicy.MERGE and (
|
|
694
|
+
isinstance(self._filters, models.Filter) or isinstance(filters, models.Filter)
|
|
695
|
+
):
|
|
696
|
+
raise ValueError(FILTER_POLICY_MERGE_ERROR_MESSAGE)
|
|
697
|
+
|
|
698
|
+
# Replacing filters works with native Qdrant filters even if the type is wrong
|
|
699
|
+
filters = apply_filter_policy(
|
|
700
|
+
filter_policy=self._filter_policy,
|
|
701
|
+
init_filters=self._filters, # type: ignore[arg-type]
|
|
702
|
+
runtime_filters=filters, # type: ignore[arg-type]
|
|
703
|
+
)
|
|
704
|
+
|
|
705
|
+
docs = await self._document_store._query_hybrid_async(
|
|
706
|
+
query_embedding=query_embedding,
|
|
707
|
+
query_sparse_embedding=query_sparse_embedding,
|
|
708
|
+
filters=filters,
|
|
709
|
+
top_k=top_k or self._top_k,
|
|
710
|
+
return_embedding=return_embedding or self._return_embedding,
|
|
711
|
+
score_threshold=score_threshold or self._score_threshold,
|
|
712
|
+
group_by=group_by or self._group_by,
|
|
713
|
+
group_size=group_size or self._group_size,
|
|
714
|
+
)
|
|
715
|
+
|
|
716
|
+
return {"documents": docs}
|
|
File without changes
|
|
@@ -1,7 +1,6 @@
|
|
|
1
|
-
import logging
|
|
2
1
|
import uuid
|
|
3
|
-
from typing import List, Union
|
|
4
2
|
|
|
3
|
+
from haystack import logging
|
|
5
4
|
from haystack.dataclasses import Document
|
|
6
5
|
from qdrant_client.http import models as rest
|
|
7
6
|
|
|
@@ -15,13 +14,14 @@ UUID_NAMESPACE = uuid.UUID("3896d314-1e95-4a3a-b45a-945f9f0b541d")
|
|
|
15
14
|
|
|
16
15
|
|
|
17
16
|
def convert_haystack_documents_to_qdrant_points(
|
|
18
|
-
documents:
|
|
17
|
+
documents: list[Document],
|
|
19
18
|
*,
|
|
20
19
|
use_sparse_embeddings: bool,
|
|
21
|
-
) ->
|
|
20
|
+
) -> list[rest.PointStruct]:
|
|
22
21
|
points = []
|
|
23
22
|
for document in documents:
|
|
24
23
|
payload = document.to_dict(flatten=False)
|
|
24
|
+
|
|
25
25
|
if use_sparse_embeddings:
|
|
26
26
|
vector = {}
|
|
27
27
|
|
|
@@ -36,7 +36,7 @@ def convert_haystack_documents_to_qdrant_points(
|
|
|
36
36
|
|
|
37
37
|
else:
|
|
38
38
|
vector = payload.pop("embedding") or {}
|
|
39
|
-
_id = convert_id(
|
|
39
|
+
_id = convert_id(document.id)
|
|
40
40
|
|
|
41
41
|
point = rest.PointStruct(
|
|
42
42
|
payload=payload,
|
|
@@ -57,23 +57,25 @@ def convert_id(_id: str) -> str:
|
|
|
57
57
|
return uuid.uuid5(UUID_NAMESPACE, _id).hex
|
|
58
58
|
|
|
59
59
|
|
|
60
|
-
QdrantPoint =
|
|
60
|
+
QdrantPoint = rest.ScoredPoint | rest.Record
|
|
61
61
|
|
|
62
62
|
|
|
63
63
|
def convert_qdrant_point_to_haystack_document(point: QdrantPoint, use_sparse_embeddings: bool) -> Document:
|
|
64
|
-
payload =
|
|
64
|
+
payload = point.payload or {}
|
|
65
65
|
payload["score"] = point.score if hasattr(point, "score") else None
|
|
66
66
|
|
|
67
67
|
if not use_sparse_embeddings:
|
|
68
68
|
payload["embedding"] = point.vector if hasattr(point, "vector") else None
|
|
69
|
-
elif hasattr(point, "vector") and point.vector is not None:
|
|
69
|
+
elif hasattr(point, "vector") and point.vector is not None and isinstance(point.vector, dict):
|
|
70
70
|
payload["embedding"] = point.vector.get(DENSE_VECTORS_NAME)
|
|
71
71
|
|
|
72
72
|
if SPARSE_VECTORS_NAME in point.vector:
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
73
|
+
sparse_vector = point.vector[SPARSE_VECTORS_NAME]
|
|
74
|
+
if isinstance(sparse_vector, rest.SparseVector):
|
|
75
|
+
sparse_vector_dict = {
|
|
76
|
+
"indices": sparse_vector.indices,
|
|
77
|
+
"values": sparse_vector.values,
|
|
78
|
+
}
|
|
79
|
+
payload["sparse_embedding"] = sparse_vector_dict
|
|
78
80
|
|
|
79
81
|
return Document.from_dict(payload)
|