qdrant-haystack 5.0.0__py3-none-any.whl → 5.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of qdrant-haystack might be problematic. Click here for more details.
- haystack_integrations/components/retrievers/qdrant/retriever.py +60 -6
- haystack_integrations/document_stores/qdrant/document_store.py +159 -55
- {qdrant_haystack-5.0.0.dist-info → qdrant_haystack-5.1.0.dist-info}/METADATA +1 -1
- {qdrant_haystack-5.0.0.dist-info → qdrant_haystack-5.1.0.dist-info}/RECORD +6 -6
- {qdrant_haystack-5.0.0.dist-info → qdrant_haystack-5.1.0.dist-info}/WHEEL +0 -0
- {qdrant_haystack-5.0.0.dist-info → qdrant_haystack-5.1.0.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -44,13 +44,16 @@ class QdrantEmbeddingRetriever:
|
|
|
44
44
|
return_embedding: bool = False,
|
|
45
45
|
filter_policy: Union[str, FilterPolicy] = FilterPolicy.REPLACE,
|
|
46
46
|
score_threshold: Optional[float] = None,
|
|
47
|
+
group_by: Optional[str] = None,
|
|
48
|
+
group_size: Optional[int] = None,
|
|
47
49
|
):
|
|
48
50
|
"""
|
|
49
51
|
Create a QdrantEmbeddingRetriever component.
|
|
50
52
|
|
|
51
53
|
:param document_store: An instance of QdrantDocumentStore.
|
|
52
54
|
:param filters: A dictionary with filters to narrow down the search space.
|
|
53
|
-
:param top_k: The maximum number of documents to retrieve.
|
|
55
|
+
:param top_k: The maximum number of documents to retrieve. If using `group_by` parameters, maximum number of
|
|
56
|
+
groups to return.
|
|
54
57
|
:param scale_score: Whether to scale the scores of the retrieved documents or not.
|
|
55
58
|
:param return_embedding: Whether to return the embedding of the retrieved Documents.
|
|
56
59
|
:param filter_policy: Policy to determine how filters are applied.
|
|
@@ -58,6 +61,9 @@ class QdrantEmbeddingRetriever:
|
|
|
58
61
|
Score of the returned result might be higher or smaller than the threshold
|
|
59
62
|
depending on the `similarity` function specified in the Document Store.
|
|
60
63
|
E.g. for cosine similarity only higher scores will be returned.
|
|
64
|
+
:param group_by: Payload field to group by, must be a string or number field. If the field contains more than 1
|
|
65
|
+
value, all values will be used for grouping. One point can be in multiple groups.
|
|
66
|
+
:param group_size: Maximum amount of points to return per group. Default is 3.
|
|
61
67
|
|
|
62
68
|
:raises ValueError: If `document_store` is not an instance of `QdrantDocumentStore`.
|
|
63
69
|
"""
|
|
@@ -75,6 +81,8 @@ class QdrantEmbeddingRetriever:
|
|
|
75
81
|
filter_policy if isinstance(filter_policy, FilterPolicy) else FilterPolicy.from_str(filter_policy)
|
|
76
82
|
)
|
|
77
83
|
self._score_threshold = score_threshold
|
|
84
|
+
self._group_by = group_by
|
|
85
|
+
self._group_size = group_size
|
|
78
86
|
|
|
79
87
|
def to_dict(self) -> Dict[str, Any]:
|
|
80
88
|
"""
|
|
@@ -92,6 +100,8 @@ class QdrantEmbeddingRetriever:
|
|
|
92
100
|
scale_score=self._scale_score,
|
|
93
101
|
return_embedding=self._return_embedding,
|
|
94
102
|
score_threshold=self._score_threshold,
|
|
103
|
+
group_by=self._group_by,
|
|
104
|
+
group_size=self._group_size,
|
|
95
105
|
)
|
|
96
106
|
d["init_parameters"]["document_store"] = self._document_store.to_dict()
|
|
97
107
|
|
|
@@ -124,16 +134,22 @@ class QdrantEmbeddingRetriever:
|
|
|
124
134
|
scale_score: Optional[bool] = None,
|
|
125
135
|
return_embedding: Optional[bool] = None,
|
|
126
136
|
score_threshold: Optional[float] = None,
|
|
137
|
+
group_by: Optional[str] = None,
|
|
138
|
+
group_size: Optional[int] = None,
|
|
127
139
|
):
|
|
128
140
|
"""
|
|
129
141
|
Run the Embedding Retriever on the given input data.
|
|
130
142
|
|
|
131
143
|
:param query_embedding: Embedding of the query.
|
|
132
144
|
:param filters: A dictionary with filters to narrow down the search space.
|
|
133
|
-
:param top_k: The maximum number of documents to return.
|
|
145
|
+
:param top_k: The maximum number of documents to return. If using `group_by` parameters, maximum number of
|
|
146
|
+
groups to return.
|
|
134
147
|
:param scale_score: Whether to scale the scores of the retrieved documents or not.
|
|
135
148
|
:param return_embedding: Whether to return the embedding of the retrieved Documents.
|
|
136
149
|
:param score_threshold: A minimal score threshold for the result.
|
|
150
|
+
:param group_by: Payload field to group by, must be a string or number field. If the field contains more than 1
|
|
151
|
+
value, all values will be used for grouping. One point can be in multiple groups.
|
|
152
|
+
:param group_size: Maximum amount of points to return per group. Default is 3.
|
|
137
153
|
:returns:
|
|
138
154
|
The retrieved documents.
|
|
139
155
|
|
|
@@ -147,6 +163,8 @@ class QdrantEmbeddingRetriever:
|
|
|
147
163
|
scale_score=scale_score or self._scale_score,
|
|
148
164
|
return_embedding=return_embedding or self._return_embedding,
|
|
149
165
|
score_threshold=score_threshold or self._score_threshold,
|
|
166
|
+
group_by=group_by or self._group_by,
|
|
167
|
+
group_size=group_size or self._group_size,
|
|
150
168
|
)
|
|
151
169
|
|
|
152
170
|
return {"documents": docs}
|
|
@@ -188,13 +206,16 @@ class QdrantSparseEmbeddingRetriever:
|
|
|
188
206
|
return_embedding: bool = False,
|
|
189
207
|
filter_policy: Union[str, FilterPolicy] = FilterPolicy.REPLACE,
|
|
190
208
|
score_threshold: Optional[float] = None,
|
|
209
|
+
group_by: Optional[str] = None,
|
|
210
|
+
group_size: Optional[int] = None,
|
|
191
211
|
):
|
|
192
212
|
"""
|
|
193
213
|
Create a QdrantSparseEmbeddingRetriever component.
|
|
194
214
|
|
|
195
215
|
:param document_store: An instance of QdrantDocumentStore.
|
|
196
216
|
:param filters: A dictionary with filters to narrow down the search space.
|
|
197
|
-
:param top_k: The maximum number of documents to retrieve.
|
|
217
|
+
:param top_k: The maximum number of documents to retrieve. If using `group_by` parameters, maximum number of
|
|
218
|
+
groups to return.
|
|
198
219
|
:param scale_score: Whether to scale the scores of the retrieved documents or not.
|
|
199
220
|
:param return_embedding: Whether to return the sparse embedding of the retrieved Documents.
|
|
200
221
|
:param filter_policy: Policy to determine how filters are applied. Defaults to "replace".
|
|
@@ -202,6 +223,9 @@ class QdrantSparseEmbeddingRetriever:
|
|
|
202
223
|
Score of the returned result might be higher or smaller than the threshold
|
|
203
224
|
depending on the Distance function used.
|
|
204
225
|
E.g. for cosine similarity only higher scores will be returned.
|
|
226
|
+
:param group_by: Payload field to group by, must be a string or number field. If the field contains more than 1
|
|
227
|
+
value, all values will be used for grouping. One point can be in multiple groups.
|
|
228
|
+
:param group_size: Maximum amount of points to return per group. Default is 3.
|
|
205
229
|
|
|
206
230
|
:raises ValueError: If `document_store` is not an instance of `QdrantDocumentStore`.
|
|
207
231
|
"""
|
|
@@ -219,6 +243,8 @@ class QdrantSparseEmbeddingRetriever:
|
|
|
219
243
|
filter_policy if isinstance(filter_policy, FilterPolicy) else FilterPolicy.from_str(filter_policy)
|
|
220
244
|
)
|
|
221
245
|
self._score_threshold = score_threshold
|
|
246
|
+
self._group_by = group_by
|
|
247
|
+
self._group_size = group_size
|
|
222
248
|
|
|
223
249
|
def to_dict(self) -> Dict[str, Any]:
|
|
224
250
|
"""
|
|
@@ -236,6 +262,8 @@ class QdrantSparseEmbeddingRetriever:
|
|
|
236
262
|
filter_policy=self._filter_policy.value,
|
|
237
263
|
return_embedding=self._return_embedding,
|
|
238
264
|
score_threshold=self._score_threshold,
|
|
265
|
+
group_by=self._group_by,
|
|
266
|
+
group_size=self._group_size,
|
|
239
267
|
)
|
|
240
268
|
d["init_parameters"]["document_store"] = self._document_store.to_dict()
|
|
241
269
|
|
|
@@ -268,6 +296,8 @@ class QdrantSparseEmbeddingRetriever:
|
|
|
268
296
|
scale_score: Optional[bool] = None,
|
|
269
297
|
return_embedding: Optional[bool] = None,
|
|
270
298
|
score_threshold: Optional[float] = None,
|
|
299
|
+
group_by: Optional[str] = None,
|
|
300
|
+
group_size: Optional[int] = None,
|
|
271
301
|
):
|
|
272
302
|
"""
|
|
273
303
|
Run the Sparse Embedding Retriever on the given input data.
|
|
@@ -276,13 +306,17 @@ class QdrantSparseEmbeddingRetriever:
|
|
|
276
306
|
:param filters: Filters applied to the retrieved Documents. The way runtime filters are applied depends on
|
|
277
307
|
the `filter_policy` chosen at retriever initialization. See init method docstring for more
|
|
278
308
|
details.
|
|
279
|
-
:param top_k: The maximum number of documents to return.
|
|
309
|
+
:param top_k: The maximum number of documents to return. If using `group_by` parameters, maximum number of
|
|
310
|
+
groups to return.
|
|
280
311
|
:param scale_score: Whether to scale the scores of the retrieved documents or not.
|
|
281
312
|
:param return_embedding: Whether to return the embedding of the retrieved Documents.
|
|
282
313
|
:param score_threshold: A minimal score threshold for the result.
|
|
283
314
|
Score of the returned result might be higher or smaller than the threshold
|
|
284
315
|
depending on the Distance function used.
|
|
285
316
|
E.g. for cosine similarity only higher scores will be returned.
|
|
317
|
+
:param group_by: Payload field to group by, must be a string or number field. If the field contains more than 1
|
|
318
|
+
value, all values will be used for grouping. One point can be in multiple groups.
|
|
319
|
+
:param group_size: Maximum amount of points to return per group. Default is 3.
|
|
286
320
|
:returns:
|
|
287
321
|
The retrieved documents.
|
|
288
322
|
|
|
@@ -296,6 +330,8 @@ class QdrantSparseEmbeddingRetriever:
|
|
|
296
330
|
scale_score=scale_score or self._scale_score,
|
|
297
331
|
return_embedding=return_embedding or self._return_embedding,
|
|
298
332
|
score_threshold=score_threshold or self._score_threshold,
|
|
333
|
+
group_by=group_by or self._group_by,
|
|
334
|
+
group_size=group_size or self._group_size,
|
|
299
335
|
)
|
|
300
336
|
|
|
301
337
|
return {"documents": docs}
|
|
@@ -342,19 +378,25 @@ class QdrantHybridRetriever:
|
|
|
342
378
|
return_embedding: bool = False,
|
|
343
379
|
filter_policy: Union[str, FilterPolicy] = FilterPolicy.REPLACE,
|
|
344
380
|
score_threshold: Optional[float] = None,
|
|
381
|
+
group_by: Optional[str] = None,
|
|
382
|
+
group_size: Optional[int] = None,
|
|
345
383
|
):
|
|
346
384
|
"""
|
|
347
385
|
Create a QdrantHybridRetriever component.
|
|
348
386
|
|
|
349
387
|
:param document_store: An instance of QdrantDocumentStore.
|
|
350
388
|
:param filters: A dictionary with filters to narrow down the search space.
|
|
351
|
-
:param top_k: The maximum number of documents to retrieve.
|
|
389
|
+
:param top_k: The maximum number of documents to retrieve. If using `group_by` parameters, maximum number of
|
|
390
|
+
groups to return.
|
|
352
391
|
:param return_embedding: Whether to return the embeddings of the retrieved Documents.
|
|
353
392
|
:param filter_policy: Policy to determine how filters are applied.
|
|
354
393
|
:param score_threshold: A minimal score threshold for the result.
|
|
355
394
|
Score of the returned result might be higher or smaller than the threshold
|
|
356
395
|
depending on the Distance function used.
|
|
357
396
|
E.g. for cosine similarity only higher scores will be returned.
|
|
397
|
+
:param group_by: Payload field to group by, must be a string or number field. If the field contains more than 1
|
|
398
|
+
value, all values will be used for grouping. One point can be in multiple groups.
|
|
399
|
+
:param group_size: Maximum amount of points to return per group. Default is 3.
|
|
358
400
|
|
|
359
401
|
:raises ValueError: If 'document_store' is not an instance of QdrantDocumentStore.
|
|
360
402
|
"""
|
|
@@ -371,6 +413,8 @@ class QdrantHybridRetriever:
|
|
|
371
413
|
filter_policy if isinstance(filter_policy, FilterPolicy) else FilterPolicy.from_str(filter_policy)
|
|
372
414
|
)
|
|
373
415
|
self._score_threshold = score_threshold
|
|
416
|
+
self._group_by = group_by
|
|
417
|
+
self._group_size = group_size
|
|
374
418
|
|
|
375
419
|
def to_dict(self) -> Dict[str, Any]:
|
|
376
420
|
"""
|
|
@@ -387,6 +431,8 @@ class QdrantHybridRetriever:
|
|
|
387
431
|
filter_policy=self._filter_policy.value,
|
|
388
432
|
return_embedding=self._return_embedding,
|
|
389
433
|
score_threshold=self._score_threshold,
|
|
434
|
+
group_by=self._group_by,
|
|
435
|
+
group_size=self._group_size,
|
|
390
436
|
)
|
|
391
437
|
|
|
392
438
|
@classmethod
|
|
@@ -416,6 +462,8 @@ class QdrantHybridRetriever:
|
|
|
416
462
|
top_k: Optional[int] = None,
|
|
417
463
|
return_embedding: Optional[bool] = None,
|
|
418
464
|
score_threshold: Optional[float] = None,
|
|
465
|
+
group_by: Optional[str] = None,
|
|
466
|
+
group_size: Optional[int] = None,
|
|
419
467
|
):
|
|
420
468
|
"""
|
|
421
469
|
Run the Sparse Embedding Retriever on the given input data.
|
|
@@ -425,12 +473,16 @@ class QdrantHybridRetriever:
|
|
|
425
473
|
:param filters: Filters applied to the retrieved Documents. The way runtime filters are applied depends on
|
|
426
474
|
the `filter_policy` chosen at retriever initialization. See init method docstring for more
|
|
427
475
|
details.
|
|
428
|
-
:param top_k: The maximum number of documents to return.
|
|
476
|
+
:param top_k: The maximum number of documents to return. If using `group_by` parameters, maximum number of
|
|
477
|
+
groups to return.
|
|
429
478
|
:param return_embedding: Whether to return the embedding of the retrieved Documents.
|
|
430
479
|
:param score_threshold: A minimal score threshold for the result.
|
|
431
480
|
Score of the returned result might be higher or smaller than the threshold
|
|
432
481
|
depending on the Distance function used.
|
|
433
482
|
E.g. for cosine similarity only higher scores will be returned.
|
|
483
|
+
:param group_by: Payload field to group by, must be a string or number field. If the field contains more than 1
|
|
484
|
+
value, all values will be used for grouping. One point can be in multiple groups.
|
|
485
|
+
:param group_size: Maximum amount of points to return per group. Default is 3.
|
|
434
486
|
:returns:
|
|
435
487
|
The retrieved documents.
|
|
436
488
|
|
|
@@ -444,6 +496,8 @@ class QdrantHybridRetriever:
|
|
|
444
496
|
top_k=top_k or self._top_k,
|
|
445
497
|
return_embedding=return_embedding or self._return_embedding,
|
|
446
498
|
score_threshold=score_threshold or self._score_threshold,
|
|
499
|
+
group_by=group_by or self._group_by,
|
|
500
|
+
group_size=group_size or self._group_size,
|
|
447
501
|
)
|
|
448
502
|
|
|
449
503
|
return {"documents": docs}
|
|
@@ -506,19 +506,25 @@ class QdrantDocumentStore:
|
|
|
506
506
|
scale_score: bool = False,
|
|
507
507
|
return_embedding: bool = False,
|
|
508
508
|
score_threshold: Optional[float] = None,
|
|
509
|
+
group_by: Optional[str] = None,
|
|
510
|
+
group_size: Optional[int] = None,
|
|
509
511
|
) -> List[Document]:
|
|
510
512
|
"""
|
|
511
513
|
Queries Qdrant using a sparse embedding and returns the most relevant documents.
|
|
512
514
|
|
|
513
515
|
:param query_sparse_embedding: Sparse embedding of the query.
|
|
514
516
|
:param filters: Filters applied to the retrieved documents.
|
|
515
|
-
:param top_k: Maximum number of documents to return.
|
|
517
|
+
:param top_k: Maximum number of documents to return. If using `group_by` parameters, maximum number of
|
|
518
|
+
groups to return.
|
|
516
519
|
:param scale_score: Whether to scale the scores of the retrieved documents.
|
|
517
520
|
:param return_embedding: Whether to return the embeddings of the retrieved documents.
|
|
518
521
|
:param score_threshold: A minimal score threshold for the result.
|
|
519
522
|
Score of the returned result might be higher or smaller than the threshold
|
|
520
523
|
depending on the Distance function used.
|
|
521
524
|
E.g. for cosine similarity only higher scores will be returned.
|
|
525
|
+
:param group_by: Payload field to group by, must be a string or number field. If the field contains more than 1
|
|
526
|
+
value, all values will be used for grouping. One point can be in multiple groups.
|
|
527
|
+
:param group_size: Maximum amount of points to return per group. Default is 3.
|
|
522
528
|
|
|
523
529
|
:returns: List of documents that are most similar to `query_sparse_embedding`.
|
|
524
530
|
|
|
@@ -536,22 +542,47 @@ class QdrantDocumentStore:
|
|
|
536
542
|
qdrant_filters = convert_filters_to_qdrant(filters)
|
|
537
543
|
query_indices = query_sparse_embedding.indices
|
|
538
544
|
query_values = query_sparse_embedding.values
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
545
|
+
if group_by:
|
|
546
|
+
groups = self.client.query_points_groups(
|
|
547
|
+
collection_name=self.index,
|
|
548
|
+
query=rest.SparseVector(
|
|
549
|
+
indices=query_indices,
|
|
550
|
+
values=query_values,
|
|
551
|
+
),
|
|
552
|
+
using=SPARSE_VECTORS_NAME,
|
|
553
|
+
query_filter=qdrant_filters,
|
|
554
|
+
limit=top_k,
|
|
555
|
+
group_by=group_by,
|
|
556
|
+
group_size=group_size,
|
|
557
|
+
with_vectors=return_embedding,
|
|
558
|
+
score_threshold=score_threshold,
|
|
559
|
+
).groups
|
|
560
|
+
results = (
|
|
561
|
+
[
|
|
562
|
+
convert_qdrant_point_to_haystack_document(point, use_sparse_embeddings=self.use_sparse_embeddings)
|
|
563
|
+
for group in groups
|
|
564
|
+
for point in group.hits
|
|
565
|
+
]
|
|
566
|
+
if groups
|
|
567
|
+
else []
|
|
568
|
+
)
|
|
569
|
+
else:
|
|
570
|
+
points = self.client.query_points(
|
|
571
|
+
collection_name=self.index,
|
|
572
|
+
query=rest.SparseVector(
|
|
573
|
+
indices=query_indices,
|
|
574
|
+
values=query_values,
|
|
575
|
+
),
|
|
576
|
+
using=SPARSE_VECTORS_NAME,
|
|
577
|
+
query_filter=qdrant_filters,
|
|
578
|
+
limit=top_k,
|
|
579
|
+
with_vectors=return_embedding,
|
|
580
|
+
score_threshold=score_threshold,
|
|
581
|
+
).points
|
|
582
|
+
results = [
|
|
583
|
+
convert_qdrant_point_to_haystack_document(point, use_sparse_embeddings=self.use_sparse_embeddings)
|
|
584
|
+
for point in points
|
|
585
|
+
]
|
|
555
586
|
if scale_score:
|
|
556
587
|
for document in results:
|
|
557
588
|
score = document.score
|
|
@@ -567,37 +598,65 @@ class QdrantDocumentStore:
|
|
|
567
598
|
scale_score: bool = False,
|
|
568
599
|
return_embedding: bool = False,
|
|
569
600
|
score_threshold: Optional[float] = None,
|
|
601
|
+
group_by: Optional[str] = None,
|
|
602
|
+
group_size: Optional[int] = None,
|
|
570
603
|
) -> List[Document]:
|
|
571
604
|
"""
|
|
572
605
|
Queries Qdrant using a dense embedding and returns the most relevant documents.
|
|
573
606
|
|
|
574
607
|
:param query_embedding: Dense embedding of the query.
|
|
575
608
|
:param filters: Filters applied to the retrieved documents.
|
|
576
|
-
:param top_k: Maximum number of documents to return.
|
|
609
|
+
:param top_k: Maximum number of documents to return. If using `group_by` parameters, maximum number of
|
|
610
|
+
groups to return.
|
|
577
611
|
:param scale_score: Whether to scale the scores of the retrieved documents.
|
|
578
612
|
:param return_embedding: Whether to return the embeddings of the retrieved documents.
|
|
579
613
|
:param score_threshold: A minimal score threshold for the result.
|
|
580
614
|
Score of the returned result might be higher or smaller than the threshold
|
|
581
615
|
depending on the Distance function used.
|
|
582
616
|
E.g. for cosine similarity only higher scores will be returned.
|
|
617
|
+
:param group_by: Payload field to group by, must be a string or number field. If the field contains more than 1
|
|
618
|
+
value, all values will be used for grouping. One point can be in multiple groups.
|
|
619
|
+
:param group_size: Maximum amount of points to return per group. Default is 3.
|
|
583
620
|
|
|
584
621
|
:returns: List of documents that are most similar to `query_embedding`.
|
|
585
622
|
"""
|
|
586
623
|
qdrant_filters = convert_filters_to_qdrant(filters)
|
|
624
|
+
if group_by:
|
|
625
|
+
groups = self.client.query_points_groups(
|
|
626
|
+
collection_name=self.index,
|
|
627
|
+
query=query_embedding,
|
|
628
|
+
using=DENSE_VECTORS_NAME if self.use_sparse_embeddings else None,
|
|
629
|
+
query_filter=qdrant_filters,
|
|
630
|
+
limit=top_k,
|
|
631
|
+
group_by=group_by,
|
|
632
|
+
group_size=group_size,
|
|
633
|
+
with_vectors=return_embedding,
|
|
634
|
+
score_threshold=score_threshold,
|
|
635
|
+
).groups
|
|
636
|
+
results = (
|
|
637
|
+
[
|
|
638
|
+
convert_qdrant_point_to_haystack_document(point, use_sparse_embeddings=self.use_sparse_embeddings)
|
|
639
|
+
for group in groups
|
|
640
|
+
for point in group.hits
|
|
641
|
+
]
|
|
642
|
+
if groups
|
|
643
|
+
else []
|
|
644
|
+
)
|
|
645
|
+
else:
|
|
646
|
+
points = self.client.query_points(
|
|
647
|
+
collection_name=self.index,
|
|
648
|
+
query=query_embedding,
|
|
649
|
+
using=DENSE_VECTORS_NAME if self.use_sparse_embeddings else None,
|
|
650
|
+
query_filter=qdrant_filters,
|
|
651
|
+
limit=top_k,
|
|
652
|
+
with_vectors=return_embedding,
|
|
653
|
+
score_threshold=score_threshold,
|
|
654
|
+
).points
|
|
655
|
+
results = [
|
|
656
|
+
convert_qdrant_point_to_haystack_document(point, use_sparse_embeddings=self.use_sparse_embeddings)
|
|
657
|
+
for point in points
|
|
658
|
+
]
|
|
587
659
|
|
|
588
|
-
points = self.client.query_points(
|
|
589
|
-
collection_name=self.index,
|
|
590
|
-
query=query_embedding,
|
|
591
|
-
using=DENSE_VECTORS_NAME if self.use_sparse_embeddings else None,
|
|
592
|
-
query_filter=qdrant_filters,
|
|
593
|
-
limit=top_k,
|
|
594
|
-
with_vectors=return_embedding,
|
|
595
|
-
score_threshold=score_threshold,
|
|
596
|
-
).points
|
|
597
|
-
results = [
|
|
598
|
-
convert_qdrant_point_to_haystack_document(point, use_sparse_embeddings=self.use_sparse_embeddings)
|
|
599
|
-
for point in points
|
|
600
|
-
]
|
|
601
660
|
if scale_score:
|
|
602
661
|
for document in results:
|
|
603
662
|
score = document.score
|
|
@@ -616,6 +675,8 @@ class QdrantDocumentStore:
|
|
|
616
675
|
top_k: int = 10,
|
|
617
676
|
return_embedding: bool = False,
|
|
618
677
|
score_threshold: Optional[float] = None,
|
|
678
|
+
group_by: Optional[str] = None,
|
|
679
|
+
group_size: Optional[int] = None,
|
|
619
680
|
) -> List[Document]:
|
|
620
681
|
"""
|
|
621
682
|
Retrieves documents based on dense and sparse embeddings and fuses the results using Reciprocal Rank Fusion.
|
|
@@ -626,12 +687,16 @@ class QdrantDocumentStore:
|
|
|
626
687
|
:param query_embedding: Dense embedding of the query.
|
|
627
688
|
:param query_sparse_embedding: Sparse embedding of the query.
|
|
628
689
|
:param filters: Filters applied to the retrieved documents.
|
|
629
|
-
:param top_k: Maximum number of documents to return.
|
|
690
|
+
:param top_k: Maximum number of documents to return. If using `group_by` parameters, maximum number of
|
|
691
|
+
groups to return.
|
|
630
692
|
:param return_embedding: Whether to return the embeddings of the retrieved documents.
|
|
631
693
|
:param score_threshold: A minimal score threshold for the result.
|
|
632
694
|
Score of the returned result might be higher or smaller than the threshold
|
|
633
695
|
depending on the Distance function used.
|
|
634
696
|
E.g. for cosine similarity only higher scores will be returned.
|
|
697
|
+
:param group_by: Payload field to group by, must be a string or number field. If the field contains more than 1
|
|
698
|
+
value, all values will be used for grouping. One point can be in multiple groups.
|
|
699
|
+
:param group_size: Maximum amount of points to return per group. Default is 3.
|
|
635
700
|
|
|
636
701
|
:returns: List of Document that are most similar to `query_embedding` and `query_sparse_embedding`.
|
|
637
702
|
|
|
@@ -651,34 +716,73 @@ class QdrantDocumentStore:
|
|
|
651
716
|
qdrant_filters = convert_filters_to_qdrant(filters)
|
|
652
717
|
|
|
653
718
|
try:
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
719
|
+
if group_by:
|
|
720
|
+
groups = self.client.query_points_groups(
|
|
721
|
+
collection_name=self.index,
|
|
722
|
+
prefetch=[
|
|
723
|
+
rest.Prefetch(
|
|
724
|
+
query=rest.SparseVector(
|
|
725
|
+
indices=query_sparse_embedding.indices,
|
|
726
|
+
values=query_sparse_embedding.values,
|
|
727
|
+
),
|
|
728
|
+
using=SPARSE_VECTORS_NAME,
|
|
729
|
+
filter=qdrant_filters,
|
|
661
730
|
),
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
731
|
+
rest.Prefetch(
|
|
732
|
+
query=query_embedding,
|
|
733
|
+
using=DENSE_VECTORS_NAME,
|
|
734
|
+
filter=qdrant_filters,
|
|
735
|
+
),
|
|
736
|
+
],
|
|
737
|
+
query=rest.FusionQuery(fusion=rest.Fusion.RRF),
|
|
738
|
+
limit=top_k,
|
|
739
|
+
group_by=group_by,
|
|
740
|
+
group_size=group_size,
|
|
741
|
+
score_threshold=score_threshold,
|
|
742
|
+
with_payload=True,
|
|
743
|
+
with_vectors=return_embedding,
|
|
744
|
+
).groups
|
|
745
|
+
else:
|
|
746
|
+
points = self.client.query_points(
|
|
747
|
+
collection_name=self.index,
|
|
748
|
+
prefetch=[
|
|
749
|
+
rest.Prefetch(
|
|
750
|
+
query=rest.SparseVector(
|
|
751
|
+
indices=query_sparse_embedding.indices,
|
|
752
|
+
values=query_sparse_embedding.values,
|
|
753
|
+
),
|
|
754
|
+
using=SPARSE_VECTORS_NAME,
|
|
755
|
+
filter=qdrant_filters,
|
|
756
|
+
),
|
|
757
|
+
rest.Prefetch(
|
|
758
|
+
query=query_embedding,
|
|
759
|
+
using=DENSE_VECTORS_NAME,
|
|
760
|
+
filter=qdrant_filters,
|
|
761
|
+
),
|
|
762
|
+
],
|
|
763
|
+
query=rest.FusionQuery(fusion=rest.Fusion.RRF),
|
|
764
|
+
limit=top_k,
|
|
765
|
+
score_threshold=score_threshold,
|
|
766
|
+
with_payload=True,
|
|
767
|
+
with_vectors=return_embedding,
|
|
768
|
+
).points
|
|
769
|
+
|
|
677
770
|
except Exception as e:
|
|
678
771
|
msg = "Error during hybrid search"
|
|
679
772
|
raise QdrantStoreError(msg) from e
|
|
680
773
|
|
|
681
|
-
|
|
774
|
+
if group_by:
|
|
775
|
+
results = (
|
|
776
|
+
[
|
|
777
|
+
convert_qdrant_point_to_haystack_document(point, use_sparse_embeddings=self.use_sparse_embeddings)
|
|
778
|
+
for group in groups
|
|
779
|
+
for point in group.hits
|
|
780
|
+
]
|
|
781
|
+
if groups
|
|
782
|
+
else []
|
|
783
|
+
)
|
|
784
|
+
else:
|
|
785
|
+
results = [convert_qdrant_point_to_haystack_document(point, use_sparse_embeddings=True) for point in points]
|
|
682
786
|
|
|
683
787
|
return results
|
|
684
788
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: qdrant-haystack
|
|
3
|
-
Version: 5.
|
|
3
|
+
Version: 5.1.0
|
|
4
4
|
Summary: An integration of Qdrant ANN vector database backend with Haystack
|
|
5
5
|
Project-URL: Source, https://github.com/deepset-ai/haystack-core-integrations
|
|
6
6
|
Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/qdrant/README.md
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
haystack_integrations/components/retrievers/qdrant/__init__.py,sha256=IRjcM4f8b5eKFEMn8tn6h6RrfslEGP3WafU7mrzNzQM,313
|
|
2
|
-
haystack_integrations/components/retrievers/qdrant/retriever.py,sha256=
|
|
2
|
+
haystack_integrations/components/retrievers/qdrant/retriever.py,sha256=VsQVsvf79imTCdWUKikUxpjczl5oxOV64a91aGXZwpE,21997
|
|
3
3
|
haystack_integrations/document_stores/qdrant/__init__.py,sha256=kUGc5uewqArhmVR-JqB_NmJ4kNkTIQIvYDNSoO2ELn0,302
|
|
4
4
|
haystack_integrations/document_stores/qdrant/converters.py,sha256=2hcuI3kty1dVHzX1WGXxEtlrnZ9E8TAG56XATCFa6Pw,2491
|
|
5
|
-
haystack_integrations/document_stores/qdrant/document_store.py,sha256=
|
|
5
|
+
haystack_integrations/document_stores/qdrant/document_store.py,sha256=WPjuSecd_uVy6RdM6T5w6kqPrXjOwnUb1sgROQxMfVE,42524
|
|
6
6
|
haystack_integrations/document_stores/qdrant/filters.py,sha256=Nv_eKIYKwUWvldJfa0omfFQ0kgqi6L3DUFeMuIWziOY,11751
|
|
7
7
|
haystack_integrations/document_stores/qdrant/migrate_to_sparse.py,sha256=yhZr4GB6N1S-Ikzl52hpuZt2aHNIb4leqFDhVMU3Uho,4910
|
|
8
|
-
qdrant_haystack-5.
|
|
9
|
-
qdrant_haystack-5.
|
|
10
|
-
qdrant_haystack-5.
|
|
11
|
-
qdrant_haystack-5.
|
|
8
|
+
qdrant_haystack-5.1.0.dist-info/METADATA,sha256=grNTLGCFSD3JP43zMEyHjkJfhdeuU0ATqB2hKknZRR8,1863
|
|
9
|
+
qdrant_haystack-5.1.0.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
10
|
+
qdrant_haystack-5.1.0.dist-info/licenses/LICENSE.txt,sha256=B05uMshqTA74s-0ltyHKI6yoPfJ3zYgQbvcXfDVGFf8,10280
|
|
11
|
+
qdrant_haystack-5.1.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|