qdrant-haystack 4.2.0__py3-none-any.whl → 5.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of qdrant-haystack might be problematic. Click here for more details.

@@ -44,13 +44,16 @@ class QdrantEmbeddingRetriever:
44
44
  return_embedding: bool = False,
45
45
  filter_policy: Union[str, FilterPolicy] = FilterPolicy.REPLACE,
46
46
  score_threshold: Optional[float] = None,
47
+ group_by: Optional[str] = None,
48
+ group_size: Optional[int] = None,
47
49
  ):
48
50
  """
49
51
  Create a QdrantEmbeddingRetriever component.
50
52
 
51
53
  :param document_store: An instance of QdrantDocumentStore.
52
54
  :param filters: A dictionary with filters to narrow down the search space.
53
- :param top_k: The maximum number of documents to retrieve.
55
+ :param top_k: The maximum number of documents to retrieve. If using `group_by` parameters, maximum number of
56
+ groups to return.
54
57
  :param scale_score: Whether to scale the scores of the retrieved documents or not.
55
58
  :param return_embedding: Whether to return the embedding of the retrieved Documents.
56
59
  :param filter_policy: Policy to determine how filters are applied.
@@ -58,6 +61,9 @@ class QdrantEmbeddingRetriever:
58
61
  Score of the returned result might be higher or smaller than the threshold
59
62
  depending on the `similarity` function specified in the Document Store.
60
63
  E.g. for cosine similarity only higher scores will be returned.
64
+ :param group_by: Payload field to group by, must be a string or number field. If the field contains more than 1
65
+ value, all values will be used for grouping. One point can be in multiple groups.
66
+ :param group_size: Maximum amount of points to return per group. Default is 3.
61
67
 
62
68
  :raises ValueError: If `document_store` is not an instance of `QdrantDocumentStore`.
63
69
  """
@@ -75,6 +81,8 @@ class QdrantEmbeddingRetriever:
75
81
  filter_policy if isinstance(filter_policy, FilterPolicy) else FilterPolicy.from_str(filter_policy)
76
82
  )
77
83
  self._score_threshold = score_threshold
84
+ self._group_by = group_by
85
+ self._group_size = group_size
78
86
 
79
87
  def to_dict(self) -> Dict[str, Any]:
80
88
  """
@@ -92,6 +100,8 @@ class QdrantEmbeddingRetriever:
92
100
  scale_score=self._scale_score,
93
101
  return_embedding=self._return_embedding,
94
102
  score_threshold=self._score_threshold,
103
+ group_by=self._group_by,
104
+ group_size=self._group_size,
95
105
  )
96
106
  d["init_parameters"]["document_store"] = self._document_store.to_dict()
97
107
 
@@ -124,16 +134,22 @@ class QdrantEmbeddingRetriever:
124
134
  scale_score: Optional[bool] = None,
125
135
  return_embedding: Optional[bool] = None,
126
136
  score_threshold: Optional[float] = None,
137
+ group_by: Optional[str] = None,
138
+ group_size: Optional[int] = None,
127
139
  ):
128
140
  """
129
141
  Run the Embedding Retriever on the given input data.
130
142
 
131
143
  :param query_embedding: Embedding of the query.
132
144
  :param filters: A dictionary with filters to narrow down the search space.
133
- :param top_k: The maximum number of documents to return.
145
+ :param top_k: The maximum number of documents to return. If using `group_by` parameters, maximum number of
146
+ groups to return.
134
147
  :param scale_score: Whether to scale the scores of the retrieved documents or not.
135
148
  :param return_embedding: Whether to return the embedding of the retrieved Documents.
136
149
  :param score_threshold: A minimal score threshold for the result.
150
+ :param group_by: Payload field to group by, must be a string or number field. If the field contains more than 1
151
+ value, all values will be used for grouping. One point can be in multiple groups.
152
+ :param group_size: Maximum amount of points to return per group. Default is 3.
137
153
  :returns:
138
154
  The retrieved documents.
139
155
 
@@ -147,6 +163,8 @@ class QdrantEmbeddingRetriever:
147
163
  scale_score=scale_score or self._scale_score,
148
164
  return_embedding=return_embedding or self._return_embedding,
149
165
  score_threshold=score_threshold or self._score_threshold,
166
+ group_by=group_by or self._group_by,
167
+ group_size=group_size or self._group_size,
150
168
  )
151
169
 
152
170
  return {"documents": docs}
@@ -188,13 +206,16 @@ class QdrantSparseEmbeddingRetriever:
188
206
  return_embedding: bool = False,
189
207
  filter_policy: Union[str, FilterPolicy] = FilterPolicy.REPLACE,
190
208
  score_threshold: Optional[float] = None,
209
+ group_by: Optional[str] = None,
210
+ group_size: Optional[int] = None,
191
211
  ):
192
212
  """
193
213
  Create a QdrantSparseEmbeddingRetriever component.
194
214
 
195
215
  :param document_store: An instance of QdrantDocumentStore.
196
216
  :param filters: A dictionary with filters to narrow down the search space.
197
- :param top_k: The maximum number of documents to retrieve.
217
+ :param top_k: The maximum number of documents to retrieve. If using `group_by` parameters, maximum number of
218
+ groups to return.
198
219
  :param scale_score: Whether to scale the scores of the retrieved documents or not.
199
220
  :param return_embedding: Whether to return the sparse embedding of the retrieved Documents.
200
221
  :param filter_policy: Policy to determine how filters are applied. Defaults to "replace".
@@ -202,6 +223,9 @@ class QdrantSparseEmbeddingRetriever:
202
223
  Score of the returned result might be higher or smaller than the threshold
203
224
  depending on the Distance function used.
204
225
  E.g. for cosine similarity only higher scores will be returned.
226
+ :param group_by: Payload field to group by, must be a string or number field. If the field contains more than 1
227
+ value, all values will be used for grouping. One point can be in multiple groups.
228
+ :param group_size: Maximum amount of points to return per group. Default is 3.
205
229
 
206
230
  :raises ValueError: If `document_store` is not an instance of `QdrantDocumentStore`.
207
231
  """
@@ -219,6 +243,8 @@ class QdrantSparseEmbeddingRetriever:
219
243
  filter_policy if isinstance(filter_policy, FilterPolicy) else FilterPolicy.from_str(filter_policy)
220
244
  )
221
245
  self._score_threshold = score_threshold
246
+ self._group_by = group_by
247
+ self._group_size = group_size
222
248
 
223
249
  def to_dict(self) -> Dict[str, Any]:
224
250
  """
@@ -236,6 +262,8 @@ class QdrantSparseEmbeddingRetriever:
236
262
  filter_policy=self._filter_policy.value,
237
263
  return_embedding=self._return_embedding,
238
264
  score_threshold=self._score_threshold,
265
+ group_by=self._group_by,
266
+ group_size=self._group_size,
239
267
  )
240
268
  d["init_parameters"]["document_store"] = self._document_store.to_dict()
241
269
 
@@ -268,6 +296,8 @@ class QdrantSparseEmbeddingRetriever:
268
296
  scale_score: Optional[bool] = None,
269
297
  return_embedding: Optional[bool] = None,
270
298
  score_threshold: Optional[float] = None,
299
+ group_by: Optional[str] = None,
300
+ group_size: Optional[int] = None,
271
301
  ):
272
302
  """
273
303
  Run the Sparse Embedding Retriever on the given input data.
@@ -276,13 +306,17 @@ class QdrantSparseEmbeddingRetriever:
276
306
  :param filters: Filters applied to the retrieved Documents. The way runtime filters are applied depends on
277
307
  the `filter_policy` chosen at retriever initialization. See init method docstring for more
278
308
  details.
279
- :param top_k: The maximum number of documents to return.
309
+ :param top_k: The maximum number of documents to return. If using `group_by` parameters, maximum number of
310
+ groups to return.
280
311
  :param scale_score: Whether to scale the scores of the retrieved documents or not.
281
312
  :param return_embedding: Whether to return the embedding of the retrieved Documents.
282
313
  :param score_threshold: A minimal score threshold for the result.
283
314
  Score of the returned result might be higher or smaller than the threshold
284
315
  depending on the Distance function used.
285
316
  E.g. for cosine similarity only higher scores will be returned.
317
+ :param group_by: Payload field to group by, must be a string or number field. If the field contains more than 1
318
+ value, all values will be used for grouping. One point can be in multiple groups.
319
+ :param group_size: Maximum amount of points to return per group. Default is 3.
286
320
  :returns:
287
321
  The retrieved documents.
288
322
 
@@ -296,6 +330,8 @@ class QdrantSparseEmbeddingRetriever:
296
330
  scale_score=scale_score or self._scale_score,
297
331
  return_embedding=return_embedding or self._return_embedding,
298
332
  score_threshold=score_threshold or self._score_threshold,
333
+ group_by=group_by or self._group_by,
334
+ group_size=group_size or self._group_size,
299
335
  )
300
336
 
301
337
  return {"documents": docs}
@@ -342,19 +378,25 @@ class QdrantHybridRetriever:
342
378
  return_embedding: bool = False,
343
379
  filter_policy: Union[str, FilterPolicy] = FilterPolicy.REPLACE,
344
380
  score_threshold: Optional[float] = None,
381
+ group_by: Optional[str] = None,
382
+ group_size: Optional[int] = None,
345
383
  ):
346
384
  """
347
385
  Create a QdrantHybridRetriever component.
348
386
 
349
387
  :param document_store: An instance of QdrantDocumentStore.
350
388
  :param filters: A dictionary with filters to narrow down the search space.
351
- :param top_k: The maximum number of documents to retrieve.
389
+ :param top_k: The maximum number of documents to retrieve. If using `group_by` parameters, maximum number of
390
+ groups to return.
352
391
  :param return_embedding: Whether to return the embeddings of the retrieved Documents.
353
392
  :param filter_policy: Policy to determine how filters are applied.
354
393
  :param score_threshold: A minimal score threshold for the result.
355
394
  Score of the returned result might be higher or smaller than the threshold
356
395
  depending on the Distance function used.
357
396
  E.g. for cosine similarity only higher scores will be returned.
397
+ :param group_by: Payload field to group by, must be a string or number field. If the field contains more than 1
398
+ value, all values will be used for grouping. One point can be in multiple groups.
399
+ :param group_size: Maximum amount of points to return per group. Default is 3.
358
400
 
359
401
  :raises ValueError: If 'document_store' is not an instance of QdrantDocumentStore.
360
402
  """
@@ -371,6 +413,8 @@ class QdrantHybridRetriever:
371
413
  filter_policy if isinstance(filter_policy, FilterPolicy) else FilterPolicy.from_str(filter_policy)
372
414
  )
373
415
  self._score_threshold = score_threshold
416
+ self._group_by = group_by
417
+ self._group_size = group_size
374
418
 
375
419
  def to_dict(self) -> Dict[str, Any]:
376
420
  """
@@ -387,6 +431,8 @@ class QdrantHybridRetriever:
387
431
  filter_policy=self._filter_policy.value,
388
432
  return_embedding=self._return_embedding,
389
433
  score_threshold=self._score_threshold,
434
+ group_by=self._group_by,
435
+ group_size=self._group_size,
390
436
  )
391
437
 
392
438
  @classmethod
@@ -416,6 +462,8 @@ class QdrantHybridRetriever:
416
462
  top_k: Optional[int] = None,
417
463
  return_embedding: Optional[bool] = None,
418
464
  score_threshold: Optional[float] = None,
465
+ group_by: Optional[str] = None,
466
+ group_size: Optional[int] = None,
419
467
  ):
420
468
  """
421
469
  Run the Sparse Embedding Retriever on the given input data.
@@ -425,12 +473,16 @@ class QdrantHybridRetriever:
425
473
  :param filters: Filters applied to the retrieved Documents. The way runtime filters are applied depends on
426
474
  the `filter_policy` chosen at retriever initialization. See init method docstring for more
427
475
  details.
428
- :param top_k: The maximum number of documents to return.
476
+ :param top_k: The maximum number of documents to return. If using `group_by` parameters, maximum number of
477
+ groups to return.
429
478
  :param return_embedding: Whether to return the embedding of the retrieved Documents.
430
479
  :param score_threshold: A minimal score threshold for the result.
431
480
  Score of the returned result might be higher or smaller than the threshold
432
481
  depending on the Distance function used.
433
482
  E.g. for cosine similarity only higher scores will be returned.
483
+ :param group_by: Payload field to group by, must be a string or number field. If the field contains more than 1
484
+ value, all values will be used for grouping. One point can be in multiple groups.
485
+ :param group_size: Maximum amount of points to return per group. Default is 3.
434
486
  :returns:
435
487
  The retrieved documents.
436
488
 
@@ -444,6 +496,8 @@ class QdrantHybridRetriever:
444
496
  top_k=top_k or self._top_k,
445
497
  return_embedding=return_embedding or self._return_embedding,
446
498
  score_threshold=score_threshold or self._score_threshold,
499
+ group_by=group_by or self._group_by,
500
+ group_size=group_size or self._group_size,
447
501
  )
448
502
 
449
503
  return {"documents": docs}
@@ -334,7 +334,7 @@ class QdrantDocumentStore:
334
334
  self,
335
335
  documents: List[Document],
336
336
  policy: DuplicatePolicy = DuplicatePolicy.FAIL,
337
- ):
337
+ ) -> int:
338
338
  """
339
339
  Writes documents to Qdrant using the specified policy.
340
340
  The QdrantDocumentStore can handle duplicate documents based on the given policy.
@@ -358,7 +358,7 @@ class QdrantDocumentStore:
358
358
 
359
359
  if len(documents) == 0:
360
360
  logger.warning("Calling QdrantDocumentStore.write_documents() with empty list")
361
- return
361
+ return 0
362
362
 
363
363
  document_objects = self._handle_duplicate_documents(
364
364
  documents=documents,
@@ -383,13 +383,13 @@ class QdrantDocumentStore:
383
383
  progress_bar.update(self.write_batch_size)
384
384
  return len(document_objects)
385
385
 
386
- def delete_documents(self, ids: List[str]):
386
+ def delete_documents(self, document_ids: List[str]) -> None:
387
387
  """
388
388
  Deletes documents that match the provided `document_ids` from the document store.
389
389
 
390
390
  :param document_ids: the document ids to delete
391
391
  """
392
- ids = [convert_id(_id) for _id in ids]
392
+ ids = [convert_id(_id) for _id in document_ids]
393
393
  try:
394
394
  self.client.delete(
395
395
  collection_name=self.index,
@@ -506,19 +506,25 @@ class QdrantDocumentStore:
506
506
  scale_score: bool = False,
507
507
  return_embedding: bool = False,
508
508
  score_threshold: Optional[float] = None,
509
+ group_by: Optional[str] = None,
510
+ group_size: Optional[int] = None,
509
511
  ) -> List[Document]:
510
512
  """
511
513
  Queries Qdrant using a sparse embedding and returns the most relevant documents.
512
514
 
513
515
  :param query_sparse_embedding: Sparse embedding of the query.
514
516
  :param filters: Filters applied to the retrieved documents.
515
- :param top_k: Maximum number of documents to return.
517
+ :param top_k: Maximum number of documents to return. If using `group_by` parameters, maximum number of
518
+ groups to return.
516
519
  :param scale_score: Whether to scale the scores of the retrieved documents.
517
520
  :param return_embedding: Whether to return the embeddings of the retrieved documents.
518
521
  :param score_threshold: A minimal score threshold for the result.
519
522
  Score of the returned result might be higher or smaller than the threshold
520
523
  depending on the Distance function used.
521
524
  E.g. for cosine similarity only higher scores will be returned.
525
+ :param group_by: Payload field to group by, must be a string or number field. If the field contains more than 1
526
+ value, all values will be used for grouping. One point can be in multiple groups.
527
+ :param group_size: Maximum amount of points to return per group. Default is 3.
522
528
 
523
529
  :returns: List of documents that are most similar to `query_sparse_embedding`.
524
530
 
@@ -536,22 +542,47 @@ class QdrantDocumentStore:
536
542
  qdrant_filters = convert_filters_to_qdrant(filters)
537
543
  query_indices = query_sparse_embedding.indices
538
544
  query_values = query_sparse_embedding.values
539
- points = self.client.query_points(
540
- collection_name=self.index,
541
- query=rest.SparseVector(
542
- indices=query_indices,
543
- values=query_values,
544
- ),
545
- using=SPARSE_VECTORS_NAME,
546
- query_filter=qdrant_filters,
547
- limit=top_k,
548
- with_vectors=return_embedding,
549
- score_threshold=score_threshold,
550
- ).points
551
- results = [
552
- convert_qdrant_point_to_haystack_document(point, use_sparse_embeddings=self.use_sparse_embeddings)
553
- for point in points
554
- ]
545
+ if group_by:
546
+ groups = self.client.query_points_groups(
547
+ collection_name=self.index,
548
+ query=rest.SparseVector(
549
+ indices=query_indices,
550
+ values=query_values,
551
+ ),
552
+ using=SPARSE_VECTORS_NAME,
553
+ query_filter=qdrant_filters,
554
+ limit=top_k,
555
+ group_by=group_by,
556
+ group_size=group_size,
557
+ with_vectors=return_embedding,
558
+ score_threshold=score_threshold,
559
+ ).groups
560
+ results = (
561
+ [
562
+ convert_qdrant_point_to_haystack_document(point, use_sparse_embeddings=self.use_sparse_embeddings)
563
+ for group in groups
564
+ for point in group.hits
565
+ ]
566
+ if groups
567
+ else []
568
+ )
569
+ else:
570
+ points = self.client.query_points(
571
+ collection_name=self.index,
572
+ query=rest.SparseVector(
573
+ indices=query_indices,
574
+ values=query_values,
575
+ ),
576
+ using=SPARSE_VECTORS_NAME,
577
+ query_filter=qdrant_filters,
578
+ limit=top_k,
579
+ with_vectors=return_embedding,
580
+ score_threshold=score_threshold,
581
+ ).points
582
+ results = [
583
+ convert_qdrant_point_to_haystack_document(point, use_sparse_embeddings=self.use_sparse_embeddings)
584
+ for point in points
585
+ ]
555
586
  if scale_score:
556
587
  for document in results:
557
588
  score = document.score
@@ -567,37 +598,65 @@ class QdrantDocumentStore:
567
598
  scale_score: bool = False,
568
599
  return_embedding: bool = False,
569
600
  score_threshold: Optional[float] = None,
601
+ group_by: Optional[str] = None,
602
+ group_size: Optional[int] = None,
570
603
  ) -> List[Document]:
571
604
  """
572
605
  Queries Qdrant using a dense embedding and returns the most relevant documents.
573
606
 
574
607
  :param query_embedding: Dense embedding of the query.
575
608
  :param filters: Filters applied to the retrieved documents.
576
- :param top_k: Maximum number of documents to return.
609
+ :param top_k: Maximum number of documents to return. If using `group_by` parameters, maximum number of
610
+ groups to return.
577
611
  :param scale_score: Whether to scale the scores of the retrieved documents.
578
612
  :param return_embedding: Whether to return the embeddings of the retrieved documents.
579
613
  :param score_threshold: A minimal score threshold for the result.
580
614
  Score of the returned result might be higher or smaller than the threshold
581
615
  depending on the Distance function used.
582
616
  E.g. for cosine similarity only higher scores will be returned.
617
+ :param group_by: Payload field to group by, must be a string or number field. If the field contains more than 1
618
+ value, all values will be used for grouping. One point can be in multiple groups.
619
+ :param group_size: Maximum amount of points to return per group. Default is 3.
583
620
 
584
621
  :returns: List of documents that are most similar to `query_embedding`.
585
622
  """
586
623
  qdrant_filters = convert_filters_to_qdrant(filters)
624
+ if group_by:
625
+ groups = self.client.query_points_groups(
626
+ collection_name=self.index,
627
+ query=query_embedding,
628
+ using=DENSE_VECTORS_NAME if self.use_sparse_embeddings else None,
629
+ query_filter=qdrant_filters,
630
+ limit=top_k,
631
+ group_by=group_by,
632
+ group_size=group_size,
633
+ with_vectors=return_embedding,
634
+ score_threshold=score_threshold,
635
+ ).groups
636
+ results = (
637
+ [
638
+ convert_qdrant_point_to_haystack_document(point, use_sparse_embeddings=self.use_sparse_embeddings)
639
+ for group in groups
640
+ for point in group.hits
641
+ ]
642
+ if groups
643
+ else []
644
+ )
645
+ else:
646
+ points = self.client.query_points(
647
+ collection_name=self.index,
648
+ query=query_embedding,
649
+ using=DENSE_VECTORS_NAME if self.use_sparse_embeddings else None,
650
+ query_filter=qdrant_filters,
651
+ limit=top_k,
652
+ with_vectors=return_embedding,
653
+ score_threshold=score_threshold,
654
+ ).points
655
+ results = [
656
+ convert_qdrant_point_to_haystack_document(point, use_sparse_embeddings=self.use_sparse_embeddings)
657
+ for point in points
658
+ ]
587
659
 
588
- points = self.client.query_points(
589
- collection_name=self.index,
590
- query=query_embedding,
591
- using=DENSE_VECTORS_NAME if self.use_sparse_embeddings else None,
592
- query_filter=qdrant_filters,
593
- limit=top_k,
594
- with_vectors=return_embedding,
595
- score_threshold=score_threshold,
596
- ).points
597
- results = [
598
- convert_qdrant_point_to_haystack_document(point, use_sparse_embeddings=self.use_sparse_embeddings)
599
- for point in points
600
- ]
601
660
  if scale_score:
602
661
  for document in results:
603
662
  score = document.score
@@ -616,6 +675,8 @@ class QdrantDocumentStore:
616
675
  top_k: int = 10,
617
676
  return_embedding: bool = False,
618
677
  score_threshold: Optional[float] = None,
678
+ group_by: Optional[str] = None,
679
+ group_size: Optional[int] = None,
619
680
  ) -> List[Document]:
620
681
  """
621
682
  Retrieves documents based on dense and sparse embeddings and fuses the results using Reciprocal Rank Fusion.
@@ -626,12 +687,16 @@ class QdrantDocumentStore:
626
687
  :param query_embedding: Dense embedding of the query.
627
688
  :param query_sparse_embedding: Sparse embedding of the query.
628
689
  :param filters: Filters applied to the retrieved documents.
629
- :param top_k: Maximum number of documents to return.
690
+ :param top_k: Maximum number of documents to return. If using `group_by` parameters, maximum number of
691
+ groups to return.
630
692
  :param return_embedding: Whether to return the embeddings of the retrieved documents.
631
693
  :param score_threshold: A minimal score threshold for the result.
632
694
  Score of the returned result might be higher or smaller than the threshold
633
695
  depending on the Distance function used.
634
696
  E.g. for cosine similarity only higher scores will be returned.
697
+ :param group_by: Payload field to group by, must be a string or number field. If the field contains more than 1
698
+ value, all values will be used for grouping. One point can be in multiple groups.
699
+ :param group_size: Maximum amount of points to return per group. Default is 3.
635
700
 
636
701
  :returns: List of Document that are most similar to `query_embedding` and `query_sparse_embedding`.
637
702
 
@@ -651,34 +716,73 @@ class QdrantDocumentStore:
651
716
  qdrant_filters = convert_filters_to_qdrant(filters)
652
717
 
653
718
  try:
654
- points = self.client.query_points(
655
- collection_name=self.index,
656
- prefetch=[
657
- rest.Prefetch(
658
- query=rest.SparseVector(
659
- indices=query_sparse_embedding.indices,
660
- values=query_sparse_embedding.values,
719
+ if group_by:
720
+ groups = self.client.query_points_groups(
721
+ collection_name=self.index,
722
+ prefetch=[
723
+ rest.Prefetch(
724
+ query=rest.SparseVector(
725
+ indices=query_sparse_embedding.indices,
726
+ values=query_sparse_embedding.values,
727
+ ),
728
+ using=SPARSE_VECTORS_NAME,
729
+ filter=qdrant_filters,
661
730
  ),
662
- using=SPARSE_VECTORS_NAME,
663
- filter=qdrant_filters,
664
- ),
665
- rest.Prefetch(
666
- query=query_embedding,
667
- using=DENSE_VECTORS_NAME,
668
- filter=qdrant_filters,
669
- ),
670
- ],
671
- query=rest.FusionQuery(fusion=rest.Fusion.RRF),
672
- limit=top_k,
673
- score_threshold=score_threshold,
674
- with_payload=True,
675
- with_vectors=return_embedding,
676
- ).points
731
+ rest.Prefetch(
732
+ query=query_embedding,
733
+ using=DENSE_VECTORS_NAME,
734
+ filter=qdrant_filters,
735
+ ),
736
+ ],
737
+ query=rest.FusionQuery(fusion=rest.Fusion.RRF),
738
+ limit=top_k,
739
+ group_by=group_by,
740
+ group_size=group_size,
741
+ score_threshold=score_threshold,
742
+ with_payload=True,
743
+ with_vectors=return_embedding,
744
+ ).groups
745
+ else:
746
+ points = self.client.query_points(
747
+ collection_name=self.index,
748
+ prefetch=[
749
+ rest.Prefetch(
750
+ query=rest.SparseVector(
751
+ indices=query_sparse_embedding.indices,
752
+ values=query_sparse_embedding.values,
753
+ ),
754
+ using=SPARSE_VECTORS_NAME,
755
+ filter=qdrant_filters,
756
+ ),
757
+ rest.Prefetch(
758
+ query=query_embedding,
759
+ using=DENSE_VECTORS_NAME,
760
+ filter=qdrant_filters,
761
+ ),
762
+ ],
763
+ query=rest.FusionQuery(fusion=rest.Fusion.RRF),
764
+ limit=top_k,
765
+ score_threshold=score_threshold,
766
+ with_payload=True,
767
+ with_vectors=return_embedding,
768
+ ).points
769
+
677
770
  except Exception as e:
678
771
  msg = "Error during hybrid search"
679
772
  raise QdrantStoreError(msg) from e
680
773
 
681
- results = [convert_qdrant_point_to_haystack_document(point, use_sparse_embeddings=True) for point in points]
774
+ if group_by:
775
+ results = (
776
+ [
777
+ convert_qdrant_point_to_haystack_document(point, use_sparse_embeddings=self.use_sparse_embeddings)
778
+ for group in groups
779
+ for point in group.hits
780
+ ]
781
+ if groups
782
+ else []
783
+ )
784
+ else:
785
+ results = [convert_qdrant_point_to_haystack_document(point, use_sparse_embeddings=True) for point in points]
682
786
 
683
787
  return results
684
788
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: qdrant-haystack
3
- Version: 4.2.0
3
+ Version: 5.1.0
4
4
  Summary: An integration of Qdrant ANN vector database backend with Haystack
5
5
  Project-URL: Source, https://github.com/deepset-ai/haystack-core-integrations
6
6
  Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/qdrant/README.md
@@ -1,11 +1,11 @@
1
1
  haystack_integrations/components/retrievers/qdrant/__init__.py,sha256=IRjcM4f8b5eKFEMn8tn6h6RrfslEGP3WafU7mrzNzQM,313
2
- haystack_integrations/components/retrievers/qdrant/retriever.py,sha256=DIqwa2JMVP7z52wmKdCfFk1ZdV0j50ZYCuRluQu5llk,18449
2
+ haystack_integrations/components/retrievers/qdrant/retriever.py,sha256=VsQVsvf79imTCdWUKikUxpjczl5oxOV64a91aGXZwpE,21997
3
3
  haystack_integrations/document_stores/qdrant/__init__.py,sha256=kUGc5uewqArhmVR-JqB_NmJ4kNkTIQIvYDNSoO2ELn0,302
4
4
  haystack_integrations/document_stores/qdrant/converters.py,sha256=2hcuI3kty1dVHzX1WGXxEtlrnZ9E8TAG56XATCFa6Pw,2491
5
- haystack_integrations/document_stores/qdrant/document_store.py,sha256=zkRlg-vppenzy1QEFKQh9vyCEBehdYP42_cn0-Uu5vE,37587
5
+ haystack_integrations/document_stores/qdrant/document_store.py,sha256=WPjuSecd_uVy6RdM6T5w6kqPrXjOwnUb1sgROQxMfVE,42524
6
6
  haystack_integrations/document_stores/qdrant/filters.py,sha256=Nv_eKIYKwUWvldJfa0omfFQ0kgqi6L3DUFeMuIWziOY,11751
7
7
  haystack_integrations/document_stores/qdrant/migrate_to_sparse.py,sha256=yhZr4GB6N1S-Ikzl52hpuZt2aHNIb4leqFDhVMU3Uho,4910
8
- qdrant_haystack-4.2.0.dist-info/METADATA,sha256=1YbgcocABSF6diKKveRlJg2q1D4IOTYpv9fOWPQJbDk,1863
9
- qdrant_haystack-4.2.0.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
10
- qdrant_haystack-4.2.0.dist-info/licenses/LICENSE.txt,sha256=B05uMshqTA74s-0ltyHKI6yoPfJ3zYgQbvcXfDVGFf8,10280
11
- qdrant_haystack-4.2.0.dist-info/RECORD,,
8
+ qdrant_haystack-5.1.0.dist-info/METADATA,sha256=grNTLGCFSD3JP43zMEyHjkJfhdeuU0ATqB2hKknZRR8,1863
9
+ qdrant_haystack-5.1.0.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
10
+ qdrant_haystack-5.1.0.dist-info/licenses/LICENSE.txt,sha256=B05uMshqTA74s-0ltyHKI6yoPfJ3zYgQbvcXfDVGFf8,10280
11
+ qdrant_haystack-5.1.0.dist-info/RECORD,,