qdrant-haystack 4.0.0__tar.gz → 4.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of qdrant-haystack might be problematic. Click here for more details.

Files changed (23) hide show
  1. {qdrant_haystack-4.0.0 → qdrant_haystack-4.1.0}/CHANGELOG.md +12 -0
  2. {qdrant_haystack-4.0.0 → qdrant_haystack-4.1.0}/PKG-INFO +2 -2
  3. {qdrant_haystack-4.0.0 → qdrant_haystack-4.1.0}/pyproject.toml +1 -1
  4. {qdrant_haystack-4.0.0 → qdrant_haystack-4.1.0}/src/haystack_integrations/components/retrievers/qdrant/retriever.py +36 -0
  5. {qdrant_haystack-4.0.0 → qdrant_haystack-4.1.0}/src/haystack_integrations/document_stores/qdrant/document_store.py +38 -3
  6. {qdrant_haystack-4.0.0 → qdrant_haystack-4.1.0}/tests/test_dict_converters.py +3 -0
  7. {qdrant_haystack-4.0.0 → qdrant_haystack-4.1.0}/tests/test_document_store.py +23 -1
  8. {qdrant_haystack-4.0.0 → qdrant_haystack-4.1.0}/tests/test_retriever.py +37 -0
  9. {qdrant_haystack-4.0.0 → qdrant_haystack-4.1.0}/.gitignore +0 -0
  10. {qdrant_haystack-4.0.0 → qdrant_haystack-4.1.0}/LICENSE.txt +0 -0
  11. {qdrant_haystack-4.0.0 → qdrant_haystack-4.1.0}/README.md +0 -0
  12. {qdrant_haystack-4.0.0 → qdrant_haystack-4.1.0}/examples/embedding_retrieval.py +0 -0
  13. {qdrant_haystack-4.0.0 → qdrant_haystack-4.1.0}/pydoc/config.yml +0 -0
  14. {qdrant_haystack-4.0.0 → qdrant_haystack-4.1.0}/src/haystack_integrations/components/retrievers/qdrant/__init__.py +0 -0
  15. {qdrant_haystack-4.0.0 → qdrant_haystack-4.1.0}/src/haystack_integrations/document_stores/qdrant/__init__.py +0 -0
  16. {qdrant_haystack-4.0.0 → qdrant_haystack-4.1.0}/src/haystack_integrations/document_stores/qdrant/converters.py +0 -0
  17. {qdrant_haystack-4.0.0 → qdrant_haystack-4.1.0}/src/haystack_integrations/document_stores/qdrant/filters.py +0 -0
  18. {qdrant_haystack-4.0.0 → qdrant_haystack-4.1.0}/src/haystack_integrations/document_stores/qdrant/migrate_to_sparse.py +0 -0
  19. {qdrant_haystack-4.0.0 → qdrant_haystack-4.1.0}/tests/__init__.py +0 -0
  20. {qdrant_haystack-4.0.0 → qdrant_haystack-4.1.0}/tests/conftest.py +0 -0
  21. {qdrant_haystack-4.0.0 → qdrant_haystack-4.1.0}/tests/test_converters.py +0 -0
  22. {qdrant_haystack-4.0.0 → qdrant_haystack-4.1.0}/tests/test_filters.py +0 -0
  23. {qdrant_haystack-4.0.0 → qdrant_haystack-4.1.0}/tests/test_legacy_filters.py +0 -0
@@ -1,5 +1,17 @@
1
1
  # Changelog
2
2
 
3
+ ## [integrations/qdrant-v4.0.0] - 2024-07-02
4
+
5
+ ### 🚜 Refactor
6
+
7
+ - [**breaking**] Qdrant - remove unused init parameters: `content_field`, `name_field`, `embedding_field`, and `duplicate_documents` (#861)
8
+ - [**breaking**] Qdrant - set `scale_score` default value to `False` (#862)
9
+
10
+ ### ⚙️ Miscellaneous Tasks
11
+
12
+ - Retry tests to reduce flakyness (#836)
13
+ - Update ruff invocation to include check parameter (#853)
14
+
3
15
  ## [integrations/qdrant-v3.8.1] - 2024-06-20
4
16
 
5
17
  ### 📚 Documentation
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: qdrant-haystack
3
- Version: 4.0.0
3
+ Version: 4.1.0
4
4
  Summary: An integration of Qdrant ANN vector database backend with Haystack
5
5
  Project-URL: Source, https://github.com/deepset-ai/haystack-core-integrations
6
6
  Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/qdrant/README.md
@@ -19,7 +19,7 @@ Classifier: Programming Language :: Python :: Implementation :: CPython
19
19
  Classifier: Programming Language :: Python :: Implementation :: PyPy
20
20
  Requires-Python: >=3.8
21
21
  Requires-Dist: haystack-ai>=2.0.1
22
- Requires-Dist: qdrant-client
22
+ Requires-Dist: qdrant-client>=1.10.0
23
23
  Description-Content-Type: text/markdown
24
24
 
25
25
  # qdrant-haystack
@@ -25,7 +25,7 @@ classifiers = [
25
25
  "Programming Language :: Python :: Implementation :: CPython",
26
26
  "Programming Language :: Python :: Implementation :: PyPy",
27
27
  ]
28
- dependencies = ["haystack-ai>=2.0.1", "qdrant-client"]
28
+ dependencies = ["haystack-ai>=2.0.1", "qdrant-client>=1.10.0"]
29
29
 
30
30
  [project.urls]
31
31
  Source = "https://github.com/deepset-ai/haystack-core-integrations"
@@ -39,6 +39,7 @@ class QdrantEmbeddingRetriever:
39
39
  top_k: int = 10,
40
40
  scale_score: bool = False,
41
41
  return_embedding: bool = False,
42
+ score_threshold: Optional[float] = None,
42
43
  ):
43
44
  """
44
45
  Create a QdrantEmbeddingRetriever component.
@@ -48,6 +49,10 @@ class QdrantEmbeddingRetriever:
48
49
  :param top_k: The maximum number of documents to retrieve.
49
50
  :param scale_score: Whether to scale the scores of the retrieved documents or not.
50
51
  :param return_embedding: Whether to return the embedding of the retrieved Documents.
52
+ :param score_threshold: A minimal score threshold for the result.
53
+ Score of the returned result might be higher or smaller than the threshold
54
+ depending on the `similarity` function specified in the Document Store.
55
+ E.g. for cosine similarity only higher scores will be returned.
51
56
 
52
57
  :raises ValueError: If `document_store` is not an instance of `QdrantDocumentStore`.
53
58
  """
@@ -61,6 +66,7 @@ class QdrantEmbeddingRetriever:
61
66
  self._top_k = top_k
62
67
  self._scale_score = scale_score
63
68
  self._return_embedding = return_embedding
69
+ self._score_threshold = score_threshold
64
70
 
65
71
  def to_dict(self) -> Dict[str, Any]:
66
72
  """
@@ -76,6 +82,7 @@ class QdrantEmbeddingRetriever:
76
82
  top_k=self._top_k,
77
83
  scale_score=self._scale_score,
78
84
  return_embedding=self._return_embedding,
85
+ score_threshold=self._score_threshold,
79
86
  )
80
87
  d["init_parameters"]["document_store"] = self._document_store.to_dict()
81
88
 
@@ -103,6 +110,7 @@ class QdrantEmbeddingRetriever:
103
110
  top_k: Optional[int] = None,
104
111
  scale_score: Optional[bool] = None,
105
112
  return_embedding: Optional[bool] = None,
113
+ score_threshold: Optional[float] = None,
106
114
  ):
107
115
  """
108
116
  Run the Embedding Retriever on the given input data.
@@ -112,6 +120,7 @@ class QdrantEmbeddingRetriever:
112
120
  :param top_k: The maximum number of documents to return.
113
121
  :param scale_score: Whether to scale the scores of the retrieved documents or not.
114
122
  :param return_embedding: Whether to return the embedding of the retrieved Documents.
123
+ :param score_threshold: A minimal score threshold for the result.
115
124
  :returns:
116
125
  The retrieved documents.
117
126
 
@@ -122,6 +131,7 @@ class QdrantEmbeddingRetriever:
122
131
  top_k=top_k or self._top_k,
123
132
  scale_score=scale_score or self._scale_score,
124
133
  return_embedding=return_embedding or self._return_embedding,
134
+ score_threshold=score_threshold or self._score_threshold,
125
135
  )
126
136
 
127
137
  return {"documents": docs}
@@ -161,6 +171,7 @@ class QdrantSparseEmbeddingRetriever:
161
171
  top_k: int = 10,
162
172
  scale_score: bool = False,
163
173
  return_embedding: bool = False,
174
+ score_threshold: Optional[float] = None,
164
175
  ):
165
176
  """
166
177
  Create a QdrantSparseEmbeddingRetriever component.
@@ -170,6 +181,10 @@ class QdrantSparseEmbeddingRetriever:
170
181
  :param top_k: The maximum number of documents to retrieve.
171
182
  :param scale_score: Whether to scale the scores of the retrieved documents or not.
172
183
  :param return_embedding: Whether to return the sparse embedding of the retrieved Documents.
184
+ :param score_threshold: A minimal score threshold for the result.
185
+ Score of the returned result might be higher or smaller than the threshold
186
+ depending on the Distance function used.
187
+ E.g. for cosine similarity only higher scores will be returned.
173
188
 
174
189
  :raises ValueError: If `document_store` is not an instance of `QdrantDocumentStore`.
175
190
  """
@@ -183,6 +198,7 @@ class QdrantSparseEmbeddingRetriever:
183
198
  self._top_k = top_k
184
199
  self._scale_score = scale_score
185
200
  self._return_embedding = return_embedding
201
+ self._score_threshold = score_threshold
186
202
 
187
203
  def to_dict(self) -> Dict[str, Any]:
188
204
  """
@@ -198,6 +214,7 @@ class QdrantSparseEmbeddingRetriever:
198
214
  top_k=self._top_k,
199
215
  scale_score=self._scale_score,
200
216
  return_embedding=self._return_embedding,
217
+ score_threshold=self._score_threshold,
201
218
  )
202
219
  d["init_parameters"]["document_store"] = self._document_store.to_dict()
203
220
 
@@ -225,6 +242,7 @@ class QdrantSparseEmbeddingRetriever:
225
242
  top_k: Optional[int] = None,
226
243
  scale_score: Optional[bool] = None,
227
244
  return_embedding: Optional[bool] = None,
245
+ score_threshold: Optional[float] = None,
228
246
  ):
229
247
  """
230
248
  Run the Sparse Embedding Retriever on the given input data.
@@ -234,6 +252,10 @@ class QdrantSparseEmbeddingRetriever:
234
252
  :param top_k: The maximum number of documents to return.
235
253
  :param scale_score: Whether to scale the scores of the retrieved documents or not.
236
254
  :param return_embedding: Whether to return the embedding of the retrieved Documents.
255
+ :param score_threshold: A minimal score threshold for the result.
256
+ Score of the returned result might be higher or smaller than the threshold
257
+ depending on the Distance function used.
258
+ E.g. for cosine similarity only higher scores will be returned.
237
259
  :returns:
238
260
  The retrieved documents.
239
261
 
@@ -244,6 +266,7 @@ class QdrantSparseEmbeddingRetriever:
244
266
  top_k=top_k or self._top_k,
245
267
  scale_score=scale_score or self._scale_score,
246
268
  return_embedding=return_embedding or self._return_embedding,
269
+ score_threshold=score_threshold or self._score_threshold,
247
270
  )
248
271
 
249
272
  return {"documents": docs}
@@ -288,6 +311,7 @@ class QdrantHybridRetriever:
288
311
  filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
289
312
  top_k: int = 10,
290
313
  return_embedding: bool = False,
314
+ score_threshold: Optional[float] = None,
291
315
  ):
292
316
  """
293
317
  Create a QdrantHybridRetriever component.
@@ -296,6 +320,10 @@ class QdrantHybridRetriever:
296
320
  :param filters: A dictionary with filters to narrow down the search space.
297
321
  :param top_k: The maximum number of documents to retrieve.
298
322
  :param return_embedding: Whether to return the embeddings of the retrieved Documents.
323
+ :param score_threshold: A minimal score threshold for the result.
324
+ Score of the returned result might be higher or smaller than the threshold
325
+ depending on the Distance function used.
326
+ E.g. for cosine similarity only higher scores will be returned.
299
327
 
300
328
  :raises ValueError: If 'document_store' is not an instance of QdrantDocumentStore.
301
329
  """
@@ -308,6 +336,7 @@ class QdrantHybridRetriever:
308
336
  self._filters = filters
309
337
  self._top_k = top_k
310
338
  self._return_embedding = return_embedding
339
+ self._score_threshold = score_threshold
311
340
 
312
341
  def to_dict(self) -> Dict[str, Any]:
313
342
  """
@@ -322,6 +351,7 @@ class QdrantHybridRetriever:
322
351
  filters=self._filters,
323
352
  top_k=self._top_k,
324
353
  return_embedding=self._return_embedding,
354
+ score_threshold=self._score_threshold,
325
355
  )
326
356
 
327
357
  @classmethod
@@ -346,6 +376,7 @@ class QdrantHybridRetriever:
346
376
  filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
347
377
  top_k: Optional[int] = None,
348
378
  return_embedding: Optional[bool] = None,
379
+ score_threshold: Optional[float] = None,
349
380
  ):
350
381
  """
351
382
  Run the Sparse Embedding Retriever on the given input data.
@@ -355,6 +386,10 @@ class QdrantHybridRetriever:
355
386
  :param filters: A dictionary with filters to narrow down the search space.
356
387
  :param top_k: The maximum number of documents to return.
357
388
  :param return_embedding: Whether to return the embedding of the retrieved Documents.
389
+ :param score_threshold: A minimal score threshold for the result.
390
+ Score of the returned result might be higher or smaller than the threshold
391
+ depending on the Distance function used.
392
+ E.g. for cosine similarity only higher scores will be returned.
358
393
  :returns:
359
394
  The retrieved documents.
360
395
 
@@ -365,6 +400,7 @@ class QdrantHybridRetriever:
365
400
  filters=filters or self._filters,
366
401
  top_k=top_k or self._top_k,
367
402
  return_embedding=return_embedding or self._return_embedding,
403
+ score_threshold=score_threshold or self._score_threshold,
368
404
  )
369
405
 
370
406
  return {"documents": docs}
@@ -111,6 +111,7 @@ class QdrantDocumentStore:
111
111
  embedding_dim: int = 768,
112
112
  on_disk: bool = False,
113
113
  use_sparse_embeddings: bool = False,
114
+ sparse_idf: bool = False,
114
115
  similarity: str = "cosine",
115
116
  return_embedding: bool = False,
116
117
  progress_bar: bool = True,
@@ -168,6 +169,9 @@ class QdrantDocumentStore:
168
169
  Whether to store the collection on disk.
169
170
  :param use_sparse_embedding:
170
171
  If set to `True`, enables support for sparse embeddings.
172
+ :param sparse_idf:
173
+ If set to `True`, computes the Inverse Document Frequency (IDF) when using sparse embeddings.
174
+ It is required to use techniques like BM42. It is ignored if `use_sparse_embeddings` is `False`.
171
175
  :param similarity:
172
176
  The similarity metric to use.
173
177
  :param return_embedding:
@@ -246,6 +250,7 @@ class QdrantDocumentStore:
246
250
  self.recreate_index = recreate_index
247
251
  self.payload_fields_to_index = payload_fields_to_index
248
252
  self.use_sparse_embeddings = use_sparse_embeddings
253
+ self.sparse_idf = use_sparse_embeddings and sparse_idf
249
254
  self.embedding_dim = embedding_dim
250
255
  self.on_disk = on_disk
251
256
  self.similarity = similarity
@@ -280,6 +285,7 @@ class QdrantDocumentStore:
280
285
  self.recreate_index,
281
286
  self.similarity,
282
287
  self.use_sparse_embeddings,
288
+ self.sparse_idf,
283
289
  self.on_disk,
284
290
  self.payload_fields_to_index,
285
291
  )
@@ -347,7 +353,9 @@ class QdrantDocumentStore:
347
353
  if not isinstance(doc, Document):
348
354
  msg = f"DocumentStore.write_documents() expects a list of Documents but got an element of {type(doc)}."
349
355
  raise ValueError(msg)
350
- self._set_up_collection(self.index, self.embedding_dim, False, self.similarity, self.use_sparse_embeddings)
356
+ self._set_up_collection(
357
+ self.index, self.embedding_dim, False, self.similarity, self.use_sparse_embeddings, self.sparse_idf
358
+ )
351
359
 
352
360
  if len(documents) == 0:
353
361
  logger.warning("Calling QdrantDocumentStore.write_documents() with empty list")
@@ -498,6 +506,7 @@ class QdrantDocumentStore:
498
506
  top_k: int = 10,
499
507
  scale_score: bool = False,
500
508
  return_embedding: bool = False,
509
+ score_threshold: Optional[float] = None,
501
510
  ) -> List[Document]:
502
511
  """
503
512
  Queries Qdrant using a sparse embedding and returns the most relevant documents.
@@ -507,6 +516,10 @@ class QdrantDocumentStore:
507
516
  :param top_k: Maximum number of documents to return.
508
517
  :param scale_score: Whether to scale the scores of the retrieved documents.
509
518
  :param return_embedding: Whether to return the embeddings of the retrieved documents.
519
+ :param score_threshold: A minimal score threshold for the result.
520
+ Score of the returned result might be higher or smaller than the threshold
521
+ depending on the Distance function used.
522
+ E.g. for cosine similarity only higher scores will be returned.
510
523
 
511
524
  :returns: List of documents that are most similar to `query_sparse_embedding`.
512
525
 
@@ -536,6 +549,7 @@ class QdrantDocumentStore:
536
549
  query_filter=qdrant_filters,
537
550
  limit=top_k,
538
551
  with_vectors=return_embedding,
552
+ score_threshold=score_threshold,
539
553
  )
540
554
  results = [
541
555
  convert_qdrant_point_to_haystack_document(point, use_sparse_embeddings=self.use_sparse_embeddings)
@@ -555,6 +569,7 @@ class QdrantDocumentStore:
555
569
  top_k: int = 10,
556
570
  scale_score: bool = False,
557
571
  return_embedding: bool = False,
572
+ score_threshold: Optional[float] = None,
558
573
  ) -> List[Document]:
559
574
  """
560
575
  Queries Qdrant using a dense embedding and returns the most relevant documents.
@@ -564,6 +579,10 @@ class QdrantDocumentStore:
564
579
  :param top_k: Maximum number of documents to return.
565
580
  :param scale_score: Whether to scale the scores of the retrieved documents.
566
581
  :param return_embedding: Whether to return the embeddings of the retrieved documents.
582
+ :param score_threshold: A minimal score threshold for the result.
583
+ Score of the returned result might be higher or smaller than the threshold
584
+ depending on the Distance function used.
585
+ E.g. for cosine similarity only higher scores will be returned.
567
586
 
568
587
  :returns: List of documents that are most similar to `query_embedding`.
569
588
  """
@@ -578,6 +597,7 @@ class QdrantDocumentStore:
578
597
  query_filter=qdrant_filters,
579
598
  limit=top_k,
580
599
  with_vectors=return_embedding,
600
+ score_threshold=score_threshold,
581
601
  )
582
602
  results = [
583
603
  convert_qdrant_point_to_haystack_document(point, use_sparse_embeddings=self.use_sparse_embeddings)
@@ -600,6 +620,7 @@ class QdrantDocumentStore:
600
620
  filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
601
621
  top_k: int = 10,
602
622
  return_embedding: bool = False,
623
+ score_threshold: Optional[float] = None,
603
624
  ) -> List[Document]:
604
625
  """
605
626
  Retrieves documents based on dense and sparse embeddings and fuses the results using Reciprocal Rank Fusion.
@@ -612,6 +633,10 @@ class QdrantDocumentStore:
612
633
  :param filters: Filters applied to the retrieved documents.
613
634
  :param top_k: Maximum number of documents to return.
614
635
  :param return_embedding: Whether to return the embeddings of the retrieved documents.
636
+ :param score_threshold: A minimal score threshold for the result.
637
+ Score of the returned result might be higher or smaller than the threshold
638
+ depending on the Distance function used.
639
+ E.g. for cosine similarity only higher scores will be returned.
615
640
 
616
641
  :returns: List of Document that are most similar to `query_embedding` and `query_sparse_embedding`.
617
642
 
@@ -642,6 +667,7 @@ class QdrantDocumentStore:
642
667
  limit=top_k,
643
668
  with_payload=True,
644
669
  with_vector=return_embedding,
670
+ score_threshold=score_threshold,
645
671
  )
646
672
 
647
673
  dense_request = rest.SearchRequest(
@@ -714,6 +740,7 @@ class QdrantDocumentStore:
714
740
  recreate_collection: bool,
715
741
  similarity: str,
716
742
  use_sparse_embeddings: bool,
743
+ sparse_idf: bool,
717
744
  on_disk: bool = False,
718
745
  payload_fields_to_index: Optional[List[dict]] = None,
719
746
  ):
@@ -729,6 +756,8 @@ class QdrantDocumentStore:
729
756
  The similarity measure to use.
730
757
  :param use_sparse_embeddings:
731
758
  Whether to use sparse embeddings.
759
+ :param sparse_idf:
760
+ Whether to compute the Inverse Document Frequency (IDF) when using sparse embeddings. Required for BM42.
732
761
  :param on_disk:
733
762
  Whether to store the collection on disk.
734
763
  :param payload_fields_to_index:
@@ -745,7 +774,9 @@ class QdrantDocumentStore:
745
774
  if recreate_collection or not self.client.collection_exists(collection_name):
746
775
  # There is no need to verify the current configuration of that
747
776
  # collection. It might be just recreated again or does not exist yet.
748
- self.recreate_collection(collection_name, distance, embedding_dim, on_disk, use_sparse_embeddings)
777
+ self.recreate_collection(
778
+ collection_name, distance, embedding_dim, on_disk, use_sparse_embeddings, sparse_idf
779
+ )
749
780
  # Create Payload index if payload_fields_to_index is provided
750
781
  self._create_payload_index(collection_name, payload_fields_to_index)
751
782
  return
@@ -808,6 +839,7 @@ class QdrantDocumentStore:
808
839
  embedding_dim: int,
809
840
  on_disk: Optional[bool] = None,
810
841
  use_sparse_embeddings: Optional[bool] = None,
842
+ sparse_idf: bool = False,
811
843
  ):
812
844
  """
813
845
  Recreates the Qdrant collection with the specified parameters.
@@ -822,6 +854,8 @@ class QdrantDocumentStore:
822
854
  Whether to store the collection on disk.
823
855
  :param use_sparse_embeddings:
824
856
  Whether to use sparse embeddings.
857
+ :param sparse_idf:
858
+ Whether to compute the Inverse Document Frequency (IDF) when using sparse embeddings. Required for BM42.
825
859
  """
826
860
  if on_disk is None:
827
861
  on_disk = self.on_disk
@@ -840,7 +874,8 @@ class QdrantDocumentStore:
840
874
  SPARSE_VECTORS_NAME: rest.SparseVectorParams(
841
875
  index=rest.SparseIndexParams(
842
876
  on_disk=on_disk,
843
- )
877
+ ),
878
+ modifier=rest.Modifier.IDF if sparse_idf else None,
844
879
  ),
845
880
  }
846
881
 
@@ -24,6 +24,7 @@ def test_to_dict():
24
24
  "on_disk": False,
25
25
  "force_disable_check_same_thread": False,
26
26
  "use_sparse_embeddings": False,
27
+ "sparse_idf": False,
27
28
  "similarity": "cosine",
28
29
  "return_embedding": False,
29
30
  "progress_bar": True,
@@ -60,6 +61,7 @@ def test_from_dict():
60
61
  "on_disk": False,
61
62
  "force_disable_check_same_thread": False,
62
63
  "use_sparse_embeddings": True,
64
+ "sparse_idf": True,
63
65
  "similarity": "cosine",
64
66
  "return_embedding": False,
65
67
  "progress_bar": True,
@@ -81,6 +83,7 @@ def test_from_dict():
81
83
  document_store.index == "test",
82
84
  document_store.force_disable_check_same_thread is False,
83
85
  document_store.use_sparse_embeddings is True,
86
+ document_store.sparse_idf is True,
84
87
  document_store.on_disk is False,
85
88
  document_store.similarity == "cosine",
86
89
  document_store.return_embedding is False,
@@ -12,7 +12,12 @@ from haystack.testing.document_store import (
12
12
  WriteDocumentsTest,
13
13
  _random_embeddings,
14
14
  )
15
- from haystack_integrations.document_stores.qdrant.document_store import QdrantDocumentStore, QdrantStoreError
15
+ from haystack_integrations.document_stores.qdrant.document_store import (
16
+ SPARSE_VECTORS_NAME,
17
+ QdrantDocumentStore,
18
+ QdrantStoreError,
19
+ )
20
+ from qdrant_client.http import models as rest
16
21
 
17
22
 
18
23
  class TestQdrantDocumentStore(CountDocumentsTest, WriteDocumentsTest, DeleteDocumentsTest):
@@ -49,6 +54,23 @@ class TestQdrantDocumentStore(CountDocumentsTest, WriteDocumentsTest, DeleteDocu
49
54
  with pytest.raises(DuplicateDocumentError):
50
55
  document_store.write_documents(docs, DuplicatePolicy.FAIL)
51
56
 
57
+ def test_sparse_configuration(self):
58
+ document_store = QdrantDocumentStore(
59
+ ":memory:",
60
+ recreate_index=True,
61
+ use_sparse_embeddings=True,
62
+ sparse_idf=True,
63
+ )
64
+
65
+ client = document_store.client
66
+ sparse_config = client.get_collection("Document").config.params.sparse_vectors
67
+
68
+ assert SPARSE_VECTORS_NAME in sparse_config
69
+
70
+ # check that the `sparse_idf` parameter takes effect
71
+ assert hasattr(sparse_config[SPARSE_VECTORS_NAME], "modifier")
72
+ assert sparse_config[SPARSE_VECTORS_NAME].modifier == rest.Modifier.IDF
73
+
52
74
  def test_query_hybrid(self, generate_sparse_embedding):
53
75
  document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=True)
54
76
 
@@ -22,6 +22,7 @@ class TestQdrantRetriever(FilterableDocsFixtureMixin):
22
22
  assert retriever._filters is None
23
23
  assert retriever._top_k == 10
24
24
  assert retriever._return_embedding is False
25
+ assert retriever._score_threshold is None
25
26
 
26
27
  def test_to_dict(self):
27
28
  document_store = QdrantDocumentStore(location=":memory:", index="test", use_sparse_embeddings=False)
@@ -49,6 +50,7 @@ class TestQdrantRetriever(FilterableDocsFixtureMixin):
49
50
  "on_disk": False,
50
51
  "force_disable_check_same_thread": False,
51
52
  "use_sparse_embeddings": False,
53
+ "sparse_idf": False,
52
54
  "similarity": "cosine",
53
55
  "return_embedding": False,
54
56
  "progress_bar": True,
@@ -73,6 +75,7 @@ class TestQdrantRetriever(FilterableDocsFixtureMixin):
73
75
  "top_k": 10,
74
76
  "scale_score": False,
75
77
  "return_embedding": False,
78
+ "score_threshold": None,
76
79
  },
77
80
  }
78
81
 
@@ -88,6 +91,7 @@ class TestQdrantRetriever(FilterableDocsFixtureMixin):
88
91
  "top_k": 5,
89
92
  "scale_score": False,
90
93
  "return_embedding": True,
94
+ "score_threshold": None,
91
95
  },
92
96
  }
93
97
  retriever = QdrantEmbeddingRetriever.from_dict(data)
@@ -97,6 +101,7 @@ class TestQdrantRetriever(FilterableDocsFixtureMixin):
97
101
  assert retriever._top_k == 5
98
102
  assert retriever._scale_score is False
99
103
  assert retriever._return_embedding is True
104
+ assert retriever._score_threshold is None
100
105
 
101
106
  def test_run(self, filterable_docs: List[Document]):
102
107
  document_store = QdrantDocumentStore(location=":memory:", index="Boi", use_sparse_embeddings=False)
@@ -114,6 +119,28 @@ class TestQdrantRetriever(FilterableDocsFixtureMixin):
114
119
  for document in results:
115
120
  assert document.embedding is None
116
121
 
122
+ def test_run_with_score_threshold(self):
123
+ document_store = QdrantDocumentStore(
124
+ embedding_dim=4, location=":memory:", similarity="cosine", index="Boi", use_sparse_embeddings=False
125
+ )
126
+
127
+ document_store.write_documents(
128
+ [
129
+ Document(
130
+ content="Yet another document",
131
+ embedding=[-0.1, -0.9, -10.0, -0.2],
132
+ ),
133
+ Document(content="The document", embedding=[1.0, 1.0, 1.0, 1.0]),
134
+ Document(content="Another document", embedding=[0.8, 0.8, 0.5, 1.0]),
135
+ ]
136
+ )
137
+
138
+ retriever = QdrantEmbeddingRetriever(document_store=document_store)
139
+ results = retriever.run(
140
+ query_embedding=[0.9, 0.9, 0.9, 0.9], top_k=5, return_embedding=False, score_threshold=0.5
141
+ )["documents"]
142
+ assert len(results) == 2
143
+
117
144
  def test_run_with_sparse_activated(self, filterable_docs: List[Document]):
118
145
  document_store = QdrantDocumentStore(location=":memory:", index="Boi", use_sparse_embeddings=True)
119
146
 
@@ -141,6 +168,7 @@ class TestQdrantSparseEmbeddingRetriever(FilterableDocsFixtureMixin):
141
168
  assert retriever._filters is None
142
169
  assert retriever._top_k == 10
143
170
  assert retriever._return_embedding is False
171
+ assert retriever._score_threshold is None
144
172
 
145
173
  def test_to_dict(self):
146
174
  document_store = QdrantDocumentStore(location=":memory:", index="test")
@@ -168,6 +196,7 @@ class TestQdrantSparseEmbeddingRetriever(FilterableDocsFixtureMixin):
168
196
  "on_disk": False,
169
197
  "force_disable_check_same_thread": False,
170
198
  "use_sparse_embeddings": False,
199
+ "sparse_idf": False,
171
200
  "similarity": "cosine",
172
201
  "return_embedding": False,
173
202
  "progress_bar": True,
@@ -192,6 +221,7 @@ class TestQdrantSparseEmbeddingRetriever(FilterableDocsFixtureMixin):
192
221
  "top_k": 10,
193
222
  "scale_score": False,
194
223
  "return_embedding": False,
224
+ "score_threshold": None,
195
225
  },
196
226
  }
197
227
 
@@ -207,6 +237,7 @@ class TestQdrantSparseEmbeddingRetriever(FilterableDocsFixtureMixin):
207
237
  "top_k": 5,
208
238
  "scale_score": False,
209
239
  "return_embedding": True,
240
+ "score_threshold": None,
210
241
  },
211
242
  }
212
243
  retriever = QdrantSparseEmbeddingRetriever.from_dict(data)
@@ -216,6 +247,7 @@ class TestQdrantSparseEmbeddingRetriever(FilterableDocsFixtureMixin):
216
247
  assert retriever._top_k == 5
217
248
  assert retriever._scale_score is False
218
249
  assert retriever._return_embedding is True
250
+ assert retriever._score_threshold is None
219
251
 
220
252
  def test_run(self, filterable_docs: List[Document], generate_sparse_embedding):
221
253
  document_store = QdrantDocumentStore(location=":memory:", index="Boi", use_sparse_embeddings=True)
@@ -247,6 +279,7 @@ class TestQdrantHybridRetriever:
247
279
  assert retriever._filters is None
248
280
  assert retriever._top_k == 10
249
281
  assert retriever._return_embedding is False
282
+ assert retriever._score_threshold is None
250
283
 
251
284
  def test_to_dict(self):
252
285
  document_store = QdrantDocumentStore(location=":memory:", index="test")
@@ -274,6 +307,7 @@ class TestQdrantHybridRetriever:
274
307
  "on_disk": False,
275
308
  "force_disable_check_same_thread": False,
276
309
  "use_sparse_embeddings": False,
310
+ "sparse_idf": False,
277
311
  "similarity": "cosine",
278
312
  "return_embedding": False,
279
313
  "progress_bar": True,
@@ -297,6 +331,7 @@ class TestQdrantHybridRetriever:
297
331
  "filters": None,
298
332
  "top_k": 5,
299
333
  "return_embedding": True,
334
+ "score_threshold": None,
300
335
  },
301
336
  }
302
337
 
@@ -311,6 +346,7 @@ class TestQdrantHybridRetriever:
311
346
  "filters": None,
312
347
  "top_k": 5,
313
348
  "return_embedding": True,
349
+ "score_threshold": None,
314
350
  },
315
351
  }
316
352
  retriever = QdrantHybridRetriever.from_dict(data)
@@ -319,6 +355,7 @@ class TestQdrantHybridRetriever:
319
355
  assert retriever._filters is None
320
356
  assert retriever._top_k == 5
321
357
  assert retriever._return_embedding
358
+ assert retriever._score_threshold is None
322
359
 
323
360
  def test_run(self):
324
361
  mock_store = Mock(spec=QdrantDocumentStore)