qdrant-haystack 3.4.0__tar.gz → 3.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of qdrant-haystack might be problematic. Click here for more details.

Files changed (23) hide show
  1. {qdrant_haystack-3.4.0 → qdrant_haystack-3.5.0}/PKG-INFO +2 -1
  2. {qdrant_haystack-3.4.0 → qdrant_haystack-3.5.0}/pyproject.toml +1 -0
  3. {qdrant_haystack-3.4.0 → qdrant_haystack-3.5.0}/src/haystack_integrations/components/retrievers/qdrant/__init__.py +2 -2
  4. {qdrant_haystack-3.4.0 → qdrant_haystack-3.5.0}/src/haystack_integrations/components/retrievers/qdrant/retriever.py +132 -5
  5. {qdrant_haystack-3.4.0 → qdrant_haystack-3.5.0}/src/haystack_integrations/document_stores/qdrant/document_store.py +83 -2
  6. qdrant_haystack-3.5.0/tests/conftest.py +18 -0
  7. qdrant_haystack-3.5.0/tests/test_document_store.py +104 -0
  8. {qdrant_haystack-3.4.0 → qdrant_haystack-3.5.0}/tests/test_retriever.py +113 -14
  9. qdrant_haystack-3.4.0/tests/test_document_store.py +0 -41
  10. {qdrant_haystack-3.4.0 → qdrant_haystack-3.5.0}/.gitignore +0 -0
  11. {qdrant_haystack-3.4.0 → qdrant_haystack-3.5.0}/LICENSE.txt +0 -0
  12. {qdrant_haystack-3.4.0 → qdrant_haystack-3.5.0}/README.md +0 -0
  13. {qdrant_haystack-3.4.0 → qdrant_haystack-3.5.0}/examples/embedding_retrieval.py +0 -0
  14. {qdrant_haystack-3.4.0 → qdrant_haystack-3.5.0}/pydoc/config.yml +0 -0
  15. {qdrant_haystack-3.4.0 → qdrant_haystack-3.5.0}/src/haystack_integrations/document_stores/qdrant/__init__.py +0 -0
  16. {qdrant_haystack-3.4.0 → qdrant_haystack-3.5.0}/src/haystack_integrations/document_stores/qdrant/converters.py +0 -0
  17. {qdrant_haystack-3.4.0 → qdrant_haystack-3.5.0}/src/haystack_integrations/document_stores/qdrant/filters.py +0 -0
  18. {qdrant_haystack-3.4.0 → qdrant_haystack-3.5.0}/src/haystack_integrations/document_stores/qdrant/migrate_to_sparse.py +0 -0
  19. {qdrant_haystack-3.4.0 → qdrant_haystack-3.5.0}/tests/__init__.py +0 -0
  20. {qdrant_haystack-3.4.0 → qdrant_haystack-3.5.0}/tests/test_converters.py +0 -0
  21. {qdrant_haystack-3.4.0 → qdrant_haystack-3.5.0}/tests/test_dict_converters.py +0 -0
  22. {qdrant_haystack-3.4.0 → qdrant_haystack-3.5.0}/tests/test_filters.py +0 -0
  23. {qdrant_haystack-3.4.0 → qdrant_haystack-3.5.0}/tests/test_legacy_filters.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: qdrant-haystack
3
- Version: 3.4.0
3
+ Version: 3.5.0
4
4
  Summary: An integration of Qdrant ANN vector database backend with Haystack
5
5
  Project-URL: Source, https://github.com/deepset-ai/haystack-core-integrations
6
6
  Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/qdrant/README.md
@@ -9,6 +9,7 @@ Author-email: Kacper Łukawski <kacper.lukawski@qdrant.com>, Anush Shetty <anush
9
9
  License-Expression: Apache-2.0
10
10
  License-File: LICENSE.txt
11
11
  Classifier: Development Status :: 4 - Beta
12
+ Classifier: License :: OSI Approved :: Apache Software License
12
13
  Classifier: Programming Language :: Python
13
14
  Classifier: Programming Language :: Python :: 3.8
14
15
  Classifier: Programming Language :: Python :: 3.9
@@ -15,6 +15,7 @@ authors = [
15
15
  { name = "Anush Shetty", email = "anush.shetty@qdrant.com" },
16
16
  ]
17
17
  classifiers = [
18
+ "License :: OSI Approved :: Apache Software License",
18
19
  "Development Status :: 4 - Beta",
19
20
  "Programming Language :: Python",
20
21
  "Programming Language :: Python :: 3.8",
@@ -2,6 +2,6 @@
2
2
  #
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
 
5
- from .retriever import QdrantEmbeddingRetriever, QdrantSparseEmbeddingRetriever
5
+ from .retriever import QdrantEmbeddingRetriever, QdrantHybridRetriever, QdrantSparseEmbeddingRetriever
6
6
 
7
- __all__ = ("QdrantEmbeddingRetriever", "QdrantSparseEmbeddingRetriever")
7
+ __all__ = ("QdrantEmbeddingRetriever", "QdrantSparseEmbeddingRetriever", "QdrantHybridRetriever")
@@ -19,8 +19,10 @@ class QdrantEmbeddingRetriever:
19
19
  ":memory:",
20
20
  recreate_index=True,
21
21
  return_embedding=True,
22
- wait_result_from_api=True,
23
22
  )
23
+
24
+ document_store.write_documents([Document(content="test", embedding=[0.5]*768)])
25
+
24
26
  retriever = QdrantEmbeddingRetriever(document_store=document_store)
25
27
 
26
28
  # using a fake vector to keep the example simple
@@ -112,7 +114,7 @@ class QdrantEmbeddingRetriever:
112
114
  The retrieved documents.
113
115
 
114
116
  """
115
- docs = self._document_store.query_by_embedding(
117
+ docs = self._document_store._query_by_embedding(
116
118
  query_embedding=query_embedding,
117
119
  filters=filters or self._filters,
118
120
  top_k=top_k or self._top_k,
@@ -136,10 +138,14 @@ class QdrantSparseEmbeddingRetriever:
136
138
 
137
139
  document_store = QdrantDocumentStore(
138
140
  ":memory:",
141
+ use_sparse_embeddings=True,
139
142
  recreate_index=True,
140
143
  return_embedding=True,
141
- wait_result_from_api=True,
142
144
  )
145
+
146
+ doc = Document(content="test", sparse_embedding=SparseEmbedding(indices=[0, 3, 5], values=[0.1, 0.5, 0.12]))
147
+ document_store.write_documents([doc])
148
+
143
149
  retriever = QdrantSparseEmbeddingRetriever(document_store=document_store)
144
150
  sparse_embedding = SparseEmbedding(indices=[0, 1, 2, 3], values=[0.1, 0.8, 0.05, 0.33])
145
151
  retriever.run(query_sparse_embedding=sparse_embedding)
@@ -196,7 +202,7 @@ class QdrantSparseEmbeddingRetriever:
196
202
  return d
197
203
 
198
204
  @classmethod
199
- def from_dict(cls, data: Dict[str, Any]) -> "QdrantEmbeddingRetriever":
205
+ def from_dict(cls, data: Dict[str, Any]) -> "QdrantSparseEmbeddingRetriever":
200
206
  """
201
207
  Deserializes the component from a dictionary.
202
208
 
@@ -230,7 +236,7 @@ class QdrantSparseEmbeddingRetriever:
230
236
  The retrieved documents.
231
237
 
232
238
  """
233
- docs = self._document_store.query_by_sparse(
239
+ docs = self._document_store._query_by_sparse(
234
240
  query_sparse_embedding=query_sparse_embedding,
235
241
  filters=filters or self._filters,
236
242
  top_k=top_k or self._top_k,
@@ -239,3 +245,124 @@ class QdrantSparseEmbeddingRetriever:
239
245
  )
240
246
 
241
247
  return {"documents": docs}
248
+
249
+
250
+ @component
251
+ class QdrantHybridRetriever:
252
+ """
253
+ A component for retrieving documents from an QdrantDocumentStore using both dense and sparse vectors
254
+ and fusing the results using Reciprocal Rank Fusion.
255
+
256
+ Usage example:
257
+ ```python
258
+ from haystack_integrations.components.retrievers.qdrant import QdrantHybridRetriever
259
+ from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
260
+ from haystack.dataclasses.sparse_embedding import SparseEmbedding
261
+
262
+ document_store = QdrantDocumentStore(
263
+ ":memory:",
264
+ use_sparse_embeddings=True,
265
+ recreate_index=True,
266
+ return_embedding=True,
267
+ wait_result_from_api=True,
268
+ )
269
+
270
+ doc = Document(content="test",
271
+ embedding=[0.5]*768,
272
+ sparse_embedding=SparseEmbedding(indices=[0, 3, 5], values=[0.1, 0.5, 0.12]))
273
+
274
+ document_store.write_documents([doc])
275
+
276
+ retriever = QdrantHybridRetriever(document_store=document_store)
277
+ embedding = [0.1]*768
278
+ sparse_embedding = SparseEmbedding(indices=[0, 1, 2, 3], values=[0.1, 0.8, 0.05, 0.33])
279
+ retriever.run(query_embedding=embedding, query_sparse_embedding=sparse_embedding)
280
+ ```
281
+ """
282
+
283
+ def __init__(
284
+ self,
285
+ document_store: QdrantDocumentStore,
286
+ filters: Optional[Dict[str, Any]] = None,
287
+ top_k: int = 10,
288
+ return_embedding: bool = False,
289
+ ):
290
+ """
291
+ Create a QdrantHybridRetriever component.
292
+
293
+ :param document_store: An instance of QdrantDocumentStore.
294
+ :param filters: A dictionary with filters to narrow down the search space.
295
+ :param top_k: The maximum number of documents to retrieve.
296
+ :param return_embedding: Whether to return the embeddings of the retrieved Documents.
297
+
298
+ :raises ValueError: If 'document_store' is not an instance of QdrantDocumentStore.
299
+ """
300
+
301
+ if not isinstance(document_store, QdrantDocumentStore):
302
+ msg = "document_store must be an instance of QdrantDocumentStore"
303
+ raise ValueError(msg)
304
+
305
+ self._document_store = document_store
306
+ self._filters = filters
307
+ self._top_k = top_k
308
+ self._return_embedding = return_embedding
309
+
310
+ def to_dict(self) -> Dict[str, Any]:
311
+ """
312
+ Serializes the component to a dictionary.
313
+
314
+ :returns:
315
+ Dictionary with serialized data.
316
+ """
317
+ return default_to_dict(
318
+ self,
319
+ document_store=self._document_store.to_dict(),
320
+ filters=self._filters,
321
+ top_k=self._top_k,
322
+ return_embedding=self._return_embedding,
323
+ )
324
+
325
+ @classmethod
326
+ def from_dict(cls, data: Dict[str, Any]) -> "QdrantHybridRetriever":
327
+ """
328
+ Deserializes the component from a dictionary.
329
+
330
+ :param data:
331
+ Dictionary to deserialize from.
332
+ :returns:
333
+ Deserialized component.
334
+ """
335
+ document_store = QdrantDocumentStore.from_dict(data["init_parameters"]["document_store"])
336
+ data["init_parameters"]["document_store"] = document_store
337
+ return default_from_dict(cls, data)
338
+
339
+ @component.output_types(documents=List[Document])
340
+ def run(
341
+ self,
342
+ query_embedding: List[float],
343
+ query_sparse_embedding: SparseEmbedding,
344
+ filters: Optional[Dict[str, Any]] = None,
345
+ top_k: Optional[int] = None,
346
+ return_embedding: Optional[bool] = None,
347
+ ):
348
+ """
349
+ Run the Sparse Embedding Retriever on the given input data.
350
+
351
+ :param query_embedding: Dense embedding of the query.
352
+ :param query_sparse_embedding: Sparse embedding of the query.
353
+ :param filters: A dictionary with filters to narrow down the search space.
354
+ :param top_k: The maximum number of documents to return.
355
+ :param return_embedding: Whether to return the embedding of the retrieved Documents.
356
+ :returns:
357
+ The retrieved documents.
358
+
359
+ """
360
+ docs = self._document_store._query_hybrid(
361
+ query_embedding=query_embedding,
362
+ query_sparse_embedding=query_sparse_embedding,
363
+ filters=filters or self._filters,
364
+ top_k=top_k or self._top_k,
365
+ return_embedding=return_embedding or self._return_embedding,
366
+ )
367
+
368
+ return {"documents": docs}
@@ -16,6 +16,7 @@ from haystack.utils.filters import convert as convert_legacy_filters
16
16
  from qdrant_client import grpc
17
17
  from qdrant_client.http import models as rest
18
18
  from qdrant_client.http.exceptions import UnexpectedResponse
19
+ from qdrant_client.hybrid.fusion import reciprocal_rank_fusion
19
20
  from tqdm import tqdm
20
21
 
21
22
  from .converters import (
@@ -307,7 +308,7 @@ class QdrantDocumentStore:
307
308
  )
308
309
  return documents
309
310
 
310
- def query_by_sparse(
311
+ def _query_by_sparse(
311
312
  self,
312
313
  query_sparse_embedding: SparseEmbedding,
313
314
  filters: Optional[Dict[str, Any]] = None,
@@ -349,7 +350,7 @@ class QdrantDocumentStore:
349
350
  document.score = score
350
351
  return results
351
352
 
352
- def query_by_embedding(
353
+ def _query_by_embedding(
353
354
  self,
354
355
  query_embedding: List[float],
355
356
  filters: Optional[Dict[str, Any]] = None,
@@ -383,6 +384,86 @@ class QdrantDocumentStore:
383
384
  document.score = score
384
385
  return results
385
386
 
387
+ def _query_hybrid(
388
+ self,
389
+ query_embedding: List[float],
390
+ query_sparse_embedding: SparseEmbedding,
391
+ filters: Optional[Dict[str, Any]] = None,
392
+ top_k: int = 10,
393
+ return_embedding: bool = False,
394
+ ) -> List[Document]:
395
+ """
396
+ Retrieves documents based on dense and sparse embeddings and fuses the results using Reciprocal Rank Fusion.
397
+
398
+ This method is not part of the public interface of `QdrantDocumentStore` and shouldn't be used directly.
399
+ Use the `QdrantHybridRetriever` instead.
400
+
401
+ :param query_embedding: Dense embedding of the query.
402
+ :param query_sparse_embedding: Sparse embedding of the query.
403
+ :param filters: Filters applied to the retrieved Documents.
404
+ :param top_k: Maximum number of Documents to return.
405
+ :param return_embedding: Whether to return the embeddings of the retrieved documents.
406
+
407
+ :returns: List of Document that are most similar to `query_embedding` and `query_sparse_embedding`.
408
+
409
+ :raises QdrantStoreError:
410
+ If the Document Store was initialized with `use_sparse_embeddings=False`.
411
+ """
412
+
413
+ # This implementation is based on the code from the Python Qdrant client:
414
+ # https://github.com/qdrant/qdrant-client/blob/8e3ea58f781e4110d11c0a6985b5e6bb66b85d33/qdrant_client/qdrant_fastembed.py#L519
415
+ if not self.use_sparse_embeddings:
416
+ message = (
417
+ "You are trying to query using sparse embeddings, but the Document Store "
418
+ "was initialized with `use_sparse_embeddings=False`. "
419
+ )
420
+ raise QdrantStoreError(message)
421
+
422
+ qdrant_filters = convert_filters_to_qdrant(filters)
423
+
424
+ sparse_request = rest.SearchRequest(
425
+ vector=rest.NamedSparseVector(
426
+ name=SPARSE_VECTORS_NAME,
427
+ vector=rest.SparseVector(
428
+ indices=query_sparse_embedding.indices,
429
+ values=query_sparse_embedding.values,
430
+ ),
431
+ ),
432
+ filter=qdrant_filters,
433
+ limit=top_k,
434
+ with_payload=True,
435
+ with_vector=return_embedding,
436
+ )
437
+
438
+ dense_request = rest.SearchRequest(
439
+ vector=rest.NamedVector(
440
+ name=DENSE_VECTORS_NAME,
441
+ vector=query_embedding,
442
+ ),
443
+ filter=qdrant_filters,
444
+ limit=top_k,
445
+ with_payload=True,
446
+ with_vector=return_embedding,
447
+ )
448
+
449
+ try:
450
+ dense_request_response, sparse_request_response = self.client.search_batch(
451
+ collection_name=self.index, requests=[dense_request, sparse_request]
452
+ )
453
+ except Exception as e:
454
+ msg = "Error during hybrid search"
455
+ raise QdrantStoreError(msg) from e
456
+
457
+ try:
458
+ points = reciprocal_rank_fusion(responses=[dense_request_response, sparse_request_response], limit=top_k)
459
+ except Exception as e:
460
+ msg = "Error while applying Reciprocal Rank Fusion"
461
+ raise QdrantStoreError(msg) from e
462
+
463
+ results = [convert_qdrant_point_to_haystack_document(point, use_sparse_embeddings=True) for point in points]
464
+
465
+ return results
466
+
386
467
  def _get_distance(self, similarity: str) -> rest.Distance:
387
468
  try:
388
469
  return self.SIMILARITY[similarity]
@@ -0,0 +1,18 @@
1
+ import numpy as np
2
+ import pytest
3
+ from haystack.dataclasses import SparseEmbedding
4
+
5
+
6
+ @pytest.fixture(scope="session")
7
+ def generate_sparse_embedding():
8
+ """
9
+ This fixture returns a function that generates a random SparseEmbedding each time it is called.
10
+ """
11
+
12
+ def _generate_random_sparse_embedding():
13
+ random_indice_length = np.random.randint(3, 15)
14
+ indices = list(range(random_indice_length))
15
+ values = [np.random.random_sample() for _ in range(random_indice_length)]
16
+ return SparseEmbedding(indices=indices, values=values)
17
+
18
+ return _generate_random_sparse_embedding
@@ -0,0 +1,104 @@
1
+ from typing import List
2
+ from unittest.mock import patch
3
+
4
+ import pytest
5
+ from haystack import Document
6
+ from haystack.dataclasses import SparseEmbedding
7
+ from haystack.document_stores.errors import DuplicateDocumentError
8
+ from haystack.document_stores.types import DuplicatePolicy
9
+ from haystack.testing.document_store import (
10
+ CountDocumentsTest,
11
+ DeleteDocumentsTest,
12
+ WriteDocumentsTest,
13
+ _random_embeddings,
14
+ )
15
+ from haystack_integrations.document_stores.qdrant.document_store import QdrantDocumentStore, QdrantStoreError
16
+
17
+
18
+ class TestQdrantDocumentStore(CountDocumentsTest, WriteDocumentsTest, DeleteDocumentsTest):
19
+ @pytest.fixture
20
+ def document_store(self) -> QdrantDocumentStore:
21
+ return QdrantDocumentStore(
22
+ ":memory:",
23
+ recreate_index=True,
24
+ return_embedding=True,
25
+ wait_result_from_api=True,
26
+ use_sparse_embeddings=False,
27
+ )
28
+
29
+ def assert_documents_are_equal(self, received: List[Document], expected: List[Document]):
30
+ """
31
+ Assert that two lists of Documents are equal.
32
+ This is used in every test.
33
+ """
34
+
35
+ # Check that the lengths of the lists are the same
36
+ assert len(received) == len(expected)
37
+
38
+ # Check that the sets are equal, meaning the content and IDs match regardless of order
39
+ assert {doc.id for doc in received} == {doc.id for doc in expected}
40
+
41
+ def test_write_documents(self, document_store: QdrantDocumentStore):
42
+ docs = [Document(id="1")]
43
+ assert document_store.write_documents(docs) == 1
44
+ with pytest.raises(DuplicateDocumentError):
45
+ document_store.write_documents(docs, DuplicatePolicy.FAIL)
46
+
47
+ def test_query_hybrid(self, generate_sparse_embedding):
48
+ document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=True)
49
+
50
+ docs = []
51
+ for i in range(20):
52
+ docs.append(
53
+ Document(
54
+ content=f"doc {i}", sparse_embedding=generate_sparse_embedding(), embedding=_random_embeddings(768)
55
+ )
56
+ )
57
+
58
+ document_store.write_documents(docs)
59
+
60
+ sparse_embedding = SparseEmbedding(indices=[0, 1, 2, 3], values=[0.1, 0.8, 0.05, 0.33])
61
+ embedding = [0.1] * 768
62
+
63
+ results: List[Document] = document_store._query_hybrid(
64
+ query_sparse_embedding=sparse_embedding, query_embedding=embedding, top_k=10, return_embedding=True
65
+ )
66
+ assert len(results) == 10
67
+
68
+ for document in results:
69
+ assert document.sparse_embedding
70
+ assert document.embedding
71
+
72
+ def test_query_hybrid_fail_without_sparse_embedding(self, document_store):
73
+ sparse_embedding = SparseEmbedding(indices=[0, 1, 2, 3], values=[0.1, 0.8, 0.05, 0.33])
74
+ embedding = [0.1] * 768
75
+
76
+ with pytest.raises(QdrantStoreError):
77
+
78
+ document_store._query_hybrid(
79
+ query_sparse_embedding=sparse_embedding,
80
+ query_embedding=embedding,
81
+ )
82
+
83
+ def test_query_hybrid_search_batch_failure(self):
84
+ document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=True)
85
+
86
+ sparse_embedding = SparseEmbedding(indices=[0, 1, 2, 3], values=[0.1, 0.8, 0.05, 0.33])
87
+ embedding = [0.1] * 768
88
+
89
+ with patch.object(document_store.client, "search_batch", side_effect=Exception("search_batch error")):
90
+
91
+ with pytest.raises(QdrantStoreError):
92
+ document_store._query_hybrid(query_sparse_embedding=sparse_embedding, query_embedding=embedding)
93
+
94
+ @patch("haystack_integrations.document_stores.qdrant.document_store.reciprocal_rank_fusion")
95
+ def test_query_hybrid_reciprocal_rank_fusion_failure(self, mocked_fusion):
96
+ document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=True)
97
+
98
+ sparse_embedding = SparseEmbedding(indices=[0, 1, 2, 3], values=[0.1, 0.8, 0.05, 0.33])
99
+ embedding = [0.1] * 768
100
+
101
+ mocked_fusion.side_effect = Exception("reciprocal_rank_fusion error")
102
+
103
+ with pytest.raises(QdrantStoreError):
104
+ document_store._query_hybrid(query_sparse_embedding=sparse_embedding, query_embedding=embedding)
@@ -1,6 +1,6 @@
1
1
  from typing import List
2
+ from unittest.mock import Mock
2
3
 
3
- import numpy as np
4
4
  from haystack.dataclasses import Document, SparseEmbedding
5
5
  from haystack.testing.document_store import (
6
6
  FilterableDocsFixtureMixin,
@@ -8,6 +8,7 @@ from haystack.testing.document_store import (
8
8
  )
9
9
  from haystack_integrations.components.retrievers.qdrant import (
10
10
  QdrantEmbeddingRetriever,
11
+ QdrantHybridRetriever,
11
12
  QdrantSparseEmbeddingRetriever,
12
13
  )
13
14
  from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
@@ -222,23 +223,12 @@ class TestQdrantSparseEmbeddingRetriever(FilterableDocsFixtureMixin):
222
223
  assert retriever._scale_score is False
223
224
  assert retriever._return_embedding is True
224
225
 
225
- def _generate_mocked_sparse_embedding(self, n):
226
- list_of_sparse_vectors = []
227
- for _ in range(n):
228
- random_indice_length = np.random.randint(3, 15)
229
- data = {
230
- "indices": list(range(random_indice_length)),
231
- "values": [np.random.random_sample() for _ in range(random_indice_length)],
232
- }
233
- list_of_sparse_vectors.append(data)
234
- return list_of_sparse_vectors
235
-
236
- def test_run(self, filterable_docs: List[Document]):
226
+ def test_run(self, filterable_docs: List[Document], generate_sparse_embedding):
237
227
  document_store = QdrantDocumentStore(location=":memory:", index="Boi", use_sparse_embeddings=True)
238
228
 
239
229
  # Add fake sparse embedding to documents
240
230
  for doc in filterable_docs:
241
- doc.sparse_embedding = SparseEmbedding.from_dict(self._generate_mocked_sparse_embedding(1)[0])
231
+ doc.sparse_embedding = generate_sparse_embedding()
242
232
 
243
233
  document_store.write_documents(filterable_docs)
244
234
  retriever = QdrantSparseEmbeddingRetriever(document_store=document_store)
@@ -252,3 +242,112 @@ class TestQdrantSparseEmbeddingRetriever(FilterableDocsFixtureMixin):
252
242
 
253
243
  for document in results:
254
244
  assert document.sparse_embedding
245
+
246
+
247
+ class TestQdrantHybridRetriever:
248
+ def test_init_default(self):
249
+ document_store = QdrantDocumentStore(location=":memory:", index="test", use_sparse_embeddings=True)
250
+ retriever = QdrantHybridRetriever(document_store=document_store)
251
+
252
+ assert retriever._document_store == document_store
253
+ assert retriever._filters is None
254
+ assert retriever._top_k == 10
255
+ assert retriever._return_embedding is False
256
+
257
+ def test_to_dict(self):
258
+ document_store = QdrantDocumentStore(location=":memory:", index="test")
259
+ retriever = QdrantHybridRetriever(document_store=document_store, top_k=5, return_embedding=True)
260
+ res = retriever.to_dict()
261
+ assert res == {
262
+ "type": "haystack_integrations.components.retrievers.qdrant.retriever.QdrantHybridRetriever",
263
+ "init_parameters": {
264
+ "document_store": {
265
+ "type": "haystack_integrations.document_stores.qdrant.document_store.QdrantDocumentStore",
266
+ "init_parameters": {
267
+ "location": ":memory:",
268
+ "url": None,
269
+ "port": 6333,
270
+ "grpc_port": 6334,
271
+ "prefer_grpc": False,
272
+ "https": None,
273
+ "api_key": None,
274
+ "prefix": None,
275
+ "timeout": None,
276
+ "host": None,
277
+ "path": None,
278
+ "index": "test",
279
+ "embedding_dim": 768,
280
+ "on_disk": False,
281
+ "content_field": "content",
282
+ "name_field": "name",
283
+ "embedding_field": "embedding",
284
+ "use_sparse_embeddings": False,
285
+ "similarity": "cosine",
286
+ "return_embedding": False,
287
+ "progress_bar": True,
288
+ "duplicate_documents": "overwrite",
289
+ "recreate_index": False,
290
+ "shard_number": None,
291
+ "replication_factor": None,
292
+ "write_consistency_factor": None,
293
+ "on_disk_payload": None,
294
+ "hnsw_config": None,
295
+ "optimizers_config": None,
296
+ "wal_config": None,
297
+ "quantization_config": None,
298
+ "init_from": None,
299
+ "wait_result_from_api": True,
300
+ "metadata": {},
301
+ "write_batch_size": 100,
302
+ "scroll_size": 10000,
303
+ "payload_fields_to_index": None,
304
+ },
305
+ },
306
+ "filters": None,
307
+ "top_k": 5,
308
+ "return_embedding": True,
309
+ },
310
+ }
311
+
312
+ def test_from_dict(self):
313
+ data = {
314
+ "type": "haystack_integrations.components.retrievers.qdrant.retriever.QdrantHybridRetriever",
315
+ "init_parameters": {
316
+ "document_store": {
317
+ "init_parameters": {"location": ":memory:", "index": "test"},
318
+ "type": "haystack_integrations.document_stores.qdrant.document_store.QdrantDocumentStore",
319
+ },
320
+ "filters": None,
321
+ "top_k": 5,
322
+ "return_embedding": True,
323
+ },
324
+ }
325
+ retriever = QdrantHybridRetriever.from_dict(data)
326
+ assert isinstance(retriever._document_store, QdrantDocumentStore)
327
+ assert retriever._document_store.index == "test"
328
+ assert retriever._filters is None
329
+ assert retriever._top_k == 5
330
+ assert retriever._return_embedding
331
+
332
+ def test_run(self):
333
+ mock_store = Mock(spec=QdrantDocumentStore)
334
+ sparse_embedding = SparseEmbedding(indices=[0, 1, 2, 3], values=[0.1, 0.8, 0.05, 0.33])
335
+ mock_store._query_hybrid.return_value = [
336
+ Document(content="Test doc", embedding=[0.1, 0.2], sparse_embedding=sparse_embedding)
337
+ ]
338
+
339
+ retriever = QdrantHybridRetriever(document_store=mock_store)
340
+ res = retriever.run(
341
+ query_embedding=[0.5, 0.7], query_sparse_embedding=SparseEmbedding(indices=[0, 5], values=[0.1, 0.7])
342
+ )
343
+
344
+ call_args = mock_store._query_hybrid.call_args
345
+ assert call_args[1]["query_embedding"] == [0.5, 0.7]
346
+ assert call_args[1]["query_sparse_embedding"].indices == [0, 5]
347
+ assert call_args[1]["query_sparse_embedding"].values == [0.1, 0.7]
348
+ assert call_args[1]["top_k"] == 10
349
+ assert call_args[1]["return_embedding"] is False
350
+
351
+ assert res["documents"][0].content == "Test doc"
352
+ assert res["documents"][0].embedding == [0.1, 0.2]
353
+ assert res["documents"][0].sparse_embedding == sparse_embedding
@@ -1,41 +0,0 @@
1
- from typing import List
2
-
3
- import pytest
4
- from haystack import Document
5
- from haystack.document_stores.errors import DuplicateDocumentError
6
- from haystack.document_stores.types import DuplicatePolicy
7
- from haystack.testing.document_store import (
8
- CountDocumentsTest,
9
- DeleteDocumentsTest,
10
- WriteDocumentsTest,
11
- )
12
- from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
13
-
14
-
15
- class TestQdrantStoreBaseTests(CountDocumentsTest, WriteDocumentsTest, DeleteDocumentsTest):
16
- @pytest.fixture
17
- def document_store(self) -> QdrantDocumentStore:
18
- return QdrantDocumentStore(
19
- ":memory:",
20
- recreate_index=True,
21
- return_embedding=True,
22
- wait_result_from_api=True,
23
- )
24
-
25
- def assert_documents_are_equal(self, received: List[Document], expected: List[Document]):
26
- """
27
- Assert that two lists of Documents are equal.
28
- This is used in every test.
29
- """
30
-
31
- # Check that the lengths of the lists are the same
32
- assert len(received) == len(expected)
33
-
34
- # Check that the sets are equal, meaning the content and IDs match regardless of order
35
- assert {doc.id for doc in received} == {doc.id for doc in expected}
36
-
37
- def test_write_documents(self, document_store: QdrantDocumentStore):
38
- docs = [Document(id="1")]
39
- assert document_store.write_documents(docs) == 1
40
- with pytest.raises(DuplicateDocumentError):
41
- document_store.write_documents(docs, DuplicatePolicy.FAIL)