qdrant-haystack 3.5.0__tar.gz → 3.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. {qdrant_haystack-3.5.0 → qdrant_haystack-3.7.0}/PKG-INFO +1 -1
  2. {qdrant_haystack-3.5.0 → qdrant_haystack-3.7.0}/pydoc/config.yml +1 -1
  3. {qdrant_haystack-3.5.0 → qdrant_haystack-3.7.0}/src/haystack_integrations/components/retrievers/qdrant/retriever.py +21 -19
  4. {qdrant_haystack-3.5.0 → qdrant_haystack-3.7.0}/src/haystack_integrations/document_stores/qdrant/document_store.py +59 -58
  5. {qdrant_haystack-3.5.0 → qdrant_haystack-3.7.0}/src/haystack_integrations/document_stores/qdrant/filters.py +3 -2
  6. {qdrant_haystack-3.5.0 → qdrant_haystack-3.7.0}/tests/test_document_store.py +5 -0
  7. {qdrant_haystack-3.5.0 → qdrant_haystack-3.7.0}/tests/test_filters.py +16 -0
  8. {qdrant_haystack-3.5.0 → qdrant_haystack-3.7.0}/.gitignore +0 -0
  9. {qdrant_haystack-3.5.0 → qdrant_haystack-3.7.0}/LICENSE.txt +0 -0
  10. {qdrant_haystack-3.5.0 → qdrant_haystack-3.7.0}/README.md +0 -0
  11. {qdrant_haystack-3.5.0 → qdrant_haystack-3.7.0}/examples/embedding_retrieval.py +0 -0
  12. {qdrant_haystack-3.5.0 → qdrant_haystack-3.7.0}/pyproject.toml +0 -0
  13. {qdrant_haystack-3.5.0 → qdrant_haystack-3.7.0}/src/haystack_integrations/components/retrievers/qdrant/__init__.py +0 -0
  14. {qdrant_haystack-3.5.0 → qdrant_haystack-3.7.0}/src/haystack_integrations/document_stores/qdrant/__init__.py +0 -0
  15. {qdrant_haystack-3.5.0 → qdrant_haystack-3.7.0}/src/haystack_integrations/document_stores/qdrant/converters.py +0 -0
  16. {qdrant_haystack-3.5.0 → qdrant_haystack-3.7.0}/src/haystack_integrations/document_stores/qdrant/migrate_to_sparse.py +0 -0
  17. {qdrant_haystack-3.5.0 → qdrant_haystack-3.7.0}/tests/__init__.py +0 -0
  18. {qdrant_haystack-3.5.0 → qdrant_haystack-3.7.0}/tests/conftest.py +0 -0
  19. {qdrant_haystack-3.5.0 → qdrant_haystack-3.7.0}/tests/test_converters.py +0 -0
  20. {qdrant_haystack-3.5.0 → qdrant_haystack-3.7.0}/tests/test_dict_converters.py +0 -0
  21. {qdrant_haystack-3.5.0 → qdrant_haystack-3.7.0}/tests/test_legacy_filters.py +0 -0
  22. {qdrant_haystack-3.5.0 → qdrant_haystack-3.7.0}/tests/test_retriever.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: qdrant-haystack
3
- Version: 3.5.0
3
+ Version: 3.7.0
4
4
  Summary: An integration of Qdrant ANN vector database backend with Haystack
5
5
  Project-URL: Source, https://github.com/deepset-ai/haystack-core-integrations
6
6
  Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/qdrant/README.md
@@ -17,7 +17,7 @@ processors:
17
17
  - type: smart
18
18
  - type: crossref
19
19
  renderer:
20
- type: haystack_pydoc_tools.renderers.ReadmePreviewRenderer
20
+ type: haystack_pydoc_tools.renderers.ReadmeIntegrationRenderer
21
21
  excerpt: Qdrant integration for Haystack
22
22
  category_slug: integrations-api
23
23
  title: Qdrant
@@ -1,8 +1,9 @@
1
- from typing import Any, Dict, List, Optional
1
+ from typing import Any, Dict, List, Optional, Union
2
2
 
3
3
  from haystack import Document, component, default_from_dict, default_to_dict
4
4
  from haystack.dataclasses.sparse_embedding import SparseEmbedding
5
5
  from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
6
+ from qdrant_client.http import models
6
7
 
7
8
 
8
9
  @component
@@ -12,6 +13,7 @@ class QdrantEmbeddingRetriever:
12
13
 
13
14
  Usage example:
14
15
  ```python
16
+ from haystack.dataclasses import Document
15
17
  from haystack_integrations.components.retrievers.qdrant import QdrantEmbeddingRetriever
16
18
  from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
17
19
 
@@ -33,7 +35,7 @@ class QdrantEmbeddingRetriever:
33
35
  def __init__(
34
36
  self,
35
37
  document_store: QdrantDocumentStore,
36
- filters: Optional[Dict[str, Any]] = None,
38
+ filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
37
39
  top_k: int = 10,
38
40
  scale_score: bool = True,
39
41
  return_embedding: bool = False,
@@ -42,12 +44,12 @@ class QdrantEmbeddingRetriever:
42
44
  Create a QdrantEmbeddingRetriever component.
43
45
 
44
46
  :param document_store: An instance of QdrantDocumentStore.
45
- :param filters: A dictionary with filters to narrow down the search space. Default is None.
46
- :param top_k: The maximum number of documents to retrieve. Default is 10.
47
- :param scale_score: Whether to scale the scores of the retrieved documents or not. Default is True.
48
- :param return_embedding: Whether to return the embedding of the retrieved Documents. Default is False.
47
+ :param filters: A dictionary with filters to narrow down the search space.
48
+ :param top_k: The maximum number of documents to retrieve.
49
+ :param scale_score: Whether to scale the scores of the retrieved documents or not.
50
+ :param return_embedding: Whether to return the embedding of the retrieved Documents.
49
51
 
50
- :raises ValueError: If 'document_store' is not an instance of QdrantDocumentStore.
52
+ :raises ValueError: If `document_store` is not an instance of `QdrantDocumentStore`.
51
53
  """
52
54
 
53
55
  if not isinstance(document_store, QdrantDocumentStore):
@@ -97,7 +99,7 @@ class QdrantEmbeddingRetriever:
97
99
  def run(
98
100
  self,
99
101
  query_embedding: List[float],
100
- filters: Optional[Dict[str, Any]] = None,
102
+ filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
101
103
  top_k: Optional[int] = None,
102
104
  scale_score: Optional[bool] = None,
103
105
  return_embedding: Optional[bool] = None,
@@ -134,7 +136,7 @@ class QdrantSparseEmbeddingRetriever:
134
136
  ```python
135
137
  from haystack_integrations.components.retrievers.qdrant import QdrantSparseEmbeddingRetriever
136
138
  from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
137
- from haystack.dataclasses.sparse_embedding import SparseEmbedding
139
+ from haystack.dataclasses import Document, SparseEmbedding
138
140
 
139
141
  document_store = QdrantDocumentStore(
140
142
  ":memory:",
@@ -155,7 +157,7 @@ class QdrantSparseEmbeddingRetriever:
155
157
  def __init__(
156
158
  self,
157
159
  document_store: QdrantDocumentStore,
158
- filters: Optional[Dict[str, Any]] = None,
160
+ filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
159
161
  top_k: int = 10,
160
162
  scale_score: bool = True,
161
163
  return_embedding: bool = False,
@@ -164,12 +166,12 @@ class QdrantSparseEmbeddingRetriever:
164
166
  Create a QdrantSparseEmbeddingRetriever component.
165
167
 
166
168
  :param document_store: An instance of QdrantDocumentStore.
167
- :param filters: A dictionary with filters to narrow down the search space. Default is None.
168
- :param top_k: The maximum number of documents to retrieve. Default is 10.
169
- :param scale_score: Whether to scale the scores of the retrieved documents or not. Default is True.
170
- :param return_embedding: Whether to return the sparse embedding of the retrieved Documents. Default is False.
169
+ :param filters: A dictionary with filters to narrow down the search space.
170
+ :param top_k: The maximum number of documents to retrieve.
171
+ :param scale_score: Whether to scale the scores of the retrieved documents or not.
172
+ :param return_embedding: Whether to return the sparse embedding of the retrieved Documents.
171
173
 
172
- :raises ValueError: If 'document_store' is not an instance of QdrantDocumentStore.
174
+ :raises ValueError: If `document_store` is not an instance of `QdrantDocumentStore`.
173
175
  """
174
176
 
175
177
  if not isinstance(document_store, QdrantDocumentStore):
@@ -219,7 +221,7 @@ class QdrantSparseEmbeddingRetriever:
219
221
  def run(
220
222
  self,
221
223
  query_sparse_embedding: SparseEmbedding,
222
- filters: Optional[Dict[str, Any]] = None,
224
+ filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
223
225
  top_k: Optional[int] = None,
224
226
  scale_score: Optional[bool] = None,
225
227
  return_embedding: Optional[bool] = None,
@@ -257,7 +259,7 @@ class QdrantHybridRetriever:
257
259
  ```python
258
260
  from haystack_integrations.components.retrievers.qdrant import QdrantHybridRetriever
259
261
  from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
260
- from haystack.dataclasses.sparse_embedding import SparseEmbedding
262
+ from haystack.dataclasses import Document, SparseEmbedding
261
263
 
262
264
  document_store = QdrantDocumentStore(
263
265
  ":memory:",
@@ -283,7 +285,7 @@ class QdrantHybridRetriever:
283
285
  def __init__(
284
286
  self,
285
287
  document_store: QdrantDocumentStore,
286
- filters: Optional[Dict[str, Any]] = None,
288
+ filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
287
289
  top_k: int = 10,
288
290
  return_embedding: bool = False,
289
291
  ):
@@ -341,7 +343,7 @@ class QdrantHybridRetriever:
341
343
  self,
342
344
  query_embedding: List[float],
343
345
  query_sparse_embedding: SparseEmbedding,
344
- filters: Optional[Dict[str, Any]] = None,
346
+ filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
345
347
  top_k: Optional[int] = None,
346
348
  return_embedding: Optional[bool] = None,
347
349
  ):
@@ -5,7 +5,6 @@ from typing import Any, ClassVar, Dict, Generator, List, Optional, Set, Union
5
5
 
6
6
  import numpy as np
7
7
  import qdrant_client
8
- from grpc import RpcError
9
8
  from haystack import default_from_dict, default_to_dict
10
9
  from haystack.dataclasses import Document
11
10
  from haystack.dataclasses.sparse_embedding import SparseEmbedding
@@ -66,7 +65,7 @@ class QdrantDocumentStore:
66
65
  https: Optional[bool] = None,
67
66
  api_key: Optional[Secret] = None,
68
67
  prefix: Optional[str] = None,
69
- timeout: Optional[float] = None,
68
+ timeout: Optional[int] = None,
70
69
  host: Optional[str] = None,
71
70
  path: Optional[str] = None,
72
71
  index: str = "Document",
@@ -96,23 +95,7 @@ class QdrantDocumentStore:
96
95
  scroll_size: int = 10_000,
97
96
  payload_fields_to_index: Optional[List[dict]] = None,
98
97
  ):
99
- super().__init__()
100
-
101
- metadata = metadata or {}
102
- self.client = qdrant_client.QdrantClient(
103
- location=location,
104
- url=url,
105
- port=port,
106
- grpc_port=grpc_port,
107
- prefer_grpc=prefer_grpc,
108
- https=https,
109
- api_key=api_key.resolve_value() if api_key else None,
110
- prefix=prefix,
111
- timeout=timeout,
112
- host=host,
113
- path=path,
114
- metadata=metadata,
115
- )
98
+ self._client = None
116
99
 
117
100
  # Store the Qdrant client specific attributes
118
101
  self.location = location
@@ -126,7 +109,7 @@ class QdrantDocumentStore:
126
109
  self.timeout = timeout
127
110
  self.host = host
128
111
  self.path = path
129
- self.metadata = metadata
112
+ self.metadata = metadata or {}
130
113
  self.api_key = api_key
131
114
 
132
115
  # Store the Qdrant collection specific attributes
@@ -143,12 +126,6 @@ class QdrantDocumentStore:
143
126
  self.recreate_index = recreate_index
144
127
  self.payload_fields_to_index = payload_fields_to_index
145
128
  self.use_sparse_embeddings = use_sparse_embeddings
146
-
147
- # Make sure the collection is properly set up
148
- self._set_up_collection(
149
- index, embedding_dim, recreate_index, similarity, use_sparse_embeddings, on_disk, payload_fields_to_index
150
- )
151
-
152
129
  self.embedding_dim = embedding_dim
153
130
  self.on_disk = on_disk
154
131
  self.content_field = content_field
@@ -162,6 +139,35 @@ class QdrantDocumentStore:
162
139
  self.write_batch_size = write_batch_size
163
140
  self.scroll_size = scroll_size
164
141
 
142
+ @property
143
+ def client(self):
144
+ if not self._client:
145
+ self._client = qdrant_client.QdrantClient(
146
+ location=self.location,
147
+ url=self.url,
148
+ port=self.port,
149
+ grpc_port=self.grpc_port,
150
+ prefer_grpc=self.prefer_grpc,
151
+ https=self.https,
152
+ api_key=self.api_key.resolve_value() if self.api_key else None,
153
+ prefix=self.prefix,
154
+ timeout=self.timeout,
155
+ host=self.host,
156
+ path=self.path,
157
+ metadata=self.metadata,
158
+ )
159
+ # Make sure the collection is properly set up
160
+ self._set_up_collection(
161
+ self.index,
162
+ self.embedding_dim,
163
+ self.recreate_index,
164
+ self.similarity,
165
+ self.use_sparse_embeddings,
166
+ self.on_disk,
167
+ self.payload_fields_to_index,
168
+ )
169
+ return self._client
170
+
165
171
  def count_documents(self) -> int:
166
172
  try:
167
173
  response = self.client.count(
@@ -176,13 +182,13 @@ class QdrantDocumentStore:
176
182
 
177
183
  def filter_documents(
178
184
  self,
179
- filters: Optional[Dict[str, Any]] = None,
185
+ filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
180
186
  ) -> List[Document]:
181
- if filters and not isinstance(filters, dict):
182
- msg = "Filter must be a dictionary"
187
+ if filters and not isinstance(filters, dict) and not isinstance(filters, rest.Filter):
188
+ msg = "Filter must be a dictionary or an instance of `qdrant_client.http.models.Filter`"
183
189
  raise ValueError(msg)
184
190
 
185
- if filters and "operator" not in filters:
191
+ if filters and not isinstance(filters, rest.Filter) and "operator" not in filters:
186
192
  filters = convert_legacy_filters(filters)
187
193
  return list(
188
194
  self.get_documents_generator(
@@ -260,7 +266,7 @@ class QdrantDocumentStore:
260
266
 
261
267
  def get_documents_generator(
262
268
  self,
263
- filters: Optional[Dict[str, Any]] = None,
269
+ filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
264
270
  ) -> Generator[Document, None, None]:
265
271
  index = self.index
266
272
  qdrant_filters = convert_filters_to_qdrant(filters)
@@ -311,7 +317,7 @@ class QdrantDocumentStore:
311
317
  def _query_by_sparse(
312
318
  self,
313
319
  query_sparse_embedding: SparseEmbedding,
314
- filters: Optional[Dict[str, Any]] = None,
320
+ filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
315
321
  top_k: int = 10,
316
322
  scale_score: bool = True,
317
323
  return_embedding: bool = False,
@@ -353,7 +359,7 @@ class QdrantDocumentStore:
353
359
  def _query_by_embedding(
354
360
  self,
355
361
  query_embedding: List[float],
356
- filters: Optional[Dict[str, Any]] = None,
362
+ filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
357
363
  top_k: int = 10,
358
364
  scale_score: bool = True,
359
365
  return_embedding: bool = False,
@@ -388,7 +394,7 @@ class QdrantDocumentStore:
388
394
  self,
389
395
  query_embedding: List[float],
390
396
  query_sparse_embedding: SparseEmbedding,
391
- filters: Optional[Dict[str, Any]] = None,
397
+ filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
392
398
  top_k: int = 10,
393
399
  return_embedding: bool = False,
394
400
  ) -> List[Document]:
@@ -464,7 +470,7 @@ class QdrantDocumentStore:
464
470
 
465
471
  return results
466
472
 
467
- def _get_distance(self, similarity: str) -> rest.Distance:
473
+ def get_distance(self, similarity: str) -> rest.Distance:
468
474
  try:
469
475
  return self.SIMILARITY[similarity]
470
476
  except KeyError as ke:
@@ -498,31 +504,17 @@ class QdrantDocumentStore:
498
504
  on_disk: bool = False,
499
505
  payload_fields_to_index: Optional[List[dict]] = None,
500
506
  ):
501
- distance = self._get_distance(similarity)
507
+ distance = self.get_distance(similarity)
502
508
 
503
- if recreate_collection:
509
+ if recreate_collection or not self.client.collection_exists(collection_name):
504
510
  # There is no need to verify the current configuration of that
505
- # collection. It might be just recreated again.
506
- self._recreate_collection(collection_name, distance, embedding_dim, on_disk, use_sparse_embeddings)
511
+ # collection. It might be just recreated again or does not exist yet.
512
+ self.recreate_collection(collection_name, distance, embedding_dim, on_disk, use_sparse_embeddings)
507
513
  # Create Payload index if payload_fields_to_index is provided
508
514
  self._create_payload_index(collection_name, payload_fields_to_index)
509
515
  return
510
516
 
511
- try:
512
- # Check if the collection already exists and validate its
513
- # current configuration with the parameters.
514
- collection_info = self.client.get_collection(collection_name)
515
- except (UnexpectedResponse, RpcError, ValueError):
516
- # That indicates the collection does not exist, so it can be
517
- # safely created with any configuration.
518
- #
519
- # Qdrant local raises ValueError if the collection is not found, but
520
- # with the remote server UnexpectedResponse / RpcError is raised.
521
- # Until that's unified, we need to catch both.
522
- self._recreate_collection(collection_name, distance, embedding_dim, on_disk, use_sparse_embeddings)
523
- # Create Payload index if payload_fields_to_index is provided
524
- self._create_payload_index(collection_name, payload_fields_to_index)
525
- return
517
+ collection_info = self.client.get_collection(collection_name)
526
518
 
527
519
  has_named_vectors = (
528
520
  isinstance(collection_info.config.params.vectors, dict)
@@ -573,14 +565,20 @@ class QdrantDocumentStore:
573
565
  )
574
566
  raise ValueError(msg)
575
567
 
576
- def _recreate_collection(
568
+ def recreate_collection(
577
569
  self,
578
570
  collection_name: str,
579
571
  distance,
580
572
  embedding_dim: int,
581
- on_disk: bool,
582
- use_sparse_embeddings: bool,
573
+ on_disk: Optional[bool] = None,
574
+ use_sparse_embeddings: Optional[bool] = None,
583
575
  ):
576
+ if on_disk is None:
577
+ on_disk = self.on_disk
578
+
579
+ if use_sparse_embeddings is None:
580
+ use_sparse_embeddings = self.use_sparse_embeddings
581
+
584
582
  # dense vectors configuration
585
583
  vectors_config = rest.VectorParams(size=embedding_dim, on_disk=on_disk, distance=distance)
586
584
 
@@ -596,7 +594,10 @@ class QdrantDocumentStore:
596
594
  ),
597
595
  }
598
596
 
599
- self.client.recreate_collection(
597
+ if self.client.collection_exists(collection_name):
598
+ self.client.delete_collection(collection_name)
599
+
600
+ self.client.create_collection(
600
601
  collection_name=collection_name,
601
602
  vectors_config=vectors_config,
602
603
  sparse_vectors_config=sparse_vectors_config if use_sparse_embeddings else None,
@@ -11,10 +11,11 @@ LOGICAL_OPERATORS = LOGICAL_OPERATORS.keys()
11
11
 
12
12
 
13
13
  def convert_filters_to_qdrant(
14
- filter_term: Optional[Union[List[dict], dict]] = None,
14
+ filter_term: Optional[Union[List[dict], dict, models.Filter]] = None,
15
15
  ) -> Optional[models.Filter]:
16
16
  """Converts Haystack filters to the format used by Qdrant."""
17
-
17
+ if isinstance(filter_term, models.Filter):
18
+ return filter_term
18
19
  if not filter_term:
19
20
  return None
20
21
 
@@ -26,6 +26,11 @@ class TestQdrantDocumentStore(CountDocumentsTest, WriteDocumentsTest, DeleteDocu
26
26
  use_sparse_embeddings=False,
27
27
  )
28
28
 
29
+ def test_init_is_lazy(self):
30
+ with patch("haystack_integrations.document_stores.qdrant.document_store.qdrant_client") as mocked_qdrant:
31
+ QdrantDocumentStore(location=":memory:", use_sparse_embeddings=True)
32
+ mocked_qdrant.assert_not_called()
33
+
29
34
  def assert_documents_are_equal(self, received: List[Document], expected: List[Document]):
30
35
  """
31
36
  Assert that two lists of Documents are equal.
@@ -5,6 +5,7 @@ from haystack import Document
5
5
  from haystack.testing.document_store import FilterDocumentsTest
6
6
  from haystack.utils.filters import FilterError
7
7
  from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
8
+ from qdrant_client.http import models
8
9
 
9
10
 
10
11
  class TestQdrantStoreBaseTests(FilterDocumentsTest):
@@ -17,6 +18,21 @@ class TestQdrantStoreBaseTests(FilterDocumentsTest):
17
18
  wait_result_from_api=True,
18
19
  )
19
20
 
21
+ def test_filter_documents_with_qdrant_filters(self, document_store, filterable_docs):
22
+ document_store.write_documents(filterable_docs)
23
+ result = document_store.filter_documents(
24
+ filters=models.Filter(
25
+ must_not=[
26
+ models.FieldCondition(key="meta.number", match=models.MatchValue(value=100)),
27
+ models.FieldCondition(key="meta.name", match=models.MatchValue(value="name_0")),
28
+ ]
29
+ )
30
+ )
31
+ self.assert_documents_are_equal(
32
+ result,
33
+ [d for d in filterable_docs if (d.meta.get("number") != 100 and d.meta.get("name") != "name_0")],
34
+ )
35
+
20
36
  def assert_documents_are_equal(self, received: List[Document], expected: List[Document]):
21
37
  """
22
38
  Assert that two lists of Documents are equal.