qdrant-haystack 3.5.0__py3-none-any.whl → 3.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of qdrant-haystack might be problematic. Click here for more details.

@@ -1,8 +1,9 @@
1
- from typing import Any, Dict, List, Optional
1
+ from typing import Any, Dict, List, Optional, Union
2
2
 
3
3
  from haystack import Document, component, default_from_dict, default_to_dict
4
4
  from haystack.dataclasses.sparse_embedding import SparseEmbedding
5
5
  from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
6
+ from qdrant_client.http import models
6
7
 
7
8
 
8
9
  @component
@@ -12,6 +13,7 @@ class QdrantEmbeddingRetriever:
12
13
 
13
14
  Usage example:
14
15
  ```python
16
+ from haystack.dataclasses import Document
15
17
  from haystack_integrations.components.retrievers.qdrant import QdrantEmbeddingRetriever
16
18
  from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
17
19
 
@@ -33,7 +35,7 @@ class QdrantEmbeddingRetriever:
33
35
  def __init__(
34
36
  self,
35
37
  document_store: QdrantDocumentStore,
36
- filters: Optional[Dict[str, Any]] = None,
38
+ filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
37
39
  top_k: int = 10,
38
40
  scale_score: bool = True,
39
41
  return_embedding: bool = False,
@@ -42,12 +44,12 @@ class QdrantEmbeddingRetriever:
42
44
  Create a QdrantEmbeddingRetriever component.
43
45
 
44
46
  :param document_store: An instance of QdrantDocumentStore.
45
- :param filters: A dictionary with filters to narrow down the search space. Default is None.
46
- :param top_k: The maximum number of documents to retrieve. Default is 10.
47
- :param scale_score: Whether to scale the scores of the retrieved documents or not. Default is True.
48
- :param return_embedding: Whether to return the embedding of the retrieved Documents. Default is False.
47
+ :param filters: A dictionary with filters to narrow down the search space.
48
+ :param top_k: The maximum number of documents to retrieve.
49
+ :param scale_score: Whether to scale the scores of the retrieved documents or not.
50
+ :param return_embedding: Whether to return the embedding of the retrieved Documents.
49
51
 
50
- :raises ValueError: If 'document_store' is not an instance of QdrantDocumentStore.
52
+ :raises ValueError: If `document_store` is not an instance of `QdrantDocumentStore`.
51
53
  """
52
54
 
53
55
  if not isinstance(document_store, QdrantDocumentStore):
@@ -97,7 +99,7 @@ class QdrantEmbeddingRetriever:
97
99
  def run(
98
100
  self,
99
101
  query_embedding: List[float],
100
- filters: Optional[Dict[str, Any]] = None,
102
+ filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
101
103
  top_k: Optional[int] = None,
102
104
  scale_score: Optional[bool] = None,
103
105
  return_embedding: Optional[bool] = None,
@@ -134,7 +136,7 @@ class QdrantSparseEmbeddingRetriever:
134
136
  ```python
135
137
  from haystack_integrations.components.retrievers.qdrant import QdrantSparseEmbeddingRetriever
136
138
  from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
137
- from haystack.dataclasses.sparse_embedding import SparseEmbedding
139
+ from haystack.dataclasses import Document, SparseEmbedding
138
140
 
139
141
  document_store = QdrantDocumentStore(
140
142
  ":memory:",
@@ -155,7 +157,7 @@ class QdrantSparseEmbeddingRetriever:
155
157
  def __init__(
156
158
  self,
157
159
  document_store: QdrantDocumentStore,
158
- filters: Optional[Dict[str, Any]] = None,
160
+ filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
159
161
  top_k: int = 10,
160
162
  scale_score: bool = True,
161
163
  return_embedding: bool = False,
@@ -164,12 +166,12 @@ class QdrantSparseEmbeddingRetriever:
164
166
  Create a QdrantSparseEmbeddingRetriever component.
165
167
 
166
168
  :param document_store: An instance of QdrantDocumentStore.
167
- :param filters: A dictionary with filters to narrow down the search space. Default is None.
168
- :param top_k: The maximum number of documents to retrieve. Default is 10.
169
- :param scale_score: Whether to scale the scores of the retrieved documents or not. Default is True.
170
- :param return_embedding: Whether to return the sparse embedding of the retrieved Documents. Default is False.
169
+ :param filters: A dictionary with filters to narrow down the search space.
170
+ :param top_k: The maximum number of documents to retrieve.
171
+ :param scale_score: Whether to scale the scores of the retrieved documents or not.
172
+ :param return_embedding: Whether to return the sparse embedding of the retrieved Documents.
171
173
 
172
- :raises ValueError: If 'document_store' is not an instance of QdrantDocumentStore.
174
+ :raises ValueError: If `document_store` is not an instance of `QdrantDocumentStore`.
173
175
  """
174
176
 
175
177
  if not isinstance(document_store, QdrantDocumentStore):
@@ -219,7 +221,7 @@ class QdrantSparseEmbeddingRetriever:
219
221
  def run(
220
222
  self,
221
223
  query_sparse_embedding: SparseEmbedding,
222
- filters: Optional[Dict[str, Any]] = None,
224
+ filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
223
225
  top_k: Optional[int] = None,
224
226
  scale_score: Optional[bool] = None,
225
227
  return_embedding: Optional[bool] = None,
@@ -257,7 +259,7 @@ class QdrantHybridRetriever:
257
259
  ```python
258
260
  from haystack_integrations.components.retrievers.qdrant import QdrantHybridRetriever
259
261
  from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
260
- from haystack.dataclasses.sparse_embedding import SparseEmbedding
262
+ from haystack.dataclasses import Document, SparseEmbedding
261
263
 
262
264
  document_store = QdrantDocumentStore(
263
265
  ":memory:",
@@ -283,7 +285,7 @@ class QdrantHybridRetriever:
283
285
  def __init__(
284
286
  self,
285
287
  document_store: QdrantDocumentStore,
286
- filters: Optional[Dict[str, Any]] = None,
288
+ filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
287
289
  top_k: int = 10,
288
290
  return_embedding: bool = False,
289
291
  ):
@@ -341,7 +343,7 @@ class QdrantHybridRetriever:
341
343
  self,
342
344
  query_embedding: List[float],
343
345
  query_sparse_embedding: SparseEmbedding,
344
- filters: Optional[Dict[str, Any]] = None,
346
+ filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
345
347
  top_k: Optional[int] = None,
346
348
  return_embedding: Optional[bool] = None,
347
349
  ):
@@ -66,7 +66,7 @@ class QdrantDocumentStore:
66
66
  https: Optional[bool] = None,
67
67
  api_key: Optional[Secret] = None,
68
68
  prefix: Optional[str] = None,
69
- timeout: Optional[float] = None,
69
+ timeout: Optional[int] = None,
70
70
  host: Optional[str] = None,
71
71
  path: Optional[str] = None,
72
72
  index: str = "Document",
@@ -96,23 +96,7 @@ class QdrantDocumentStore:
96
96
  scroll_size: int = 10_000,
97
97
  payload_fields_to_index: Optional[List[dict]] = None,
98
98
  ):
99
- super().__init__()
100
-
101
- metadata = metadata or {}
102
- self.client = qdrant_client.QdrantClient(
103
- location=location,
104
- url=url,
105
- port=port,
106
- grpc_port=grpc_port,
107
- prefer_grpc=prefer_grpc,
108
- https=https,
109
- api_key=api_key.resolve_value() if api_key else None,
110
- prefix=prefix,
111
- timeout=timeout,
112
- host=host,
113
- path=path,
114
- metadata=metadata,
115
- )
99
+ self._client = None
116
100
 
117
101
  # Store the Qdrant client specific attributes
118
102
  self.location = location
@@ -126,7 +110,7 @@ class QdrantDocumentStore:
126
110
  self.timeout = timeout
127
111
  self.host = host
128
112
  self.path = path
129
- self.metadata = metadata
113
+ self.metadata = metadata or {}
130
114
  self.api_key = api_key
131
115
 
132
116
  # Store the Qdrant collection specific attributes
@@ -143,12 +127,6 @@ class QdrantDocumentStore:
143
127
  self.recreate_index = recreate_index
144
128
  self.payload_fields_to_index = payload_fields_to_index
145
129
  self.use_sparse_embeddings = use_sparse_embeddings
146
-
147
- # Make sure the collection is properly set up
148
- self._set_up_collection(
149
- index, embedding_dim, recreate_index, similarity, use_sparse_embeddings, on_disk, payload_fields_to_index
150
- )
151
-
152
130
  self.embedding_dim = embedding_dim
153
131
  self.on_disk = on_disk
154
132
  self.content_field = content_field
@@ -162,6 +140,35 @@ class QdrantDocumentStore:
162
140
  self.write_batch_size = write_batch_size
163
141
  self.scroll_size = scroll_size
164
142
 
143
+ @property
144
+ def client(self):
145
+ if not self._client:
146
+ self._client = qdrant_client.QdrantClient(
147
+ location=self.location,
148
+ url=self.url,
149
+ port=self.port,
150
+ grpc_port=self.grpc_port,
151
+ prefer_grpc=self.prefer_grpc,
152
+ https=self.https,
153
+ api_key=self.api_key.resolve_value() if self.api_key else None,
154
+ prefix=self.prefix,
155
+ timeout=self.timeout,
156
+ host=self.host,
157
+ path=self.path,
158
+ metadata=self.metadata,
159
+ )
160
+ # Make sure the collection is properly set up
161
+ self._set_up_collection(
162
+ self.index,
163
+ self.embedding_dim,
164
+ self.recreate_index,
165
+ self.similarity,
166
+ self.use_sparse_embeddings,
167
+ self.on_disk,
168
+ self.payload_fields_to_index,
169
+ )
170
+ return self._client
171
+
165
172
  def count_documents(self) -> int:
166
173
  try:
167
174
  response = self.client.count(
@@ -176,13 +183,13 @@ class QdrantDocumentStore:
176
183
 
177
184
  def filter_documents(
178
185
  self,
179
- filters: Optional[Dict[str, Any]] = None,
186
+ filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
180
187
  ) -> List[Document]:
181
- if filters and not isinstance(filters, dict):
182
- msg = "Filter must be a dictionary"
188
+ if filters and not isinstance(filters, dict) and not isinstance(filters, rest.Filter):
189
+ msg = "Filter must be a dictionary or an instance of `qdrant_client.http.models.Filter`"
183
190
  raise ValueError(msg)
184
191
 
185
- if filters and "operator" not in filters:
192
+ if filters and not isinstance(filters, rest.Filter) and "operator" not in filters:
186
193
  filters = convert_legacy_filters(filters)
187
194
  return list(
188
195
  self.get_documents_generator(
@@ -260,7 +267,7 @@ class QdrantDocumentStore:
260
267
 
261
268
  def get_documents_generator(
262
269
  self,
263
- filters: Optional[Dict[str, Any]] = None,
270
+ filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
264
271
  ) -> Generator[Document, None, None]:
265
272
  index = self.index
266
273
  qdrant_filters = convert_filters_to_qdrant(filters)
@@ -311,7 +318,7 @@ class QdrantDocumentStore:
311
318
  def _query_by_sparse(
312
319
  self,
313
320
  query_sparse_embedding: SparseEmbedding,
314
- filters: Optional[Dict[str, Any]] = None,
321
+ filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
315
322
  top_k: int = 10,
316
323
  scale_score: bool = True,
317
324
  return_embedding: bool = False,
@@ -353,7 +360,7 @@ class QdrantDocumentStore:
353
360
  def _query_by_embedding(
354
361
  self,
355
362
  query_embedding: List[float],
356
- filters: Optional[Dict[str, Any]] = None,
363
+ filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
357
364
  top_k: int = 10,
358
365
  scale_score: bool = True,
359
366
  return_embedding: bool = False,
@@ -388,7 +395,7 @@ class QdrantDocumentStore:
388
395
  self,
389
396
  query_embedding: List[float],
390
397
  query_sparse_embedding: SparseEmbedding,
391
- filters: Optional[Dict[str, Any]] = None,
398
+ filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
392
399
  top_k: int = 10,
393
400
  return_embedding: bool = False,
394
401
  ) -> List[Document]:
@@ -11,10 +11,11 @@ LOGICAL_OPERATORS = LOGICAL_OPERATORS.keys()
11
11
 
12
12
 
13
13
  def convert_filters_to_qdrant(
14
- filter_term: Optional[Union[List[dict], dict]] = None,
14
+ filter_term: Optional[Union[List[dict], dict, models.Filter]] = None,
15
15
  ) -> Optional[models.Filter]:
16
16
  """Converts Haystack filters to the format used by Qdrant."""
17
-
17
+ if isinstance(filter_term, models.Filter):
18
+ return filter_term
18
19
  if not filter_term:
19
20
  return None
20
21
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: qdrant-haystack
3
- Version: 3.5.0
3
+ Version: 3.6.0
4
4
  Summary: An integration of Qdrant ANN vector database backend with Haystack
5
5
  Project-URL: Source, https://github.com/deepset-ai/haystack-core-integrations
6
6
  Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/qdrant/README.md
@@ -1,11 +1,11 @@
1
1
  haystack_integrations/components/retrievers/qdrant/__init__.py,sha256=IRjcM4f8b5eKFEMn8tn6h6RrfslEGP3WafU7mrzNzQM,313
2
- haystack_integrations/components/retrievers/qdrant/retriever.py,sha256=cb2JxC3O-pH8iWUcLO4EduKUYiO7KoVzBOsrnirSb8w,13384
2
+ haystack_integrations/components/retrievers/qdrant/retriever.py,sha256=r416_a7_6l7ehfPLdFtyHncsPrHj3mFCyROeJJF9rwM,13463
3
3
  haystack_integrations/document_stores/qdrant/__init__.py,sha256=kUGc5uewqArhmVR-JqB_NmJ4kNkTIQIvYDNSoO2ELn0,302
4
4
  haystack_integrations/document_stores/qdrant/converters.py,sha256=oSO2YlsWEQbcw9CPlWfSg_HoTZlnkAhZw_6VlYWzKKs,2525
5
- haystack_integrations/document_stores/qdrant/document_store.py,sha256=rzsDn8IkxyWisSR-jdgGJw6khk8Lqwer0QxCMHYLDxY,26260
6
- haystack_integrations/document_stores/qdrant/filters.py,sha256=iNWOqv1otUaXTURXd8e9QOYg8sx3Qm_LOqOaxAP2xJI,8249
5
+ haystack_integrations/document_stores/qdrant/document_store.py,sha256=ouPp-oM1M4VvkYtpZ2pl5kGVQ5Ei4kc--Jwd7cYRlzk,26827
6
+ haystack_integrations/document_stores/qdrant/filters.py,sha256=0w70Wa3Za1fNdbJ5O95sZDIpXfblJG_sBBUv0JTQ0-o,8337
7
7
  haystack_integrations/document_stores/qdrant/migrate_to_sparse.py,sha256=i6wBC_9_JVzYZtqKm3dhHKTxhwNdcAdpgki8GABDp1c,4909
8
- qdrant_haystack-3.5.0.dist-info/METADATA,sha256=R-_leVdxwt--25K9DfScVtjyWgUiaHVZ3ax21iD6Vyw,1862
9
- qdrant_haystack-3.5.0.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
10
- qdrant_haystack-3.5.0.dist-info/licenses/LICENSE.txt,sha256=B05uMshqTA74s-0ltyHKI6yoPfJ3zYgQbvcXfDVGFf8,10280
11
- qdrant_haystack-3.5.0.dist-info/RECORD,,
8
+ qdrant_haystack-3.6.0.dist-info/METADATA,sha256=8pAGEH2tOoSqU-YXe0hoCfgUUOMEdcV1KSEcj4YMOsM,1862
9
+ qdrant_haystack-3.6.0.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
10
+ qdrant_haystack-3.6.0.dist-info/licenses/LICENSE.txt,sha256=B05uMshqTA74s-0ltyHKI6yoPfJ3zYgQbvcXfDVGFf8,10280
11
+ qdrant_haystack-3.6.0.dist-info/RECORD,,