qdrant-haystack 9.1.0__py3-none-any.whl → 9.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of qdrant-haystack might be problematic. Click here for more details.

@@ -46,7 +46,7 @@ class QdrantEmbeddingRetriever:
46
46
  score_threshold: Optional[float] = None,
47
47
  group_by: Optional[str] = None,
48
48
  group_size: Optional[int] = None,
49
- ):
49
+ ) -> None:
50
50
  """
51
51
  Create a QdrantEmbeddingRetriever component.
52
52
 
@@ -136,7 +136,7 @@ class QdrantEmbeddingRetriever:
136
136
  score_threshold: Optional[float] = None,
137
137
  group_by: Optional[str] = None,
138
138
  group_size: Optional[int] = None,
139
- ):
139
+ ) -> Dict[str, List[Document]]:
140
140
  """
141
141
  Run the Embedding Retriever on the given input data.
142
142
 
@@ -180,7 +180,7 @@ class QdrantEmbeddingRetriever:
180
180
  score_threshold: Optional[float] = None,
181
181
  group_by: Optional[str] = None,
182
182
  group_size: Optional[int] = None,
183
- ):
183
+ ) -> Dict[str, List[Document]]:
184
184
  """
185
185
  Asynchronously run the Embedding Retriever on the given input data.
186
186
 
@@ -252,7 +252,7 @@ class QdrantSparseEmbeddingRetriever:
252
252
  score_threshold: Optional[float] = None,
253
253
  group_by: Optional[str] = None,
254
254
  group_size: Optional[int] = None,
255
- ):
255
+ ) -> None:
256
256
  """
257
257
  Create a QdrantSparseEmbeddingRetriever component.
258
258
 
@@ -342,7 +342,7 @@ class QdrantSparseEmbeddingRetriever:
342
342
  score_threshold: Optional[float] = None,
343
343
  group_by: Optional[str] = None,
344
344
  group_size: Optional[int] = None,
345
- ):
345
+ ) -> Dict[str, List[Document]]:
346
346
  """
347
347
  Run the Sparse Embedding Retriever on the given input data.
348
348
 
@@ -391,7 +391,7 @@ class QdrantSparseEmbeddingRetriever:
391
391
  score_threshold: Optional[float] = None,
392
392
  group_by: Optional[str] = None,
393
393
  group_size: Optional[int] = None,
394
- ):
394
+ ) -> Dict[str, List[Document]]:
395
395
  """
396
396
  Asynchronously run the Sparse Embedding Retriever on the given input data.
397
397
 
@@ -473,7 +473,7 @@ class QdrantHybridRetriever:
473
473
  score_threshold: Optional[float] = None,
474
474
  group_by: Optional[str] = None,
475
475
  group_size: Optional[int] = None,
476
- ):
476
+ ) -> None:
477
477
  """
478
478
  Create a QdrantHybridRetriever component.
479
479
 
@@ -557,7 +557,7 @@ class QdrantHybridRetriever:
557
557
  score_threshold: Optional[float] = None,
558
558
  group_by: Optional[str] = None,
559
559
  group_size: Optional[int] = None,
560
- ):
560
+ ) -> Dict[str, List[Document]]:
561
561
  """
562
562
  Run the Sparse Embedding Retriever on the given input data.
563
563
 
@@ -606,7 +606,7 @@ class QdrantHybridRetriever:
606
606
  score_threshold: Optional[float] = None,
607
607
  group_by: Optional[str] = None,
608
608
  group_size: Optional[int] = None,
609
- ):
609
+ ) -> Dict[str, List[Document]]:
610
610
  """
611
611
  Asynchronously run the Sparse Embedding Retriever on the given input data.
612
612
 
@@ -1,6 +1,6 @@
1
1
  import inspect
2
2
  from itertools import islice
3
- from typing import Any, AsyncGenerator, ClassVar, Dict, Generator, List, Optional, Set, Union
3
+ from typing import Any, AsyncGenerator, ClassVar, Dict, Generator, List, Optional, Set, Tuple, Union
4
4
 
5
5
  import numpy as np
6
6
  import qdrant_client
@@ -18,6 +18,7 @@ from tqdm import tqdm
18
18
  from .converters import (
19
19
  DENSE_VECTORS_NAME,
20
20
  SPARSE_VECTORS_NAME,
21
+ QdrantPoint,
21
22
  convert_haystack_documents_to_qdrant_points,
22
23
  convert_id,
23
24
  convert_qdrant_point_to_haystack_document,
@@ -34,7 +35,7 @@ class QdrantStoreError(DocumentStoreError):
34
35
  FilterType = Dict[str, Union[Dict[str, Any], List[Any], str, int, float, bool]]
35
36
 
36
37
 
37
- def get_batches_from_generator(iterable, n):
38
+ def get_batches_from_generator(iterable: List, n: int) -> Generator:
38
39
  """
39
40
  Batch elements of an iterable into fixed-length chunks or blocks.
40
41
  """
@@ -127,10 +128,10 @@ class QdrantDocumentStore:
127
128
  write_batch_size: int = 100,
128
129
  scroll_size: int = 10_000,
129
130
  payload_fields_to_index: Optional[List[dict]] = None,
130
- ):
131
+ ) -> None:
131
132
  """
132
133
  :param location:
133
- If `memory` - use in-memory Qdrant instance.
134
+ If `":memory:"` - use in-memory Qdrant instance.
134
135
  If `str` - use it as a URL parameter.
135
136
  If `None` - use default values for host and port.
136
137
  :param url:
@@ -164,7 +165,7 @@ class QdrantDocumentStore:
164
165
  Dimension of the embeddings.
165
166
  :param on_disk:
166
167
  Whether to store the collection on disk.
167
- :param use_sparse_embedding:
168
+ :param use_sparse_embeddings:
168
169
  If set to `True`, enables support for sparse embeddings.
169
170
  :param sparse_idf:
170
171
  If set to `True`, computes the Inverse Document Frequency (IDF) when using sparse embeddings.
@@ -232,7 +233,6 @@ class QdrantDocumentStore:
232
233
  self.path = path
233
234
  self.force_disable_check_same_thread = force_disable_check_same_thread
234
235
  self.metadata = metadata or {}
235
- self.api_key = api_key
236
236
 
237
237
  # Store the Qdrant collection specific attributes
238
238
  self.shard_number = shard_number
@@ -258,9 +258,10 @@ class QdrantDocumentStore:
258
258
  self.write_batch_size = write_batch_size
259
259
  self.scroll_size = scroll_size
260
260
 
261
- def _initialize_client(self):
261
+ def _initialize_client(self) -> None:
262
262
  if self._client is None:
263
263
  client_params = self._prepare_client_params()
264
+ # This step adds the api-key and User-Agent to metadata
264
265
  self._client = qdrant_client.QdrantClient(**client_params)
265
266
  # Make sure the collection is properly set up
266
267
  self._set_up_collection(
@@ -274,7 +275,7 @@ class QdrantDocumentStore:
274
275
  self.payload_fields_to_index,
275
276
  )
276
277
 
277
- async def _initialize_async_client(self):
278
+ async def _initialize_async_client(self) -> None:
278
279
  """
279
280
  Returns the asynchronous Qdrant client, initializing it if necessary.
280
281
  """
@@ -628,8 +629,6 @@ class QdrantDocumentStore:
628
629
 
629
630
  :param ids:
630
631
  A list of document IDs to retrieve.
631
- :param index:
632
- The name of the index to retrieve documents from.
633
632
  :returns:
634
633
  A list of documents.
635
634
  """
@@ -661,8 +660,6 @@ class QdrantDocumentStore:
661
660
 
662
661
  :param ids:
663
662
  A list of document IDs to retrieve.
664
- :param index:
665
- The name of the index to retrieve documents from.
666
663
  :returns:
667
664
  A list of documents.
668
665
  """
@@ -1210,7 +1207,7 @@ class QdrantDocumentStore:
1210
1207
  )
1211
1208
  raise QdrantStoreError(msg) from ke
1212
1209
 
1213
- def _create_payload_index(self, collection_name: str, payload_fields_to_index: Optional[List[dict]] = None):
1210
+ def _create_payload_index(self, collection_name: str, payload_fields_to_index: Optional[List[dict]] = None) -> None:
1214
1211
  """
1215
1212
  Create payload index for the collection if payload_fields_to_index is provided
1216
1213
  See: https://qdrant.tech/documentation/concepts/indexing/#payload-index
@@ -1229,7 +1226,7 @@ class QdrantDocumentStore:
1229
1226
 
1230
1227
  async def _create_payload_index_async(
1231
1228
  self, collection_name: str, payload_fields_to_index: Optional[List[dict]] = None
1232
- ):
1229
+ ) -> None:
1233
1230
  """
1234
1231
  Asynchronously create payload index for the collection if payload_fields_to_index is provided
1235
1232
  See: https://qdrant.tech/documentation/concepts/indexing/#payload-index
@@ -1257,7 +1254,7 @@ class QdrantDocumentStore:
1257
1254
  sparse_idf: bool,
1258
1255
  on_disk: bool = False,
1259
1256
  payload_fields_to_index: Optional[List[dict]] = None,
1260
- ):
1257
+ ) -> None:
1261
1258
  """
1262
1259
  Sets up the Qdrant collection with the specified parameters.
1263
1260
  :param collection_name:
@@ -1313,7 +1310,7 @@ class QdrantDocumentStore:
1313
1310
  sparse_idf: bool,
1314
1311
  on_disk: bool = False,
1315
1312
  payload_fields_to_index: Optional[List[dict]] = None,
1316
- ):
1313
+ ) -> None:
1317
1314
  """
1318
1315
  Asynchronously sets up the Qdrant collection with the specified parameters.
1319
1316
  :param collection_name:
@@ -1367,7 +1364,7 @@ class QdrantDocumentStore:
1367
1364
  on_disk: Optional[bool] = None,
1368
1365
  use_sparse_embeddings: Optional[bool] = None,
1369
1366
  sparse_idf: bool = False,
1370
- ):
1367
+ ) -> None:
1371
1368
  """
1372
1369
  Recreates the Qdrant collection with the specified parameters.
1373
1370
 
@@ -1410,7 +1407,7 @@ class QdrantDocumentStore:
1410
1407
  on_disk: Optional[bool] = None,
1411
1408
  use_sparse_embeddings: Optional[bool] = None,
1412
1409
  sparse_idf: bool = False,
1413
- ):
1410
+ ) -> None:
1414
1411
  """
1415
1412
  Asynchronously recreates the Qdrant collection with the specified parameters.
1416
1413
 
@@ -1449,7 +1446,7 @@ class QdrantDocumentStore:
1449
1446
  self,
1450
1447
  documents: List[Document],
1451
1448
  policy: DuplicatePolicy = None,
1452
- ):
1449
+ ) -> List[Document]:
1453
1450
  """
1454
1451
  Checks whether any of the passed documents is already existing in the chosen index and returns a list of
1455
1452
  documents that are not in the index yet.
@@ -1476,7 +1473,7 @@ class QdrantDocumentStore:
1476
1473
  self,
1477
1474
  documents: List[Document],
1478
1475
  policy: DuplicatePolicy = None,
1479
- ):
1476
+ ) -> List[Document]:
1480
1477
  """
1481
1478
  Asynchronously checks whether any of the passed documents is already existing
1482
1479
  in the chosen index and returns a list of
@@ -1511,9 +1508,9 @@ class QdrantDocumentStore:
1511
1508
  for document in documents:
1512
1509
  if document.id in _hash_ids:
1513
1510
  logger.info(
1514
- "Duplicate Documents: Document with id '%s' already exists in index '%s'",
1515
- document.id,
1516
- self.index,
1511
+ "Duplicate Documents: Document with id '{document_id}' already exists in index '{index}'",
1512
+ document_id=document.id,
1513
+ index=self.index,
1517
1514
  )
1518
1515
  continue
1519
1516
  _documents.append(document)
@@ -1521,7 +1518,7 @@ class QdrantDocumentStore:
1521
1518
 
1522
1519
  return _documents
1523
1520
 
1524
- def _prepare_collection_params(self):
1521
+ def _prepare_collection_params(self) -> Dict[str, Any]:
1525
1522
  """
1526
1523
  Prepares the common parameters for collection creation.
1527
1524
  """
@@ -1537,7 +1534,7 @@ class QdrantDocumentStore:
1537
1534
  "init_from": self.init_from,
1538
1535
  }
1539
1536
 
1540
- def _prepare_client_params(self):
1537
+ def _prepare_client_params(self) -> Dict[str, Any]:
1541
1538
  """
1542
1539
  Prepares the common parameters for client initialization.
1543
1540
 
@@ -1554,7 +1551,10 @@ class QdrantDocumentStore:
1554
1551
  "timeout": self.timeout,
1555
1552
  "host": self.host,
1556
1553
  "path": self.path,
1557
- "metadata": self.metadata,
1554
+ # NOTE: We purposefully expand the fields of self.metadata to avoid modifying the original self.metadata
1555
+ # class attribute. For example, the resolved api key is added to metadata by the QdrantClient class
1556
+ # when using a hosted Qdrant service, which means running to_dict() exposes the api key.
1557
+ "metadata": {**self.metadata},
1558
1558
  "force_disable_check_same_thread": self.force_disable_check_same_thread,
1559
1559
  }
1560
1560
 
@@ -1565,7 +1565,7 @@ class QdrantDocumentStore:
1565
1565
  on_disk: Optional[bool] = None,
1566
1566
  use_sparse_embeddings: Optional[bool] = None,
1567
1567
  sparse_idf: bool = False,
1568
- ):
1568
+ ) -> Tuple[Dict[str, rest.VectorParams], Optional[Dict[str, rest.SparseVectorParams]]]:
1569
1569
  """
1570
1570
  Prepares the configuration for creating or recreating a Qdrant collection.
1571
1571
 
@@ -1595,9 +1595,12 @@ class QdrantDocumentStore:
1595
1595
 
1596
1596
  return vectors_config, sparse_vectors_config
1597
1597
 
1598
- def _validate_filters(self, filters: Optional[Union[Dict[str, Any], rest.Filter]] = None):
1598
+ def _validate_filters(self, filters: Optional[Union[Dict[str, Any], rest.Filter]] = None) -> None:
1599
1599
  """
1600
1600
  Validates the filters provided for querying.
1601
+
1602
+ :param filters: Filters to validate. Can be a dictionary or an instance of `qdrant_client.http.models.Filter`.
1603
+ :raises ValueError: If the filters are not in the correct format or syntax.
1601
1604
  """
1602
1605
  if filters and not isinstance(filters, dict) and not isinstance(filters, rest.Filter):
1603
1606
  msg = "Filter must be a dictionary or an instance of `qdrant_client.http.models.Filter`"
@@ -1607,7 +1610,7 @@ class QdrantDocumentStore:
1607
1610
  msg = "Invalid filter syntax. See https://docs.haystack.deepset.ai/docs/metadata-filtering for details."
1608
1611
  raise ValueError(msg)
1609
1612
 
1610
- def _process_query_point_results(self, results, scale_score: bool = False):
1613
+ def _process_query_point_results(self, results: List[QdrantPoint], scale_score: bool = False) -> List[Document]:
1611
1614
  """
1612
1615
  Processes query results from Qdrant.
1613
1616
  """
@@ -1627,7 +1630,7 @@ class QdrantDocumentStore:
1627
1630
 
1628
1631
  return documents
1629
1632
 
1630
- def _process_group_results(self, groups):
1633
+ def _process_group_results(self, groups: List[rest.PointGroup]) -> List[Document]:
1631
1634
  """
1632
1635
  Processes grouped query results from Qdrant.
1633
1636
 
@@ -1647,7 +1650,7 @@ class QdrantDocumentStore:
1647
1650
  collection_info,
1648
1651
  distance,
1649
1652
  embedding_dim: int,
1650
- ):
1653
+ ) -> None:
1651
1654
  """
1652
1655
  Validates that an existing collection is compatible with the current configuration.
1653
1656
  """
@@ -138,10 +138,10 @@ def convert_filters_to_qdrant(
138
138
 
139
139
 
140
140
  def build_filters_for_repeated_operators(
141
- must_clauses,
142
- should_clauses,
143
- must_not_clauses,
144
- qdrant_filter,
141
+ must_clauses: List,
142
+ should_clauses: List,
143
+ must_not_clauses: List,
144
+ qdrant_filter: List[models.Filter],
145
145
  ) -> List[models.Filter]:
146
146
  """
147
147
  Flattens the nested lists of clauses by creating separate Filters for each clause of a logical operator.
@@ -11,7 +11,7 @@ logger.addHandler(python_logging.StreamHandler())
11
11
  logger.setLevel(python_logging.INFO)
12
12
 
13
13
 
14
- def migrate_to_sparse_embeddings_support(old_document_store: QdrantDocumentStore, new_index: str):
14
+ def migrate_to_sparse_embeddings_support(old_document_store: QdrantDocumentStore, new_index: str) -> None:
15
15
  """
16
16
  Utility function to migrate an existing `QdrantDocumentStore` to a new one with support for sparse embeddings.
17
17
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: qdrant-haystack
3
- Version: 9.1.0
3
+ Version: 9.1.2
4
4
  Summary: An integration of Qdrant ANN vector database backend with Haystack
5
5
  Project-URL: Source, https://github.com/deepset-ai/haystack-core-integrations
6
6
  Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/qdrant/README.md
@@ -15,6 +15,7 @@ Classifier: Programming Language :: Python :: 3.9
15
15
  Classifier: Programming Language :: Python :: 3.10
16
16
  Classifier: Programming Language :: Python :: 3.11
17
17
  Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
18
19
  Classifier: Programming Language :: Python :: Implementation :: CPython
19
20
  Classifier: Programming Language :: Python :: Implementation :: PyPy
20
21
  Requires-Python: >=3.9
@@ -1,11 +1,11 @@
1
1
  haystack_integrations/components/retrievers/qdrant/__init__.py,sha256=AE1hdw4sqb0rTSqfAxKCRUOZVE8gbHdQ1wDccdN86hc,313
2
- haystack_integrations/components/retrievers/qdrant/retriever.py,sha256=IKl3gHaOhDiwIgPsQm1fTBkgSPPd5OIl3XXFevnVJvk,29100
2
+ haystack_integrations/components/retrievers/qdrant/retriever.py,sha256=Ni_aWM_JslmrWdfUl0TenOOxT15YJmMpSSSRAP3qPYs,29298
3
3
  haystack_integrations/document_stores/qdrant/__init__.py,sha256=kUGc5uewqArhmVR-JqB_NmJ4kNkTIQIvYDNSoO2ELn0,302
4
4
  haystack_integrations/document_stores/qdrant/converters.py,sha256=iVhAZ7wdRxRjfLVMHB1JdAhn7LpU5bwza1obGmEePWU,2506
5
- haystack_integrations/document_stores/qdrant/document_store.py,sha256=EPlz5O0VMoNUO0wgECLnmzRnhjCogd0WxUux9pt9dLQ,70012
6
- haystack_integrations/document_stores/qdrant/filters.py,sha256=Nv_eKIYKwUWvldJfa0omfFQ0kgqi6L3DUFeMuIWziOY,11751
7
- haystack_integrations/document_stores/qdrant/migrate_to_sparse.py,sha256=2xyet1fhy2lpVTs3E75f7oR521zcjT6U2jHd4pLLgKM,4971
8
- qdrant_haystack-9.1.0.dist-info/METADATA,sha256=dh9TfZTQkY5i8ZCBy8jPoD72qZ62BoFoZINMAwqGFFo,1872
9
- qdrant_haystack-9.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
10
- qdrant_haystack-9.1.0.dist-info/licenses/LICENSE.txt,sha256=B05uMshqTA74s-0ltyHKI6yoPfJ3zYgQbvcXfDVGFf8,10280
11
- qdrant_haystack-9.1.0.dist-info/RECORD,,
5
+ haystack_integrations/document_stores/qdrant/document_store.py,sha256=PVvWBKJZXQRG2-TiFp0lZLd7nOChVh0gIipRXoaVYaM,70829
6
+ haystack_integrations/document_stores/qdrant/filters.py,sha256=e7y-Pqf6S2v1jd-1jCNdzD4sVGGI4x7f3Q16lP21NrQ,11790
7
+ haystack_integrations/document_stores/qdrant/migrate_to_sparse.py,sha256=o66D6VaDEtz_zFYmZw_jsbBTXb9MGX15JnfAzyo7Wq0,4979
8
+ qdrant_haystack-9.1.2.dist-info/METADATA,sha256=WMLyOW2a5Xt6TlC_YuOf7iBnkYG2NHQucvMD7NGuRo0,1923
9
+ qdrant_haystack-9.1.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
10
+ qdrant_haystack-9.1.2.dist-info/licenses/LICENSE.txt,sha256=B05uMshqTA74s-0ltyHKI6yoPfJ3zYgQbvcXfDVGFf8,10280
11
+ qdrant_haystack-9.1.2.dist-info/RECORD,,