qdrant-haystack 9.1.3__py3-none-any.whl → 9.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of qdrant-haystack might be problematic. Click here for more details.

File without changes
@@ -8,6 +8,11 @@ from qdrant_client.http import models
8
8
 
9
9
  from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
10
10
 
11
+ FILTER_POLICY_MERGE_ERROR_MESSAGE = (
12
+ "Native Qdrant filters cannot be used with filter_policy set to MERGE. "
13
+ "Set filter_policy to REPLACE or use Haystack filters instead."
14
+ )
15
+
11
16
 
12
17
  @component
13
18
  class QdrantEmbeddingRetriever:
@@ -153,8 +158,19 @@ class QdrantEmbeddingRetriever:
153
158
  :returns:
154
159
  The retrieved documents.
155
160
 
161
+ :raises ValueError: If 'filter_policy' is set to 'MERGE' and 'filters' is a native Qdrant filter.
156
162
  """
157
- filters = apply_filter_policy(self._filter_policy, self._filters, filters)
163
+ if self._filter_policy == FilterPolicy.MERGE and (
164
+ isinstance(self._filters, models.Filter) or isinstance(filters, models.Filter)
165
+ ):
166
+ raise ValueError(FILTER_POLICY_MERGE_ERROR_MESSAGE)
167
+
168
+ # Replacing filters works with native Qdrant filters even if the type is wrong
169
+ filters = apply_filter_policy(
170
+ filter_policy=self._filter_policy,
171
+ init_filters=self._filters, # type: ignore[arg-type]
172
+ runtime_filters=filters, # type: ignore[arg-type]
173
+ )
158
174
 
159
175
  docs = self._document_store._query_by_embedding(
160
176
  query_embedding=query_embedding,
@@ -197,8 +213,19 @@ class QdrantEmbeddingRetriever:
197
213
  :returns:
198
214
  The retrieved documents.
199
215
 
216
+ :raises ValueError: If 'filter_policy' is set to 'MERGE' and 'filters' is a native Qdrant filter.
200
217
  """
201
- filters = apply_filter_policy(self._filter_policy, self._filters, filters)
218
+ if self._filter_policy == FilterPolicy.MERGE and (
219
+ isinstance(self._filters, models.Filter) or isinstance(filters, models.Filter)
220
+ ):
221
+ raise ValueError(FILTER_POLICY_MERGE_ERROR_MESSAGE)
222
+
223
+ # Replacing filters works with native Qdrant filters even if the type is wrong
224
+ filters = apply_filter_policy(
225
+ filter_policy=self._filter_policy,
226
+ init_filters=self._filters, # type: ignore[arg-type]
227
+ runtime_filters=filters, # type: ignore[arg-type]
228
+ )
202
229
 
203
230
  docs = await self._document_store._query_by_embedding_async(
204
231
  query_embedding=query_embedding,
@@ -364,8 +391,19 @@ class QdrantSparseEmbeddingRetriever:
364
391
  :returns:
365
392
  The retrieved documents.
366
393
 
394
+ :raises ValueError: If 'filter_policy' is set to 'MERGE' and 'filters' is a native Qdrant filter.
367
395
  """
368
- filters = apply_filter_policy(self._filter_policy, self._filters, filters)
396
+ if self._filter_policy == FilterPolicy.MERGE and (
397
+ isinstance(self._filters, models.Filter) or isinstance(filters, models.Filter)
398
+ ):
399
+ raise ValueError(FILTER_POLICY_MERGE_ERROR_MESSAGE)
400
+
401
+ # Replacing filters works with native Qdrant filters even if the type is wrong
402
+ filters = apply_filter_policy(
403
+ filter_policy=self._filter_policy,
404
+ init_filters=self._filters, # type: ignore[arg-type]
405
+ runtime_filters=filters, # type: ignore[arg-type]
406
+ )
369
407
 
370
408
  docs = self._document_store._query_by_sparse(
371
409
  query_sparse_embedding=query_sparse_embedding,
@@ -413,8 +451,19 @@ class QdrantSparseEmbeddingRetriever:
413
451
  :returns:
414
452
  The retrieved documents.
415
453
 
454
+ :raises ValueError: If 'filter_policy' is set to 'MERGE' and 'filters' is a native Qdrant filter.
416
455
  """
417
- filters = apply_filter_policy(self._filter_policy, self._filters, filters)
456
+ if self._filter_policy == FilterPolicy.MERGE and (
457
+ isinstance(self._filters, models.Filter) or isinstance(filters, models.Filter)
458
+ ):
459
+ raise ValueError(FILTER_POLICY_MERGE_ERROR_MESSAGE)
460
+
461
+ # Replacing filters works with native Qdrant filters even if the type is wrong
462
+ filters = apply_filter_policy(
463
+ filter_policy=self._filter_policy,
464
+ init_filters=self._filters, # type: ignore[arg-type]
465
+ runtime_filters=filters, # type: ignore[arg-type]
466
+ )
418
467
 
419
468
  docs = await self._document_store._query_by_sparse_async(
420
469
  query_sparse_embedding=query_sparse_embedding,
@@ -579,8 +628,19 @@ class QdrantHybridRetriever:
579
628
  :returns:
580
629
  The retrieved documents.
581
630
 
631
+ :raises ValueError: If 'filter_policy' is set to 'MERGE' and 'filters' is a native Qdrant filter.
582
632
  """
583
- filters = apply_filter_policy(self._filter_policy, self._filters, filters)
633
+ if self._filter_policy == FilterPolicy.MERGE and (
634
+ isinstance(self._filters, models.Filter) or isinstance(filters, models.Filter)
635
+ ):
636
+ raise ValueError(FILTER_POLICY_MERGE_ERROR_MESSAGE)
637
+
638
+ # Replacing filters works with native Qdrant filters even if the type is wrong
639
+ filters = apply_filter_policy(
640
+ filter_policy=self._filter_policy,
641
+ init_filters=self._filters, # type: ignore[arg-type]
642
+ runtime_filters=filters, # type: ignore[arg-type]
643
+ )
584
644
 
585
645
  docs = self._document_store._query_hybrid(
586
646
  query_embedding=query_embedding,
@@ -628,8 +688,19 @@ class QdrantHybridRetriever:
628
688
  :returns:
629
689
  The retrieved documents.
630
690
 
691
+ :raises ValueError: If 'filter_policy' is set to 'MERGE' and 'filters' is a native Qdrant filter.
631
692
  """
632
- filters = apply_filter_policy(self._filter_policy, self._filters, filters)
693
+ if self._filter_policy == FilterPolicy.MERGE and (
694
+ isinstance(self._filters, models.Filter) or isinstance(filters, models.Filter)
695
+ ):
696
+ raise ValueError(FILTER_POLICY_MERGE_ERROR_MESSAGE)
697
+
698
+ # Replacing filters works with native Qdrant filters even if the type is wrong
699
+ filters = apply_filter_policy(
700
+ filter_policy=self._filter_policy,
701
+ init_filters=self._filters, # type: ignore[arg-type]
702
+ runtime_filters=filters, # type: ignore[arg-type]
703
+ )
633
704
 
634
705
  docs = await self._document_store._query_hybrid_async(
635
706
  query_embedding=query_embedding,
File without changes
@@ -37,7 +37,7 @@ def convert_haystack_documents_to_qdrant_points(
37
37
 
38
38
  else:
39
39
  vector = payload.pop("embedding") or {}
40
- _id = convert_id(payload.get("id"))
40
+ _id = convert_id(document.id)
41
41
 
42
42
  point = rest.PointStruct(
43
43
  payload=payload,
@@ -62,19 +62,21 @@ QdrantPoint = Union[rest.ScoredPoint, rest.Record]
62
62
 
63
63
 
64
64
  def convert_qdrant_point_to_haystack_document(point: QdrantPoint, use_sparse_embeddings: bool) -> Document:
65
- payload = {**point.payload}
65
+ payload = point.payload or {}
66
66
  payload["score"] = point.score if hasattr(point, "score") else None
67
67
 
68
68
  if not use_sparse_embeddings:
69
69
  payload["embedding"] = point.vector if hasattr(point, "vector") else None
70
- elif hasattr(point, "vector") and point.vector is not None:
70
+ elif hasattr(point, "vector") and point.vector is not None and isinstance(point.vector, dict):
71
71
  payload["embedding"] = point.vector.get(DENSE_VECTORS_NAME)
72
72
 
73
73
  if SPARSE_VECTORS_NAME in point.vector:
74
- parse_vector_dict = {
75
- "indices": point.vector[SPARSE_VECTORS_NAME].indices,
76
- "values": point.vector[SPARSE_VECTORS_NAME].values,
77
- }
78
- payload["sparse_embedding"] = parse_vector_dict
74
+ sparse_vector = point.vector[SPARSE_VECTORS_NAME]
75
+ if isinstance(sparse_vector, rest.SparseVector):
76
+ sparse_vector_dict = {
77
+ "indices": sparse_vector.indices,
78
+ "values": sparse_vector.values,
79
+ }
80
+ payload["sparse_embedding"] = sparse_vector_dict
79
81
 
80
82
  return Document.from_dict(payload)
@@ -2,7 +2,6 @@ import inspect
2
2
  from itertools import islice
3
3
  from typing import Any, AsyncGenerator, ClassVar, Dict, Generator, List, Optional, Set, Tuple, Union
4
4
 
5
- import numpy as np
6
5
  import qdrant_client
7
6
  from haystack import default_from_dict, default_to_dict, logging
8
7
  from haystack.dataclasses import Document
@@ -10,6 +9,7 @@ from haystack.dataclasses.sparse_embedding import SparseEmbedding
10
9
  from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError
11
10
  from haystack.document_stores.types import DuplicatePolicy
12
11
  from haystack.utils import Secret, deserialize_secrets_inplace
12
+ from numpy import exp
13
13
  from qdrant_client import grpc
14
14
  from qdrant_client.http import models as rest
15
15
  from qdrant_client.http.exceptions import UnexpectedResponse
@@ -18,7 +18,6 @@ from tqdm import tqdm
18
18
  from .converters import (
19
19
  DENSE_VECTORS_NAME,
20
20
  SPARSE_VECTORS_NAME,
21
- QdrantPoint,
22
21
  convert_haystack_documents_to_qdrant_points,
23
22
  convert_id,
24
23
  convert_qdrant_point_to_haystack_document,
@@ -27,6 +26,12 @@ from .filters import convert_filters_to_qdrant
27
26
 
28
27
  logger = logging.getLogger(__name__)
29
28
 
29
+ # Default group size to apply when using group_by
30
+ # - Our methods use None as the default for optional group_size parameter.
31
+ # - Qdrant expects an integer and internally defaults to 3 when performing grouped queries.
32
+ # - When group_by is specified but group_size is None, we use this value instead of passing None.
33
+ DEFAULT_GROUP_SIZE = 3
34
+
30
35
 
31
36
  class QdrantStoreError(DocumentStoreError):
32
37
  pass
@@ -85,7 +90,7 @@ class QdrantDocumentStore:
85
90
  ```
86
91
  """
87
92
 
88
- SIMILARITY: ClassVar[Dict[str, str]] = {
93
+ SIMILARITY: ClassVar[Dict[str, rest.Distance]] = {
89
94
  "cosine": rest.Distance.COSINE,
90
95
  "dot_product": rest.Distance.DOT,
91
96
  "l2": rest.Distance.EUCLID,
@@ -216,8 +221,8 @@ class QdrantDocumentStore:
216
221
  List of payload fields to index.
217
222
  """
218
223
 
219
- self._client = None
220
- self._async_client = None
224
+ self._client: Optional[qdrant_client.QdrantClient] = None
225
+ self._async_client: Optional[qdrant_client.AsyncQdrantClient] = None
221
226
 
222
227
  # Store the Qdrant client specific attributes
223
228
  self.location = location
@@ -575,8 +580,8 @@ class QdrantDocumentStore:
575
580
  with_vectors=True,
576
581
  )
577
582
  stop_scrolling = next_offset is None or (
578
- isinstance(next_offset, grpc.PointId) and next_offset.num == 0 and next_offset.uuid == ""
579
- )
583
+ isinstance(next_offset, grpc.PointId) and next_offset.num == 0 and next_offset.uuid == "" # type: ignore[union-attr]
584
+ ) # grpc.PointId always has num and uuid
580
585
 
581
586
  for record in records:
582
587
  yield convert_qdrant_point_to_haystack_document(
@@ -612,8 +617,8 @@ class QdrantDocumentStore:
612
617
  with_vectors=True,
613
618
  )
614
619
  stop_scrolling = next_offset is None or (
615
- isinstance(next_offset, grpc.PointId) and next_offset.num == 0 and next_offset.uuid == ""
616
- )
620
+ isinstance(next_offset, grpc.PointId) and next_offset.num == 0 and next_offset.uuid == "" # type: ignore[union-attr]
621
+ ) # grpc.PointId always has num and uuid
617
622
 
618
623
  for record in records:
619
624
  yield convert_qdrant_point_to_haystack_document(
@@ -739,7 +744,7 @@ class QdrantDocumentStore:
739
744
  query_filter=qdrant_filters,
740
745
  limit=top_k,
741
746
  group_by=group_by,
742
- group_size=group_size,
747
+ group_size=group_size or DEFAULT_GROUP_SIZE,
743
748
  with_vectors=return_embedding,
744
749
  score_threshold=score_threshold,
745
750
  ).groups
@@ -801,7 +806,7 @@ class QdrantDocumentStore:
801
806
  query_filter=qdrant_filters,
802
807
  limit=top_k,
803
808
  group_by=group_by,
804
- group_size=group_size,
809
+ group_size=group_size or DEFAULT_GROUP_SIZE,
805
810
  with_vectors=return_embedding,
806
811
  score_threshold=score_threshold,
807
812
  ).groups
@@ -893,7 +898,7 @@ class QdrantDocumentStore:
893
898
  query=rest.FusionQuery(fusion=rest.Fusion.RRF),
894
899
  limit=top_k,
895
900
  group_by=group_by,
896
- group_size=group_size,
901
+ group_size=group_size or DEFAULT_GROUP_SIZE,
897
902
  score_threshold=score_threshold,
898
903
  with_payload=True,
899
904
  with_vectors=return_embedding,
@@ -990,14 +995,14 @@ class QdrantDocumentStore:
990
995
  query_filter=qdrant_filters,
991
996
  limit=top_k,
992
997
  group_by=group_by,
993
- group_size=group_size,
998
+ group_size=group_size or DEFAULT_GROUP_SIZE,
994
999
  with_vectors=return_embedding,
995
1000
  score_threshold=score_threshold,
996
1001
  )
997
1002
  groups = response.groups
998
1003
  return self._process_group_results(groups)
999
1004
  else:
1000
- response = await self._async_client.query_points(
1005
+ query_response = await self._async_client.query_points(
1001
1006
  collection_name=self.index,
1002
1007
  query=rest.SparseVector(
1003
1008
  indices=query_indices,
@@ -1009,7 +1014,7 @@ class QdrantDocumentStore:
1009
1014
  with_vectors=return_embedding,
1010
1015
  score_threshold=score_threshold,
1011
1016
  )
1012
- points = response.points
1017
+ points = query_response.points
1013
1018
  return self._process_query_point_results(points, scale_score=scale_score)
1014
1019
 
1015
1020
  async def _query_by_embedding_async(
@@ -1054,14 +1059,14 @@ class QdrantDocumentStore:
1054
1059
  query_filter=qdrant_filters,
1055
1060
  limit=top_k,
1056
1061
  group_by=group_by,
1057
- group_size=group_size,
1062
+ group_size=group_size or DEFAULT_GROUP_SIZE,
1058
1063
  with_vectors=return_embedding,
1059
1064
  score_threshold=score_threshold,
1060
1065
  )
1061
1066
  groups = response.groups
1062
1067
  return self._process_group_results(groups)
1063
1068
  else:
1064
- response = await self._async_client.query_points(
1069
+ query_response = await self._async_client.query_points(
1065
1070
  collection_name=self.index,
1066
1071
  query=query_embedding,
1067
1072
  using=DENSE_VECTORS_NAME if self.use_sparse_embeddings else None,
@@ -1070,7 +1075,7 @@ class QdrantDocumentStore:
1070
1075
  with_vectors=return_embedding,
1071
1076
  score_threshold=score_threshold,
1072
1077
  )
1073
- points = response.points
1078
+ points = query_response.points
1074
1079
  return self._process_query_point_results(points, scale_score=scale_score)
1075
1080
 
1076
1081
  async def _query_hybrid_async(
@@ -1145,14 +1150,14 @@ class QdrantDocumentStore:
1145
1150
  query=rest.FusionQuery(fusion=rest.Fusion.RRF),
1146
1151
  limit=top_k,
1147
1152
  group_by=group_by,
1148
- group_size=group_size,
1153
+ group_size=group_size or DEFAULT_GROUP_SIZE,
1149
1154
  score_threshold=score_threshold,
1150
1155
  with_payload=True,
1151
1156
  with_vectors=return_embedding,
1152
1157
  )
1153
1158
  groups = response.groups
1154
1159
  else:
1155
- response = await self._async_client.query_points(
1160
+ query_response = await self._async_client.query_points(
1156
1161
  collection_name=self.index,
1157
1162
  prefetch=[
1158
1163
  rest.Prefetch(
@@ -1175,7 +1180,7 @@ class QdrantDocumentStore:
1175
1180
  with_payload=True,
1176
1181
  with_vectors=return_embedding,
1177
1182
  )
1178
- points = response.points
1183
+ points = query_response.points
1179
1184
 
1180
1185
  except Exception as e:
1181
1186
  msg = "Error during hybrid search"
@@ -1233,7 +1238,6 @@ class QdrantDocumentStore:
1233
1238
  """
1234
1239
  if payload_fields_to_index is not None:
1235
1240
  for payload_index in payload_fields_to_index:
1236
-
1237
1241
  # self._async_client is initialized at this point
1238
1242
  # since _initialize_async_client() is called before this method is executed
1239
1243
  assert self._async_client is not None
@@ -1359,7 +1363,7 @@ class QdrantDocumentStore:
1359
1363
  def recreate_collection(
1360
1364
  self,
1361
1365
  collection_name: str,
1362
- distance,
1366
+ distance: rest.Distance,
1363
1367
  embedding_dim: int,
1364
1368
  on_disk: Optional[bool] = None,
1365
1369
  use_sparse_embeddings: Optional[bool] = None,
@@ -1402,7 +1406,7 @@ class QdrantDocumentStore:
1402
1406
  async def recreate_collection_async(
1403
1407
  self,
1404
1408
  collection_name: str,
1405
- distance,
1409
+ distance: rest.Distance,
1406
1410
  embedding_dim: int,
1407
1411
  on_disk: Optional[bool] = None,
1408
1412
  use_sparse_embeddings: Optional[bool] = None,
@@ -1445,7 +1449,7 @@ class QdrantDocumentStore:
1445
1449
  def _handle_duplicate_documents(
1446
1450
  self,
1447
1451
  documents: List[Document],
1448
- policy: DuplicatePolicy = None,
1452
+ policy: Optional[DuplicatePolicy] = None,
1449
1453
  ) -> List[Document]:
1450
1454
  """
1451
1455
  Checks whether any of the passed documents is already existing in the chosen index and returns a list of
@@ -1472,7 +1476,7 @@ class QdrantDocumentStore:
1472
1476
  async def _handle_duplicate_documents_async(
1473
1477
  self,
1474
1478
  documents: List[Document],
1475
- policy: DuplicatePolicy = None,
1479
+ policy: Optional[DuplicatePolicy] = None,
1476
1480
  ) -> List[Document]:
1477
1481
  """
1478
1482
  Asynchronously checks whether any of the passed documents is already existing
@@ -1561,11 +1565,11 @@ class QdrantDocumentStore:
1561
1565
  def _prepare_collection_config(
1562
1566
  self,
1563
1567
  embedding_dim: int,
1564
- distance,
1568
+ distance: rest.Distance,
1565
1569
  on_disk: Optional[bool] = None,
1566
1570
  use_sparse_embeddings: Optional[bool] = None,
1567
1571
  sparse_idf: bool = False,
1568
- ) -> Tuple[Dict[str, rest.VectorParams], Optional[Dict[str, rest.SparseVectorParams]]]:
1572
+ ) -> Tuple[Union[Dict[str, rest.VectorParams], rest.VectorParams], Optional[Dict[str, rest.SparseVectorParams]]]:
1569
1573
  """
1570
1574
  Prepares the configuration for creating or recreating a Qdrant collection.
1571
1575
 
@@ -1577,12 +1581,14 @@ class QdrantDocumentStore:
1577
1581
  use_sparse_embeddings = self.use_sparse_embeddings
1578
1582
 
1579
1583
  # dense vectors configuration
1580
- vectors_config = rest.VectorParams(size=embedding_dim, on_disk=on_disk, distance=distance)
1581
- sparse_vectors_config = None
1584
+ base_vectors_config = rest.VectorParams(size=embedding_dim, on_disk=on_disk, distance=distance)
1585
+ vectors_config: Union[rest.VectorParams, Dict[str, rest.VectorParams]] = base_vectors_config
1586
+
1587
+ sparse_vectors_config: Optional[Dict[str, rest.SparseVectorParams]] = None
1582
1588
 
1583
1589
  if use_sparse_embeddings:
1584
1590
  # in this case, we need to define named vectors
1585
- vectors_config = {DENSE_VECTORS_NAME: vectors_config}
1591
+ vectors_config = {DENSE_VECTORS_NAME: base_vectors_config}
1586
1592
 
1587
1593
  sparse_vectors_config = {
1588
1594
  SPARSE_VECTORS_NAME: rest.SparseVectorParams(
@@ -1610,7 +1616,9 @@ class QdrantDocumentStore:
1610
1616
  msg = "Invalid filter syntax. See https://docs.haystack.deepset.ai/docs/metadata-filtering for details."
1611
1617
  raise ValueError(msg)
1612
1618
 
1613
- def _process_query_point_results(self, results: List[QdrantPoint], scale_score: bool = False) -> List[Document]:
1619
+ def _process_query_point_results(
1620
+ self, results: List[rest.ScoredPoint], scale_score: bool = False
1621
+ ) -> List[Document]:
1614
1622
  """
1615
1623
  Processes query results from Qdrant.
1616
1624
  """
@@ -1622,10 +1630,12 @@ class QdrantDocumentStore:
1622
1630
  if scale_score:
1623
1631
  for document in documents:
1624
1632
  score = document.score
1633
+ if score is None:
1634
+ continue
1625
1635
  if self.similarity == "cosine":
1626
1636
  score = (score + 1) / 2
1627
1637
  else:
1628
- score = float(1 / (1 + np.exp(-score / 100)))
1638
+ score = float(1 / (1 + exp(-score / 100)))
1629
1639
  document.score = score
1630
1640
 
1631
1641
  return documents
@@ -1647,16 +1657,22 @@ class QdrantDocumentStore:
1647
1657
  def _validate_collection_compatibility(
1648
1658
  self,
1649
1659
  collection_name: str,
1650
- collection_info,
1651
- distance,
1660
+ collection_info: rest.CollectionInfo,
1661
+ distance: rest.Distance,
1652
1662
  embedding_dim: int,
1653
1663
  ) -> None:
1654
1664
  """
1655
1665
  Validates that an existing collection is compatible with the current configuration.
1656
1666
  """
1657
- has_named_vectors = isinstance(collection_info.config.params.vectors, dict)
1667
+ vectors_config = collection_info.config.params.vectors
1658
1668
 
1659
- if has_named_vectors and DENSE_VECTORS_NAME not in collection_info.config.params.vectors:
1669
+ if vectors_config is None:
1670
+ msg = f"Collection '{collection_name}' has no vector configuration."
1671
+ raise QdrantStoreError(msg)
1672
+
1673
+ has_named_vectors = isinstance(vectors_config, dict)
1674
+
1675
+ if has_named_vectors and DENSE_VECTORS_NAME not in vectors_config:
1660
1676
  msg = (
1661
1677
  f"Collection '{collection_name}' already exists in Qdrant, "
1662
1678
  f"but it has been originally created outside of Haystack and is not supported. "
@@ -1688,11 +1704,20 @@ class QdrantDocumentStore:
1688
1704
 
1689
1705
  # Get current distance and vector size based on collection configuration
1690
1706
  if self.use_sparse_embeddings:
1691
- current_distance = collection_info.config.params.vectors[DENSE_VECTORS_NAME].distance
1692
- current_vector_size = collection_info.config.params.vectors[DENSE_VECTORS_NAME].size
1707
+ if not isinstance(vectors_config, dict):
1708
+ msg = f"Collection '{collection_name}' has invalid vector configuration for sparse embeddings."
1709
+ raise QdrantStoreError(msg)
1710
+
1711
+ dense_vector_config = vectors_config[DENSE_VECTORS_NAME]
1712
+ current_distance = dense_vector_config.distance
1713
+ current_vector_size = dense_vector_config.size
1693
1714
  else:
1694
- current_distance = collection_info.config.params.vectors.distance
1695
- current_vector_size = collection_info.config.params.vectors.size
1715
+ if isinstance(vectors_config, dict):
1716
+ msg = f"Collection '{collection_name}' has invalid vector configuration for dense embeddings only."
1717
+ raise QdrantStoreError(msg)
1718
+
1719
+ current_distance = vectors_config.distance
1720
+ current_vector_size = vectors_config.size
1696
1721
 
1697
1722
  # Validate distance metric
1698
1723
  if current_distance != distance:
@@ -1,198 +1,115 @@
1
1
  from datetime import datetime
2
- from typing import List, Optional, Union
2
+ from typing import Any, Callable, Dict, List, Optional, Union
3
3
 
4
4
  from haystack.utils.filters import COMPARISON_OPERATORS, LOGICAL_OPERATORS, FilterError
5
5
  from qdrant_client.http import models
6
6
 
7
- COMPARISON_OPERATORS = COMPARISON_OPERATORS.keys()
8
- LOGICAL_OPERATORS = LOGICAL_OPERATORS.keys()
9
-
10
7
 
11
8
  def convert_filters_to_qdrant(
12
- filter_term: Optional[Union[List[dict], dict, models.Filter]] = None, is_parent_call: bool = True
13
- ) -> Optional[Union[models.Filter, List[models.Filter], List[models.Condition]]]:
9
+ filter_term: Optional[Union[List[Dict[str, Any]], Dict[str, Any], models.Filter]] = None,
10
+ ) -> Optional[models.Filter]:
14
11
  """Converts Haystack filters to the format used by Qdrant.
15
12
 
16
13
  :param filter_term: the haystack filter to be converted to qdrant.
17
- :param is_parent_call: indicates if this is the top-level call to the function. If True, the function returns
18
- a single models.Filter object; if False, it may return a list of filters or conditions for further processing.
19
-
20
- :returns: a single Qdrant Filter in the parent call or a list of such Filters in recursive calls.
21
-
22
- :raises FilterError: If the invalid filter criteria is provided or if an unknown operator is encountered.
23
-
14
+ :returns: a single Qdrant Filter or None.
15
+ :raises FilterError: If invalid filter criteria is provided.
24
16
  """
25
-
26
17
  if isinstance(filter_term, models.Filter):
27
18
  return filter_term
28
19
  if not filter_term:
29
20
  return None
30
21
 
31
- must_clauses: List[models.Filter] = []
32
- should_clauses: List[models.Filter] = []
33
- must_not_clauses: List[models.Filter] = []
34
- # Indicates if there are multiple same LOGICAL OPERATORS on each level
35
- # and prevents them from being combined
36
- same_operator_flag = False
37
- conditions, qdrant_filter, current_level_operators = (
38
- [],
39
- [],
40
- [],
41
- )
42
-
43
22
  if isinstance(filter_term, dict):
44
23
  filter_term = [filter_term]
45
24
 
46
- # ======== IDENTIFY FILTER ITEMS ON EACH LEVEL ========
25
+ conditions = _process_filter_items(filter_term)
26
+
27
+ return _build_final_filter(conditions)
47
28
 
48
- for item in filter_term:
49
- operator = item.get("operator")
50
29
 
51
- # Check for repeated similar operators on each level
52
- same_operator_flag = operator in current_level_operators and operator in LOGICAL_OPERATORS
53
- if not same_operator_flag:
54
- current_level_operators.append(operator)
30
+ def _process_filter_items(filter_items: List[Dict[str, Any]]) -> List[models.Condition]:
31
+ """Process a list of filter items and return all conditions."""
32
+ all_conditions: List[models.Condition] = []
55
33
 
34
+ for item in filter_items:
35
+ operator = item.get("operator")
56
36
  if operator is None:
57
37
  msg = "Operator not found in filters"
58
38
  raise FilterError(msg)
59
39
 
60
- if operator in LOGICAL_OPERATORS and "conditions" not in item:
61
- msg = f"'conditions' not found for '{operator}'"
40
+ if operator in LOGICAL_OPERATORS:
41
+ condition = _process_logical_operator(item)
42
+ if condition:
43
+ all_conditions.append(condition)
44
+ elif operator in COMPARISON_OPERATORS:
45
+ condition = _process_comparison_operator(item)
46
+ if condition:
47
+ all_conditions.append(condition)
48
+ else:
49
+ msg = f"Unknown operator {operator} used in filters"
62
50
  raise FilterError(msg)
63
51
 
64
- if operator in LOGICAL_OPERATORS:
65
- # Recursively process nested conditions
66
- current_filter = convert_filters_to_qdrant(item.get("conditions", []), is_parent_call=False) or []
67
-
68
- # When same_operator_flag is set to True,
69
- # ensure each clause is appended as an independent list to avoid merging distinct clauses.
70
- if operator == "AND":
71
- must_clauses = [must_clauses, current_filter] if same_operator_flag else must_clauses + current_filter
72
- elif operator == "OR":
73
- should_clauses = (
74
- [should_clauses, current_filter] if same_operator_flag else should_clauses + current_filter
75
- )
76
- elif operator == "NOT":
77
- must_not_clauses = (
78
- [must_not_clauses, current_filter] if same_operator_flag else must_not_clauses + current_filter
79
- )
52
+ return all_conditions
80
53
 
81
- elif operator in COMPARISON_OPERATORS:
82
- field = item.get("field")
83
- value = item.get("value")
84
- if field is None or value is None:
85
- msg = f"'field' or 'value' not found for '{operator}'"
86
- raise FilterError(msg)
87
54
 
88
- parsed_conditions = _parse_comparison_operation(comparison_operation=operator, key=field, value=value)
55
+ def _process_logical_operator(item: Dict[str, Any]) -> Optional[models.Condition]:
56
+ """Process a logical operator (AND, OR, NOT) and return the corresponding condition."""
57
+ operator = item["operator"]
58
+ conditions = item.get("conditions")
89
59
 
90
- # check if the parsed_conditions are models.Filter or models.Condition
91
- for condition in parsed_conditions:
92
- if isinstance(condition, models.Filter):
93
- qdrant_filter.append(condition)
94
- else:
95
- conditions.append(condition)
60
+ if not conditions:
61
+ msg = f"'conditions' not found for '{operator}'"
62
+ raise FilterError(msg)
96
63
 
97
- else:
98
- msg = f"Unknown operator {operator} used in filters"
99
- raise FilterError(msg)
64
+ # Recursively process nested conditions
65
+ nested_conditions = _process_filter_items(conditions)
100
66
 
101
- # ======== PROCESS FILTER ITEMS ON EACH LEVEL ========
102
-
103
- # If same logical operators have separate clauses, create separate filters
104
- if same_operator_flag:
105
- qdrant_filter = build_filters_for_repeated_operators(
106
- must_clauses, should_clauses, must_not_clauses, qdrant_filter
107
- )
108
-
109
- # else append a single Filter for existing clauses
110
- elif must_clauses or should_clauses or must_not_clauses:
111
- qdrant_filter.append(
112
- models.Filter(
113
- must=must_clauses or None,
114
- should=should_clauses or None,
115
- must_not=must_not_clauses or None,
116
- )
117
- )
118
-
119
- # In case of parent call, a single Filter is returned
120
- if is_parent_call:
121
- # If qdrant_filter has just a single Filter in parent call,
122
- # then it might be returned instead.
123
- if len(qdrant_filter) == 1 and isinstance(qdrant_filter[0], models.Filter):
124
- return qdrant_filter[0]
125
- else:
126
- must_clauses.extend(conditions)
127
- return models.Filter(
128
- must=must_clauses or None,
129
- should=should_clauses or None,
130
- must_not=must_not_clauses or None,
131
- )
67
+ if not nested_conditions:
68
+ return None
132
69
 
133
- # Store conditions of each level in output of the loop
134
- elif conditions:
135
- qdrant_filter.extend(conditions)
70
+ # Build the appropriate filter based on operator
71
+ if operator == "AND":
72
+ return models.Filter(must=nested_conditions)
73
+ elif operator == "OR":
74
+ return models.Filter(should=nested_conditions)
75
+ elif operator == "NOT":
76
+ return models.Filter(must_not=nested_conditions)
136
77
 
137
- return qdrant_filter
78
+ return None
138
79
 
139
80
 
140
- def build_filters_for_repeated_operators(
141
- must_clauses: List,
142
- should_clauses: List,
143
- must_not_clauses: List,
144
- qdrant_filter: List[models.Filter],
145
- ) -> List[models.Filter]:
146
- """
147
- Flattens the nested lists of clauses by creating separate Filters for each clause of a logical operator.
81
+ def _process_comparison_operator(item: Dict[str, Any]) -> Optional[models.Condition]:
82
+ """Process a comparison operator and return the corresponding condition."""
83
+ operator = item["operator"]
84
+ field = item.get("field")
85
+ value = item.get("value")
148
86
 
149
- :param must_clauses: a nested list of must clauses or an empty list.
150
- :param should_clauses: a nested list of should clauses or an empty list.
151
- :param must_not_clauses: a nested list of must_not clauses or an empty list.
152
- :param qdrant_filter: a list where the generated Filter objects will be appended.
153
- This list will be modified in-place.
87
+ if field is None or value is None:
88
+ msg = f"'field' or 'value' not found for '{operator}'"
89
+ raise FilterError(msg)
154
90
 
91
+ return _build_comparison_condition(operator, field, value)
155
92
 
156
- :returns: the modified `qdrant_filter` list with appended generated Filter objects.
157
- """
158
93
 
159
- if any(isinstance(i, list) for i in must_clauses):
160
- for i in must_clauses:
161
- qdrant_filter.append(
162
- models.Filter(
163
- must=i or None,
164
- should=should_clauses or None,
165
- must_not=must_not_clauses or None,
166
- )
167
- )
168
- if any(isinstance(i, list) for i in should_clauses):
169
- for i in should_clauses:
170
- qdrant_filter.append(
171
- models.Filter(
172
- must=must_clauses or None,
173
- should=i or None,
174
- must_not=must_not_clauses or None,
175
- )
176
- )
177
- if any(isinstance(i, list) for i in must_not_clauses):
178
- for i in must_clauses:
179
- qdrant_filter.append(
180
- models.Filter(
181
- must=must_clauses or None,
182
- should=should_clauses or None,
183
- must_not=i or None,
184
- )
185
- )
94
+ def _build_final_filter(conditions: List[models.Condition]) -> Optional[models.Filter]:
95
+ """Build the final filter from a list of conditions."""
96
+ if not conditions:
97
+ return None
186
98
 
187
- return qdrant_filter
99
+ if len(conditions) == 1:
100
+ # If single condition and it's already a Filter, return it
101
+ if isinstance(conditions[0], models.Filter):
102
+ return conditions[0]
103
+ # Otherwise wrap it in a Filter
104
+ return models.Filter(must=[conditions[0]])
188
105
 
106
+ # Multiple conditions - combine with AND logic
107
+ return models.Filter(must=conditions)
189
108
 
190
- def _parse_comparison_operation(
191
- comparison_operation: str, key: str, value: Union[dict, List, str, float]
192
- ) -> List[models.Condition]:
193
- conditions: List[models.Condition] = []
194
109
 
195
- condition_builder_mapping = {
110
+ def _build_comparison_condition(operator: str, key: str, value: Any) -> models.Condition:
111
+ """Build a comparison condition based on operator, key, and value."""
112
+ condition_builders: Dict[str, Callable[[str, Any], models.Condition]] = {
196
113
  "==": _build_eq_condition,
197
114
  "in": _build_in_condition,
198
115
  "!=": _build_ne_condition,
@@ -203,15 +120,12 @@ def _parse_comparison_operation(
203
120
  "<=": _build_lte_condition,
204
121
  }
205
122
 
206
- condition_builder = condition_builder_mapping.get(comparison_operation)
207
-
208
- if condition_builder is None:
209
- msg = f"Unknown operator {comparison_operation} used in filters"
210
- raise ValueError(msg)
211
-
212
- conditions.append(condition_builder(key, value))
123
+ builder = condition_builders.get(operator)
124
+ if builder is None:
125
+ msg = f"Unknown operator {operator} used in filters"
126
+ raise FilterError(msg)
213
127
 
214
- return conditions
128
+ return builder(key, value)
215
129
 
216
130
 
217
131
  def _build_eq_condition(key: str, value: models.ValueVariants) -> models.Condition:
@@ -266,7 +180,8 @@ def _build_nin_condition(key: str, value: List[models.ValueVariants]) -> models.
266
180
 
267
181
  def _build_lt_condition(key: str, value: Union[str, float, int]) -> models.Condition:
268
182
  if isinstance(value, str) and is_datetime_string(value):
269
- return models.FieldCondition(key=key, range=models.DatetimeRange(lt=value))
183
+ dt_value = datetime.fromisoformat(value)
184
+ return models.FieldCondition(key=key, range=models.DatetimeRange(lt=dt_value))
270
185
 
271
186
  if isinstance(value, (int, float)):
272
187
  return models.FieldCondition(key=key, range=models.Range(lt=value))
@@ -277,7 +192,8 @@ def _build_lt_condition(key: str, value: Union[str, float, int]) -> models.Condi
277
192
 
278
193
  def _build_lte_condition(key: str, value: Union[str, float, int]) -> models.Condition:
279
194
  if isinstance(value, str) and is_datetime_string(value):
280
- return models.FieldCondition(key=key, range=models.DatetimeRange(lte=value))
195
+ dt_value = datetime.fromisoformat(value)
196
+ return models.FieldCondition(key=key, range=models.DatetimeRange(lte=dt_value))
281
197
 
282
198
  if isinstance(value, (int, float)):
283
199
  return models.FieldCondition(key=key, range=models.Range(lte=value))
@@ -288,7 +204,8 @@ def _build_lte_condition(key: str, value: Union[str, float, int]) -> models.Cond
288
204
 
289
205
  def _build_gt_condition(key: str, value: Union[str, float, int]) -> models.Condition:
290
206
  if isinstance(value, str) and is_datetime_string(value):
291
- return models.FieldCondition(key=key, range=models.DatetimeRange(gt=value))
207
+ dt_value = datetime.fromisoformat(value)
208
+ return models.FieldCondition(key=key, range=models.DatetimeRange(gt=dt_value))
292
209
 
293
210
  if isinstance(value, (int, float)):
294
211
  return models.FieldCondition(key=key, range=models.Range(gt=value))
@@ -299,7 +216,8 @@ def _build_gt_condition(key: str, value: Union[str, float, int]) -> models.Condi
299
216
 
300
217
  def _build_gte_condition(key: str, value: Union[str, float, int]) -> models.Condition:
301
218
  if isinstance(value, str) and is_datetime_string(value):
302
- return models.FieldCondition(key=key, range=models.DatetimeRange(gte=value))
219
+ dt_value = datetime.fromisoformat(value)
220
+ return models.FieldCondition(key=key, range=models.DatetimeRange(gte=dt_value))
303
221
 
304
222
  if isinstance(value, (int, float)):
305
223
  return models.FieldCondition(key=key, range=models.Range(gte=value))
@@ -1,17 +1,19 @@
1
- import logging as python_logging
1
+ # mypy: disable-error-code="assignment, arg-type"
2
+
3
+ import logging
2
4
  import time
3
5
 
4
- from haystack import logging
5
6
  from qdrant_client.http import models
6
7
 
7
8
  from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
8
9
 
10
+ # using Haystack logging is problematic here
9
11
  logger = logging.getLogger(__name__)
10
- logger.addHandler(python_logging.StreamHandler())
11
- logger.setLevel(python_logging.INFO)
12
+ logger.addHandler(logging.StreamHandler())
13
+ logger.setLevel(logging.INFO)
12
14
 
13
15
 
14
- def migrate_to_sparse_embeddings_support(old_document_store: QdrantDocumentStore, new_index: str) -> None:
16
+ def migrate_to_sparse_embeddings_support(old_document_store: QdrantDocumentStore, new_index: str) -> None: # type: ignore
15
17
  """
16
18
  Utility function to migrate an existing `QdrantDocumentStore` to a new one with support for sparse embeddings.
17
19
 
@@ -61,8 +63,10 @@ def migrate_to_sparse_embeddings_support(old_document_store: QdrantDocumentStore
61
63
  init_parameters["recreate_index"] = True
62
64
 
63
65
  new_document_store = QdrantDocumentStore(**init_parameters)
66
+ new_document_store._initialize_client()
67
+ assert new_document_store._client is not None
64
68
 
65
- client = new_document_store.client
69
+ client = new_document_store._client
66
70
 
67
71
  original_indexing_threshold = client.get_collection(
68
72
  collection_name=new_index
@@ -115,7 +119,7 @@ def migrate_to_sparse_embeddings_support(old_document_store: QdrantDocumentStore
115
119
 
116
120
  message = (
117
121
  f"Points transmitted: {points_transmitted}/{total_points}\n"
118
- f"Percent done {points_transmitted/total_points*100:.2f}%\n"
122
+ f"Percent done {points_transmitted / total_points * 100:.2f}%\n"
119
123
  f"Time elapsed: {time.time() - start:.2f} seconds\n"
120
124
  f"Time remaining: {(((time.time() - start) / points_transmitted) * points_remaining) / 60:.2f} minutes\n"
121
125
  f"Current offset: {next_page_offset}\n"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: qdrant-haystack
3
- Version: 9.1.3
3
+ Version: 9.2.0
4
4
  Summary: An integration of Qdrant ANN vector database backend with Haystack
5
5
  Project-URL: Source, https://github.com/deepset-ai/haystack-core-integrations
6
6
  Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/qdrant/README.md
@@ -45,7 +45,7 @@ pip install qdrant-haystack
45
45
  The test suites use Qdrant's in-memory instance. No additional steps required.
46
46
 
47
47
  ```console
48
- hatch run test
48
+ hatch run test:all
49
49
  ```
50
50
 
51
51
  ## License
@@ -0,0 +1,13 @@
1
+ haystack_integrations/components/retrievers/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ haystack_integrations/components/retrievers/qdrant/__init__.py,sha256=AE1hdw4sqb0rTSqfAxKCRUOZVE8gbHdQ1wDccdN86hc,313
3
+ haystack_integrations/components/retrievers/qdrant/retriever.py,sha256=2NcFyZQrM7USrdclYqwhFjc6xOxpxlGWrDECrQapUf4,32850
4
+ haystack_integrations/document_stores/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ haystack_integrations/document_stores/qdrant/__init__.py,sha256=kUGc5uewqArhmVR-JqB_NmJ4kNkTIQIvYDNSoO2ELn0,302
6
+ haystack_integrations/document_stores/qdrant/converters.py,sha256=oAgBXAjIrcjWRr8SlhcNZwUeF34cviKeJnUFqhFBNBo,2642
7
+ haystack_integrations/document_stores/qdrant/document_store.py,sha256=jOFDwEVvUFBmSdwVDhYNCq3uwVXjP9ERemWFr32OuKQ,72454
8
+ haystack_integrations/document_stores/qdrant/filters.py,sha256=s5Y3ISe2yGfKijiiPMYA51-R54HMz2YWHXXJgQ-TL8c,8361
9
+ haystack_integrations/document_stores/qdrant/migrate_to_sparse.py,sha256=DcQ9_Ilx1fgXOzjsUDpuaM9TKh2utmVm2hYFn-V2CkQ,5129
10
+ qdrant_haystack-9.2.0.dist-info/METADATA,sha256=iTc-OW227ceEtgKyP9Z9UBUf5NA4y4wTi1DzYJcSAw0,1927
11
+ qdrant_haystack-9.2.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
12
+ qdrant_haystack-9.2.0.dist-info/licenses/LICENSE.txt,sha256=B05uMshqTA74s-0ltyHKI6yoPfJ3zYgQbvcXfDVGFf8,10280
13
+ qdrant_haystack-9.2.0.dist-info/RECORD,,
@@ -1,11 +0,0 @@
1
- haystack_integrations/components/retrievers/qdrant/__init__.py,sha256=AE1hdw4sqb0rTSqfAxKCRUOZVE8gbHdQ1wDccdN86hc,313
2
- haystack_integrations/components/retrievers/qdrant/retriever.py,sha256=Ni_aWM_JslmrWdfUl0TenOOxT15YJmMpSSSRAP3qPYs,29298
3
- haystack_integrations/document_stores/qdrant/__init__.py,sha256=kUGc5uewqArhmVR-JqB_NmJ4kNkTIQIvYDNSoO2ELn0,302
4
- haystack_integrations/document_stores/qdrant/converters.py,sha256=iVhAZ7wdRxRjfLVMHB1JdAhn7LpU5bwza1obGmEePWU,2506
5
- haystack_integrations/document_stores/qdrant/document_store.py,sha256=PVvWBKJZXQRG2-TiFp0lZLd7nOChVh0gIipRXoaVYaM,70829
6
- haystack_integrations/document_stores/qdrant/filters.py,sha256=e7y-Pqf6S2v1jd-1jCNdzD4sVGGI4x7f3Q16lP21NrQ,11790
7
- haystack_integrations/document_stores/qdrant/migrate_to_sparse.py,sha256=o66D6VaDEtz_zFYmZw_jsbBTXb9MGX15JnfAzyo7Wq0,4979
8
- qdrant_haystack-9.1.3.dist-info/METADATA,sha256=IWKyGxGKrMNSBGMUttB-28RaO_dLExuhAxn0LMJHnOc,1923
9
- qdrant_haystack-9.1.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
10
- qdrant_haystack-9.1.3.dist-info/licenses/LICENSE.txt,sha256=B05uMshqTA74s-0ltyHKI6yoPfJ3zYgQbvcXfDVGFf8,10280
11
- qdrant_haystack-9.1.3.dist-info/RECORD,,