qdrant-haystack 4.0.0__py3-none-any.whl → 4.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of qdrant-haystack might be problematic. Click here for more details.

@@ -2,6 +2,8 @@ from typing import Any, Dict, List, Optional, Union
2
2
 
3
3
  from haystack import Document, component, default_from_dict, default_to_dict
4
4
  from haystack.dataclasses.sparse_embedding import SparseEmbedding
5
+ from haystack.document_stores.types import FilterPolicy
6
+ from haystack.document_stores.types.filter_policy import apply_filter_policy
5
7
  from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
6
8
  from qdrant_client.http import models
7
9
 
@@ -39,6 +41,8 @@ class QdrantEmbeddingRetriever:
39
41
  top_k: int = 10,
40
42
  scale_score: bool = False,
41
43
  return_embedding: bool = False,
44
+ filter_policy: Union[str, FilterPolicy] = FilterPolicy.REPLACE,
45
+ score_threshold: Optional[float] = None,
42
46
  ):
43
47
  """
44
48
  Create a QdrantEmbeddingRetriever component.
@@ -48,6 +52,11 @@ class QdrantEmbeddingRetriever:
48
52
  :param top_k: The maximum number of documents to retrieve.
49
53
  :param scale_score: Whether to scale the scores of the retrieved documents or not.
50
54
  :param return_embedding: Whether to return the embedding of the retrieved Documents.
55
+ :param filter_policy: Policy to determine how filters are applied.
56
+ :param score_threshold: A minimal score threshold for the result.
57
+ Score of the returned result might be higher or smaller than the threshold
58
+ depending on the `similarity` function specified in the Document Store.
59
+ E.g. for cosine similarity only higher scores will be returned.
51
60
 
52
61
  :raises ValueError: If `document_store` is not an instance of `QdrantDocumentStore`.
53
62
  """
@@ -61,6 +70,10 @@ class QdrantEmbeddingRetriever:
61
70
  self._top_k = top_k
62
71
  self._scale_score = scale_score
63
72
  self._return_embedding = return_embedding
73
+ self._filter_policy = (
74
+ filter_policy if isinstance(filter_policy, FilterPolicy) else FilterPolicy.from_str(filter_policy)
75
+ )
76
+ self._score_threshold = score_threshold
64
77
 
65
78
  def to_dict(self) -> Dict[str, Any]:
66
79
  """
@@ -74,8 +87,10 @@ class QdrantEmbeddingRetriever:
74
87
  document_store=self._document_store,
75
88
  filters=self._filters,
76
89
  top_k=self._top_k,
90
+ filter_policy=self._filter_policy.value,
77
91
  scale_score=self._scale_score,
78
92
  return_embedding=self._return_embedding,
93
+ score_threshold=self._score_threshold,
79
94
  )
80
95
  d["init_parameters"]["document_store"] = self._document_store.to_dict()
81
96
 
@@ -93,6 +108,7 @@ class QdrantEmbeddingRetriever:
93
108
  """
94
109
  document_store = QdrantDocumentStore.from_dict(data["init_parameters"]["document_store"])
95
110
  data["init_parameters"]["document_store"] = document_store
111
+ data["init_parameters"]["filter_policy"] = FilterPolicy.from_str(data["init_parameters"]["filter_policy"])
96
112
  return default_from_dict(cls, data)
97
113
 
98
114
  @component.output_types(documents=List[Document])
@@ -103,6 +119,7 @@ class QdrantEmbeddingRetriever:
103
119
  top_k: Optional[int] = None,
104
120
  scale_score: Optional[bool] = None,
105
121
  return_embedding: Optional[bool] = None,
122
+ score_threshold: Optional[float] = None,
106
123
  ):
107
124
  """
108
125
  Run the Embedding Retriever on the given input data.
@@ -112,16 +129,20 @@ class QdrantEmbeddingRetriever:
112
129
  :param top_k: The maximum number of documents to return.
113
130
  :param scale_score: Whether to scale the scores of the retrieved documents or not.
114
131
  :param return_embedding: Whether to return the embedding of the retrieved Documents.
132
+ :param score_threshold: A minimal score threshold for the result.
115
133
  :returns:
116
134
  The retrieved documents.
117
135
 
118
136
  """
137
+ filters = apply_filter_policy(self._filter_policy, self._filters, filters)
138
+
119
139
  docs = self._document_store._query_by_embedding(
120
140
  query_embedding=query_embedding,
121
- filters=filters or self._filters,
141
+ filters=filters,
122
142
  top_k=top_k or self._top_k,
123
143
  scale_score=scale_score or self._scale_score,
124
144
  return_embedding=return_embedding or self._return_embedding,
145
+ score_threshold=score_threshold or self._score_threshold,
125
146
  )
126
147
 
127
148
  return {"documents": docs}
@@ -161,6 +182,8 @@ class QdrantSparseEmbeddingRetriever:
161
182
  top_k: int = 10,
162
183
  scale_score: bool = False,
163
184
  return_embedding: bool = False,
185
+ filter_policy: Union[str, FilterPolicy] = FilterPolicy.REPLACE,
186
+ score_threshold: Optional[float] = None,
164
187
  ):
165
188
  """
166
189
  Create a QdrantSparseEmbeddingRetriever component.
@@ -170,6 +193,11 @@ class QdrantSparseEmbeddingRetriever:
170
193
  :param top_k: The maximum number of documents to retrieve.
171
194
  :param scale_score: Whether to scale the scores of the retrieved documents or not.
172
195
  :param return_embedding: Whether to return the sparse embedding of the retrieved Documents.
196
+ :param filter_policy: Policy to determine how filters are applied. Defaults to "replace".
197
+ :param score_threshold: A minimal score threshold for the result.
198
+ Score of the returned result might be higher or smaller than the threshold
199
+ depending on the Distance function used.
200
+ E.g. for cosine similarity only higher scores will be returned.
173
201
 
174
202
  :raises ValueError: If `document_store` is not an instance of `QdrantDocumentStore`.
175
203
  """
@@ -183,6 +211,10 @@ class QdrantSparseEmbeddingRetriever:
183
211
  self._top_k = top_k
184
212
  self._scale_score = scale_score
185
213
  self._return_embedding = return_embedding
214
+ self._filter_policy = (
215
+ filter_policy if isinstance(filter_policy, FilterPolicy) else FilterPolicy.from_str(filter_policy)
216
+ )
217
+ self._score_threshold = score_threshold
186
218
 
187
219
  def to_dict(self) -> Dict[str, Any]:
188
220
  """
@@ -197,7 +229,9 @@ class QdrantSparseEmbeddingRetriever:
197
229
  filters=self._filters,
198
230
  top_k=self._top_k,
199
231
  scale_score=self._scale_score,
232
+ filter_policy=self._filter_policy.value,
200
233
  return_embedding=self._return_embedding,
234
+ score_threshold=self._score_threshold,
201
235
  )
202
236
  d["init_parameters"]["document_store"] = self._document_store.to_dict()
203
237
 
@@ -215,6 +249,7 @@ class QdrantSparseEmbeddingRetriever:
215
249
  """
216
250
  document_store = QdrantDocumentStore.from_dict(data["init_parameters"]["document_store"])
217
251
  data["init_parameters"]["document_store"] = document_store
252
+ data["init_parameters"]["filter_policy"] = FilterPolicy.from_str(data["init_parameters"]["filter_policy"])
218
253
  return default_from_dict(cls, data)
219
254
 
220
255
  @component.output_types(documents=List[Document])
@@ -225,25 +260,35 @@ class QdrantSparseEmbeddingRetriever:
225
260
  top_k: Optional[int] = None,
226
261
  scale_score: Optional[bool] = None,
227
262
  return_embedding: Optional[bool] = None,
263
+ score_threshold: Optional[float] = None,
228
264
  ):
229
265
  """
230
266
  Run the Sparse Embedding Retriever on the given input data.
231
267
 
232
268
  :param query_sparse_embedding: Sparse Embedding of the query.
233
- :param filters: A dictionary with filters to narrow down the search space.
269
+ :param filters: Filters applied to the retrieved Documents. The way runtime filters are applied depends on
270
+ the `filter_policy` chosen at retriever initialization. See init method docstring for more
271
+ details.
234
272
  :param top_k: The maximum number of documents to return.
235
273
  :param scale_score: Whether to scale the scores of the retrieved documents or not.
236
274
  :param return_embedding: Whether to return the embedding of the retrieved Documents.
275
+ :param score_threshold: A minimal score threshold for the result.
276
+ Score of the returned result might be higher or smaller than the threshold
277
+ depending on the Distance function used.
278
+ E.g. for cosine similarity only higher scores will be returned.
237
279
  :returns:
238
280
  The retrieved documents.
239
281
 
240
282
  """
283
+ filters = apply_filter_policy(self._filter_policy, self._filters, filters)
284
+
241
285
  docs = self._document_store._query_by_sparse(
242
286
  query_sparse_embedding=query_sparse_embedding,
243
- filters=filters or self._filters,
287
+ filters=filters,
244
288
  top_k=top_k or self._top_k,
245
289
  scale_score=scale_score or self._scale_score,
246
290
  return_embedding=return_embedding or self._return_embedding,
291
+ score_threshold=score_threshold or self._score_threshold,
247
292
  )
248
293
 
249
294
  return {"documents": docs}
@@ -288,6 +333,8 @@ class QdrantHybridRetriever:
288
333
  filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
289
334
  top_k: int = 10,
290
335
  return_embedding: bool = False,
336
+ filter_policy: Union[str, FilterPolicy] = FilterPolicy.REPLACE,
337
+ score_threshold: Optional[float] = None,
291
338
  ):
292
339
  """
293
340
  Create a QdrantHybridRetriever component.
@@ -296,6 +343,11 @@ class QdrantHybridRetriever:
296
343
  :param filters: A dictionary with filters to narrow down the search space.
297
344
  :param top_k: The maximum number of documents to retrieve.
298
345
  :param return_embedding: Whether to return the embeddings of the retrieved Documents.
346
+ :param filter_policy: Policy to determine how filters are applied.
347
+ :param score_threshold: A minimal score threshold for the result.
348
+ Score of the returned result might be higher or smaller than the threshold
349
+ depending on the Distance function used.
350
+ E.g. for cosine similarity only higher scores will be returned.
299
351
 
300
352
  :raises ValueError: If 'document_store' is not an instance of QdrantDocumentStore.
301
353
  """
@@ -308,6 +360,10 @@ class QdrantHybridRetriever:
308
360
  self._filters = filters
309
361
  self._top_k = top_k
310
362
  self._return_embedding = return_embedding
363
+ self._filter_policy = (
364
+ filter_policy if isinstance(filter_policy, FilterPolicy) else FilterPolicy.from_str(filter_policy)
365
+ )
366
+ self._score_threshold = score_threshold
311
367
 
312
368
  def to_dict(self) -> Dict[str, Any]:
313
369
  """
@@ -321,7 +377,9 @@ class QdrantHybridRetriever:
321
377
  document_store=self._document_store.to_dict(),
322
378
  filters=self._filters,
323
379
  top_k=self._top_k,
380
+ filter_policy=self._filter_policy.value,
324
381
  return_embedding=self._return_embedding,
382
+ score_threshold=self._score_threshold,
325
383
  )
326
384
 
327
385
  @classmethod
@@ -336,6 +394,7 @@ class QdrantHybridRetriever:
336
394
  """
337
395
  document_store = QdrantDocumentStore.from_dict(data["init_parameters"]["document_store"])
338
396
  data["init_parameters"]["document_store"] = document_store
397
+ data["init_parameters"]["filter_policy"] = FilterPolicy.from_str(data["init_parameters"]["filter_policy"])
339
398
  return default_from_dict(cls, data)
340
399
 
341
400
  @component.output_types(documents=List[Document])
@@ -346,25 +405,35 @@ class QdrantHybridRetriever:
346
405
  filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
347
406
  top_k: Optional[int] = None,
348
407
  return_embedding: Optional[bool] = None,
408
+ score_threshold: Optional[float] = None,
349
409
  ):
350
410
  """
351
411
  Run the Sparse Embedding Retriever on the given input data.
352
412
 
353
413
  :param query_embedding: Dense embedding of the query.
354
414
  :param query_sparse_embedding: Sparse embedding of the query.
355
- :param filters: A dictionary with filters to narrow down the search space.
415
+ :param filters: Filters applied to the retrieved Documents. The way runtime filters are applied depends on
416
+ the `filter_policy` chosen at retriever initialization. See init method docstring for more
417
+ details.
356
418
  :param top_k: The maximum number of documents to return.
357
419
  :param return_embedding: Whether to return the embedding of the retrieved Documents.
420
+ :param score_threshold: A minimal score threshold for the result.
421
+ Score of the returned result might be higher or smaller than the threshold
422
+ depending on the Distance function used.
423
+ E.g. for cosine similarity only higher scores will be returned.
358
424
  :returns:
359
425
  The retrieved documents.
360
426
 
361
427
  """
428
+ filters = apply_filter_policy(self._filter_policy, self._filters, filters)
429
+
362
430
  docs = self._document_store._query_hybrid(
363
431
  query_embedding=query_embedding,
364
432
  query_sparse_embedding=query_sparse_embedding,
365
- filters=filters or self._filters,
433
+ filters=filters,
366
434
  top_k=top_k or self._top_k,
367
435
  return_embedding=return_embedding or self._return_embedding,
436
+ score_threshold=score_threshold or self._score_threshold,
368
437
  )
369
438
 
370
439
  return {"documents": docs}
@@ -111,6 +111,7 @@ class QdrantDocumentStore:
111
111
  embedding_dim: int = 768,
112
112
  on_disk: bool = False,
113
113
  use_sparse_embeddings: bool = False,
114
+ sparse_idf: bool = False,
114
115
  similarity: str = "cosine",
115
116
  return_embedding: bool = False,
116
117
  progress_bar: bool = True,
@@ -168,6 +169,9 @@ class QdrantDocumentStore:
168
169
  Whether to store the collection on disk.
169
170
  :param use_sparse_embedding:
170
171
  If set to `True`, enables support for sparse embeddings.
172
+ :param sparse_idf:
173
+ If set to `True`, computes the Inverse Document Frequency (IDF) when using sparse embeddings.
174
+ It is required to use techniques like BM42. It is ignored if `use_sparse_embeddings` is `False`.
171
175
  :param similarity:
172
176
  The similarity metric to use.
173
177
  :param return_embedding:
@@ -246,6 +250,7 @@ class QdrantDocumentStore:
246
250
  self.recreate_index = recreate_index
247
251
  self.payload_fields_to_index = payload_fields_to_index
248
252
  self.use_sparse_embeddings = use_sparse_embeddings
253
+ self.sparse_idf = use_sparse_embeddings and sparse_idf
249
254
  self.embedding_dim = embedding_dim
250
255
  self.on_disk = on_disk
251
256
  self.similarity = similarity
@@ -280,6 +285,7 @@ class QdrantDocumentStore:
280
285
  self.recreate_index,
281
286
  self.similarity,
282
287
  self.use_sparse_embeddings,
288
+ self.sparse_idf,
283
289
  self.on_disk,
284
290
  self.payload_fields_to_index,
285
291
  )
@@ -347,7 +353,9 @@ class QdrantDocumentStore:
347
353
  if not isinstance(doc, Document):
348
354
  msg = f"DocumentStore.write_documents() expects a list of Documents but got an element of {type(doc)}."
349
355
  raise ValueError(msg)
350
- self._set_up_collection(self.index, self.embedding_dim, False, self.similarity, self.use_sparse_embeddings)
356
+ self._set_up_collection(
357
+ self.index, self.embedding_dim, False, self.similarity, self.use_sparse_embeddings, self.sparse_idf
358
+ )
351
359
 
352
360
  if len(documents) == 0:
353
361
  logger.warning("Calling QdrantDocumentStore.write_documents() with empty list")
@@ -498,6 +506,7 @@ class QdrantDocumentStore:
498
506
  top_k: int = 10,
499
507
  scale_score: bool = False,
500
508
  return_embedding: bool = False,
509
+ score_threshold: Optional[float] = None,
501
510
  ) -> List[Document]:
502
511
  """
503
512
  Queries Qdrant using a sparse embedding and returns the most relevant documents.
@@ -507,6 +516,10 @@ class QdrantDocumentStore:
507
516
  :param top_k: Maximum number of documents to return.
508
517
  :param scale_score: Whether to scale the scores of the retrieved documents.
509
518
  :param return_embedding: Whether to return the embeddings of the retrieved documents.
519
+ :param score_threshold: A minimal score threshold for the result.
520
+ Score of the returned result might be higher or smaller than the threshold
521
+ depending on the Distance function used.
522
+ E.g. for cosine similarity only higher scores will be returned.
510
523
 
511
524
  :returns: List of documents that are most similar to `query_sparse_embedding`.
512
525
 
@@ -536,6 +549,7 @@ class QdrantDocumentStore:
536
549
  query_filter=qdrant_filters,
537
550
  limit=top_k,
538
551
  with_vectors=return_embedding,
552
+ score_threshold=score_threshold,
539
553
  )
540
554
  results = [
541
555
  convert_qdrant_point_to_haystack_document(point, use_sparse_embeddings=self.use_sparse_embeddings)
@@ -555,6 +569,7 @@ class QdrantDocumentStore:
555
569
  top_k: int = 10,
556
570
  scale_score: bool = False,
557
571
  return_embedding: bool = False,
572
+ score_threshold: Optional[float] = None,
558
573
  ) -> List[Document]:
559
574
  """
560
575
  Queries Qdrant using a dense embedding and returns the most relevant documents.
@@ -564,6 +579,10 @@ class QdrantDocumentStore:
564
579
  :param top_k: Maximum number of documents to return.
565
580
  :param scale_score: Whether to scale the scores of the retrieved documents.
566
581
  :param return_embedding: Whether to return the embeddings of the retrieved documents.
582
+ :param score_threshold: A minimal score threshold for the result.
583
+ Score of the returned result might be higher or smaller than the threshold
584
+ depending on the Distance function used.
585
+ E.g. for cosine similarity only higher scores will be returned.
567
586
 
568
587
  :returns: List of documents that are most similar to `query_embedding`.
569
588
  """
@@ -578,6 +597,7 @@ class QdrantDocumentStore:
578
597
  query_filter=qdrant_filters,
579
598
  limit=top_k,
580
599
  with_vectors=return_embedding,
600
+ score_threshold=score_threshold,
581
601
  )
582
602
  results = [
583
603
  convert_qdrant_point_to_haystack_document(point, use_sparse_embeddings=self.use_sparse_embeddings)
@@ -600,6 +620,7 @@ class QdrantDocumentStore:
600
620
  filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
601
621
  top_k: int = 10,
602
622
  return_embedding: bool = False,
623
+ score_threshold: Optional[float] = None,
603
624
  ) -> List[Document]:
604
625
  """
605
626
  Retrieves documents based on dense and sparse embeddings and fuses the results using Reciprocal Rank Fusion.
@@ -612,6 +633,10 @@ class QdrantDocumentStore:
612
633
  :param filters: Filters applied to the retrieved documents.
613
634
  :param top_k: Maximum number of documents to return.
614
635
  :param return_embedding: Whether to return the embeddings of the retrieved documents.
636
+ :param score_threshold: A minimal score threshold for the result.
637
+ Score of the returned result might be higher or smaller than the threshold
638
+ depending on the Distance function used.
639
+ E.g. for cosine similarity only higher scores will be returned.
615
640
 
616
641
  :returns: List of Document that are most similar to `query_embedding` and `query_sparse_embedding`.
617
642
 
@@ -642,6 +667,7 @@ class QdrantDocumentStore:
642
667
  limit=top_k,
643
668
  with_payload=True,
644
669
  with_vector=return_embedding,
670
+ score_threshold=score_threshold,
645
671
  )
646
672
 
647
673
  dense_request = rest.SearchRequest(
@@ -714,6 +740,7 @@ class QdrantDocumentStore:
714
740
  recreate_collection: bool,
715
741
  similarity: str,
716
742
  use_sparse_embeddings: bool,
743
+ sparse_idf: bool,
717
744
  on_disk: bool = False,
718
745
  payload_fields_to_index: Optional[List[dict]] = None,
719
746
  ):
@@ -729,6 +756,8 @@ class QdrantDocumentStore:
729
756
  The similarity measure to use.
730
757
  :param use_sparse_embeddings:
731
758
  Whether to use sparse embeddings.
759
+ :param sparse_idf:
760
+ Whether to compute the Inverse Document Frequency (IDF) when using sparse embeddings. Required for BM42.
732
761
  :param on_disk:
733
762
  Whether to store the collection on disk.
734
763
  :param payload_fields_to_index:
@@ -745,7 +774,9 @@ class QdrantDocumentStore:
745
774
  if recreate_collection or not self.client.collection_exists(collection_name):
746
775
  # There is no need to verify the current configuration of that
747
776
  # collection. It might be just recreated again or does not exist yet.
748
- self.recreate_collection(collection_name, distance, embedding_dim, on_disk, use_sparse_embeddings)
777
+ self.recreate_collection(
778
+ collection_name, distance, embedding_dim, on_disk, use_sparse_embeddings, sparse_idf
779
+ )
749
780
  # Create Payload index if payload_fields_to_index is provided
750
781
  self._create_payload_index(collection_name, payload_fields_to_index)
751
782
  return
@@ -808,6 +839,7 @@ class QdrantDocumentStore:
808
839
  embedding_dim: int,
809
840
  on_disk: Optional[bool] = None,
810
841
  use_sparse_embeddings: Optional[bool] = None,
842
+ sparse_idf: bool = False,
811
843
  ):
812
844
  """
813
845
  Recreates the Qdrant collection with the specified parameters.
@@ -822,6 +854,8 @@ class QdrantDocumentStore:
822
854
  Whether to store the collection on disk.
823
855
  :param use_sparse_embeddings:
824
856
  Whether to use sparse embeddings.
857
+ :param sparse_idf:
858
+ Whether to compute the Inverse Document Frequency (IDF) when using sparse embeddings. Required for BM42.
825
859
  """
826
860
  if on_disk is None:
827
861
  on_disk = self.on_disk
@@ -840,7 +874,8 @@ class QdrantDocumentStore:
840
874
  SPARSE_VECTORS_NAME: rest.SparseVectorParams(
841
875
  index=rest.SparseIndexParams(
842
876
  on_disk=on_disk,
843
- )
877
+ ),
878
+ modifier=rest.Modifier.IDF if sparse_idf else None,
844
879
  ),
845
880
  }
846
881
 
@@ -4,28 +4,55 @@ from typing import List, Optional, Union
4
4
  from haystack.utils.filters import COMPARISON_OPERATORS, LOGICAL_OPERATORS, FilterError
5
5
  from qdrant_client.http import models
6
6
 
7
- from .converters import convert_id
8
-
9
7
  COMPARISON_OPERATORS = COMPARISON_OPERATORS.keys()
10
8
  LOGICAL_OPERATORS = LOGICAL_OPERATORS.keys()
11
9
 
12
10
 
13
11
  def convert_filters_to_qdrant(
14
- filter_term: Optional[Union[List[dict], dict, models.Filter]] = None,
15
- ) -> Optional[models.Filter]:
16
- """Converts Haystack filters to the format used by Qdrant."""
12
+ filter_term: Optional[Union[List[dict], dict, models.Filter]] = None, is_parent_call: bool = True
13
+ ) -> Optional[Union[models.Filter, List[models.Filter], List[models.Condition]]]:
14
+ """Converts Haystack filters to the format used by Qdrant.
15
+
16
+ :param filter_term: the haystack filter to be converted to qdrant.
17
+ :param is_parent_call: indicates if this is the top-level call to the function. If True, the function returns
18
+ a single models.Filter object; if False, it may return a list of filters or conditions for further processing.
19
+
20
+ :returns: a single Qdrant Filter in the parent call or a list of such Filters in recursive calls.
21
+
22
+ :raises FilterError: If the invalid filter criteria is provided or if an unknown operator is encountered.
23
+
24
+ """
25
+
17
26
  if isinstance(filter_term, models.Filter):
18
27
  return filter_term
19
28
  if not filter_term:
20
29
  return None
21
30
 
22
- must_clauses, should_clauses, must_not_clauses = [], [], []
31
+ must_clauses: List[models.Filter] = []
32
+ should_clauses: List[models.Filter] = []
33
+ must_not_clauses: List[models.Filter] = []
34
+ # Indicates if there are multiple same LOGICAL OPERATORS on each level
35
+ # and prevents them from being combined
36
+ same_operator_flag = False
37
+ conditions, qdrant_filter, current_level_operators = (
38
+ [],
39
+ [],
40
+ [],
41
+ )
23
42
 
24
43
  if isinstance(filter_term, dict):
25
44
  filter_term = [filter_term]
26
45
 
46
+ # ======== IDENTIFY FILTER ITEMS ON EACH LEVEL ========
47
+
27
48
  for item in filter_term:
28
49
  operator = item.get("operator")
50
+
51
+ # Check for repeated similar operators on each level
52
+ same_operator_flag = operator in current_level_operators and operator in LOGICAL_OPERATORS
53
+ if not same_operator_flag:
54
+ current_level_operators.append(operator)
55
+
29
56
  if operator is None:
30
57
  msg = "Operator not found in filters"
31
58
  raise FilterError(msg)
@@ -34,12 +61,23 @@ def convert_filters_to_qdrant(
34
61
  msg = f"'conditions' not found for '{operator}'"
35
62
  raise FilterError(msg)
36
63
 
37
- if operator == "AND":
38
- must_clauses.append(convert_filters_to_qdrant(item.get("conditions", [])))
39
- elif operator == "OR":
40
- should_clauses.append(convert_filters_to_qdrant(item.get("conditions", [])))
41
- elif operator == "NOT":
42
- must_not_clauses.append(convert_filters_to_qdrant(item.get("conditions", [])))
64
+ if operator in LOGICAL_OPERATORS:
65
+ # Recursively process nested conditions
66
+ current_filter = convert_filters_to_qdrant(item.get("conditions", []), is_parent_call=False) or []
67
+
68
+ # When same_operator_flag is set to True,
69
+ # ensure each clause is appended as an independent list to avoid merging distinct clauses.
70
+ if operator == "AND":
71
+ must_clauses = [must_clauses, current_filter] if same_operator_flag else must_clauses + current_filter
72
+ elif operator == "OR":
73
+ should_clauses = (
74
+ [should_clauses, current_filter] if same_operator_flag else should_clauses + current_filter
75
+ )
76
+ elif operator == "NOT":
77
+ must_not_clauses = (
78
+ [must_not_clauses, current_filter] if same_operator_flag else must_not_clauses + current_filter
79
+ )
80
+
43
81
  elif operator in COMPARISON_OPERATORS:
44
82
  field = item.get("field")
45
83
  value = item.get("value")
@@ -47,20 +85,106 @@ def convert_filters_to_qdrant(
47
85
  msg = f"'field' or 'value' not found for '{operator}'"
48
86
  raise FilterError(msg)
49
87
 
50
- must_clauses.extend(_parse_comparison_operation(comparison_operation=operator, key=field, value=value))
88
+ parsed_conditions = _parse_comparison_operation(comparison_operation=operator, key=field, value=value)
89
+
90
+ # check if the parsed_conditions are models.Filter or models.Condition
91
+ for condition in parsed_conditions:
92
+ if isinstance(condition, models.Filter):
93
+ qdrant_filter.append(condition)
94
+ else:
95
+ conditions.append(condition)
96
+
51
97
  else:
52
98
  msg = f"Unknown operator {operator} used in filters"
53
99
  raise FilterError(msg)
54
100
 
55
- payload_filter = models.Filter(
56
- must=must_clauses or None,
57
- should=should_clauses or None,
58
- must_not=must_not_clauses or None,
59
- )
101
+ # ======== PROCESS FILTER ITEMS ON EACH LEVEL ========
102
+
103
+ # If same logical operators have separate clauses, create separate filters
104
+ if same_operator_flag:
105
+ qdrant_filter = build_filters_for_repeated_operators(
106
+ must_clauses, should_clauses, must_not_clauses, qdrant_filter
107
+ )
108
+
109
+ # else append a single Filter for existing clauses
110
+ elif must_clauses or should_clauses or must_not_clauses:
111
+ qdrant_filter.append(
112
+ models.Filter(
113
+ must=must_clauses or None,
114
+ should=should_clauses or None,
115
+ must_not=must_not_clauses or None,
116
+ )
117
+ )
118
+
119
+ # In case of parent call, a single Filter is returned
120
+ if is_parent_call:
121
+ # If qdrant_filter has just a single Filter in parent call,
122
+ # then it might be returned instead.
123
+ if len(qdrant_filter) == 1 and isinstance(qdrant_filter[0], models.Filter):
124
+ return qdrant_filter[0]
125
+ else:
126
+ must_clauses.extend(conditions)
127
+ return models.Filter(
128
+ must=must_clauses or None,
129
+ should=should_clauses or None,
130
+ must_not=must_not_clauses or None,
131
+ )
132
+
133
+ # Store conditions of each level in output of the loop
134
+ elif conditions:
135
+ qdrant_filter.extend(conditions)
136
+
137
+ return qdrant_filter
138
+
139
+
140
+ def build_filters_for_repeated_operators(
141
+ must_clauses,
142
+ should_clauses,
143
+ must_not_clauses,
144
+ qdrant_filter,
145
+ ) -> List[models.Filter]:
146
+ """
147
+ Flattens the nested lists of clauses by creating separate Filters for each clause of a logical operator.
148
+
149
+ :param must_clauses: a nested list of must clauses or an empty list.
150
+ :param should_clauses: a nested list of should clauses or an empty list.
151
+ :param must_not_clauses: a nested list of must_not clauses or an empty list.
152
+ :param qdrant_filter: a list where the generated Filter objects will be appended.
153
+ This list will be modified in-place.
60
154
 
61
- filter_result = _squeeze_filter(payload_filter)
62
155
 
63
- return filter_result
156
+ :returns: the modified `qdrant_filter` list with appended generated Filter objects.
157
+ """
158
+
159
+ if any(isinstance(i, list) for i in must_clauses):
160
+ for i in must_clauses:
161
+ qdrant_filter.append(
162
+ models.Filter(
163
+ must=i or None,
164
+ should=should_clauses or None,
165
+ must_not=must_not_clauses or None,
166
+ )
167
+ )
168
+ if any(isinstance(i, list) for i in should_clauses):
169
+ for i in should_clauses:
170
+ qdrant_filter.append(
171
+ models.Filter(
172
+ must=must_clauses or None,
173
+ should=i or None,
174
+ must_not=must_not_clauses or None,
175
+ )
176
+ )
177
+ if any(isinstance(i, list) for i in must_not_clauses):
178
+ for i in must_clauses:
179
+ qdrant_filter.append(
180
+ models.Filter(
181
+ must=must_clauses or None,
182
+ should=should_clauses or None,
183
+ must_not=i or None,
184
+ )
185
+ )
186
+
187
+ return qdrant_filter
64
188
 
65
189
 
66
190
  def _parse_comparison_operation(
@@ -92,7 +216,7 @@ def _parse_comparison_operation(
92
216
 
93
217
  def _build_eq_condition(key: str, value: models.ValueVariants) -> models.Condition:
94
218
  if isinstance(value, str) and " " in value:
95
- models.FieldCondition(key=key, match=models.MatchText(text=value))
219
+ return models.FieldCondition(key=key, match=models.MatchText(text=value))
96
220
  return models.FieldCondition(key=key, match=models.MatchValue(value=value))
97
221
 
98
222
 
@@ -184,52 +308,6 @@ def _build_gte_condition(key: str, value: Union[str, float, int]) -> models.Cond
184
308
  raise FilterError(msg)
185
309
 
186
310
 
187
- def _build_has_id_condition(id_values: List[models.ExtendedPointId]) -> models.HasIdCondition:
188
- return models.HasIdCondition(
189
- has_id=[
190
- # Ids are converted into their internal representation
191
- convert_id(item)
192
- for item in id_values
193
- ]
194
- )
195
-
196
-
197
- def _squeeze_filter(payload_filter: models.Filter) -> models.Filter:
198
- """
199
- Simplify given payload filter, if the nested structure might be unnested.
200
- That happens if there is a single clause in that filter.
201
- :param payload_filter:
202
- :returns:
203
- """
204
- filter_parts = {
205
- "must": payload_filter.must,
206
- "should": payload_filter.should,
207
- "must_not": payload_filter.must_not,
208
- }
209
-
210
- total_clauses = sum(len(x) for x in filter_parts.values() if x is not None)
211
- if total_clauses == 0 or total_clauses > 1:
212
- return payload_filter
213
-
214
- # Payload filter has just a single clause provided (either must, should
215
- # or must_not). If that single clause is also of a models.Filter type,
216
- # then it might be returned instead.
217
- for part_name, filter_part in filter_parts.items():
218
- if not filter_part:
219
- continue
220
-
221
- subfilter = filter_part[0]
222
- if not isinstance(subfilter, models.Filter):
223
- # The inner statement is a simple condition like models.FieldCondition
224
- # so it cannot be simplified.
225
- continue
226
-
227
- if subfilter.must:
228
- return models.Filter(**{part_name: subfilter.must})
229
-
230
- return payload_filter
231
-
232
-
233
311
  def is_datetime_string(value: str) -> bool:
234
312
  try:
235
313
  datetime.fromisoformat(value)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: qdrant-haystack
3
- Version: 4.0.0
3
+ Version: 4.1.1
4
4
  Summary: An integration of Qdrant ANN vector database backend with Haystack
5
5
  Project-URL: Source, https://github.com/deepset-ai/haystack-core-integrations
6
6
  Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/qdrant/README.md
@@ -18,8 +18,8 @@ Classifier: Programming Language :: Python :: 3.11
18
18
  Classifier: Programming Language :: Python :: Implementation :: CPython
19
19
  Classifier: Programming Language :: Python :: Implementation :: PyPy
20
20
  Requires-Python: >=3.8
21
- Requires-Dist: haystack-ai>=2.0.1
22
- Requires-Dist: qdrant-client
21
+ Requires-Dist: haystack-ai
22
+ Requires-Dist: qdrant-client>=1.10.0
23
23
  Description-Content-Type: text/markdown
24
24
 
25
25
  # qdrant-haystack
@@ -1,11 +1,11 @@
1
1
  haystack_integrations/components/retrievers/qdrant/__init__.py,sha256=IRjcM4f8b5eKFEMn8tn6h6RrfslEGP3WafU7mrzNzQM,313
2
- haystack_integrations/components/retrievers/qdrant/retriever.py,sha256=_6noYJ0M71shgoTOywIgSuGQtB-CBhwRW_zUFiYIOTw,13465
2
+ haystack_integrations/components/retrievers/qdrant/retriever.py,sha256=55IY5bmNvFe62abNBfDOhuo1I38-ue713c8gMNgkfuY,17947
3
3
  haystack_integrations/document_stores/qdrant/__init__.py,sha256=kUGc5uewqArhmVR-JqB_NmJ4kNkTIQIvYDNSoO2ELn0,302
4
4
  haystack_integrations/document_stores/qdrant/converters.py,sha256=2hcuI3kty1dVHzX1WGXxEtlrnZ9E8TAG56XATCFa6Pw,2491
5
- haystack_integrations/document_stores/qdrant/document_store.py,sha256=mjzv6Z3iE9oFRil_PVLjmEq-vX7a7ULpT5afGsU7iSU,36088
6
- haystack_integrations/document_stores/qdrant/filters.py,sha256=0w70Wa3Za1fNdbJ5O95sZDIpXfblJG_sBBUv0JTQ0-o,8337
5
+ haystack_integrations/document_stores/qdrant/document_store.py,sha256=eLw4P1h8GCj40R-BIlQOvJG9MpDzvtmQ7Hpb3AZhMSo,38117
6
+ haystack_integrations/document_stores/qdrant/filters.py,sha256=Nv_eKIYKwUWvldJfa0omfFQ0kgqi6L3DUFeMuIWziOY,11751
7
7
  haystack_integrations/document_stores/qdrant/migrate_to_sparse.py,sha256=i6wBC_9_JVzYZtqKm3dhHKTxhwNdcAdpgki8GABDp1c,4909
8
- qdrant_haystack-4.0.0.dist-info/METADATA,sha256=wHvVJIDCQDPFLX8fL_d11zNMZul4U6r02bVhhCdmitk,1862
9
- qdrant_haystack-4.0.0.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
10
- qdrant_haystack-4.0.0.dist-info/licenses/LICENSE.txt,sha256=B05uMshqTA74s-0ltyHKI6yoPfJ3zYgQbvcXfDVGFf8,10280
11
- qdrant_haystack-4.0.0.dist-info/RECORD,,
8
+ qdrant_haystack-4.1.1.dist-info/METADATA,sha256=BQcRpx4WyYIvccTCEowd4jnc43zoApKx-Ql_BnUqZwM,1863
9
+ qdrant_haystack-4.1.1.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
10
+ qdrant_haystack-4.1.1.dist-info/licenses/LICENSE.txt,sha256=B05uMshqTA74s-0ltyHKI6yoPfJ3zYgQbvcXfDVGFf8,10280
11
+ qdrant_haystack-4.1.1.dist-info/RECORD,,