qdrant-haystack 9.1.1__py3-none-any.whl → 10.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
@@ -1,4 +1,4 @@
1
- from typing import Any, Dict, List, Optional, Union
1
+ from typing import Any
2
2
 
3
3
  from haystack import Document, component, default_from_dict, default_to_dict
4
4
  from haystack.dataclasses.sparse_embedding import SparseEmbedding
@@ -8,6 +8,11 @@ from qdrant_client.http import models
8
8
 
9
9
  from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
10
10
 
11
+ FILTER_POLICY_MERGE_ERROR_MESSAGE = (
12
+ "Native Qdrant filters cannot be used with filter_policy set to MERGE. "
13
+ "Set filter_policy to REPLACE or use Haystack filters instead."
14
+ )
15
+
11
16
 
12
17
  @component
13
18
  class QdrantEmbeddingRetriever:
@@ -38,15 +43,15 @@ class QdrantEmbeddingRetriever:
38
43
  def __init__(
39
44
  self,
40
45
  document_store: QdrantDocumentStore,
41
- filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
46
+ filters: dict[str, Any] | models.Filter | None = None,
42
47
  top_k: int = 10,
43
48
  scale_score: bool = False,
44
49
  return_embedding: bool = False,
45
- filter_policy: Union[str, FilterPolicy] = FilterPolicy.REPLACE,
46
- score_threshold: Optional[float] = None,
47
- group_by: Optional[str] = None,
48
- group_size: Optional[int] = None,
49
- ):
50
+ filter_policy: str | FilterPolicy = FilterPolicy.REPLACE,
51
+ score_threshold: float | None = None,
52
+ group_by: str | None = None,
53
+ group_size: int | None = None,
54
+ ) -> None:
50
55
  """
51
56
  Create a QdrantEmbeddingRetriever component.
52
57
 
@@ -84,7 +89,7 @@ class QdrantEmbeddingRetriever:
84
89
  self._group_by = group_by
85
90
  self._group_size = group_size
86
91
 
87
- def to_dict(self) -> Dict[str, Any]:
92
+ def to_dict(self) -> dict[str, Any]:
88
93
  """
89
94
  Serializes the component to a dictionary.
90
95
 
@@ -108,7 +113,7 @@ class QdrantEmbeddingRetriever:
108
113
  return d
109
114
 
110
115
  @classmethod
111
- def from_dict(cls, data: Dict[str, Any]) -> "QdrantEmbeddingRetriever":
116
+ def from_dict(cls, data: dict[str, Any]) -> "QdrantEmbeddingRetriever":
112
117
  """
113
118
  Deserializes the component from a dictionary.
114
119
 
@@ -125,18 +130,18 @@ class QdrantEmbeddingRetriever:
125
130
  data["init_parameters"]["filter_policy"] = FilterPolicy.from_str(filter_policy)
126
131
  return default_from_dict(cls, data)
127
132
 
128
- @component.output_types(documents=List[Document])
133
+ @component.output_types(documents=list[Document])
129
134
  def run(
130
135
  self,
131
- query_embedding: List[float],
132
- filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
133
- top_k: Optional[int] = None,
134
- scale_score: Optional[bool] = None,
135
- return_embedding: Optional[bool] = None,
136
- score_threshold: Optional[float] = None,
137
- group_by: Optional[str] = None,
138
- group_size: Optional[int] = None,
139
- ):
136
+ query_embedding: list[float],
137
+ filters: dict[str, Any] | models.Filter | None = None,
138
+ top_k: int | None = None,
139
+ scale_score: bool | None = None,
140
+ return_embedding: bool | None = None,
141
+ score_threshold: float | None = None,
142
+ group_by: str | None = None,
143
+ group_size: int | None = None,
144
+ ) -> dict[str, list[Document]]:
140
145
  """
141
146
  Run the Embedding Retriever on the given input data.
142
147
 
@@ -153,8 +158,19 @@ class QdrantEmbeddingRetriever:
153
158
  :returns:
154
159
  The retrieved documents.
155
160
 
161
+ :raises ValueError: If 'filter_policy' is set to 'MERGE' and 'filters' is a native Qdrant filter.
156
162
  """
157
- filters = apply_filter_policy(self._filter_policy, self._filters, filters)
163
+ if self._filter_policy == FilterPolicy.MERGE and (
164
+ isinstance(self._filters, models.Filter) or isinstance(filters, models.Filter)
165
+ ):
166
+ raise ValueError(FILTER_POLICY_MERGE_ERROR_MESSAGE)
167
+
168
+ # Replacing filters works with native Qdrant filters even if the type is wrong
169
+ filters = apply_filter_policy(
170
+ filter_policy=self._filter_policy,
171
+ init_filters=self._filters, # type: ignore[arg-type]
172
+ runtime_filters=filters, # type: ignore[arg-type]
173
+ )
158
174
 
159
175
  docs = self._document_store._query_by_embedding(
160
176
  query_embedding=query_embedding,
@@ -169,18 +185,18 @@ class QdrantEmbeddingRetriever:
169
185
 
170
186
  return {"documents": docs}
171
187
 
172
- @component.output_types(documents=List[Document])
188
+ @component.output_types(documents=list[Document])
173
189
  async def run_async(
174
190
  self,
175
- query_embedding: List[float],
176
- filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
177
- top_k: Optional[int] = None,
178
- scale_score: Optional[bool] = None,
179
- return_embedding: Optional[bool] = None,
180
- score_threshold: Optional[float] = None,
181
- group_by: Optional[str] = None,
182
- group_size: Optional[int] = None,
183
- ):
191
+ query_embedding: list[float],
192
+ filters: dict[str, Any] | models.Filter | None = None,
193
+ top_k: int | None = None,
194
+ scale_score: bool | None = None,
195
+ return_embedding: bool | None = None,
196
+ score_threshold: float | None = None,
197
+ group_by: str | None = None,
198
+ group_size: int | None = None,
199
+ ) -> dict[str, list[Document]]:
184
200
  """
185
201
  Asynchronously run the Embedding Retriever on the given input data.
186
202
 
@@ -197,8 +213,19 @@ class QdrantEmbeddingRetriever:
197
213
  :returns:
198
214
  The retrieved documents.
199
215
 
216
+ :raises ValueError: If 'filter_policy' is set to 'MERGE' and 'filters' is a native Qdrant filter.
200
217
  """
201
- filters = apply_filter_policy(self._filter_policy, self._filters, filters)
218
+ if self._filter_policy == FilterPolicy.MERGE and (
219
+ isinstance(self._filters, models.Filter) or isinstance(filters, models.Filter)
220
+ ):
221
+ raise ValueError(FILTER_POLICY_MERGE_ERROR_MESSAGE)
222
+
223
+ # Replacing filters works with native Qdrant filters even if the type is wrong
224
+ filters = apply_filter_policy(
225
+ filter_policy=self._filter_policy,
226
+ init_filters=self._filters, # type: ignore[arg-type]
227
+ runtime_filters=filters, # type: ignore[arg-type]
228
+ )
202
229
 
203
230
  docs = await self._document_store._query_by_embedding_async(
204
231
  query_embedding=query_embedding,
@@ -244,15 +271,15 @@ class QdrantSparseEmbeddingRetriever:
244
271
  def __init__(
245
272
  self,
246
273
  document_store: QdrantDocumentStore,
247
- filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
274
+ filters: dict[str, Any] | models.Filter | None = None,
248
275
  top_k: int = 10,
249
276
  scale_score: bool = False,
250
277
  return_embedding: bool = False,
251
- filter_policy: Union[str, FilterPolicy] = FilterPolicy.REPLACE,
252
- score_threshold: Optional[float] = None,
253
- group_by: Optional[str] = None,
254
- group_size: Optional[int] = None,
255
- ):
278
+ filter_policy: str | FilterPolicy = FilterPolicy.REPLACE,
279
+ score_threshold: float | None = None,
280
+ group_by: str | None = None,
281
+ group_size: int | None = None,
282
+ ) -> None:
256
283
  """
257
284
  Create a QdrantSparseEmbeddingRetriever component.
258
285
 
@@ -290,7 +317,7 @@ class QdrantSparseEmbeddingRetriever:
290
317
  self._group_by = group_by
291
318
  self._group_size = group_size
292
319
 
293
- def to_dict(self) -> Dict[str, Any]:
320
+ def to_dict(self) -> dict[str, Any]:
294
321
  """
295
322
  Serializes the component to a dictionary.
296
323
 
@@ -314,7 +341,7 @@ class QdrantSparseEmbeddingRetriever:
314
341
  return d
315
342
 
316
343
  @classmethod
317
- def from_dict(cls, data: Dict[str, Any]) -> "QdrantSparseEmbeddingRetriever":
344
+ def from_dict(cls, data: dict[str, Any]) -> "QdrantSparseEmbeddingRetriever":
318
345
  """
319
346
  Deserializes the component from a dictionary.
320
347
 
@@ -331,18 +358,18 @@ class QdrantSparseEmbeddingRetriever:
331
358
  data["init_parameters"]["filter_policy"] = FilterPolicy.from_str(filter_policy)
332
359
  return default_from_dict(cls, data)
333
360
 
334
- @component.output_types(documents=List[Document])
361
+ @component.output_types(documents=list[Document])
335
362
  def run(
336
363
  self,
337
364
  query_sparse_embedding: SparseEmbedding,
338
- filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
339
- top_k: Optional[int] = None,
340
- scale_score: Optional[bool] = None,
341
- return_embedding: Optional[bool] = None,
342
- score_threshold: Optional[float] = None,
343
- group_by: Optional[str] = None,
344
- group_size: Optional[int] = None,
345
- ):
365
+ filters: dict[str, Any] | models.Filter | None = None,
366
+ top_k: int | None = None,
367
+ scale_score: bool | None = None,
368
+ return_embedding: bool | None = None,
369
+ score_threshold: float | None = None,
370
+ group_by: str | None = None,
371
+ group_size: int | None = None,
372
+ ) -> dict[str, list[Document]]:
346
373
  """
347
374
  Run the Sparse Embedding Retriever on the given input data.
348
375
 
@@ -364,8 +391,19 @@ class QdrantSparseEmbeddingRetriever:
364
391
  :returns:
365
392
  The retrieved documents.
366
393
 
394
+ :raises ValueError: If 'filter_policy' is set to 'MERGE' and 'filters' is a native Qdrant filter.
367
395
  """
368
- filters = apply_filter_policy(self._filter_policy, self._filters, filters)
396
+ if self._filter_policy == FilterPolicy.MERGE and (
397
+ isinstance(self._filters, models.Filter) or isinstance(filters, models.Filter)
398
+ ):
399
+ raise ValueError(FILTER_POLICY_MERGE_ERROR_MESSAGE)
400
+
401
+ # Replacing filters works with native Qdrant filters even if the type is wrong
402
+ filters = apply_filter_policy(
403
+ filter_policy=self._filter_policy,
404
+ init_filters=self._filters, # type: ignore[arg-type]
405
+ runtime_filters=filters, # type: ignore[arg-type]
406
+ )
369
407
 
370
408
  docs = self._document_store._query_by_sparse(
371
409
  query_sparse_embedding=query_sparse_embedding,
@@ -380,18 +418,18 @@ class QdrantSparseEmbeddingRetriever:
380
418
 
381
419
  return {"documents": docs}
382
420
 
383
- @component.output_types(documents=List[Document])
421
+ @component.output_types(documents=list[Document])
384
422
  async def run_async(
385
423
  self,
386
424
  query_sparse_embedding: SparseEmbedding,
387
- filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
388
- top_k: Optional[int] = None,
389
- scale_score: Optional[bool] = None,
390
- return_embedding: Optional[bool] = None,
391
- score_threshold: Optional[float] = None,
392
- group_by: Optional[str] = None,
393
- group_size: Optional[int] = None,
394
- ):
425
+ filters: dict[str, Any] | models.Filter | None = None,
426
+ top_k: int | None = None,
427
+ scale_score: bool | None = None,
428
+ return_embedding: bool | None = None,
429
+ score_threshold: float | None = None,
430
+ group_by: str | None = None,
431
+ group_size: int | None = None,
432
+ ) -> dict[str, list[Document]]:
395
433
  """
396
434
  Asynchronously run the Sparse Embedding Retriever on the given input data.
397
435
 
@@ -413,8 +451,19 @@ class QdrantSparseEmbeddingRetriever:
413
451
  :returns:
414
452
  The retrieved documents.
415
453
 
454
+ :raises ValueError: If 'filter_policy' is set to 'MERGE' and 'filters' is a native Qdrant filter.
416
455
  """
417
- filters = apply_filter_policy(self._filter_policy, self._filters, filters)
456
+ if self._filter_policy == FilterPolicy.MERGE and (
457
+ isinstance(self._filters, models.Filter) or isinstance(filters, models.Filter)
458
+ ):
459
+ raise ValueError(FILTER_POLICY_MERGE_ERROR_MESSAGE)
460
+
461
+ # Replacing filters works with native Qdrant filters even if the type is wrong
462
+ filters = apply_filter_policy(
463
+ filter_policy=self._filter_policy,
464
+ init_filters=self._filters, # type: ignore[arg-type]
465
+ runtime_filters=filters, # type: ignore[arg-type]
466
+ )
418
467
 
419
468
  docs = await self._document_store._query_by_sparse_async(
420
469
  query_sparse_embedding=query_sparse_embedding,
@@ -466,14 +515,14 @@ class QdrantHybridRetriever:
466
515
  def __init__(
467
516
  self,
468
517
  document_store: QdrantDocumentStore,
469
- filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
518
+ filters: dict[str, Any] | models.Filter | None = None,
470
519
  top_k: int = 10,
471
520
  return_embedding: bool = False,
472
- filter_policy: Union[str, FilterPolicy] = FilterPolicy.REPLACE,
473
- score_threshold: Optional[float] = None,
474
- group_by: Optional[str] = None,
475
- group_size: Optional[int] = None,
476
- ):
521
+ filter_policy: str | FilterPolicy = FilterPolicy.REPLACE,
522
+ score_threshold: float | None = None,
523
+ group_by: str | None = None,
524
+ group_size: int | None = None,
525
+ ) -> None:
477
526
  """
478
527
  Create a QdrantHybridRetriever component.
479
528
 
@@ -509,7 +558,7 @@ class QdrantHybridRetriever:
509
558
  self._group_by = group_by
510
559
  self._group_size = group_size
511
560
 
512
- def to_dict(self) -> Dict[str, Any]:
561
+ def to_dict(self) -> dict[str, Any]:
513
562
  """
514
563
  Serializes the component to a dictionary.
515
564
 
@@ -529,7 +578,7 @@ class QdrantHybridRetriever:
529
578
  )
530
579
 
531
580
  @classmethod
532
- def from_dict(cls, data: Dict[str, Any]) -> "QdrantHybridRetriever":
581
+ def from_dict(cls, data: dict[str, Any]) -> "QdrantHybridRetriever":
533
582
  """
534
583
  Deserializes the component from a dictionary.
535
584
 
@@ -546,18 +595,18 @@ class QdrantHybridRetriever:
546
595
  data["init_parameters"]["filter_policy"] = FilterPolicy.from_str(filter_policy)
547
596
  return default_from_dict(cls, data)
548
597
 
549
- @component.output_types(documents=List[Document])
598
+ @component.output_types(documents=list[Document])
550
599
  def run(
551
600
  self,
552
- query_embedding: List[float],
601
+ query_embedding: list[float],
553
602
  query_sparse_embedding: SparseEmbedding,
554
- filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
555
- top_k: Optional[int] = None,
556
- return_embedding: Optional[bool] = None,
557
- score_threshold: Optional[float] = None,
558
- group_by: Optional[str] = None,
559
- group_size: Optional[int] = None,
560
- ):
603
+ filters: dict[str, Any] | models.Filter | None = None,
604
+ top_k: int | None = None,
605
+ return_embedding: bool | None = None,
606
+ score_threshold: float | None = None,
607
+ group_by: str | None = None,
608
+ group_size: int | None = None,
609
+ ) -> dict[str, list[Document]]:
561
610
  """
562
611
  Run the Sparse Embedding Retriever on the given input data.
563
612
 
@@ -579,8 +628,19 @@ class QdrantHybridRetriever:
579
628
  :returns:
580
629
  The retrieved documents.
581
630
 
631
+ :raises ValueError: If 'filter_policy' is set to 'MERGE' and 'filters' is a native Qdrant filter.
582
632
  """
583
- filters = apply_filter_policy(self._filter_policy, self._filters, filters)
633
+ if self._filter_policy == FilterPolicy.MERGE and (
634
+ isinstance(self._filters, models.Filter) or isinstance(filters, models.Filter)
635
+ ):
636
+ raise ValueError(FILTER_POLICY_MERGE_ERROR_MESSAGE)
637
+
638
+ # Replacing filters works with native Qdrant filters even if the type is wrong
639
+ filters = apply_filter_policy(
640
+ filter_policy=self._filter_policy,
641
+ init_filters=self._filters, # type: ignore[arg-type]
642
+ runtime_filters=filters, # type: ignore[arg-type]
643
+ )
584
644
 
585
645
  docs = self._document_store._query_hybrid(
586
646
  query_embedding=query_embedding,
@@ -595,18 +655,18 @@ class QdrantHybridRetriever:
595
655
 
596
656
  return {"documents": docs}
597
657
 
598
- @component.output_types(documents=List[Document])
658
+ @component.output_types(documents=list[Document])
599
659
  async def run_async(
600
660
  self,
601
- query_embedding: List[float],
661
+ query_embedding: list[float],
602
662
  query_sparse_embedding: SparseEmbedding,
603
- filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
604
- top_k: Optional[int] = None,
605
- return_embedding: Optional[bool] = None,
606
- score_threshold: Optional[float] = None,
607
- group_by: Optional[str] = None,
608
- group_size: Optional[int] = None,
609
- ):
663
+ filters: dict[str, Any] | models.Filter | None = None,
664
+ top_k: int | None = None,
665
+ return_embedding: bool | None = None,
666
+ score_threshold: float | None = None,
667
+ group_by: str | None = None,
668
+ group_size: int | None = None,
669
+ ) -> dict[str, list[Document]]:
610
670
  """
611
671
  Asynchronously run the Sparse Embedding Retriever on the given input data.
612
672
 
@@ -628,8 +688,19 @@ class QdrantHybridRetriever:
628
688
  :returns:
629
689
  The retrieved documents.
630
690
 
691
+ :raises ValueError: If 'filter_policy' is set to 'MERGE' and 'filters' is a native Qdrant filter.
631
692
  """
632
- filters = apply_filter_policy(self._filter_policy, self._filters, filters)
693
+ if self._filter_policy == FilterPolicy.MERGE and (
694
+ isinstance(self._filters, models.Filter) or isinstance(filters, models.Filter)
695
+ ):
696
+ raise ValueError(FILTER_POLICY_MERGE_ERROR_MESSAGE)
697
+
698
+ # Replacing filters works with native Qdrant filters even if the type is wrong
699
+ filters = apply_filter_policy(
700
+ filter_policy=self._filter_policy,
701
+ init_filters=self._filters, # type: ignore[arg-type]
702
+ runtime_filters=filters, # type: ignore[arg-type]
703
+ )
633
704
 
634
705
  docs = await self._document_store._query_hybrid_async(
635
706
  query_embedding=query_embedding,
File without changes
@@ -1,5 +1,4 @@
1
1
  import uuid
2
- from typing import List, Union
3
2
 
4
3
  from haystack import logging
5
4
  from haystack.dataclasses import Document
@@ -15,10 +14,10 @@ UUID_NAMESPACE = uuid.UUID("3896d314-1e95-4a3a-b45a-945f9f0b541d")
15
14
 
16
15
 
17
16
  def convert_haystack_documents_to_qdrant_points(
18
- documents: List[Document],
17
+ documents: list[Document],
19
18
  *,
20
19
  use_sparse_embeddings: bool,
21
- ) -> List[rest.PointStruct]:
20
+ ) -> list[rest.PointStruct]:
22
21
  points = []
23
22
  for document in documents:
24
23
  payload = document.to_dict(flatten=False)
@@ -37,7 +36,7 @@ def convert_haystack_documents_to_qdrant_points(
37
36
 
38
37
  else:
39
38
  vector = payload.pop("embedding") or {}
40
- _id = convert_id(payload.get("id"))
39
+ _id = convert_id(document.id)
41
40
 
42
41
  point = rest.PointStruct(
43
42
  payload=payload,
@@ -58,23 +57,25 @@ def convert_id(_id: str) -> str:
58
57
  return uuid.uuid5(UUID_NAMESPACE, _id).hex
59
58
 
60
59
 
61
- QdrantPoint = Union[rest.ScoredPoint, rest.Record]
60
+ QdrantPoint = rest.ScoredPoint | rest.Record
62
61
 
63
62
 
64
63
  def convert_qdrant_point_to_haystack_document(point: QdrantPoint, use_sparse_embeddings: bool) -> Document:
65
- payload = {**point.payload}
64
+ payload = point.payload or {}
66
65
  payload["score"] = point.score if hasattr(point, "score") else None
67
66
 
68
67
  if not use_sparse_embeddings:
69
68
  payload["embedding"] = point.vector if hasattr(point, "vector") else None
70
- elif hasattr(point, "vector") and point.vector is not None:
69
+ elif hasattr(point, "vector") and point.vector is not None and isinstance(point.vector, dict):
71
70
  payload["embedding"] = point.vector.get(DENSE_VECTORS_NAME)
72
71
 
73
72
  if SPARSE_VECTORS_NAME in point.vector:
74
- parse_vector_dict = {
75
- "indices": point.vector[SPARSE_VECTORS_NAME].indices,
76
- "values": point.vector[SPARSE_VECTORS_NAME].values,
77
- }
78
- payload["sparse_embedding"] = parse_vector_dict
73
+ sparse_vector = point.vector[SPARSE_VECTORS_NAME]
74
+ if isinstance(sparse_vector, rest.SparseVector):
75
+ sparse_vector_dict = {
76
+ "indices": sparse_vector.indices,
77
+ "values": sparse_vector.values,
78
+ }
79
+ payload["sparse_embedding"] = sparse_vector_dict
79
80
 
80
81
  return Document.from_dict(payload)