qdrant-haystack 8.1.0__py3-none-any.whl → 9.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of qdrant-haystack might be problematic. Click here for more details.

@@ -169,6 +169,50 @@ class QdrantEmbeddingRetriever:
169
169
 
170
170
  return {"documents": docs}
171
171
 
172
+ @component.output_types(documents=List[Document])
173
+ async def run_async(
174
+ self,
175
+ query_embedding: List[float],
176
+ filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
177
+ top_k: Optional[int] = None,
178
+ scale_score: Optional[bool] = None,
179
+ return_embedding: Optional[bool] = None,
180
+ score_threshold: Optional[float] = None,
181
+ group_by: Optional[str] = None,
182
+ group_size: Optional[int] = None,
183
+ ):
184
+ """
185
+ Asynchronously run the Embedding Retriever on the given input data.
186
+
187
+ :param query_embedding: Embedding of the query.
188
+ :param filters: A dictionary with filters to narrow down the search space.
189
+ :param top_k: The maximum number of documents to return. If using `group_by` parameters, maximum number of
190
+ groups to return.
191
+ :param scale_score: Whether to scale the scores of the retrieved documents or not.
192
+ :param return_embedding: Whether to return the embedding of the retrieved Documents.
193
+ :param score_threshold: A minimal score threshold for the result.
194
+ :param group_by: Payload field to group by, must be a string or number field. If the field contains more than 1
195
+ value, all values will be used for grouping. One point can be in multiple groups.
196
+ :param group_size: Maximum amount of points to return per group. Default is 3.
197
+ :returns:
198
+ The retrieved documents.
199
+
200
+ """
201
+ filters = apply_filter_policy(self._filter_policy, self._filters, filters)
202
+
203
+ docs = await self._document_store._query_by_embedding_async(
204
+ query_embedding=query_embedding,
205
+ filters=filters,
206
+ top_k=top_k or self._top_k,
207
+ scale_score=scale_score or self._scale_score,
208
+ return_embedding=return_embedding or self._return_embedding,
209
+ score_threshold=score_threshold or self._score_threshold,
210
+ group_by=group_by or self._group_by,
211
+ group_size=group_size or self._group_size,
212
+ )
213
+
214
+ return {"documents": docs}
215
+
172
216
 
173
217
  @component
174
218
  class QdrantSparseEmbeddingRetriever:
@@ -336,6 +380,55 @@ class QdrantSparseEmbeddingRetriever:
336
380
 
337
381
  return {"documents": docs}
338
382
 
383
+ @component.output_types(documents=List[Document])
384
+ async def run_async(
385
+ self,
386
+ query_sparse_embedding: SparseEmbedding,
387
+ filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
388
+ top_k: Optional[int] = None,
389
+ scale_score: Optional[bool] = None,
390
+ return_embedding: Optional[bool] = None,
391
+ score_threshold: Optional[float] = None,
392
+ group_by: Optional[str] = None,
393
+ group_size: Optional[int] = None,
394
+ ):
395
+ """
396
+ Asynchronously run the Sparse Embedding Retriever on the given input data.
397
+
398
+ :param query_sparse_embedding: Sparse Embedding of the query.
399
+ :param filters: Filters applied to the retrieved Documents. The way runtime filters are applied depends on
400
+ the `filter_policy` chosen at retriever initialization. See init method docstring for more
401
+ details.
402
+ :param top_k: The maximum number of documents to return. If using `group_by` parameters, maximum number of
403
+ groups to return.
404
+ :param scale_score: Whether to scale the scores of the retrieved documents or not.
405
+ :param return_embedding: Whether to return the embedding of the retrieved Documents.
406
+ :param score_threshold: A minimal score threshold for the result.
407
+ Score of the returned result might be higher or smaller than the threshold
408
+ depending on the Distance function used.
409
+ E.g. for cosine similarity only higher scores will be returned.
410
+ :param group_by: Payload field to group by, must be a string or number field. If the field contains more than 1
411
+ value, all values will be used for grouping. One point can be in multiple groups.
412
+ :param group_size: Maximum amount of points to return per group. Default is 3.
413
+ :returns:
414
+ The retrieved documents.
415
+
416
+ """
417
+ filters = apply_filter_policy(self._filter_policy, self._filters, filters)
418
+
419
+ docs = await self._document_store._query_by_sparse_async(
420
+ query_sparse_embedding=query_sparse_embedding,
421
+ filters=filters,
422
+ top_k=top_k or self._top_k,
423
+ scale_score=scale_score or self._scale_score,
424
+ return_embedding=return_embedding or self._return_embedding,
425
+ score_threshold=score_threshold or self._score_threshold,
426
+ group_by=group_by or self._group_by,
427
+ group_size=group_size or self._group_size,
428
+ )
429
+
430
+ return {"documents": docs}
431
+
339
432
 
340
433
  @component
341
434
  class QdrantHybridRetriever:
@@ -501,3 +594,52 @@ class QdrantHybridRetriever:
501
594
  )
502
595
 
503
596
  return {"documents": docs}
597
+
598
+ @component.output_types(documents=List[Document])
599
+ async def run_async(
600
+ self,
601
+ query_embedding: List[float],
602
+ query_sparse_embedding: SparseEmbedding,
603
+ filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
604
+ top_k: Optional[int] = None,
605
+ return_embedding: Optional[bool] = None,
606
+ score_threshold: Optional[float] = None,
607
+ group_by: Optional[str] = None,
608
+ group_size: Optional[int] = None,
609
+ ):
610
+ """
611
+ Asynchronously run the Sparse Embedding Retriever on the given input data.
612
+
613
+ :param query_embedding: Dense embedding of the query.
614
+ :param query_sparse_embedding: Sparse embedding of the query.
615
+ :param filters: Filters applied to the retrieved Documents. The way runtime filters are applied depends on
616
+ the `filter_policy` chosen at retriever initialization. See init method docstring for more
617
+ details.
618
+ :param top_k: The maximum number of documents to return. If using `group_by` parameters, maximum number of
619
+ groups to return.
620
+ :param return_embedding: Whether to return the embedding of the retrieved Documents.
621
+ :param score_threshold: A minimal score threshold for the result.
622
+ Score of the returned result might be higher or smaller than the threshold
623
+ depending on the Distance function used.
624
+ E.g. for cosine similarity only higher scores will be returned.
625
+ :param group_by: Payload field to group by, must be a string or number field. If the field contains more than 1
626
+ value, all values will be used for grouping. One point can be in multiple groups.
627
+ :param group_size: Maximum amount of points to return per group. Default is 3.
628
+ :returns:
629
+ The retrieved documents.
630
+
631
+ """
632
+ filters = apply_filter_policy(self._filter_policy, self._filters, filters)
633
+
634
+ docs = await self._document_store._query_hybrid_async(
635
+ query_embedding=query_embedding,
636
+ query_sparse_embedding=query_sparse_embedding,
637
+ filters=filters,
638
+ top_k=top_k or self._top_k,
639
+ return_embedding=return_embedding or self._return_embedding,
640
+ score_threshold=score_threshold or self._score_threshold,
641
+ group_by=group_by or self._group_by,
642
+ group_size=group_size or self._group_size,
643
+ )
644
+
645
+ return {"documents": docs}
@@ -1,7 +1,7 @@
1
- import logging
2
1
  import uuid
3
2
  from typing import List, Union
4
3
 
4
+ from haystack import logging
5
5
  from haystack.dataclasses import Document
6
6
  from qdrant_client.http import models as rest
7
7
 
@@ -23,14 +23,6 @@ def convert_haystack_documents_to_qdrant_points(
23
23
  for document in documents:
24
24
  payload = document.to_dict(flatten=False)
25
25
 
26
- if payload.pop("dataframe", None):
27
- logger.warning(
28
- "Document %s has the `dataframe` field set,"
29
- "QdrantDocumentStore no longer supports dataframes and this field will be ignored. "
30
- "The `dataframe` field will soon be removed from Haystack Document.",
31
- document.id,
32
- )
33
-
34
26
  if use_sparse_embeddings:
35
27
  vector = {}
36
28
 
@@ -73,14 +65,6 @@ def convert_qdrant_point_to_haystack_document(point: QdrantPoint, use_sparse_emb
73
65
  payload = {**point.payload}
74
66
  payload["score"] = point.score if hasattr(point, "score") else None
75
67
 
76
- if payload.pop("dataframe", None):
77
- logger.warning(
78
- "Document %s has the `dataframe` field set,"
79
- "QdrantDocumentStore no longer supports dataframes and this field will be ignored. "
80
- "The `dataframe` field will soon be removed from Haystack Document.",
81
- payload["id"],
82
- )
83
-
84
68
  if not use_sparse_embeddings:
85
69
  payload["embedding"] = point.vector if hasattr(point, "vector") else None
86
70
  elif hasattr(point, "vector") and point.vector is not None: