PyPI - qdrant-haystack - Versions diffs - 6.0.0__py3-none-any.whl → 10.2.0__py3-none-any.whl - Mend

qdrant-haystack 6.0.0py3-none-any.whl → 10.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

haystack_integrations/components/retrievers/py.typed ADDED Viewed

File without changes

haystack_integrations/components/retrievers/qdrant/__init__.py CHANGED Viewed

@@ -4,4 +4,4 @@
 from .retriever import QdrantEmbeddingRetriever, QdrantHybridRetriever, QdrantSparseEmbeddingRetriever
-__all__ = ("QdrantEmbeddingRetriever", "QdrantSparseEmbeddingRetriever", "QdrantHybridRetriever")
+__all__ = ("QdrantEmbeddingRetriever", "QdrantHybridRetriever", "QdrantSparseEmbeddingRetriever")

haystack_integrations/components/retrievers/qdrant/retriever.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Any, Dict, List, Optional, Union
+from typing import Any
 from haystack import Document, component, default_from_dict, default_to_dict
 from haystack.dataclasses.sparse_embedding import SparseEmbedding
@@ -8,6 +8,11 @@ from qdrant_client.http import models
 from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
+FILTER_POLICY_MERGE_ERROR_MESSAGE = (
+    "Native Qdrant filters cannot be used with filter_policy set to MERGE. "
+    "Set filter_policy to REPLACE or use Haystack filters instead."
+)
 @component
 class QdrantEmbeddingRetriever:
@@ -38,15 +43,15 @@ class QdrantEmbeddingRetriever:
     def __init__(
         self,
         document_store: QdrantDocumentStore,
-        filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
+        filters: dict[str, Any] | models.Filter | None = None,
         top_k: int = 10,
         scale_score: bool = False,
         return_embedding: bool = False,
-        filter_policy: Union[str, FilterPolicy] = FilterPolicy.REPLACE,
-        score_threshold: Optional[float] = None,
-        group_by: Optional[str] = None,
-        group_size: Optional[int] = None,
-    ):
+        filter_policy: str | FilterPolicy = FilterPolicy.REPLACE,
+        score_threshold: float | None = None,
+        group_by: str | None = None,
+        group_size: int | None = None,
+    ) -> None:
         """
         Create a QdrantEmbeddingRetriever component.
@@ -84,7 +89,7 @@ class QdrantEmbeddingRetriever:
         self._group_by = group_by
         self._group_size = group_size
-    def to_dict(self) -> Dict[str, Any]:
+    def to_dict(self) -> dict[str, Any]:
         """
         Serializes the component to a dictionary.
@@ -108,7 +113,7 @@ class QdrantEmbeddingRetriever:
         return d
     @classmethod
-    def from_dict(cls, data: Dict[str, Any]) -> "QdrantEmbeddingRetriever":
+    def from_dict(cls, data: dict[str, Any]) -> "QdrantEmbeddingRetriever":
         """
         Deserializes the component from a dictionary.
@@ -125,18 +130,18 @@ class QdrantEmbeddingRetriever:
             data["init_parameters"]["filter_policy"] = FilterPolicy.from_str(filter_policy)
         return default_from_dict(cls, data)
-    @component.output_types(documents=List[Document])
+    @component.output_types(documents=list[Document])
     def run(
         self,
-        query_embedding: List[float],
-        filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
-        top_k: Optional[int] = None,
-        scale_score: Optional[bool] = None,
-        return_embedding: Optional[bool] = None,
-        score_threshold: Optional[float] = None,
-        group_by: Optional[str] = None,
-        group_size: Optional[int] = None,
-    ):
+        query_embedding: list[float],
+        filters: dict[str, Any] | models.Filter | None = None,
+        top_k: int | None = None,
+        scale_score: bool | None = None,
+        return_embedding: bool | None = None,
+        score_threshold: float | None = None,
+        group_by: str | None = None,
+        group_size: int | None = None,
+    ) -> dict[str, list[Document]]:
         """
         Run the Embedding Retriever on the given input data.
@@ -153,8 +158,19 @@ class QdrantEmbeddingRetriever:
         :returns:
             The retrieved documents.
+        :raises ValueError: If 'filter_policy' is set to 'MERGE' and 'filters' is a native Qdrant filter.
         """
-        filters = apply_filter_policy(self._filter_policy, self._filters, filters)
+        if self._filter_policy == FilterPolicy.MERGE and (
+            isinstance(self._filters, models.Filter) or isinstance(filters, models.Filter)
+        ):
+            raise ValueError(FILTER_POLICY_MERGE_ERROR_MESSAGE)
+        # Replacing filters works with native Qdrant filters even if the type is wrong
+        filters = apply_filter_policy(
+            filter_policy=self._filter_policy,
+            init_filters=self._filters,  # type: ignore[arg-type]
+            runtime_filters=filters,  # type: ignore[arg-type]
+        )
         docs = self._document_store._query_by_embedding(
             query_embedding=query_embedding,
@@ -169,6 +185,61 @@ class QdrantEmbeddingRetriever:
         return {"documents": docs}
+    @component.output_types(documents=list[Document])
+    async def run_async(
+        self,
+        query_embedding: list[float],
+        filters: dict[str, Any] | models.Filter | None = None,
+        top_k: int | None = None,
+        scale_score: bool | None = None,
+        return_embedding: bool | None = None,
+        score_threshold: float | None = None,
+        group_by: str | None = None,
+        group_size: int | None = None,
+    ) -> dict[str, list[Document]]:
+        """
+        Asynchronously run the Embedding Retriever on the given input data.
+        :param query_embedding: Embedding of the query.
+        :param filters: A dictionary with filters to narrow down the search space.
+        :param top_k: The maximum number of documents to return. If using `group_by` parameters, maximum number of
+             groups to return.
+        :param scale_score: Whether to scale the scores of the retrieved documents or not.
+        :param return_embedding: Whether to return the embedding of the retrieved Documents.
+        :param score_threshold: A minimal score threshold for the result.
+        :param group_by: Payload field to group by, must be a string or number field. If the field contains more than 1
+            value, all values will be used for grouping. One point can be in multiple groups.
+        :param group_size: Maximum amount of points to return per group. Default is 3.
+        :returns:
+            The retrieved documents.
+        :raises ValueError: If 'filter_policy' is set to 'MERGE' and 'filters' is a native Qdrant filter.
+        """
+        if self._filter_policy == FilterPolicy.MERGE and (
+            isinstance(self._filters, models.Filter) or isinstance(filters, models.Filter)
+        ):
+            raise ValueError(FILTER_POLICY_MERGE_ERROR_MESSAGE)
+        # Replacing filters works with native Qdrant filters even if the type is wrong
+        filters = apply_filter_policy(
+            filter_policy=self._filter_policy,
+            init_filters=self._filters,  # type: ignore[arg-type]
+            runtime_filters=filters,  # type: ignore[arg-type]
+        )
+        docs = await self._document_store._query_by_embedding_async(
+            query_embedding=query_embedding,
+            filters=filters,
+            top_k=top_k or self._top_k,
+            scale_score=scale_score or self._scale_score,
+            return_embedding=return_embedding or self._return_embedding,
+            score_threshold=score_threshold or self._score_threshold,
+            group_by=group_by or self._group_by,
+            group_size=group_size or self._group_size,
+        )
+        return {"documents": docs}
 @component
 class QdrantSparseEmbeddingRetriever:
@@ -200,15 +271,15 @@ class QdrantSparseEmbeddingRetriever:
     def __init__(
         self,
         document_store: QdrantDocumentStore,
-        filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
+        filters: dict[str, Any] | models.Filter | None = None,
         top_k: int = 10,
         scale_score: bool = False,
         return_embedding: bool = False,
-        filter_policy: Union[str, FilterPolicy] = FilterPolicy.REPLACE,
-        score_threshold: Optional[float] = None,
-        group_by: Optional[str] = None,
-        group_size: Optional[int] = None,
-    ):
+        filter_policy: str | FilterPolicy = FilterPolicy.REPLACE,
+        score_threshold: float | None = None,
+        group_by: str | None = None,
+        group_size: int | None = None,
+    ) -> None:
         """
         Create a QdrantSparseEmbeddingRetriever component.
@@ -246,7 +317,7 @@ class QdrantSparseEmbeddingRetriever:
         self._group_by = group_by
         self._group_size = group_size
-    def to_dict(self) -> Dict[str, Any]:
+    def to_dict(self) -> dict[str, Any]:
         """
         Serializes the component to a dictionary.
@@ -270,7 +341,7 @@ class QdrantSparseEmbeddingRetriever:
         return d
     @classmethod
-    def from_dict(cls, data: Dict[str, Any]) -> "QdrantSparseEmbeddingRetriever":
+    def from_dict(cls, data: dict[str, Any]) -> "QdrantSparseEmbeddingRetriever":
         """
         Deserializes the component from a dictionary.
@@ -287,18 +358,18 @@ class QdrantSparseEmbeddingRetriever:
             data["init_parameters"]["filter_policy"] = FilterPolicy.from_str(filter_policy)
         return default_from_dict(cls, data)
-    @component.output_types(documents=List[Document])
+    @component.output_types(documents=list[Document])
     def run(
         self,
         query_sparse_embedding: SparseEmbedding,
-        filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
-        top_k: Optional[int] = None,
-        scale_score: Optional[bool] = None,
-        return_embedding: Optional[bool] = None,
-        score_threshold: Optional[float] = None,
-        group_by: Optional[str] = None,
-        group_size: Optional[int] = None,
-    ):
+        filters: dict[str, Any] | models.Filter | None = None,
+        top_k: int | None = None,
+        scale_score: bool | None = None,
+        return_embedding: bool | None = None,
+        score_threshold: float | None = None,
+        group_by: str | None = None,
+        group_size: int | None = None,
+    ) -> dict[str, list[Document]]:
         """
         Run the Sparse Embedding Retriever on the given input data.
@@ -320,8 +391,19 @@ class QdrantSparseEmbeddingRetriever:
         :returns:
             The retrieved documents.
+        :raises ValueError: If 'filter_policy' is set to 'MERGE' and 'filters' is a native Qdrant filter.
         """
-        filters = apply_filter_policy(self._filter_policy, self._filters, filters)
+        if self._filter_policy == FilterPolicy.MERGE and (
+            isinstance(self._filters, models.Filter) or isinstance(filters, models.Filter)
+        ):
+            raise ValueError(FILTER_POLICY_MERGE_ERROR_MESSAGE)
+        # Replacing filters works with native Qdrant filters even if the type is wrong
+        filters = apply_filter_policy(
+            filter_policy=self._filter_policy,
+            init_filters=self._filters,  # type: ignore[arg-type]
+            runtime_filters=filters,  # type: ignore[arg-type]
+        )
         docs = self._document_store._query_by_sparse(
             query_sparse_embedding=query_sparse_embedding,
@@ -336,6 +418,66 @@ class QdrantSparseEmbeddingRetriever:
         return {"documents": docs}
+    @component.output_types(documents=list[Document])
+    async def run_async(
+        self,
+        query_sparse_embedding: SparseEmbedding,
+        filters: dict[str, Any] | models.Filter | None = None,
+        top_k: int | None = None,
+        scale_score: bool | None = None,
+        return_embedding: bool | None = None,
+        score_threshold: float | None = None,
+        group_by: str | None = None,
+        group_size: int | None = None,
+    ) -> dict[str, list[Document]]:
+        """
+        Asynchronously run the Sparse Embedding Retriever on the given input data.
+        :param query_sparse_embedding: Sparse Embedding of the query.
+        :param filters: Filters applied to the retrieved Documents. The way runtime filters are applied depends on
+                        the `filter_policy` chosen at retriever initialization. See init method docstring for more
+                        details.
+        :param top_k: The maximum number of documents to return. If using `group_by` parameters, maximum number of
+             groups to return.
+        :param scale_score: Whether to scale the scores of the retrieved documents or not.
+        :param return_embedding: Whether to return the embedding of the retrieved Documents.
+        :param score_threshold: A minimal score threshold for the result.
+            Score of the returned result might be higher or smaller than the threshold
+             depending on the Distance function used.
+            E.g. for cosine similarity only higher scores will be returned.
+        :param group_by: Payload field to group by, must be a string or number field. If the field contains more than 1
+            value, all values will be used for grouping. One point can be in multiple groups.
+        :param group_size: Maximum amount of points to return per group. Default is 3.
+        :returns:
+            The retrieved documents.
+        :raises ValueError: If 'filter_policy' is set to 'MERGE' and 'filters' is a native Qdrant filter.
+        """
+        if self._filter_policy == FilterPolicy.MERGE and (
+            isinstance(self._filters, models.Filter) or isinstance(filters, models.Filter)
+        ):
+            raise ValueError(FILTER_POLICY_MERGE_ERROR_MESSAGE)
+        # Replacing filters works with native Qdrant filters even if the type is wrong
+        filters = apply_filter_policy(
+            filter_policy=self._filter_policy,
+            init_filters=self._filters,  # type: ignore[arg-type]
+            runtime_filters=filters,  # type: ignore[arg-type]
+        )
+        docs = await self._document_store._query_by_sparse_async(
+            query_sparse_embedding=query_sparse_embedding,
+            filters=filters,
+            top_k=top_k or self._top_k,
+            scale_score=scale_score or self._scale_score,
+            return_embedding=return_embedding or self._return_embedding,
+            score_threshold=score_threshold or self._score_threshold,
+            group_by=group_by or self._group_by,
+            group_size=group_size or self._group_size,
+        )
+        return {"documents": docs}
 @component
 class QdrantHybridRetriever:
@@ -373,14 +515,14 @@ class QdrantHybridRetriever:
     def __init__(
         self,
         document_store: QdrantDocumentStore,
-        filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
+        filters: dict[str, Any] | models.Filter | None = None,
         top_k: int = 10,
         return_embedding: bool = False,
-        filter_policy: Union[str, FilterPolicy] = FilterPolicy.REPLACE,
-        score_threshold: Optional[float] = None,
-        group_by: Optional[str] = None,
-        group_size: Optional[int] = None,
-    ):
+        filter_policy: str | FilterPolicy = FilterPolicy.REPLACE,
+        score_threshold: float | None = None,
+        group_by: str | None = None,
+        group_size: int | None = None,
+    ) -> None:
         """
         Create a QdrantHybridRetriever component.
@@ -416,7 +558,7 @@ class QdrantHybridRetriever:
         self._group_by = group_by
         self._group_size = group_size
-    def to_dict(self) -> Dict[str, Any]:
+    def to_dict(self) -> dict[str, Any]:
         """
         Serializes the component to a dictionary.
@@ -436,7 +578,7 @@ class QdrantHybridRetriever:
         )
     @classmethod
-    def from_dict(cls, data: Dict[str, Any]) -> "QdrantHybridRetriever":
+    def from_dict(cls, data: dict[str, Any]) -> "QdrantHybridRetriever":
         """
         Deserializes the component from a dictionary.
@@ -453,18 +595,18 @@ class QdrantHybridRetriever:
             data["init_parameters"]["filter_policy"] = FilterPolicy.from_str(filter_policy)
         return default_from_dict(cls, data)
-    @component.output_types(documents=List[Document])
+    @component.output_types(documents=list[Document])
     def run(
         self,
-        query_embedding: List[float],
+        query_embedding: list[float],
         query_sparse_embedding: SparseEmbedding,
-        filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
-        top_k: Optional[int] = None,
-        return_embedding: Optional[bool] = None,
-        score_threshold: Optional[float] = None,
-        group_by: Optional[str] = None,
-        group_size: Optional[int] = None,
-    ):
+        filters: dict[str, Any] | models.Filter | None = None,
+        top_k: int | None = None,
+        return_embedding: bool | None = None,
+        score_threshold: float | None = None,
+        group_by: str | None = None,
+        group_size: int | None = None,
+    ) -> dict[str, list[Document]]:
         """
         Run the Sparse Embedding Retriever on the given input data.
@@ -486,8 +628,19 @@ class QdrantHybridRetriever:
         :returns:
             The retrieved documents.
+        :raises ValueError: If 'filter_policy' is set to 'MERGE' and 'filters' is a native Qdrant filter.
         """
-        filters = apply_filter_policy(self._filter_policy, self._filters, filters)
+        if self._filter_policy == FilterPolicy.MERGE and (
+            isinstance(self._filters, models.Filter) or isinstance(filters, models.Filter)
+        ):
+            raise ValueError(FILTER_POLICY_MERGE_ERROR_MESSAGE)
+        # Replacing filters works with native Qdrant filters even if the type is wrong
+        filters = apply_filter_policy(
+            filter_policy=self._filter_policy,
+            init_filters=self._filters,  # type: ignore[arg-type]
+            runtime_filters=filters,  # type: ignore[arg-type]
+        )
         docs = self._document_store._query_hybrid(
             query_embedding=query_embedding,
@@ -501,3 +654,63 @@ class QdrantHybridRetriever:
         )
         return {"documents": docs}
+    @component.output_types(documents=list[Document])
+    async def run_async(
+        self,
+        query_embedding: list[float],
+        query_sparse_embedding: SparseEmbedding,
+        filters: dict[str, Any] | models.Filter | None = None,
+        top_k: int | None = None,
+        return_embedding: bool | None = None,
+        score_threshold: float | None = None,
+        group_by: str | None = None,
+        group_size: int | None = None,
+    ) -> dict[str, list[Document]]:
+        """
+        Asynchronously run the Sparse Embedding Retriever on the given input data.
+        :param query_embedding: Dense embedding of the query.
+        :param query_sparse_embedding: Sparse embedding of the query.
+        :param filters: Filters applied to the retrieved Documents. The way runtime filters are applied depends on
+                        the `filter_policy` chosen at retriever initialization. See init method docstring for more
+                        details.
+        :param top_k: The maximum number of documents to return. If using `group_by` parameters, maximum number of
+             groups to return.
+        :param return_embedding: Whether to return the embedding of the retrieved Documents.
+        :param score_threshold: A minimal score threshold for the result.
+            Score of the returned result might be higher or smaller than the threshold
+             depending on the Distance function used.
+            E.g. for cosine similarity only higher scores will be returned.
+        :param group_by: Payload field to group by, must be a string or number field. If the field contains more than 1
+             value, all values will be used for grouping. One point can be in multiple groups.
+        :param group_size: Maximum amount of points to return per group. Default is 3.
+        :returns:
+            The retrieved documents.
+        :raises ValueError: If 'filter_policy' is set to 'MERGE' and 'filters' is a native Qdrant filter.
+        """
+        if self._filter_policy == FilterPolicy.MERGE and (
+            isinstance(self._filters, models.Filter) or isinstance(filters, models.Filter)
+        ):
+            raise ValueError(FILTER_POLICY_MERGE_ERROR_MESSAGE)
+        # Replacing filters works with native Qdrant filters even if the type is wrong
+        filters = apply_filter_policy(
+            filter_policy=self._filter_policy,
+            init_filters=self._filters,  # type: ignore[arg-type]
+            runtime_filters=filters,  # type: ignore[arg-type]
+        )
+        docs = await self._document_store._query_hybrid_async(
+            query_embedding=query_embedding,
+            query_sparse_embedding=query_sparse_embedding,
+            filters=filters,
+            top_k=top_k or self._top_k,
+            return_embedding=return_embedding or self._return_embedding,
+            score_threshold=score_threshold or self._score_threshold,
+            group_by=group_by or self._group_by,
+            group_size=group_size or self._group_size,
+        )
+        return {"documents": docs}

haystack_integrations/document_stores/py.typed ADDED Viewed

File without changes

haystack_integrations/document_stores/qdrant/converters.py CHANGED Viewed

@@ -1,7 +1,6 @@
-import logging
 import uuid
-from typing import List, Union
+from haystack import logging
 from haystack.dataclasses import Document
 from qdrant_client.http import models as rest
@@ -15,13 +14,14 @@ UUID_NAMESPACE = uuid.UUID("3896d314-1e95-4a3a-b45a-945f9f0b541d")
 def convert_haystack_documents_to_qdrant_points(
-    documents: List[Document],
+    documents: list[Document],
     *,
     use_sparse_embeddings: bool,
-) -> List[rest.PointStruct]:
+) -> list[rest.PointStruct]:
     points = []
     for document in documents:
         payload = document.to_dict(flatten=False)
         if use_sparse_embeddings:
             vector = {}
@@ -36,7 +36,7 @@ def convert_haystack_documents_to_qdrant_points(
         else:
             vector = payload.pop("embedding") or {}
-        _id = convert_id(payload.get("id"))
+        _id = convert_id(document.id)
         point = rest.PointStruct(
             payload=payload,
@@ -57,23 +57,25 @@ def convert_id(_id: str) -> str:
     return uuid.uuid5(UUID_NAMESPACE, _id).hex
-QdrantPoint = Union[rest.ScoredPoint, rest.Record]
+QdrantPoint = rest.ScoredPoint | rest.Record
 def convert_qdrant_point_to_haystack_document(point: QdrantPoint, use_sparse_embeddings: bool) -> Document:
-    payload = {**point.payload}
+    payload = point.payload or {}
     payload["score"] = point.score if hasattr(point, "score") else None
     if not use_sparse_embeddings:
         payload["embedding"] = point.vector if hasattr(point, "vector") else None
-    elif hasattr(point, "vector") and point.vector is not None:
+    elif hasattr(point, "vector") and point.vector is not None and isinstance(point.vector, dict):
         payload["embedding"] = point.vector.get(DENSE_VECTORS_NAME)
         if SPARSE_VECTORS_NAME in point.vector:
-            parse_vector_dict = {
-                "indices": point.vector[SPARSE_VECTORS_NAME].indices,
-                "values": point.vector[SPARSE_VECTORS_NAME].values,
-            }
-            payload["sparse_embedding"] = parse_vector_dict
+            sparse_vector = point.vector[SPARSE_VECTORS_NAME]
+            if isinstance(sparse_vector, rest.SparseVector):
+                sparse_vector_dict = {
+                    "indices": sparse_vector.indices,
+                    "values": sparse_vector.values,
+                }
+                payload["sparse_embedding"] = sparse_vector_dict
     return Document.from_dict(payload)

qdrant-haystack 6.0.0__py3-none-any.whl → 10.2.0__py3-none-any.whl

qdrant-haystack 6.0.0py3-none-any.whl → 10.2.0py3-none-any.whl