PyPI - qdrant-haystack - Versions diffs - 6.0.0__py3-none-any.whl → 10.2.0__py3-none-any.whl - Mend

qdrant-haystack 6.0.0py3-none-any.whl → 10.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

haystack_integrations/document_stores/qdrant/document_store.py CHANGED Viewed

@@ -1,17 +1,16 @@
 import inspect
-import logging
+from collections.abc import AsyncGenerator, Generator
 from itertools import islice
-from typing import Any, ClassVar, Dict, Generator, List, Optional, Set, Union
+from typing import Any, ClassVar, cast
-import numpy as np
 import qdrant_client
-from haystack import default_from_dict, default_to_dict
+from haystack import default_from_dict, default_to_dict, logging
 from haystack.dataclasses import Document
 from haystack.dataclasses.sparse_embedding import SparseEmbedding
 from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError
 from haystack.document_stores.types import DuplicatePolicy
 from haystack.utils import Secret, deserialize_secrets_inplace
-from qdrant_client import grpc
+from numpy import exp
 from qdrant_client.http import models as rest
 from qdrant_client.http.exceptions import UnexpectedResponse
 from tqdm import tqdm
@@ -27,15 +26,21 @@ from .filters import convert_filters_to_qdrant
 logger = logging.getLogger(__name__)
+# Default group size to apply when using group_by
+# - Our methods use None as the default for optional group_size parameter.
+# - Qdrant expects an integer and internally defaults to 3 when performing grouped queries.
+# - When group_by is specified but group_size is None, we use this value instead of passing None.
+DEFAULT_GROUP_SIZE = 3
 class QdrantStoreError(DocumentStoreError):
     pass
-FilterType = Dict[str, Union[Dict[str, Any], List[Any], str, int, float, bool]]
+FilterType = dict[str, dict[str, Any] | list[Any] | str | int | float | bool]
-def get_batches_from_generator(iterable, n):
+def get_batches_from_generator(iterable: list, n: int) -> Generator:
     """
     Batch elements of an iterable into fixed-length chunks or blocks.
     """
@@ -48,9 +53,8 @@ def get_batches_from_generator(iterable, n):
 class QdrantDocumentStore:
     """
-    QdrantDocumentStore is a Document Store for Qdrant.
-    It can be used with any Qdrant instance: in-memory, disk-persisted, Docker-based,
-    and Qdrant Cloud Cluster deployments.
+    A QdrantDocumentStore implementation that you can use with any Qdrant instance: in-memory, disk-persisted,
+    Docker-based, and Qdrant Cloud Cluster deployments.
     Usage example by creating an in-memory instance:
@@ -60,7 +64,8 @@ class QdrantDocumentStore:
     document_store = QdrantDocumentStore(
         ":memory:",
-        recreate_index=True
+        recreate_index=True,
+        embedding_dim=5
     )
     document_store.write_documents([
         Document(content="This is first", embedding=[0.0]*5),
@@ -85,7 +90,7 @@ class QdrantDocumentStore:
     ```
     """
-    SIMILARITY: ClassVar[Dict[str, str]] = {
+    SIMILARITY: ClassVar[dict[str, rest.Distance]] = {
         "cosine": rest.Distance.COSINE,
         "dot_product": rest.Distance.DOT,
         "l2": rest.Distance.EUCLID,
@@ -93,17 +98,17 @@ class QdrantDocumentStore:
     def __init__(
         self,
-        location: Optional[str] = None,
-        url: Optional[str] = None,
+        location: str | None = None,
+        url: str | None = None,
         port: int = 6333,
         grpc_port: int = 6334,
         prefer_grpc: bool = False,
-        https: Optional[bool] = None,
-        api_key: Optional[Secret] = None,
-        prefix: Optional[str] = None,
-        timeout: Optional[int] = None,
-        host: Optional[str] = None,
-        path: Optional[str] = None,
+        https: bool | None = None,
+        api_key: Secret | None = None,
+        prefix: str | None = None,
+        timeout: int | None = None,
+        host: str | None = None,
+        path: str | None = None,
         force_disable_check_same_thread: bool = False,
         index: str = "Document",
         embedding_dim: int = 768,
@@ -114,24 +119,25 @@ class QdrantDocumentStore:
         return_embedding: bool = False,
         progress_bar: bool = True,
         recreate_index: bool = False,
-        shard_number: Optional[int] = None,
-        replication_factor: Optional[int] = None,
-        write_consistency_factor: Optional[int] = None,
-        on_disk_payload: Optional[bool] = None,
-        hnsw_config: Optional[dict] = None,
-        optimizers_config: Optional[dict] = None,
-        wal_config: Optional[dict] = None,
-        quantization_config: Optional[dict] = None,
-        init_from: Optional[dict] = None,
+        shard_number: int | None = None,
+        replication_factor: int | None = None,
+        write_consistency_factor: int | None = None,
+        on_disk_payload: bool | None = None,
+        hnsw_config: dict | None = None,
+        optimizers_config: dict | None = None,
+        wal_config: dict | None = None,
+        quantization_config: dict | None = None,
         wait_result_from_api: bool = True,
-        metadata: Optional[dict] = None,
+        metadata: dict | None = None,
         write_batch_size: int = 100,
         scroll_size: int = 10_000,
-        payload_fields_to_index: Optional[List[dict]] = None,
-    ):
+        payload_fields_to_index: list[dict] | None = None,
+    ) -> None:
         """
+        Initializes a QdrantDocumentStore.
         :param location:
-            If `memory` - use in-memory Qdrant instance.
+            If `":memory:"` - use in-memory Qdrant instance.
             If `str` - use it as a URL parameter.
             If `None` - use default values for host and port.
         :param url:
@@ -165,7 +171,7 @@ class QdrantDocumentStore:
             Dimension of the embeddings.
         :param on_disk:
             Whether to store the collection on disk.
-        :param use_sparse_embedding:
+        :param use_sparse_embeddings:
             If set to `True`, enables support for sparse embeddings.
         :param sparse_idf:
             If set to `True`, computes the Inverse Document Frequency (IDF) when using sparse embeddings.
@@ -202,8 +208,6 @@ class QdrantDocumentStore:
             Params for Write-Ahead-Log.
         :param quantization_config:
             Params for quantization. If `None`, quantization will be disabled.
-        :param init_from:
-            Use data stored in another collection to initialize this collection.
         :param wait_result_from_api:
             Whether to wait for the result from the API after each request.
         :param metadata:
@@ -216,7 +220,8 @@ class QdrantDocumentStore:
             List of payload fields to index.
         """
-        self._client = None
+        self._client: qdrant_client.QdrantClient | None = None
+        self._async_client: qdrant_client.AsyncQdrantClient | None = None
         # Store the Qdrant client specific attributes
         self.location = location
@@ -232,7 +237,6 @@ class QdrantDocumentStore:
         self.path = path
         self.force_disable_check_same_thread = force_disable_check_same_thread
         self.metadata = metadata or {}
-        self.api_key = api_key
         # Store the Qdrant collection specific attributes
         self.shard_number = shard_number
@@ -243,7 +247,6 @@ class QdrantDocumentStore:
         self.optimizers_config = optimizers_config
         self.wal_config = wal_config
         self.quantization_config = quantization_config
-        self.init_from = init_from
         self.wait_result_from_api = wait_result_from_api
         self.recreate_index = recreate_index
         self.payload_fields_to_index = payload_fields_to_index
@@ -258,24 +261,11 @@ class QdrantDocumentStore:
         self.write_batch_size = write_batch_size
         self.scroll_size = scroll_size
-    @property
-    def client(self):
-        if not self._client:
-            self._client = qdrant_client.QdrantClient(
-                location=self.location,
-                url=self.url,
-                port=self.port,
-                grpc_port=self.grpc_port,
-                prefer_grpc=self.prefer_grpc,
-                https=self.https,
-                api_key=self.api_key.resolve_value() if self.api_key else None,
-                prefix=self.prefix,
-                timeout=self.timeout,
-                host=self.host,
-                path=self.path,
-                metadata=self.metadata,
-                force_disable_check_same_thread=self.force_disable_check_same_thread,
-            )
+    def _initialize_client(self) -> None:
+        if self._client is None:
+            client_params = self._prepare_client_params()
+            # This step adds the api-key and User-Agent to metadata
+            self._client = qdrant_client.QdrantClient(**client_params)
             # Make sure the collection is properly set up
             self._set_up_collection(
                 self.index,
@@ -287,14 +277,52 @@ class QdrantDocumentStore:
                 self.on_disk,
                 self.payload_fields_to_index,
             )
-        return self._client
+    async def _initialize_async_client(self) -> None:
+        """
+        Returns the asynchronous Qdrant client, initializing it if necessary.
+        """
+        if self._async_client is None:
+            client_params = self._prepare_client_params()
+            self._async_client = qdrant_client.AsyncQdrantClient(
+                **client_params,
+            )
+            await self._set_up_collection_async(
+                self.index,
+                self.embedding_dim,
+                self.recreate_index,
+                self.similarity,
+                self.use_sparse_embeddings,
+                self.sparse_idf,
+                self.on_disk,
+                self.payload_fields_to_index,
+            )
     def count_documents(self) -> int:
         """
         Returns the number of documents present in the Document Store.
         """
+        self._initialize_client()
+        assert self._client is not None
+        try:
+            response = self._client.count(
+                collection_name=self.index,
+            )
+            return response.count
+        except (UnexpectedResponse, ValueError):
+            # Qdrant local raises ValueError if the collection is not found, but
+            # with the remote server UnexpectedResponse is raised. Until that's unified,
+            # we need to catch both.
+            return 0
+    async def count_documents_async(self) -> int:
+        """
+        Asynchronously returns the number of documents present in the document dtore.
+        """
+        await self._initialize_async_client()
+        assert self._async_client is not None
         try:
-            response = self.client.count(
+            response = await self._async_client.count(
                 collection_name=self.index,
             )
             return response.count
@@ -306,8 +334,8 @@ class QdrantDocumentStore:
     def filter_documents(
         self,
-        filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
-    ) -> List[Document]:
+        filters: dict[str, Any] | rest.Filter | None = None,
+    ) -> list[Document]:
         """
         Returns the documents that match the provided filters.
@@ -317,22 +345,32 @@ class QdrantDocumentStore:
         :param filters: The filters to apply to the document list.
         :returns: A list of documents that match the given filters.
         """
-        if filters and not isinstance(filters, dict) and not isinstance(filters, rest.Filter):
-            msg = "Filter must be a dictionary or an instance of `qdrant_client.http.models.Filter`"
-            raise ValueError(msg)
+        # No need to initialize client here as _get_documents_generator
+        # will handle client initialization internally
-        if filters and not isinstance(filters, rest.Filter) and "operator" not in filters:
-            msg = "Invalid filter syntax. See https://docs.haystack.deepset.ai/docs/metadata-filtering for details."
-            raise ValueError(msg)
+        QdrantDocumentStore._validate_filters(filters)
         return list(
-            self.get_documents_generator(
+            self._get_documents_generator(
                 filters,
             )
         )
+    async def filter_documents_async(
+        self,
+        filters: dict[str, Any] | rest.Filter | None = None,
+    ) -> list[Document]:
+        """
+        Asynchronously returns the documents that match the provided filters.
+        """
+        # No need to initialize client here as _get_documents_generator_async
+        # will handle client initialization internally
+        QdrantDocumentStore._validate_filters(filters)
+        return [doc async for doc in self._get_documents_generator_async(filters)]
     def write_documents(
         self,
-        documents: List[Document],
+        documents: list[Document],
         policy: DuplicatePolicy = DuplicatePolicy.FAIL,
     ) -> int:
         """
@@ -348,13 +386,14 @@ class QdrantDocumentStore:
         :returns: The number of documents written to the document store.
         """
+        self._initialize_client()
+        assert self._client is not None
         for doc in documents:
             if not isinstance(doc, Document):
                 msg = f"DocumentStore.write_documents() expects a list of Documents but got an element of {type(doc)}."
                 raise ValueError(msg)
-        self._set_up_collection(
-            self.index, self.embedding_dim, False, self.similarity, self.use_sparse_embeddings, self.sparse_idf
-        )
         if len(documents) == 0:
             logger.warning("Calling QdrantDocumentStore.write_documents() with empty list")
@@ -362,7 +401,6 @@ class QdrantDocumentStore:
         document_objects = self._handle_duplicate_documents(
             documents=documents,
-            index=self.index,
             policy=policy,
         )
@@ -374,7 +412,61 @@ class QdrantDocumentStore:
                     use_sparse_embeddings=self.use_sparse_embeddings,
                 )
-                self.client.upsert(
+                self._client.upsert(
+                    collection_name=self.index,
+                    points=batch,
+                    wait=self.wait_result_from_api,
+                )
+                progress_bar.update(self.write_batch_size)
+        return len(document_objects)
+    async def write_documents_async(
+        self,
+        documents: list[Document],
+        policy: DuplicatePolicy = DuplicatePolicy.FAIL,
+    ) -> int:
+        """
+        Asynchronously writes documents to Qdrant using the specified policy.
+        The QdrantDocumentStore can handle duplicate documents based on the given policy.
+        The available policies are:
+        - `FAIL`: The operation will raise an error if any document already exists.
+        - `OVERWRITE`: Existing documents will be overwritten with the new ones.
+        - `SKIP`: Existing documents will be skipped, and only new documents will be added.
+        :param documents: A list of Document objects to write to Qdrant.
+        :param policy: The policy for handling duplicate documents.
+        :returns: The number of documents written to the document store.
+        """
+        await self._initialize_async_client()
+        assert self._async_client is not None
+        for doc in documents:
+            if not isinstance(doc, Document):
+                msg = f"""DocumentStore.write_documents_async() expects a list of
+                Documents but got an element of {type(doc)}."""
+                raise ValueError(msg)
+        if len(documents) == 0:
+            logger.warning("Calling QdrantDocumentStore.write_documents_async() with empty list")
+            return 0
+        document_objects = await self._handle_duplicate_documents_async(
+            documents=documents,
+            policy=policy,
+        )
+        batched_documents = get_batches_from_generator(document_objects, self.write_batch_size)
+        with tqdm(total=len(document_objects), disable=not self.progress_bar) as progress_bar:
+            for document_batch in batched_documents:
+                batch = convert_haystack_documents_to_qdrant_points(
+                    document_batch,
+                    use_sparse_embeddings=self.use_sparse_embeddings,
+                )
+                await self._async_client.upsert(
                     collection_name=self.index,
                     points=batch,
                     wait=self.wait_result_from_api,
@@ -383,17 +475,20 @@ class QdrantDocumentStore:
                 progress_bar.update(self.write_batch_size)
         return len(document_objects)
-    def delete_documents(self, document_ids: List[str]) -> None:
+    def delete_documents(self, document_ids: list[str]) -> None:
         """
         Deletes documents that match the provided `document_ids` from the document store.
         :param document_ids: the document ids to delete
         """
-        ids = [convert_id(_id) for _id in document_ids]
+        self._initialize_client()
+        assert self._client is not None
         try:
-            self.client.delete(
+            self._client.delete(
                 collection_name=self.index,
-                points_selector=ids,
+                points_selector=rest.PointIdsList(points=[convert_id(_id) for _id in document_ids]),
                 wait=self.wait_result_from_api,
             )
         except KeyError:
@@ -401,149 +496,987 @@ class QdrantDocumentStore:
                 "Called QdrantDocumentStore.delete_documents() on a non-existing ID",
             )
-    @classmethod
-    def from_dict(cls, data: Dict[str, Any]) -> "QdrantDocumentStore":
+    async def delete_documents_async(self, document_ids: list[str]) -> None:
         """
-        Deserializes the component from a dictionary.
+        Asynchronously deletes documents that match the provided `document_ids` from the document store.
-        :param data:
-            The dictionary to deserialize from.
-        :returns:
-            The deserialized component.
+        :param document_ids: the document ids to delete
         """
-        deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"])
-        return default_from_dict(cls, data)
-    def to_dict(self) -> Dict[str, Any]:
+        await self._initialize_async_client()
+        assert self._async_client is not None
+        try:
+            await self._async_client.delete(
+                collection_name=self.index,
+                points_selector=rest.PointIdsList(points=[convert_id(_id) for _id in document_ids]),
+                wait=self.wait_result_from_api,
+            )
+        except KeyError:
+            logger.warning(
+                "Called QdrantDocumentStore.delete_documents_async() on a non-existing ID",
+            )
+    def delete_by_filter(self, filters: dict[str, Any]) -> int:
         """
-        Serializes the component to a dictionary.
+        Deletes all documents that match the provided filters.
+        :param filters: The filters to apply to select documents for deletion.
+            For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
         :returns:
-            Dictionary with serialized data.
+            The number of documents deleted.
         """
-        params = inspect.signature(self.__init__).parameters  # type: ignore
-        # All the __init__ params must be set as attributes
-        # Set as init_parms without default values
-        init_params = {k: getattr(self, k) for k in params}
-        init_params["api_key"] = self.api_key.to_dict() if self.api_key else None
-        return default_to_dict(
-            self,
-            **init_params,
-        )
+        self._initialize_client()
+        assert self._client is not None
-    def get_documents_generator(
-        self,
-        filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
-    ) -> Generator[Document, None, None]:
+        try:
+            qdrant_filter = convert_filters_to_qdrant(filters)
+            if qdrant_filter is None:
+                return 0
+            count_response = self._client.count(
+                collection_name=self.index,
+                count_filter=qdrant_filter,
+            )
+            deleted_count = count_response.count
+            self._client.delete(
+                collection_name=self.index,
+                points_selector=rest.FilterSelector(filter=qdrant_filter),
+                wait=self.wait_result_from_api,
+            )
+            return deleted_count
+        except Exception as e:
+            msg = f"Failed to delete documents by filter from Qdrant: {e!s}"
+            raise QdrantStoreError(msg) from e
+    async def delete_by_filter_async(self, filters: dict[str, Any]) -> int:
         """
-        Returns a generator that yields documents from Qdrant based on the provided filters.
+        Asynchronously deletes all documents that match the provided filters.
-        :param filters: Filters applied to the retrieved documents.
-        :returns: A generator that yields documents retrieved from Qdrant.
+        :param filters: The filters to apply to select documents for deletion.
+            For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
+        :returns:
+            The number of documents deleted.
         """
+        await self._initialize_async_client()
+        assert self._async_client is not None
-        index = self.index
-        qdrant_filters = convert_filters_to_qdrant(filters)
+        try:
+            qdrant_filter = convert_filters_to_qdrant(filters)
+            if qdrant_filter is None:
+                return 0
-        next_offset = None
-        stop_scrolling = False
-        while not stop_scrolling:
-            records, next_offset = self.client.scroll(
-                collection_name=index,
-                scroll_filter=qdrant_filters,
-                limit=self.scroll_size,
-                offset=next_offset,
-                with_payload=True,
-                with_vectors=True,
+            count_response = await self._async_client.count(
+                collection_name=self.index,
+                count_filter=qdrant_filter,
             )
-            stop_scrolling = next_offset is None or (
-                isinstance(next_offset, grpc.PointId) and next_offset.num == 0 and next_offset.uuid == ""
+            deleted_count = count_response.count
+            await self._async_client.delete(
+                collection_name=self.index,
+                points_selector=rest.FilterSelector(filter=qdrant_filter),
+                wait=self.wait_result_from_api,
             )
+            return deleted_count
-            for record in records:
-                yield convert_qdrant_point_to_haystack_document(
-                    record, use_sparse_embeddings=self.use_sparse_embeddings
-                )
+        except Exception as e:
+            msg = f"Failed to delete documents by filter from Qdrant: {e!s}"
+            raise QdrantStoreError(msg) from e
-    def get_documents_by_id(
-        self,
-        ids: List[str],
-        index: Optional[str] = None,
-    ) -> List[Document]:
+    @staticmethod
+    def _check_stop_scrolling(next_offset: Any) -> bool:
         """
-        Retrieves documents from Qdrant by their IDs.
+        Checks if scrolling should stop based on the next_offset value.
-        :param ids:
-            A list of document IDs to retrieve.
-        :param index:
-            The name of the index to retrieve documents from.
-        :returns:
-            A list of documents.
+        :param next_offset: The offset returned from the scroll operation.
+        :returns: True if scrolling should stop, False otherwise.
         """
-        index = index or self.index
+        return next_offset is None or (
+            hasattr(next_offset, "num")
+            and hasattr(next_offset, "uuid")
+            and next_offset.num == 0
+            and next_offset.uuid == ""
+        )
-        documents: List[Document] = []
+    @staticmethod
+    def _metadata_fields_info_from_schema(payload_schema: dict[str, Any]) -> dict[str, str]:
+        """Build field name -> type dict from Qdrant payload_schema. Used by get_metadata_fields_info (sync/async)."""
+        fields_info: dict[str, str] = {}
+        for field_name, field_config in payload_schema.items():
+            if hasattr(field_config, "data_type"):
+                fields_info[field_name] = str(field_config.data_type)
+            else:
+                fields_info[field_name] = "unknown"
+        return fields_info
+    @staticmethod
+    def _process_records_min_max(
+        records: list[Any], metadata_field: str, min_value: Any, max_value: Any
+    ) -> tuple[Any, Any]:
+        """Update min/max from a batch of Qdrant records. Used by get_metadata_field_min_max (sync/async)."""
+        for record in records:
+            if record.payload and "meta" in record.payload:
+                meta = record.payload["meta"]
+                if metadata_field in meta:
+                    value = meta[metadata_field]
+                    if value is not None:
+                        if min_value is None or value < min_value:
+                            min_value = value
+                        if max_value is None or value > max_value:
+                            max_value = value
+        return min_value, max_value
+    @staticmethod
+    def _process_records_count_unique(
+        records: list[Any], metadata_fields: list[str], unique_values_by_field: dict[str, set[Any]]
+    ) -> None:
+        """
+        Update unique_values_by_field from a batch of Qdrant records.
-        ids = [convert_id(_id) for _id in ids]
-        records = self.client.retrieve(
-            collection_name=index,
-            ids=ids,
-            with_payload=True,
-            with_vectors=True,
+        Used by count_unique_metadata_by_filter (sync/async).
+        """
+        for record in records:
+            if record.payload and "meta" in record.payload:
+                meta = record.payload["meta"]
+                for field in metadata_fields:
+                    if field in meta:
+                        value = meta[field]
+                        if value is not None:
+                            if isinstance(value, (list, dict)):
+                                unique_values_by_field[field].add(str(value))
+                            else:
+                                unique_values_by_field[field].add(value)
+    @staticmethod
+    def _process_records_unique_values(
+        records: list[Any],
+        metadata_field: str,
+        unique_values: list[Any],
+        unique_values_set: set[Any],
+        offset: int,
+        limit: int,
+    ) -> bool:
+        """Collect unique values from a batch of records. Returns True when len(unique_values) >= offset + limit."""
+        for record in records:
+            if record.payload and "meta" in record.payload:
+                meta = record.payload["meta"]
+                if metadata_field in meta:
+                    value = meta[metadata_field]
+                    if value is not None:
+                        hashable_value = str(value) if isinstance(value, (list, dict)) else value
+                        if hashable_value not in unique_values_set:
+                            unique_values_set.add(hashable_value)
+                            unique_values.append(value)
+                            if len(unique_values) >= offset + limit:
+                                return True
+        return False
+    @staticmethod
+    def _create_updated_point_from_record(record: Any, meta: dict[str, Any]) -> rest.PointStruct:
+        """
+        Creates an updated PointStruct from a Qdrant record with merged metadata.
+        :param record: The Qdrant record to update.
+        :param meta: The metadata fields to merge with existing metadata.
+        :returns: A PointStruct with updated metadata and preserved vectors.
+        """
+        # merge existing payload with new metadata
+        # Metadata is stored under the "meta" key in the payload
+        updated_payload = dict(record.payload or {})
+        if "meta" not in updated_payload:
+            updated_payload["meta"] = {}
+        updated_payload["meta"].update(meta)
+        # create updated point preserving vectors
+        # Type cast needed because record.vector type doesn't include all PointStruct vector types
+        vector_value = record.vector if record.vector is not None else {}
+        return rest.PointStruct(
+            id=record.id,
+            vector=cast(Any, vector_value),
+            payload=updated_payload,
         )
-        for record in records:
-            documents.append(
-                convert_qdrant_point_to_haystack_document(record, use_sparse_embeddings=self.use_sparse_embeddings)
+    def update_by_filter(self, filters: dict[str, Any], meta: dict[str, Any]) -> int:
+        """
+        Updates the metadata of all documents that match the provided filters.
+        **Note**: This operation is not atomic. Documents matching the filter are fetched first,
+        then updated. If documents are modified between the fetch and update operations,
+        those changes may be lost.
+        :param filters: The filters to apply to select documents for updating.
+            For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
+        :param meta: The metadata fields to update. This will be merged with existing metadata.
+        :returns:
+            The number of documents updated.
+        """
+        self._initialize_client()
+        assert self._client is not None
+        try:
+            qdrant_filter = convert_filters_to_qdrant(filters)
+            if qdrant_filter is None:
+                return 0
+            # get all matching documents using scroll
+            updated_points = []
+            next_offset = None
+            while True:
+                records, next_offset = self._client.scroll(
+                    collection_name=self.index,
+                    scroll_filter=qdrant_filter,
+                    limit=self.scroll_size,
+                    offset=next_offset,
+                    with_payload=True,
+                    with_vectors=True,
+                )
+                # update payload for each record
+                for record in records:
+                    updated_points.append(self._create_updated_point_from_record(record, meta))
+                if self._check_stop_scrolling(next_offset):
+                    break
+            if not updated_points:
+                return 0
+            # upsert updated points back in batches
+            for batch in get_batches_from_generator(updated_points, self.write_batch_size):
+                self._client.upsert(
+                    collection_name=self.index,
+                    points=list(batch),
+                    wait=self.wait_result_from_api,
+                )
+            logger.info(
+                "Updated {n_docs} documents in collection '{name}' using filters.",
+                n_docs=len(updated_points),
+                name=self.index,
             )
-        return documents
+            return len(updated_points)
+        except Exception as e:
+            msg = f"Failed to update documents by filter in Qdrant: {e!s}"
+            raise QdrantStoreError(msg) from e
-    def _query_by_sparse(
-        self,
-        query_sparse_embedding: SparseEmbedding,
-        filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
-        top_k: int = 10,
-        scale_score: bool = False,
-        return_embedding: bool = False,
-        score_threshold: Optional[float] = None,
-        group_by: Optional[str] = None,
-        group_size: Optional[int] = None,
-    ) -> List[Document]:
+    async def update_by_filter_async(self, filters: dict[str, Any], meta: dict[str, Any]) -> int:
         """
-        Queries Qdrant using a sparse embedding and returns the most relevant documents.
+        Asynchronously updates the metadata of all documents that match the provided filters.
-        :param query_sparse_embedding: Sparse embedding of the query.
-        :param filters: Filters applied to the retrieved documents.
-        :param top_k: Maximum number of documents to return. If using `group_by` parameters, maximum number of
-             groups to return.
-        :param scale_score: Whether to scale the scores of the retrieved documents.
-        :param return_embedding: Whether to return the embeddings of the retrieved documents.
-        :param score_threshold: A minimal score threshold for the result.
-            Score of the returned result might be higher or smaller than the threshold
-             depending on the Distance function used.
-            E.g. for cosine similarity only higher scores will be returned.
-        :param group_by: Payload field to group by, must be a string or number field. If the field contains more than 1
-             value, all values will be used for grouping. One point can be in multiple groups.
-        :param group_size: Maximum amount of points to return per group. Default is 3.
+        **Note**: This operation is not atomic. Documents matching the filter are fetched first,
+        then updated. If documents are modified between the fetch and update operations,
+        those changes may be lost.
-        :returns: List of documents that are most similar to `query_sparse_embedding`.
+        :param filters: The filters to apply to select documents for updating.
+            For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
+        :param meta: The metadata fields to update. This will be merged with existing metadata.
-        :raises QdrantStoreError:
-            If the Document Store was initialized with `use_sparse_embeddings=False`.
+        :returns:
+            The number of documents updated.
         """
+        await self._initialize_async_client()
+        assert self._async_client is not None
-        if not self.use_sparse_embeddings:
-            message = (
-                "You are trying to query using sparse embeddings, but the Document Store "
-                "was initialized with `use_sparse_embeddings=False`. "
+        try:
+            qdrant_filter = convert_filters_to_qdrant(filters)
+            if qdrant_filter is None:
+                return 0
+            updated_points = []
+            next_offset = None
+            while True:
+                records, next_offset = await self._async_client.scroll(
+                    collection_name=self.index,
+                    scroll_filter=qdrant_filter,
+                    limit=self.scroll_size,
+                    offset=next_offset,
+                    with_payload=True,
+                    with_vectors=True,
+                )
+                # update payload for each record
+                for record in records:
+                    updated_points.append(self._create_updated_point_from_record(record, meta))
+                if self._check_stop_scrolling(next_offset):
+                    break
+            if not updated_points:
+                return 0
+            # upsert updated points back in batches
+            for batch in get_batches_from_generator(updated_points, self.write_batch_size):
+                await self._async_client.upsert(
+                    collection_name=self.index,
+                    points=list(batch),
+                    wait=self.wait_result_from_api,
+                )
+            logger.info(
+                "Updated {n_docs} documents in collection '{name}' using filters.",
+                n_docs=len(updated_points),
+                name=self.index,
             )
-            raise QdrantStoreError(message)
+            return len(updated_points)
+        except Exception as e:
+            msg = f"Failed to update documents by filter in Qdrant: {e!s}"
+            raise QdrantStoreError(msg) from e
+    def delete_all_documents(self, recreate_index: bool = False) -> None:
+        """
+        Deletes all documents from the document store.
+        :param recreate_index: Whether to recreate the index after deleting all documents.
+        """
+        self._initialize_client()
+        assert self._client is not None
+        if recreate_index:
+            # get current collection config as json
+            collection_info = self._client.get_collection(collection_name=self.index)
+            info_json = collection_info.model_dump()
+            # deal with the Optional use_sparse_embeddings
+            sparse_vectors = info_json["config"]["params"]["sparse_vectors"]
+            use_sparse_embeddings = True if sparse_vectors else False
+            # deal with the Optional sparse_idf
+            hnsw_config = info_json["config"]["params"]["vectors"].get("config", {}).get("hnsw_config", None)
+            sparse_idf = True if use_sparse_embeddings and hnsw_config else False
+            # recreate collection
+            self._set_up_collection(
+                collection_name=self.index,
+                embedding_dim=info_json["config"]["params"]["vectors"]["size"],
+                recreate_collection=True,
+                similarity=info_json["config"]["params"]["vectors"]["distance"].lower(),
+                use_sparse_embeddings=use_sparse_embeddings,
+                sparse_idf=sparse_idf,
+                on_disk=info_json["config"]["hnsw_config"]["on_disk"],
+                payload_fields_to_index=info_json["payload_schema"],
+            )
+        else:
+            try:
+                self._client.delete(
+                    collection_name=self.index,
+                    points_selector=rest.FilterSelector(
+                        filter=rest.Filter(
+                            must=[],
+                        )
+                    ),
+                    wait=self.wait_result_from_api,
+                )
+            except Exception as e:
+                logger.warning(
+                    f"Error {e} when calling QdrantDocumentStore.delete_all_documents()",
+                )
+    async def delete_all_documents_async(self, recreate_index: bool = False) -> None:
+        """
+        Asynchronously deletes all documents from the document store.
+        :param recreate_index: Whether to recreate the index after deleting all documents.
+        """
+        await self._initialize_async_client()
+        assert self._async_client is not None
+        if recreate_index:
+            # get current collection config as json
+            collection_info = await self._async_client.get_collection(collection_name=self.index)
+            info_json = collection_info.model_dump()
+            # deal with the Optional use_sparse_embeddings
+            sparse_vectors = info_json["config"]["params"]["sparse_vectors"]
+            use_sparse_embeddings = True if sparse_vectors else False
+            # deal with the Optional sparse_idf
+            hnsw_config = info_json["config"]["params"]["vectors"].get("config", {}).get("hnsw_config", None)
+            sparse_idf = True if use_sparse_embeddings and hnsw_config else False
+            # recreate collection
+            await self._set_up_collection_async(
+                collection_name=self.index,
+                embedding_dim=info_json["config"]["params"]["vectors"]["size"],
+                recreate_collection=True,
+                similarity=info_json["config"]["params"]["vectors"]["distance"].lower(),
+                use_sparse_embeddings=use_sparse_embeddings,
+                sparse_idf=sparse_idf,
+                on_disk=info_json["config"]["hnsw_config"]["on_disk"],
+                payload_fields_to_index=info_json["payload_schema"],
+            )
+        else:
+            try:
+                await self._async_client.delete(
+                    collection_name=self.index,
+                    points_selector=rest.FilterSelector(
+                        filter=rest.Filter(
+                            must=[],
+                        )
+                    ),
+                    wait=self.wait_result_from_api,
+                )
+            except Exception as e:
+                logger.warning(
+                    f"Error {e} when calling QdrantDocumentStore.delete_all_documents_async()",
+                )
+    def count_documents_by_filter(self, filters: dict[str, Any]) -> int:
+        """
+        Returns the number of documents that match the provided filters.
+        :param filters: The filters to apply to count documents.
+            For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
+        :returns: The number of documents that match the filters.
+        """
+        self._initialize_client()
+        assert self._client is not None
+        qdrant_filter = convert_filters_to_qdrant(filters)
+        try:
+            response = self._client.count(
+                collection_name=self.index,
+                count_filter=qdrant_filter,
+            )
+            return response.count
+        except (UnexpectedResponse, ValueError) as e:
+            logger.warning(f"Error {e} when calling QdrantDocumentStore.count_documents_by_filter()")
+            return 0
+    async def count_documents_by_filter_async(self, filters: dict[str, Any]) -> int:
+        """
+        Asynchronously returns the number of documents that match the provided filters.
+        :param filters: The filters to apply to select documents for counting.
+            For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
+        :returns:
+            The number of documents that match the filters.
+        """
+        await self._initialize_async_client()
+        assert self._async_client is not None
+        qdrant_filter = convert_filters_to_qdrant(filters)
+        try:
+            response = await self._async_client.count(
+                collection_name=self.index,
+                count_filter=qdrant_filter,
+            )
+            return response.count
+        except (UnexpectedResponse, ValueError) as e:
+            logger.warning(f"Error {e} when calling QdrantDocumentStore.count_documents_by_filter_async()")
+            return 0
+    def get_metadata_fields_info(self) -> dict[str, str]:
+        """
+        Returns the information about the fields from the collection.
+        :returns:
+            A dictionary mapping field names to their types (e.g., {"field_name": "integer"}).
+        """
+        self._initialize_client()
+        assert self._client is not None
+        try:
+            collection_info = self._client.get_collection(self.index)
+            payload_schema = collection_info.payload_schema or {}
+            return self._metadata_fields_info_from_schema(payload_schema)
+        except (UnexpectedResponse, ValueError) as e:
+            logger.warning(f"Error {e} when calling QdrantDocumentStore.get_metadata_fields_info()")
+            return {}
+    async def get_metadata_fields_info_async(self) -> dict[str, str]:
+        """
+        Asynchronously returns the information about the fields from the collection.
+        :returns:
+            A dictionary mapping field names to their types (e.g., {"field_name": "integer"}).
+        """
+        await self._initialize_async_client()
+        assert self._async_client is not None
+        try:
+            collection_info = await self._async_client.get_collection(self.index)
+            payload_schema = collection_info.payload_schema or {}
+            return self._metadata_fields_info_from_schema(payload_schema)
+        except (UnexpectedResponse, ValueError) as e:
+            logger.warning(f"Error {e} when calling QdrantDocumentStore.get_metadata_fields_info_async()")
+            return {}
+    def get_metadata_field_min_max(self, metadata_field: str) -> dict[str, Any]:
+        """
+        Returns the minimum and maximum values for the given metadata field.
+        :param metadata_field: The metadata field key (inside ``meta``) to get the minimum and maximum values for.
+        :returns: A dictionary with the keys "min" and "max", where each value is the minimum or maximum value of the
+                  metadata field across all documents. Returns an empty dict if no documents have the field.
+        """
+        self._initialize_client()
+        assert self._client is not None
+        try:
+            min_value: Any = None
+            max_value: Any = None
+            next_offset = None
+            while True:
+                records, next_offset = self._client.scroll(
+                    collection_name=self.index,
+                    scroll_filter=None,
+                    limit=self.scroll_size,
+                    offset=next_offset,
+                    with_payload=True,
+                    with_vectors=False,
+                )
+                min_value, max_value = self._process_records_min_max(records, metadata_field, min_value, max_value)
+                if self._check_stop_scrolling(next_offset):
+                    break
+            if min_value is not None and max_value is not None:
+                return {"min": min_value, "max": max_value}
+            return {}
+        except Exception as e:
+            logger.warning(f"Error {e} when calling QdrantDocumentStore.get_metadata_field_min_max()")
+            return {}
+    async def get_metadata_field_min_max_async(self, metadata_field: str) -> dict[str, Any]:
+        """
+        Asynchronously returns the minimum and maximum values for the given metadata field.
+        :param metadata_field: The metadata field key (inside ``meta``) to get the minimum and maximum values for.
+        :returns: A dictionary with the keys "min" and "max", where each value is the minimum or maximum value of the
+                  metadata field across all documents. Returns an empty dict if no documents have the field.
+        """
+        await self._initialize_async_client()
+        assert self._async_client is not None
+        try:
+            min_value: Any = None
+            max_value: Any = None
+            next_offset = None
+            while True:
+                records, next_offset = await self._async_client.scroll(
+                    collection_name=self.index,
+                    scroll_filter=None,
+                    limit=self.scroll_size,
+                    offset=next_offset,
+                    with_payload=True,
+                    with_vectors=False,
+                )
+                min_value, max_value = self._process_records_min_max(records, metadata_field, min_value, max_value)
+                if self._check_stop_scrolling(next_offset):
+                    break
+            if min_value is not None and max_value is not None:
+                return {"min": min_value, "max": max_value}
+            return {}
+        except Exception as e:
+            logger.warning(f"Error {e} when calling QdrantDocumentStore.get_metadata_field_min_max_async()")
+            return {}
+    def count_unique_metadata_by_filter(self, filters: dict[str, Any], metadata_fields: list[str]) -> dict[str, int]:
+        """
+        Returns the number of unique values for each specified metadata field among documents that match the filters.
+        :param filters: The filters to restrict the documents considered.
+            For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
+        :param metadata_fields: List of metadata field keys (inside ``meta``) to count unique values for.
+        :returns: A dictionary mapping each metadata field name to the count of its unique values among the filtered
+                  documents.
+        """
+        self._initialize_client()
+        assert self._client is not None
+        qdrant_filter = convert_filters_to_qdrant(filters) if filters else None
+        unique_values_by_field: dict[str, set[Any]] = {field: set() for field in metadata_fields}
+        try:
+            next_offset = None
+            while True:
+                records, next_offset = self._client.scroll(
+                    collection_name=self.index,
+                    scroll_filter=qdrant_filter,
+                    limit=self.scroll_size,
+                    offset=next_offset,
+                    with_payload=True,
+                    with_vectors=False,
+                )
+                self._process_records_count_unique(records, metadata_fields, unique_values_by_field)
+                if self._check_stop_scrolling(next_offset):
+                    break
+            return {field: len(unique_values_by_field[field]) for field in metadata_fields}
+        except Exception as e:
+            logger.warning(f"Error {e} when calling QdrantDocumentStore.count_unique_metadata_by_filter()")
+            return dict.fromkeys(metadata_fields, 0)
+    async def count_unique_metadata_by_filter_async(
+        self, filters: dict[str, Any], metadata_fields: list[str]
+    ) -> dict[str, int]:
+        """
+        Asynchronously returns the number of unique values for each specified metadata field among documents that
+        match the filters.
+        :param filters: The filters to restrict the documents considered.
+            For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
+        :param metadata_fields: List of metadata field keys (inside ``meta``) to count unique values for.
+        :returns: A dictionary mapping each metadata field name to the count of its unique values among the filtered
+                  documents.
+        """
+        await self._initialize_async_client()
+        assert self._async_client is not None
+        qdrant_filter = convert_filters_to_qdrant(filters) if filters else None
+        unique_values_by_field: dict[str, set[Any]] = {field: set() for field in metadata_fields}
+        try:
+            next_offset = None
+            while True:
+                records, next_offset = await self._async_client.scroll(
+                    collection_name=self.index,
+                    scroll_filter=qdrant_filter,
+                    limit=self.scroll_size,
+                    offset=next_offset,
+                    with_payload=True,
+                    with_vectors=False,
+                )
+                self._process_records_count_unique(records, metadata_fields, unique_values_by_field)
+                if self._check_stop_scrolling(next_offset):
+                    break
+            return {field: len(unique_values_by_field[field]) for field in metadata_fields}
+        except Exception as e:
+            logger.warning(f"Error {e} when calling QdrantDocumentStore.count_unique_metadata_by_filter_async()")
+            return dict.fromkeys(metadata_fields, 0)
+    def get_metadata_field_unique_values(
+        self, metadata_field: str, filters: dict[str, Any] | None = None, limit: int = 100, offset: int = 0
+    ) -> list[Any]:
+        """
+        Returns unique values for a metadata field, with optional filters and offset/limit pagination.
+        Unique values are ordered by first occurrence during scroll. Pagination is offset-based over that order.
+        :param metadata_field: The metadata field key (inside ``meta``) to get unique values for.
+        :param filters: Optional filters to restrict the documents considered.
+            For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
+        :param limit: Maximum number of unique values to return per page. Defaults to 100.
+        :param offset: Number of unique values to skip (for pagination). Defaults to 0.
+        :returns: A list of unique values for the field (at most ``limit`` items, starting at ``offset``).
+        """
+        self._initialize_client()
+        assert self._client is not None
+        qdrant_filter = convert_filters_to_qdrant(filters) if filters else None
+        unique_values: list[Any] = []
+        unique_values_set: set[Any] = set()
+        try:
+            next_offset = None
+            while len(unique_values) < offset + limit:
+                records, next_offset = self._client.scroll(
+                    collection_name=self.index,
+                    scroll_filter=qdrant_filter,
+                    limit=self.scroll_size,
+                    offset=next_offset,
+                    with_payload=True,
+                    with_vectors=False,
+                )
+                if self._process_records_unique_values(
+                    records, metadata_field, unique_values, unique_values_set, offset, limit
+                ):
+                    break
+                if self._check_stop_scrolling(next_offset):
+                    break
+            return unique_values[offset : offset + limit]
+        except Exception as e:
+            logger.warning(f"Error {e} when calling QdrantDocumentStore.get_metadata_field_unique_values()")
+            return []
+    async def get_metadata_field_unique_values_async(
+        self, metadata_field: str, filters: dict[str, Any] | None = None, limit: int = 100, offset: int = 0
+    ) -> list[Any]:
+        """
+        Asynchronously returns unique values for a metadata field, with optional filters and offset/limit pagination.
+        Unique values are ordered by first occurrence during scroll. Pagination is offset-based over that order.
+        :param metadata_field: The metadata field key (inside ``meta``) to get unique values for.
+        :param filters: Optional filters to restrict the documents considered.
+            For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
+        :param limit: Maximum number of unique values to return per page. Defaults to 100.
+        :param offset: Number of unique values to skip (for pagination). Defaults to 0.
+        :returns: A list of unique values for the field (at most ``limit`` items, starting at ``offset``).
+        """
+        await self._initialize_async_client()
+        assert self._async_client is not None
+        qdrant_filter = convert_filters_to_qdrant(filters) if filters else None
+        unique_values: list[Any] = []
+        unique_values_set: set[Any] = set()
+        try:
+            next_offset = None
+            while len(unique_values) < offset + limit:
+                records, next_offset = await self._async_client.scroll(
+                    collection_name=self.index,
+                    scroll_filter=qdrant_filter,
+                    limit=self.scroll_size,
+                    offset=next_offset,
+                    with_payload=True,
+                    with_vectors=False,
+                )
+                if self._process_records_unique_values(
+                    records, metadata_field, unique_values, unique_values_set, offset, limit
+                ):
+                    break
+                if self._check_stop_scrolling(next_offset):
+                    break
+            return unique_values[offset : offset + limit]
+        except Exception as e:
+            logger.warning(f"Error {e} when calling QdrantDocumentStore.get_metadata_field_unique_values_async()")
+            return []
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> "QdrantDocumentStore":
+        """
+        Deserializes the component from a dictionary.
+        :param data:
+            The dictionary to deserialize from.
+        :returns:
+            The deserialized component.
+        """
+        deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"])
+        return default_from_dict(cls, data)
+    def to_dict(self) -> dict[str, Any]:
+        """
+        Serializes the component to a dictionary.
+        :returns:
+            Dictionary with serialized data.
+        """
+        params = inspect.signature(self.__init__).parameters  # type: ignore
+        # All the __init__ params must be set as attributes
+        # Set as init_parms without default values
+        init_params = {k: getattr(self, k) for k in params}
+        init_params["api_key"] = self.api_key.to_dict() if self.api_key else None
+        return default_to_dict(
+            self,
+            **init_params,
+        )
+    def _get_documents_generator(
+        self,
+        filters: dict[str, Any] | rest.Filter | None = None,
+    ) -> Generator[Document, None, None]:
+        """
+        Returns a generator that yields documents from Qdrant based on the provided filters.
+        :param filters: Filters applied to the retrieved documents.
+        :returns: A generator that yields documents retrieved from Qdrant.
+        """
+        self._initialize_client()
+        assert self._client is not None
+        index = self.index
+        qdrant_filters = convert_filters_to_qdrant(filters)
+        next_offset = None
+        stop_scrolling = False
+        while not stop_scrolling:
+            records, next_offset = self._client.scroll(
+                collection_name=index,
+                scroll_filter=qdrant_filters,
+                limit=self.scroll_size,
+                offset=next_offset,
+                with_payload=True,
+                with_vectors=True,
+            )
+            stop_scrolling = next_offset is None or (
+                hasattr(next_offset, "num")
+                and hasattr(next_offset, "uuid")
+                and next_offset.num == 0
+                and next_offset.uuid == ""
+            )  # PointId always has num and uuid
+            for record in records:
+                yield convert_qdrant_point_to_haystack_document(
+                    record, use_sparse_embeddings=self.use_sparse_embeddings
+                )
+    async def _get_documents_generator_async(
+        self,
+        filters: dict[str, Any] | rest.Filter | None = None,
+    ) -> AsyncGenerator[Document, None]:
+        """
+        Returns an asynchronous generator that yields documents from Qdrant based on the provided filters.
+        :param filters: Filters applied to the retrieved documents.
+        :returns: An asynchronous generator that yields documents retrieved from Qdrant.
+        """
+        await self._initialize_async_client()
+        assert self._async_client is not None
+        index = self.index
+        qdrant_filters = convert_filters_to_qdrant(filters)
+        next_offset = None
+        stop_scrolling = False
+        while not stop_scrolling:
+            records, next_offset = await self._async_client.scroll(
+                collection_name=index,
+                scroll_filter=qdrant_filters,
+                limit=self.scroll_size,
+                offset=next_offset,
+                with_payload=True,
+                with_vectors=True,
+            )
+            stop_scrolling = next_offset is None or (
+                hasattr(next_offset, "num")
+                and hasattr(next_offset, "uuid")
+                and next_offset.num == 0
+                and next_offset.uuid == ""
+            )  # PointId always has num and uuid
+            for record in records:
+                yield convert_qdrant_point_to_haystack_document(
+                    record, use_sparse_embeddings=self.use_sparse_embeddings
+                )
+    def get_documents_by_id(
+        self,
+        ids: list[str],
+    ) -> list[Document]:
+        """
+        Retrieves documents from Qdrant by their IDs.
+        :param ids:
+            A list of document IDs to retrieve.
+        :returns:
+            A list of documents.
+        """
+        documents: list[Document] = []
+        self._initialize_client()
+        assert self._client is not None
+        ids = [convert_id(_id) for _id in ids]
+        records = self._client.retrieve(
+            collection_name=self.index,
+            ids=ids,
+            with_payload=True,
+            with_vectors=True,
+        )
+        for record in records:
+            documents.append(
+                convert_qdrant_point_to_haystack_document(record, use_sparse_embeddings=self.use_sparse_embeddings)
+            )
+        return documents
+    async def get_documents_by_id_async(
+        self,
+        ids: list[str],
+    ) -> list[Document]:
+        """
+        Retrieves documents from Qdrant by their IDs.
+        :param ids:
+            A list of document IDs to retrieve.
+        :returns:
+            A list of documents.
+        """
+        documents: list[Document] = []
+        await self._initialize_async_client()
+        assert self._async_client is not None
+        ids = [convert_id(_id) for _id in ids]
+        records = await self._async_client.retrieve(
+            collection_name=self.index,
+            ids=ids,
+            with_payload=True,
+            with_vectors=True,
+        )
+        for record in records:
+            documents.append(
+                convert_qdrant_point_to_haystack_document(record, use_sparse_embeddings=self.use_sparse_embeddings)
+            )
+        return documents
+    def _query_by_sparse(
+        self,
+        query_sparse_embedding: SparseEmbedding,
+        filters: dict[str, Any] | rest.Filter | None = None,
+        top_k: int = 10,
+        scale_score: bool = False,
+        return_embedding: bool = False,
+        score_threshold: float | None = None,
+        group_by: str | None = None,
+        group_size: int | None = None,
+    ) -> list[Document]:
+        """
+        Queries Qdrant using a sparse embedding and returns the most relevant documents.
+        :param query_sparse_embedding: Sparse embedding of the query.
+        :param filters: Filters applied to the retrieved documents.
+        :param top_k: Maximum number of documents to return. If using `group_by` parameters, maximum number of
+             groups to return.
+        :param scale_score: Whether to scale the scores of the retrieved documents.
+        :param return_embedding: Whether to return the embeddings of the retrieved documents.
+        :param score_threshold: A minimal score threshold for the result.
+            Score of the returned result might be higher or smaller than the threshold
+             depending on the Distance function used.
+            E.g. for cosine similarity only higher scores will be returned.
+        :param group_by: Payload field to group by, must be a string or number field. If the field contains more than 1
+             value, all values will be used for grouping. One point can be in multiple groups.
+        :param group_size: Maximum amount of points to return per group. Default is 3.
+        :returns: List of documents that are most similar to `query_sparse_embedding`.
+        :raises QdrantStoreError:
+            If the Document Store was initialized with `use_sparse_embeddings=False`.
+        """
+        self._initialize_client()
+        assert self._client is not None
+        if not self.use_sparse_embeddings:
+            message = (
+                "You are trying to query using sparse embeddings, but the Document Store "
+                "was initialized with `use_sparse_embeddings=False`. "
+            )
+            raise QdrantStoreError(message)
         qdrant_filters = convert_filters_to_qdrant(filters)
         query_indices = query_sparse_embedding.indices
         query_values = query_sparse_embedding.values
         if group_by:
-            groups = self.client.query_points_groups(
+            groups = self._client.query_points_groups(
                 collection_name=self.index,
                 query=rest.SparseVector(
                     indices=query_indices,
@@ -553,21 +1486,13 @@ class QdrantDocumentStore:
                 query_filter=qdrant_filters,
                 limit=top_k,
                 group_by=group_by,
-                group_size=group_size,
+                group_size=group_size or DEFAULT_GROUP_SIZE,
                 with_vectors=return_embedding,
                 score_threshold=score_threshold,
             ).groups
-            results = (
-                [
-                    convert_qdrant_point_to_haystack_document(point, use_sparse_embeddings=self.use_sparse_embeddings)
-                    for group in groups
-                    for point in group.hits
-                ]
-                if groups
-                else []
-            )
+            return self._process_group_results(groups)
         else:
-            points = self.client.query_points(
+            points = self._client.query_points(
                 collection_name=self.index,
                 query=rest.SparseVector(
                     indices=query_indices,
@@ -579,28 +1504,19 @@ class QdrantDocumentStore:
                 with_vectors=return_embedding,
                 score_threshold=score_threshold,
             ).points
-            results = [
-                convert_qdrant_point_to_haystack_document(point, use_sparse_embeddings=self.use_sparse_embeddings)
-                for point in points
-            ]
-        if scale_score:
-            for document in results:
-                score = document.score
-                score = float(1 / (1 + np.exp(-score / 100)))
-                document.score = score
-        return results
+            return self._process_query_point_results(points, scale_score=scale_score)
     def _query_by_embedding(
         self,
-        query_embedding: List[float],
-        filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
+        query_embedding: list[float],
+        filters: dict[str, Any] | rest.Filter | None = None,
         top_k: int = 10,
         scale_score: bool = False,
         return_embedding: bool = False,
-        score_threshold: Optional[float] = None,
-        group_by: Optional[str] = None,
-        group_size: Optional[int] = None,
-    ) -> List[Document]:
+        score_threshold: float | None = None,
+        group_by: str | None = None,
+        group_size: int | None = None,
+    ) -> list[Document]:
         """
         Queries Qdrant using a dense embedding and returns the most relevant documents.
@@ -620,30 +1536,26 @@ class QdrantDocumentStore:
         :returns: List of documents that are most similar to `query_embedding`.
         """
+        self._initialize_client()
+        assert self._client is not None
         qdrant_filters = convert_filters_to_qdrant(filters)
         if group_by:
-            groups = self.client.query_points_groups(
+            groups = self._client.query_points_groups(
                 collection_name=self.index,
                 query=query_embedding,
                 using=DENSE_VECTORS_NAME if self.use_sparse_embeddings else None,
                 query_filter=qdrant_filters,
                 limit=top_k,
                 group_by=group_by,
-                group_size=group_size,
+                group_size=group_size or DEFAULT_GROUP_SIZE,
                 with_vectors=return_embedding,
                 score_threshold=score_threshold,
             ).groups
-            results = (
-                [
-                    convert_qdrant_point_to_haystack_document(point, use_sparse_embeddings=self.use_sparse_embeddings)
-                    for group in groups
-                    for point in group.hits
-                ]
-                if groups
-                else []
-            )
+            return self._process_group_results(groups)
         else:
-            points = self.client.query_points(
+            points = self._client.query_points(
                 collection_name=self.index,
                 query=query_embedding,
                 using=DENSE_VECTORS_NAME if self.use_sparse_embeddings else None,
@@ -652,32 +1564,19 @@ class QdrantDocumentStore:
                 with_vectors=return_embedding,
                 score_threshold=score_threshold,
             ).points
-            results = [
-                convert_qdrant_point_to_haystack_document(point, use_sparse_embeddings=self.use_sparse_embeddings)
-                for point in points
-            ]
-        if scale_score:
-            for document in results:
-                score = document.score
-                if self.similarity == "cosine":
-                    score = (score + 1) / 2
-                else:
-                    score = float(1 / (1 + np.exp(-score / 100)))
-                document.score = score
-        return results
+            return self._process_query_point_results(points, scale_score=scale_score)
     def _query_hybrid(
         self,
-        query_embedding: List[float],
+        query_embedding: list[float],
         query_sparse_embedding: SparseEmbedding,
-        filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
+        filters: dict[str, Any] | rest.Filter | None = None,
         top_k: int = 10,
         return_embedding: bool = False,
-        score_threshold: Optional[float] = None,
-        group_by: Optional[str] = None,
-        group_size: Optional[int] = None,
-    ) -> List[Document]:
+        score_threshold: float | None = None,
+        group_by: str | None = None,
+        group_size: int | None = None,
+    ) -> list[Document]:
         """
         Retrieves documents based on dense and sparse embeddings and fuses the results using Reciprocal Rank Fusion.
@@ -706,6 +1605,10 @@ class QdrantDocumentStore:
         # This implementation is based on the code from the Python Qdrant client:
         # https://github.com/qdrant/qdrant-client/blob/8e3ea58f781e4110d11c0a6985b5e6bb66b85d33/qdrant_client/qdrant_fastembed.py#L519
+        self._initialize_client()
+        assert self._client is not None
         if not self.use_sparse_embeddings:
             message = (
                 "You are trying to query using sparse embeddings, but the Document Store "
@@ -717,7 +1620,7 @@ class QdrantDocumentStore:
         try:
             if group_by:
-                groups = self.client.query_points_groups(
+                groups = self._client.query_points_groups(
                     collection_name=self.index,
                     prefetch=[
                         rest.Prefetch(
@@ -737,13 +1640,13 @@ class QdrantDocumentStore:
                     query=rest.FusionQuery(fusion=rest.Fusion.RRF),
                     limit=top_k,
                     group_by=group_by,
-                    group_size=group_size,
+                    group_size=group_size or DEFAULT_GROUP_SIZE,
                     score_threshold=score_threshold,
                     with_payload=True,
                     with_vectors=return_embedding,
                 ).groups
             else:
-                points = self.client.query_points(
+                points = self._client.query_points(
                     collection_name=self.index,
                     prefetch=[
                         rest.Prefetch(
@@ -772,19 +1675,263 @@ class QdrantDocumentStore:
             raise QdrantStoreError(msg) from e
         if group_by:
-            results = (
-                [
-                    convert_qdrant_point_to_haystack_document(point, use_sparse_embeddings=self.use_sparse_embeddings)
-                    for group in groups
-                    for point in group.hits
-                ]
-                if groups
-                else []
+            return self._process_group_results(groups)
+        else:
+            return self._process_query_point_results(points)
+    async def _query_by_sparse_async(
+        self,
+        query_sparse_embedding: SparseEmbedding,
+        filters: dict[str, Any] | rest.Filter | None = None,
+        top_k: int = 10,
+        scale_score: bool = False,
+        return_embedding: bool = False,
+        score_threshold: float | None = None,
+        group_by: str | None = None,
+        group_size: int | None = None,
+    ) -> list[Document]:
+        """
+        Asynchronously queries Qdrant using a sparse embedding and returns the most relevant documents.
+        :param query_sparse_embedding: Sparse embedding of the query.
+        :param filters: Filters applied to the retrieved documents.
+        :param top_k: Maximum number of documents to return. If using `group_by` parameters, maximum number of
+             groups to return.
+        :param scale_score: Whether to scale the scores of the retrieved documents.
+        :param return_embedding: Whether to return the embeddings of the retrieved documents.
+        :param score_threshold: A minimal score threshold for the result.
+            Score of the returned result might be higher or smaller than the threshold
+             depending on the Distance function used.
+            E.g. for cosine similarity only higher scores will be returned.
+        :param group_by: Payload field to group by, must be a string or number field. If the field contains more than 1
+             value, all values will be used for grouping. One point can be in multiple groups.
+        :param group_size: Maximum amount of points to return per group. Default is 3.
+        :returns: List of documents that are most similar to `query_sparse_embedding`.
+        :raises QdrantStoreError:
+            If the Document Store was initialized with `use_sparse_embeddings=False`.
+        """
+        await self._initialize_async_client()
+        assert self._async_client is not None
+        if not self.use_sparse_embeddings:
+            message = (
+                "You are trying to query using sparse embeddings, but the Document Store "
+                "was initialized with `use_sparse_embeddings=False`. "
+            )
+            raise QdrantStoreError(message)
+        qdrant_filters = convert_filters_to_qdrant(filters)
+        query_indices = query_sparse_embedding.indices
+        query_values = query_sparse_embedding.values
+        if group_by:
+            response = await self._async_client.query_points_groups(
+                collection_name=self.index,
+                query=rest.SparseVector(
+                    indices=query_indices,
+                    values=query_values,
+                ),
+                using=SPARSE_VECTORS_NAME,
+                query_filter=qdrant_filters,
+                limit=top_k,
+                group_by=group_by,
+                group_size=group_size or DEFAULT_GROUP_SIZE,
+                with_vectors=return_embedding,
+                score_threshold=score_threshold,
+            )
+            groups = response.groups
+            return self._process_group_results(groups)
+        else:
+            query_response = await self._async_client.query_points(
+                collection_name=self.index,
+                query=rest.SparseVector(
+                    indices=query_indices,
+                    values=query_values,
+                ),
+                using=SPARSE_VECTORS_NAME,
+                query_filter=qdrant_filters,
+                limit=top_k,
+                with_vectors=return_embedding,
+                score_threshold=score_threshold,
+            )
+            points = query_response.points
+            return self._process_query_point_results(points, scale_score=scale_score)
+    async def _query_by_embedding_async(
+        self,
+        query_embedding: list[float],
+        filters: dict[str, Any] | rest.Filter | None = None,
+        top_k: int = 10,
+        scale_score: bool = False,
+        return_embedding: bool = False,
+        score_threshold: float | None = None,
+        group_by: str | None = None,
+        group_size: int | None = None,
+    ) -> list[Document]:
+        """
+        Asynchronously queries Qdrant using a dense embedding and returns the most relevant documents.
+        :param query_embedding: Dense embedding of the query.
+        :param filters: Filters applied to the retrieved documents.
+        :param top_k: Maximum number of documents to return. If using `group_by` parameters, maximum number of
+             groups to return.
+        :param scale_score: Whether to scale the scores of the retrieved documents.
+        :param return_embedding: Whether to return the embeddings of the retrieved documents.
+        :param score_threshold: A minimal score threshold for the result.
+            Score of the returned result might be higher or smaller than the threshold
+             depending on the Distance function used.
+            E.g. for cosine similarity only higher scores will be returned.
+        :param group_by: Payload field to group by, must be a string or number field. If the field contains more than 1
+             value, all values will be used for grouping. One point can be in multiple groups.
+        :param group_size: Maximum amount of points to return per group. Default is 3.
+        :returns: List of documents that are most similar to `query_embedding`.
+        """
+        await self._initialize_async_client()
+        assert self._async_client is not None
+        qdrant_filters = convert_filters_to_qdrant(filters)
+        if group_by:
+            response = await self._async_client.query_points_groups(
+                collection_name=self.index,
+                query=query_embedding,
+                using=DENSE_VECTORS_NAME if self.use_sparse_embeddings else None,
+                query_filter=qdrant_filters,
+                limit=top_k,
+                group_by=group_by,
+                group_size=group_size or DEFAULT_GROUP_SIZE,
+                with_vectors=return_embedding,
+                score_threshold=score_threshold,
             )
+            groups = response.groups
+            return self._process_group_results(groups)
         else:
-            results = [convert_qdrant_point_to_haystack_document(point, use_sparse_embeddings=True) for point in points]
+            query_response = await self._async_client.query_points(
+                collection_name=self.index,
+                query=query_embedding,
+                using=DENSE_VECTORS_NAME if self.use_sparse_embeddings else None,
+                query_filter=qdrant_filters,
+                limit=top_k,
+                with_vectors=return_embedding,
+                score_threshold=score_threshold,
+            )
+            points = query_response.points
+            return self._process_query_point_results(points, scale_score=scale_score)
+    async def _query_hybrid_async(
+        self,
+        query_embedding: list[float],
+        query_sparse_embedding: SparseEmbedding,
+        filters: dict[str, Any] | rest.Filter | None = None,
+        top_k: int = 10,
+        return_embedding: bool = False,
+        score_threshold: float | None = None,
+        group_by: str | None = None,
+        group_size: int | None = None,
+    ) -> list[Document]:
+        """
+        Asynchronously retrieves documents based on dense and sparse embeddings and fuses
+        the results using Reciprocal Rank Fusion.
+        This method is not part of the public interface of `QdrantDocumentStore` and shouldn't be used directly.
+        Use the `QdrantHybridRetriever` instead.
+        :param query_embedding: Dense embedding of the query.
+        :param query_sparse_embedding: Sparse embedding of the query.
+        :param filters: Filters applied to the retrieved documents.
+        :param top_k: Maximum number of documents to return. If using `group_by` parameters, maximum number of
+             groups to return.
+        :param return_embedding: Whether to return the embeddings of the retrieved documents.
+        :param score_threshold: A minimal score threshold for the result.
+            Score of the returned result might be higher or smaller than the threshold
+             depending on the Distance function used.
+            E.g. for cosine similarity only higher scores will be returned.
+        :param group_by: Payload field to group by, must be a string or number field. If the field contains more than 1
+             value, all values will be used for grouping. One point can be in multiple groups.
+        :param group_size: Maximum amount of points to return per group. Default is 3.
+        :returns: List of Document that are most similar to `query_embedding` and `query_sparse_embedding`.
+        :raises QdrantStoreError:
+            If the Document Store was initialized with `use_sparse_embeddings=False`.
+        """
+        await self._initialize_async_client()
+        assert self._async_client is not None
+        if not self.use_sparse_embeddings:
+            message = (
+                "You are trying to query using sparse embeddings, but the Document Store "
+                "was initialized with `use_sparse_embeddings=False`. "
+            )
+            raise QdrantStoreError(message)
+        qdrant_filters = convert_filters_to_qdrant(filters)
-        return results
+        try:
+            if group_by:
+                response = await self._async_client.query_points_groups(
+                    collection_name=self.index,
+                    prefetch=[
+                        rest.Prefetch(
+                            query=rest.SparseVector(
+                                indices=query_sparse_embedding.indices,
+                                values=query_sparse_embedding.values,
+                            ),
+                            using=SPARSE_VECTORS_NAME,
+                            filter=qdrant_filters,
+                        ),
+                        rest.Prefetch(
+                            query=query_embedding,
+                            using=DENSE_VECTORS_NAME,
+                            filter=qdrant_filters,
+                        ),
+                    ],
+                    query=rest.FusionQuery(fusion=rest.Fusion.RRF),
+                    limit=top_k,
+                    group_by=group_by,
+                    group_size=group_size or DEFAULT_GROUP_SIZE,
+                    score_threshold=score_threshold,
+                    with_payload=True,
+                    with_vectors=return_embedding,
+                )
+                groups = response.groups
+            else:
+                query_response = await self._async_client.query_points(
+                    collection_name=self.index,
+                    prefetch=[
+                        rest.Prefetch(
+                            query=rest.SparseVector(
+                                indices=query_sparse_embedding.indices,
+                                values=query_sparse_embedding.values,
+                            ),
+                            using=SPARSE_VECTORS_NAME,
+                            filter=qdrant_filters,
+                        ),
+                        rest.Prefetch(
+                            query=query_embedding,
+                            using=DENSE_VECTORS_NAME,
+                            filter=qdrant_filters,
+                        ),
+                    ],
+                    query=rest.FusionQuery(fusion=rest.Fusion.RRF),
+                    limit=top_k,
+                    score_threshold=score_threshold,
+                    with_payload=True,
+                    with_vectors=return_embedding,
+                )
+                points = query_response.points
+        except Exception as e:
+            msg = "Error during hybrid search"
+            raise QdrantStoreError(msg) from e
+        if group_by:
+            return self._process_group_results(groups)
+        else:
+            return self._process_query_point_results(points)
     def get_distance(self, similarity: str) -> rest.Distance:
         """
@@ -807,14 +1954,39 @@ class QdrantDocumentStore:
             )
             raise QdrantStoreError(msg) from ke
-    def _create_payload_index(self, collection_name: str, payload_fields_to_index: Optional[List[dict]] = None):
+    def _create_payload_index(self, collection_name: str, payload_fields_to_index: list[dict] | None = None) -> None:
+        """
+        Create payload index for the collection if payload_fields_to_index is provided.
+        See: https://qdrant.tech/documentation/concepts/indexing/#payload-index
+        """
+        if payload_fields_to_index is not None:
+            for payload_index in payload_fields_to_index:
+                # self._client is initialized at this point
+                # since _initialize_client() is called before this method is executed
+                assert self._client is not None
+                self._client.create_payload_index(
+                    collection_name=collection_name,
+                    field_name=payload_index["field_name"],
+                    field_schema=payload_index["field_schema"],
+                )
+    async def _create_payload_index_async(
+        self, collection_name: str, payload_fields_to_index: list[dict] | None = None
+    ) -> None:
         """
-        Create payload index for the collection if payload_fields_to_index is provided
+        Asynchronously create payload index for the collection if payload_fields_to_index is provided.
         See: https://qdrant.tech/documentation/concepts/indexing/#payload-index
         """
         if payload_fields_to_index is not None:
             for payload_index in payload_fields_to_index:
-                self.client.create_payload_index(
+                # self._async_client is initialized at this point
+                # since _initialize_async_client() is called before this method is executed
+                assert self._async_client is not None
+                await self._async_client.create_payload_index(
                     collection_name=collection_name,
                     field_name=payload_index["field_name"],
                     field_schema=payload_index["field_schema"],
@@ -829,10 +2001,11 @@ class QdrantDocumentStore:
         use_sparse_embeddings: bool,
         sparse_idf: bool,
         on_disk: bool = False,
-        payload_fields_to_index: Optional[List[dict]] = None,
-    ):
+        payload_fields_to_index: list[dict] | None = None,
+    ) -> None:
         """
         Sets up the Qdrant collection with the specified parameters.
         :param collection_name:
             The name of the collection to set up.
         :param embedding_dim:
@@ -856,9 +2029,13 @@ class QdrantDocumentStore:
             If the collection exists with a different similarity measure or embedding dimension.
         """
+        self._initialize_client()
+        assert self._client is not None
         distance = self.get_distance(similarity)
-        if recreate_collection or not self.client.collection_exists(collection_name):
+        if recreate_collection or not self._client.collection_exists(collection_name):
             # There is no need to verify the current configuration of that
             # collection. It might be just recreated again or does not exist yet.
             self.recreate_collection(
@@ -868,66 +2045,76 @@ class QdrantDocumentStore:
             self._create_payload_index(collection_name, payload_fields_to_index)
             return
-        collection_info = self.client.get_collection(collection_name)
+        collection_info = self._client.get_collection(collection_name)
-        has_named_vectors = (
-            isinstance(collection_info.config.params.vectors, dict)
-            and DENSE_VECTORS_NAME in collection_info.config.params.vectors
-        )
+        self._validate_collection_compatibility(collection_name, collection_info, distance, embedding_dim)
-        if self.use_sparse_embeddings and not has_named_vectors:
-            msg = (
-                f"Collection '{collection_name}' already exists in Qdrant, "
-                f"but it has been originally created without sparse embedding vectors. "
-                f"If you want to use that collection, you can set `use_sparse_embeddings=False`. "
-                f"To use sparse embeddings, you need to recreate the collection or migrate the existing one. "
-                f"See `migrate_to_sparse_embeddings_support` function in "
-                f"`haystack_integrations.document_stores.qdrant`."
-            )
-            raise QdrantStoreError(msg)
+    async def _set_up_collection_async(
+        self,
+        collection_name: str,
+        embedding_dim: int,
+        recreate_collection: bool,
+        similarity: str,
+        use_sparse_embeddings: bool,
+        sparse_idf: bool,
+        on_disk: bool = False,
+        payload_fields_to_index: list[dict] | None = None,
+    ) -> None:
+        """
+        Asynchronously sets up the Qdrant collection with the specified parameters.
-        elif not self.use_sparse_embeddings and has_named_vectors:
-            msg = (
-                f"Collection '{collection_name}' already exists in Qdrant, "
-                f"but it has been originally created with sparse embedding vectors."
-                f"If you want to use that collection, please set `use_sparse_embeddings=True`."
-            )
-            raise QdrantStoreError(msg)
+        :param collection_name:
+            The name of the collection to set up.
+        :param embedding_dim:
+            The dimension of the embeddings.
+        :param recreate_collection:
+            Whether to recreate the collection if it already exists.
+        :param similarity:
+            The similarity measure to use.
+        :param use_sparse_embeddings:
+            Whether to use sparse embeddings.
+        :param sparse_idf:
+            Whether to compute the Inverse Document Frequency (IDF) when using sparse embeddings. Required for BM42.
+        :param on_disk:
+            Whether to store the collection on disk.
+        :param payload_fields_to_index:
+            List of payload fields to index.
-        if self.use_sparse_embeddings:
-            current_distance = collection_info.config.params.vectors[DENSE_VECTORS_NAME].distance
-            current_vector_size = collection_info.config.params.vectors[DENSE_VECTORS_NAME].size
-        else:
-            current_distance = collection_info.config.params.vectors.distance
-            current_vector_size = collection_info.config.params.vectors.size
+        :raises QdrantStoreError:
+            If the collection exists with incompatible settings.
+        :raises ValueError:
+            If the collection exists with a different similarity measure or embedding dimension.
-        if current_distance != distance:
-            msg = (
-                f"Collection '{collection_name}' already exists in Qdrant, "
-                f"but it is configured with a similarity '{current_distance.name}'. "
-                f"If you want to use that collection, but with a different "
-                f"similarity, please set `recreate_collection=True` argument."
-            )
-            raise ValueError(msg)
+        """
-        if current_vector_size != embedding_dim:
-            msg = (
-                f"Collection '{collection_name}' already exists in Qdrant, "
-                f"but it is configured with a vector size '{current_vector_size}'. "
-                f"If you want to use that collection, but with a different "
-                f"vector size, please set `recreate_collection=True` argument."
+        await self._initialize_async_client()
+        assert self._async_client is not None
+        distance = self.get_distance(similarity)
+        if recreate_collection or not await self._async_client.collection_exists(collection_name):
+            # There is no need to verify the current configuration of that
+            # collection. It might be just recreated again or does not exist yet.
+            await self.recreate_collection_async(
+                collection_name, distance, embedding_dim, on_disk, use_sparse_embeddings, sparse_idf
             )
-            raise ValueError(msg)
+            # Create Payload index if payload_fields_to_index is provided
+            await self._create_payload_index_async(collection_name, payload_fields_to_index)
+            return
+        collection_info = await self._async_client.get_collection(collection_name)
+        self._validate_collection_compatibility(collection_name, collection_info, distance, embedding_dim)
     def recreate_collection(
         self,
         collection_name: str,
-        distance,
+        distance: rest.Distance,
         embedding_dim: int,
-        on_disk: Optional[bool] = None,
-        use_sparse_embeddings: Optional[bool] = None,
+        on_disk: bool | None = None,
+        use_sparse_embeddings: bool | None = None,
         sparse_idf: bool = False,
-    ):
+    ) -> None:
         """
         Recreates the Qdrant collection with the specified parameters.
@@ -944,96 +2131,356 @@ class QdrantDocumentStore:
         :param sparse_idf:
             Whether to compute the Inverse Document Frequency (IDF) when using sparse embeddings. Required for BM42.
         """
-        if on_disk is None:
-            on_disk = self.on_disk
+        vectors_config, sparse_vectors_config = self._prepare_collection_config(
+            embedding_dim, distance, on_disk, use_sparse_embeddings, sparse_idf
+        )
+        collection_params = self._prepare_collection_params()
-        if use_sparse_embeddings is None:
-            use_sparse_embeddings = self.use_sparse_embeddings
+        self._initialize_client()
+        assert self._client is not None
-        # dense vectors configuration
-        vectors_config = rest.VectorParams(size=embedding_dim, on_disk=on_disk, distance=distance)
+        if self._client.collection_exists(collection_name):
+            self._client.delete_collection(collection_name)
-        if use_sparse_embeddings:
-            # in this case, we need to define named vectors
-            vectors_config = {DENSE_VECTORS_NAME: vectors_config}
+        self._client.create_collection(
+            collection_name=collection_name,
+            vectors_config=vectors_config,
+            sparse_vectors_config=sparse_vectors_config,
+            **collection_params,
+        )
-            sparse_vectors_config = {
-                SPARSE_VECTORS_NAME: rest.SparseVectorParams(
-                    index=rest.SparseIndexParams(
-                        on_disk=on_disk,
-                    ),
-                    modifier=rest.Modifier.IDF if sparse_idf else None,
-                ),
-            }
+    async def recreate_collection_async(
+        self,
+        collection_name: str,
+        distance: rest.Distance,
+        embedding_dim: int,
+        on_disk: bool | None = None,
+        use_sparse_embeddings: bool | None = None,
+        sparse_idf: bool = False,
+    ) -> None:
+        """
+        Asynchronously recreates the Qdrant collection with the specified parameters.
+        :param collection_name:
+            The name of the collection to recreate.
+        :param distance:
+            The distance metric to use for the collection.
+        :param embedding_dim:
+            The dimension of the embeddings.
+        :param on_disk:
+            Whether to store the collection on disk.
+        :param use_sparse_embeddings:
+            Whether to use sparse embeddings.
+        :param sparse_idf:
+            Whether to compute the Inverse Document Frequency (IDF) when using sparse embeddings. Required for BM42.
+        """
+        vectors_config, sparse_vectors_config = self._prepare_collection_config(
+            embedding_dim, distance, on_disk, use_sparse_embeddings, sparse_idf
+        )
+        collection_params = self._prepare_collection_params()
+        await self._initialize_async_client()
+        assert self._async_client is not None
-        if self.client.collection_exists(collection_name):
-            self.client.delete_collection(collection_name)
+        if await self._async_client.collection_exists(collection_name):
+            await self._async_client.delete_collection(collection_name)
-        self.client.create_collection(
+        await self._async_client.create_collection(
             collection_name=collection_name,
             vectors_config=vectors_config,
-            sparse_vectors_config=sparse_vectors_config if use_sparse_embeddings else None,
-            shard_number=self.shard_number,
-            replication_factor=self.replication_factor,
-            write_consistency_factor=self.write_consistency_factor,
-            on_disk_payload=self.on_disk_payload,
-            hnsw_config=self.hnsw_config,
-            optimizers_config=self.optimizers_config,
-            wal_config=self.wal_config,
-            quantization_config=self.quantization_config,
-            init_from=self.init_from,
+            sparse_vectors_config=sparse_vectors_config,
+            **collection_params,
         )
     def _handle_duplicate_documents(
         self,
-        documents: List[Document],
-        index: Optional[str] = None,
-        policy: DuplicatePolicy = None,
-    ):
+        documents: list[Document],
+        policy: DuplicatePolicy | None = None,
+    ) -> list[Document]:
         """
         Checks whether any of the passed documents is already existing in the chosen index and returns a list of
         documents that are not in the index yet.
         :param documents: A list of Haystack Document objects.
-        :param index: name of the index
         :param policy: The duplicate policy to use when writing documents.
         :returns: A list of Haystack Document objects.
         """
-        index = index or self.index
         if policy in (DuplicatePolicy.SKIP, DuplicatePolicy.FAIL):
-            documents = self._drop_duplicate_documents(documents, index)
-            documents_found = self.get_documents_by_id(ids=[doc.id for doc in documents], index=index)
-            ids_exist_in_db: List[str] = [doc.id for doc in documents_found]
+            documents = self._drop_duplicate_documents(documents)
+            documents_found = self.get_documents_by_id(ids=[doc.id for doc in documents])
+            ids_exist_in_db: list[str] = [doc.id for doc in documents_found]
             if len(ids_exist_in_db) > 0 and policy == DuplicatePolicy.FAIL:
-                msg = f"Document with ids '{', '.join(ids_exist_in_db)} already exists in index = '{index}'."
+                msg = f"Document with ids '{', '.join(ids_exist_in_db)} already exists in index = '{self.index}'."
                 raise DuplicateDocumentError(msg)
             documents = list(filter(lambda doc: doc.id not in ids_exist_in_db, documents))
         return documents
-    def _drop_duplicate_documents(self, documents: List[Document], index: Optional[str] = None) -> List[Document]:
+    async def _handle_duplicate_documents_async(
+        self,
+        documents: list[Document],
+        policy: DuplicatePolicy | None = None,
+    ) -> list[Document]:
         """
-        Drop duplicate documents based on same hash ID.
+        Asynchronously checks whether any of the passed documents is already existing
+        in the chosen index and returns a list of
+        documents that are not in the index yet.
         :param documents: A list of Haystack Document objects.
-        :param index: Name of the index.
+        :param policy: The duplicate policy to use when writing documents.
         :returns: A list of Haystack Document objects.
         """
-        _hash_ids: Set = set()
-        _documents: List[Document] = []
+        if policy in (DuplicatePolicy.SKIP, DuplicatePolicy.FAIL):
+            documents = self._drop_duplicate_documents(documents)
+            documents_found = await self.get_documents_by_id_async(ids=[doc.id for doc in documents])
+            ids_exist_in_db: list[str] = [doc.id for doc in documents_found]
+            if len(ids_exist_in_db) > 0 and policy == DuplicatePolicy.FAIL:
+                msg = f"Document with ids '{', '.join(ids_exist_in_db)} already exists in index = '{self.index}'."
+                raise DuplicateDocumentError(msg)
+            documents = list(filter(lambda doc: doc.id not in ids_exist_in_db, documents))
+        return documents
+    def _drop_duplicate_documents(self, documents: list[Document]) -> list[Document]:
+        """
+        Drop duplicate documents based on same hash ID.
+        """
+        _hash_ids: set = set()
+        _documents: list[Document] = []
         for document in documents:
             if document.id in _hash_ids:
                 logger.info(
-                    "Duplicate Documents: Document with id '%s' already exists in index '%s'",
-                    document.id,
-                    index or self.index,
+                    "Duplicate Documents: Document with id '{document_id}' already exists in index '{index}'",
+                    document_id=document.id,
+                    index=self.index,
                 )
                 continue
             _documents.append(document)
             _hash_ids.add(document.id)
         return _documents
+    def _prepare_collection_params(self) -> dict[str, Any]:
+        """
+        Prepares the common parameters for collection creation.
+        """
+        return {
+            "shard_number": self.shard_number,
+            "replication_factor": self.replication_factor,
+            "write_consistency_factor": self.write_consistency_factor,
+            "on_disk_payload": self.on_disk_payload,
+            "hnsw_config": self.hnsw_config,
+            "optimizers_config": self.optimizers_config,
+            "wal_config": self.wal_config,
+            "quantization_config": self.quantization_config,
+        }
+    def _prepare_client_params(self) -> dict[str, Any]:
+        """
+        Prepares the common parameters for client initialization.
+        """
+        return {
+            "location": self.location,
+            "url": self.url,
+            "port": self.port,
+            "grpc_port": self.grpc_port,
+            "prefer_grpc": self.prefer_grpc,
+            "https": self.https,
+            "api_key": self.api_key.resolve_value() if self.api_key else None,
+            "prefix": self.prefix,
+            "timeout": self.timeout,
+            "host": self.host,
+            "path": self.path,
+            # NOTE: We purposefully expand the fields of self.metadata to avoid modifying the original self.metadata
+            # class attribute. For example, the resolved api key is added to metadata by the QdrantClient class
+            # when using a hosted Qdrant service, which means running to_dict() exposes the api key.
+            "metadata": {**self.metadata},
+            "force_disable_check_same_thread": self.force_disable_check_same_thread,
+        }
+    def _prepare_collection_config(
+        self,
+        embedding_dim: int,
+        distance: rest.Distance,
+        on_disk: bool | None = None,
+        use_sparse_embeddings: bool | None = None,
+        sparse_idf: bool = False,
+    ) -> tuple[dict[str, rest.VectorParams] | rest.VectorParams, dict[str, rest.SparseVectorParams] | None]:
+        """
+        Prepares the configuration for creating or recreating a Qdrant collection.
+        """
+        if on_disk is None:
+            on_disk = self.on_disk
+        if use_sparse_embeddings is None:
+            use_sparse_embeddings = self.use_sparse_embeddings
+        # dense vectors configuration
+        base_vectors_config = rest.VectorParams(size=embedding_dim, on_disk=on_disk, distance=distance)
+        vectors_config: rest.VectorParams | dict[str, rest.VectorParams] = base_vectors_config
+        sparse_vectors_config: dict[str, rest.SparseVectorParams] | None = None
+        if use_sparse_embeddings:
+            # in this case, we need to define named vectors
+            vectors_config = {DENSE_VECTORS_NAME: base_vectors_config}
+            sparse_vectors_config = {
+                SPARSE_VECTORS_NAME: rest.SparseVectorParams(
+                    index=rest.SparseIndexParams(
+                        on_disk=on_disk,
+                    ),
+                    modifier=rest.Modifier.IDF if sparse_idf else None,
+                ),
+            }
+        return vectors_config, sparse_vectors_config
+    @staticmethod
+    def _validate_filters(filters: dict[str, Any] | rest.Filter | None = None) -> None:
+        """
+        Validates the filters provided for querying.
+        :param filters: Filters to validate. Can be a dictionary or an instance of `qdrant_client.http.models.Filter`.
+        :raises ValueError: If the filters are not in the correct format or syntax.
+        """
+        if filters and not isinstance(filters, dict) and not isinstance(filters, rest.Filter):
+            msg = "Filter must be a dictionary or an instance of `qdrant_client.http.models.Filter`"
+            raise ValueError(msg)
+        if filters and not isinstance(filters, rest.Filter) and "operator" not in filters:
+            msg = "Invalid filter syntax. See https://docs.haystack.deepset.ai/docs/metadata-filtering for details."
+            raise ValueError(msg)
+    def _process_query_point_results(
+        self, results: list[rest.ScoredPoint], scale_score: bool = False
+    ) -> list[Document]:
+        """
+        Processes query results from Qdrant.
+        """
+        documents = [
+            convert_qdrant_point_to_haystack_document(point, use_sparse_embeddings=self.use_sparse_embeddings)
+            for point in results
+        ]
+        if scale_score:
+            for document in documents:
+                score = document.score
+                if score is None:
+                    continue
+                if self.similarity == "cosine":
+                    score = (score + 1) / 2
+                else:
+                    score = float(1 / (1 + exp(-score / 100)))
+                document.score = score
+        return documents
+    def _process_group_results(self, groups: list[rest.PointGroup]) -> list[Document]:
+        """
+        Processes grouped query results from Qdrant.
+        """
+        if not groups:
+            return []
+        return [
+            convert_qdrant_point_to_haystack_document(point, use_sparse_embeddings=self.use_sparse_embeddings)
+            for group in groups
+            for point in group.hits
+        ]
+    def _validate_collection_compatibility(
+        self,
+        collection_name: str,
+        collection_info: rest.CollectionInfo,
+        distance: rest.Distance,
+        embedding_dim: int,
+    ) -> None:
+        """
+        Validates that an existing collection is compatible with the current configuration.
+        """
+        vectors_config = collection_info.config.params.vectors
+        if vectors_config is None:
+            msg = f"Collection '{collection_name}' has no vector configuration."
+            raise QdrantStoreError(msg)
+        has_named_vectors = isinstance(vectors_config, dict)
+        if has_named_vectors and DENSE_VECTORS_NAME not in vectors_config:
+            msg = (
+                f"Collection '{collection_name}' already exists in Qdrant, "
+                f"but it has been originally created outside of Haystack and is not supported. "
+                f"If possible, you should create a new Document Store with Haystack. "
+                f"In case you want to migrate the existing collection, see an example script in "
+                f"https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/qdrant/src/"
+                f"haystack_integrations/document_stores/qdrant/migrate_to_sparse.py."
+            )
+            raise QdrantStoreError(msg)
+        if self.use_sparse_embeddings and not has_named_vectors:
+            msg = (
+                f"Collection '{collection_name}' already exists in Qdrant, "
+                f"but it has been originally created without sparse embedding vectors. "
+                f"If you want to use that collection, you can set `use_sparse_embeddings=False`. "
+                f"To use sparse embeddings, you need to recreate the collection or migrate the existing one. "
+                f"See `migrate_to_sparse_embeddings_support` function in "
+                f"`haystack_integrations.document_stores.qdrant`."
+            )
+            raise QdrantStoreError(msg)
+        if not self.use_sparse_embeddings and has_named_vectors:
+            msg = (
+                f"Collection '{collection_name}' already exists in Qdrant, "
+                f"but it has been originally created with sparse embedding vectors."
+                f"If you want to use that collection, please set `use_sparse_embeddings=True`."
+            )
+            raise QdrantStoreError(msg)
+        # Get current distance and vector size based on collection configuration
+        if self.use_sparse_embeddings:
+            if not isinstance(vectors_config, dict):
+                msg = f"Collection '{collection_name}' has invalid vector configuration for sparse embeddings."
+                raise QdrantStoreError(msg)
+            dense_vector_config = vectors_config[DENSE_VECTORS_NAME]
+            current_distance = dense_vector_config.distance
+            current_vector_size = dense_vector_config.size
+        else:
+            if isinstance(vectors_config, dict):
+                msg = f"Collection '{collection_name}' has invalid vector configuration for dense embeddings only."
+                raise QdrantStoreError(msg)
+            current_distance = vectors_config.distance
+            current_vector_size = vectors_config.size
+        # Validate distance metric
+        if current_distance != distance:
+            msg = (
+                f"Collection '{collection_name}' already exists in Qdrant, "
+                f"but it is configured with a similarity '{current_distance.name}'. "
+                f"If you want to use that collection, but with a different "
+                f"similarity, please set `recreate_collection=True` argument."
+            )
+            raise ValueError(msg)
+        # Validate embedding dimension
+        if current_vector_size != embedding_dim:
+            msg = (
+                f"Collection '{collection_name}' already exists in Qdrant, "
+                f"but it is configured with a vector size '{current_vector_size}'. "
+                f"If you want to use that collection, but with a different "
+                f"vector size, please set `recreate_collection=True` argument."
+            )
+            raise ValueError(msg)

qdrant-haystack 6.0.0__py3-none-any.whl → 10.2.0__py3-none-any.whl

qdrant-haystack 6.0.0py3-none-any.whl → 10.2.0py3-none-any.whl