PyPI - qdrant-haystack - Versions diffs - 9.1.1__py3-none-any.whl → 10.2.0__py3-none-any.whl - Mend

qdrant-haystack 9.1.1py3-none-any.whl → 10.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

haystack_integrations/document_stores/qdrant/document_store.py CHANGED Viewed

@@ -1,8 +1,8 @@
 import inspect
+from collections.abc import AsyncGenerator, Generator
 from itertools import islice
-from typing import Any, AsyncGenerator, ClassVar, Dict, Generator, List, Optional, Set, Union
+from typing import Any, ClassVar, cast
-import numpy as np
 import qdrant_client
 from haystack import default_from_dict, default_to_dict, logging
 from haystack.dataclasses import Document
@@ -10,7 +10,7 @@ from haystack.dataclasses.sparse_embedding import SparseEmbedding
 from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError
 from haystack.document_stores.types import DuplicatePolicy
 from haystack.utils import Secret, deserialize_secrets_inplace
-from qdrant_client import grpc
+from numpy import exp
 from qdrant_client.http import models as rest
 from qdrant_client.http.exceptions import UnexpectedResponse
 from tqdm import tqdm
@@ -26,15 +26,21 @@ from .filters import convert_filters_to_qdrant
 logger = logging.getLogger(__name__)
+# Default group size to apply when using group_by
+# - Our methods use None as the default for optional group_size parameter.
+# - Qdrant expects an integer and internally defaults to 3 when performing grouped queries.
+# - When group_by is specified but group_size is None, we use this value instead of passing None.
+DEFAULT_GROUP_SIZE = 3
 class QdrantStoreError(DocumentStoreError):
     pass
-FilterType = Dict[str, Union[Dict[str, Any], List[Any], str, int, float, bool]]
+FilterType = dict[str, dict[str, Any] | list[Any] | str | int | float | bool]
-def get_batches_from_generator(iterable, n):
+def get_batches_from_generator(iterable: list, n: int) -> Generator:
     """
     Batch elements of an iterable into fixed-length chunks or blocks.
     """
@@ -47,9 +53,8 @@ def get_batches_from_generator(iterable, n):
 class QdrantDocumentStore:
     """
-    A QdrantDocumentStore implementation that you
-    can use with any Qdrant instance: in-memory, disk-persisted, Docker-based,
-    and Qdrant Cloud Cluster deployments.
+    A QdrantDocumentStore implementation that you can use with any Qdrant instance: in-memory, disk-persisted,
+    Docker-based, and Qdrant Cloud Cluster deployments.
     Usage example by creating an in-memory instance:
@@ -59,7 +64,8 @@ class QdrantDocumentStore:
     document_store = QdrantDocumentStore(
         ":memory:",
-        recreate_index=True
+        recreate_index=True,
+        embedding_dim=5
     )
     document_store.write_documents([
         Document(content="This is first", embedding=[0.0]*5),
@@ -84,7 +90,7 @@ class QdrantDocumentStore:
     ```
     """
-    SIMILARITY: ClassVar[Dict[str, str]] = {
+    SIMILARITY: ClassVar[dict[str, rest.Distance]] = {
         "cosine": rest.Distance.COSINE,
         "dot_product": rest.Distance.DOT,
         "l2": rest.Distance.EUCLID,
@@ -92,17 +98,17 @@ class QdrantDocumentStore:
     def __init__(
         self,
-        location: Optional[str] = None,
-        url: Optional[str] = None,
+        location: str | None = None,
+        url: str | None = None,
         port: int = 6333,
         grpc_port: int = 6334,
         prefer_grpc: bool = False,
-        https: Optional[bool] = None,
-        api_key: Optional[Secret] = None,
-        prefix: Optional[str] = None,
-        timeout: Optional[int] = None,
-        host: Optional[str] = None,
-        path: Optional[str] = None,
+        https: bool | None = None,
+        api_key: Secret | None = None,
+        prefix: str | None = None,
+        timeout: int | None = None,
+        host: str | None = None,
+        path: str | None = None,
         force_disable_check_same_thread: bool = False,
         index: str = "Document",
         embedding_dim: int = 768,
@@ -113,24 +119,25 @@ class QdrantDocumentStore:
         return_embedding: bool = False,
         progress_bar: bool = True,
         recreate_index: bool = False,
-        shard_number: Optional[int] = None,
-        replication_factor: Optional[int] = None,
-        write_consistency_factor: Optional[int] = None,
-        on_disk_payload: Optional[bool] = None,
-        hnsw_config: Optional[dict] = None,
-        optimizers_config: Optional[dict] = None,
-        wal_config: Optional[dict] = None,
-        quantization_config: Optional[dict] = None,
-        init_from: Optional[dict] = None,
+        shard_number: int | None = None,
+        replication_factor: int | None = None,
+        write_consistency_factor: int | None = None,
+        on_disk_payload: bool | None = None,
+        hnsw_config: dict | None = None,
+        optimizers_config: dict | None = None,
+        wal_config: dict | None = None,
+        quantization_config: dict | None = None,
         wait_result_from_api: bool = True,
-        metadata: Optional[dict] = None,
+        metadata: dict | None = None,
         write_batch_size: int = 100,
         scroll_size: int = 10_000,
-        payload_fields_to_index: Optional[List[dict]] = None,
-    ):
+        payload_fields_to_index: list[dict] | None = None,
+    ) -> None:
         """
+        Initializes a QdrantDocumentStore.
         :param location:
-            If `memory` - use in-memory Qdrant instance.
+            If `":memory:"` - use in-memory Qdrant instance.
             If `str` - use it as a URL parameter.
             If `None` - use default values for host and port.
         :param url:
@@ -164,7 +171,7 @@ class QdrantDocumentStore:
             Dimension of the embeddings.
         :param on_disk:
             Whether to store the collection on disk.
-        :param use_sparse_embedding:
+        :param use_sparse_embeddings:
             If set to `True`, enables support for sparse embeddings.
         :param sparse_idf:
             If set to `True`, computes the Inverse Document Frequency (IDF) when using sparse embeddings.
@@ -201,8 +208,6 @@ class QdrantDocumentStore:
             Params for Write-Ahead-Log.
         :param quantization_config:
             Params for quantization. If `None`, quantization will be disabled.
-        :param init_from:
-            Use data stored in another collection to initialize this collection.
         :param wait_result_from_api:
             Whether to wait for the result from the API after each request.
         :param metadata:
@@ -215,8 +220,8 @@ class QdrantDocumentStore:
             List of payload fields to index.
         """
-        self._client = None
-        self._async_client = None
+        self._client: qdrant_client.QdrantClient | None = None
+        self._async_client: qdrant_client.AsyncQdrantClient | None = None
         # Store the Qdrant client specific attributes
         self.location = location
@@ -232,7 +237,6 @@ class QdrantDocumentStore:
         self.path = path
         self.force_disable_check_same_thread = force_disable_check_same_thread
         self.metadata = metadata or {}
-        self.api_key = api_key
         # Store the Qdrant collection specific attributes
         self.shard_number = shard_number
@@ -243,7 +247,6 @@ class QdrantDocumentStore:
         self.optimizers_config = optimizers_config
         self.wal_config = wal_config
         self.quantization_config = quantization_config
-        self.init_from = init_from
         self.wait_result_from_api = wait_result_from_api
         self.recreate_index = recreate_index
         self.payload_fields_to_index = payload_fields_to_index
@@ -258,9 +261,10 @@ class QdrantDocumentStore:
         self.write_batch_size = write_batch_size
         self.scroll_size = scroll_size
-    def _initialize_client(self):
+    def _initialize_client(self) -> None:
         if self._client is None:
             client_params = self._prepare_client_params()
+            # This step adds the api-key and User-Agent to metadata
             self._client = qdrant_client.QdrantClient(**client_params)
             # Make sure the collection is properly set up
             self._set_up_collection(
@@ -274,7 +278,7 @@ class QdrantDocumentStore:
                 self.payload_fields_to_index,
             )
-    async def _initialize_async_client(self):
+    async def _initialize_async_client(self) -> None:
         """
         Returns the asynchronous Qdrant client, initializing it if necessary.
         """
@@ -330,8 +334,8 @@ class QdrantDocumentStore:
     def filter_documents(
         self,
-        filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
-    ) -> List[Document]:
+        filters: dict[str, Any] | rest.Filter | None = None,
+    ) -> list[Document]:
         """
         Returns the documents that match the provided filters.
@@ -344,7 +348,7 @@ class QdrantDocumentStore:
         # No need to initialize client here as _get_documents_generator
         # will handle client initialization internally
-        self._validate_filters(filters)
+        QdrantDocumentStore._validate_filters(filters)
         return list(
             self._get_documents_generator(
                 filters,
@@ -353,20 +357,20 @@ class QdrantDocumentStore:
     async def filter_documents_async(
         self,
-        filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
-    ) -> List[Document]:
+        filters: dict[str, Any] | rest.Filter | None = None,
+    ) -> list[Document]:
         """
         Asynchronously returns the documents that match the provided filters.
         """
         # No need to initialize client here as _get_documents_generator_async
         # will handle client initialization internally
-        self._validate_filters(filters)
+        QdrantDocumentStore._validate_filters(filters)
         return [doc async for doc in self._get_documents_generator_async(filters)]
     def write_documents(
         self,
-        documents: List[Document],
+        documents: list[Document],
         policy: DuplicatePolicy = DuplicatePolicy.FAIL,
     ) -> int:
         """
@@ -419,7 +423,7 @@ class QdrantDocumentStore:
     async def write_documents_async(
         self,
-        documents: List[Document],
+        documents: list[Document],
         policy: DuplicatePolicy = DuplicatePolicy.FAIL,
     ) -> int:
         """
@@ -471,7 +475,7 @@ class QdrantDocumentStore:
                 progress_bar.update(self.write_batch_size)
         return len(document_objects)
-    def delete_documents(self, document_ids: List[str]) -> None:
+    def delete_documents(self, document_ids: list[str]) -> None:
         """
         Deletes documents that match the provided `document_ids` from the document store.
@@ -481,11 +485,10 @@ class QdrantDocumentStore:
         self._initialize_client()
         assert self._client is not None
-        ids = [convert_id(_id) for _id in document_ids]
         try:
             self._client.delete(
                 collection_name=self.index,
-                points_selector=ids,
+                points_selector=rest.PointIdsList(points=[convert_id(_id) for _id in document_ids]),
                 wait=self.wait_result_from_api,
             )
         except KeyError:
@@ -493,7 +496,7 @@ class QdrantDocumentStore:
                 "Called QdrantDocumentStore.delete_documents() on a non-existing ID",
             )
-    async def delete_documents_async(self, document_ids: List[str]) -> None:
+    async def delete_documents_async(self, document_ids: list[str]) -> None:
         """
         Asynchronously deletes documents that match the provided `document_ids` from the document store.
@@ -503,11 +506,10 @@ class QdrantDocumentStore:
         await self._initialize_async_client()
         assert self._async_client is not None
-        ids = [convert_id(_id) for _id in document_ids]
         try:
             await self._async_client.delete(
                 collection_name=self.index,
-                points_selector=ids,
+                points_selector=rest.PointIdsList(points=[convert_id(_id) for _id in document_ids]),
                 wait=self.wait_result_from_api,
             )
         except KeyError:
@@ -515,8 +517,748 @@ class QdrantDocumentStore:
                 "Called QdrantDocumentStore.delete_documents_async() on a non-existing ID",
             )
+    def delete_by_filter(self, filters: dict[str, Any]) -> int:
+        """
+        Deletes all documents that match the provided filters.
+        :param filters: The filters to apply to select documents for deletion.
+            For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
+        :returns:
+            The number of documents deleted.
+        """
+        self._initialize_client()
+        assert self._client is not None
+        try:
+            qdrant_filter = convert_filters_to_qdrant(filters)
+            if qdrant_filter is None:
+                return 0
+            count_response = self._client.count(
+                collection_name=self.index,
+                count_filter=qdrant_filter,
+            )
+            deleted_count = count_response.count
+            self._client.delete(
+                collection_name=self.index,
+                points_selector=rest.FilterSelector(filter=qdrant_filter),
+                wait=self.wait_result_from_api,
+            )
+            return deleted_count
+        except Exception as e:
+            msg = f"Failed to delete documents by filter from Qdrant: {e!s}"
+            raise QdrantStoreError(msg) from e
+    async def delete_by_filter_async(self, filters: dict[str, Any]) -> int:
+        """
+        Asynchronously deletes all documents that match the provided filters.
+        :param filters: The filters to apply to select documents for deletion.
+            For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
+        :returns:
+            The number of documents deleted.
+        """
+        await self._initialize_async_client()
+        assert self._async_client is not None
+        try:
+            qdrant_filter = convert_filters_to_qdrant(filters)
+            if qdrant_filter is None:
+                return 0
+            count_response = await self._async_client.count(
+                collection_name=self.index,
+                count_filter=qdrant_filter,
+            )
+            deleted_count = count_response.count
+            await self._async_client.delete(
+                collection_name=self.index,
+                points_selector=rest.FilterSelector(filter=qdrant_filter),
+                wait=self.wait_result_from_api,
+            )
+            return deleted_count
+        except Exception as e:
+            msg = f"Failed to delete documents by filter from Qdrant: {e!s}"
+            raise QdrantStoreError(msg) from e
+    @staticmethod
+    def _check_stop_scrolling(next_offset: Any) -> bool:
+        """
+        Checks if scrolling should stop based on the next_offset value.
+        :param next_offset: The offset returned from the scroll operation.
+        :returns: True if scrolling should stop, False otherwise.
+        """
+        return next_offset is None or (
+            hasattr(next_offset, "num")
+            and hasattr(next_offset, "uuid")
+            and next_offset.num == 0
+            and next_offset.uuid == ""
+        )
+    @staticmethod
+    def _metadata_fields_info_from_schema(payload_schema: dict[str, Any]) -> dict[str, str]:
+        """Build field name -> type dict from Qdrant payload_schema. Used by get_metadata_fields_info (sync/async)."""
+        fields_info: dict[str, str] = {}
+        for field_name, field_config in payload_schema.items():
+            if hasattr(field_config, "data_type"):
+                fields_info[field_name] = str(field_config.data_type)
+            else:
+                fields_info[field_name] = "unknown"
+        return fields_info
+    @staticmethod
+    def _process_records_min_max(
+        records: list[Any], metadata_field: str, min_value: Any, max_value: Any
+    ) -> tuple[Any, Any]:
+        """Update min/max from a batch of Qdrant records. Used by get_metadata_field_min_max (sync/async)."""
+        for record in records:
+            if record.payload and "meta" in record.payload:
+                meta = record.payload["meta"]
+                if metadata_field in meta:
+                    value = meta[metadata_field]
+                    if value is not None:
+                        if min_value is None or value < min_value:
+                            min_value = value
+                        if max_value is None or value > max_value:
+                            max_value = value
+        return min_value, max_value
+    @staticmethod
+    def _process_records_count_unique(
+        records: list[Any], metadata_fields: list[str], unique_values_by_field: dict[str, set[Any]]
+    ) -> None:
+        """
+        Update unique_values_by_field from a batch of Qdrant records.
+        Used by count_unique_metadata_by_filter (sync/async).
+        """
+        for record in records:
+            if record.payload and "meta" in record.payload:
+                meta = record.payload["meta"]
+                for field in metadata_fields:
+                    if field in meta:
+                        value = meta[field]
+                        if value is not None:
+                            if isinstance(value, (list, dict)):
+                                unique_values_by_field[field].add(str(value))
+                            else:
+                                unique_values_by_field[field].add(value)
+    @staticmethod
+    def _process_records_unique_values(
+        records: list[Any],
+        metadata_field: str,
+        unique_values: list[Any],
+        unique_values_set: set[Any],
+        offset: int,
+        limit: int,
+    ) -> bool:
+        """Collect unique values from a batch of records. Returns True when len(unique_values) >= offset + limit."""
+        for record in records:
+            if record.payload and "meta" in record.payload:
+                meta = record.payload["meta"]
+                if metadata_field in meta:
+                    value = meta[metadata_field]
+                    if value is not None:
+                        hashable_value = str(value) if isinstance(value, (list, dict)) else value
+                        if hashable_value not in unique_values_set:
+                            unique_values_set.add(hashable_value)
+                            unique_values.append(value)
+                            if len(unique_values) >= offset + limit:
+                                return True
+        return False
+    @staticmethod
+    def _create_updated_point_from_record(record: Any, meta: dict[str, Any]) -> rest.PointStruct:
+        """
+        Creates an updated PointStruct from a Qdrant record with merged metadata.
+        :param record: The Qdrant record to update.
+        :param meta: The metadata fields to merge with existing metadata.
+        :returns: A PointStruct with updated metadata and preserved vectors.
+        """
+        # merge existing payload with new metadata
+        # Metadata is stored under the "meta" key in the payload
+        updated_payload = dict(record.payload or {})
+        if "meta" not in updated_payload:
+            updated_payload["meta"] = {}
+        updated_payload["meta"].update(meta)
+        # create updated point preserving vectors
+        # Type cast needed because record.vector type doesn't include all PointStruct vector types
+        vector_value = record.vector if record.vector is not None else {}
+        return rest.PointStruct(
+            id=record.id,
+            vector=cast(Any, vector_value),
+            payload=updated_payload,
+        )
+    def update_by_filter(self, filters: dict[str, Any], meta: dict[str, Any]) -> int:
+        """
+        Updates the metadata of all documents that match the provided filters.
+        **Note**: This operation is not atomic. Documents matching the filter are fetched first,
+        then updated. If documents are modified between the fetch and update operations,
+        those changes may be lost.
+        :param filters: The filters to apply to select documents for updating.
+            For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
+        :param meta: The metadata fields to update. This will be merged with existing metadata.
+        :returns:
+            The number of documents updated.
+        """
+        self._initialize_client()
+        assert self._client is not None
+        try:
+            qdrant_filter = convert_filters_to_qdrant(filters)
+            if qdrant_filter is None:
+                return 0
+            # get all matching documents using scroll
+            updated_points = []
+            next_offset = None
+            while True:
+                records, next_offset = self._client.scroll(
+                    collection_name=self.index,
+                    scroll_filter=qdrant_filter,
+                    limit=self.scroll_size,
+                    offset=next_offset,
+                    with_payload=True,
+                    with_vectors=True,
+                )
+                # update payload for each record
+                for record in records:
+                    updated_points.append(self._create_updated_point_from_record(record, meta))
+                if self._check_stop_scrolling(next_offset):
+                    break
+            if not updated_points:
+                return 0
+            # upsert updated points back in batches
+            for batch in get_batches_from_generator(updated_points, self.write_batch_size):
+                self._client.upsert(
+                    collection_name=self.index,
+                    points=list(batch),
+                    wait=self.wait_result_from_api,
+                )
+            logger.info(
+                "Updated {n_docs} documents in collection '{name}' using filters.",
+                n_docs=len(updated_points),
+                name=self.index,
+            )
+            return len(updated_points)
+        except Exception as e:
+            msg = f"Failed to update documents by filter in Qdrant: {e!s}"
+            raise QdrantStoreError(msg) from e
+    async def update_by_filter_async(self, filters: dict[str, Any], meta: dict[str, Any]) -> int:
+        """
+        Asynchronously updates the metadata of all documents that match the provided filters.
+        **Note**: This operation is not atomic. Documents matching the filter are fetched first,
+        then updated. If documents are modified between the fetch and update operations,
+        those changes may be lost.
+        :param filters: The filters to apply to select documents for updating.
+            For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
+        :param meta: The metadata fields to update. This will be merged with existing metadata.
+        :returns:
+            The number of documents updated.
+        """
+        await self._initialize_async_client()
+        assert self._async_client is not None
+        try:
+            qdrant_filter = convert_filters_to_qdrant(filters)
+            if qdrant_filter is None:
+                return 0
+            updated_points = []
+            next_offset = None
+            while True:
+                records, next_offset = await self._async_client.scroll(
+                    collection_name=self.index,
+                    scroll_filter=qdrant_filter,
+                    limit=self.scroll_size,
+                    offset=next_offset,
+                    with_payload=True,
+                    with_vectors=True,
+                )
+                # update payload for each record
+                for record in records:
+                    updated_points.append(self._create_updated_point_from_record(record, meta))
+                if self._check_stop_scrolling(next_offset):
+                    break
+            if not updated_points:
+                return 0
+            # upsert updated points back in batches
+            for batch in get_batches_from_generator(updated_points, self.write_batch_size):
+                await self._async_client.upsert(
+                    collection_name=self.index,
+                    points=list(batch),
+                    wait=self.wait_result_from_api,
+                )
+            logger.info(
+                "Updated {n_docs} documents in collection '{name}' using filters.",
+                n_docs=len(updated_points),
+                name=self.index,
+            )
+            return len(updated_points)
+        except Exception as e:
+            msg = f"Failed to update documents by filter in Qdrant: {e!s}"
+            raise QdrantStoreError(msg) from e
+    def delete_all_documents(self, recreate_index: bool = False) -> None:
+        """
+        Deletes all documents from the document store.
+        :param recreate_index: Whether to recreate the index after deleting all documents.
+        """
+        self._initialize_client()
+        assert self._client is not None
+        if recreate_index:
+            # get current collection config as json
+            collection_info = self._client.get_collection(collection_name=self.index)
+            info_json = collection_info.model_dump()
+            # deal with the Optional use_sparse_embeddings
+            sparse_vectors = info_json["config"]["params"]["sparse_vectors"]
+            use_sparse_embeddings = True if sparse_vectors else False
+            # deal with the Optional sparse_idf
+            hnsw_config = info_json["config"]["params"]["vectors"].get("config", {}).get("hnsw_config", None)
+            sparse_idf = True if use_sparse_embeddings and hnsw_config else False
+            # recreate collection
+            self._set_up_collection(
+                collection_name=self.index,
+                embedding_dim=info_json["config"]["params"]["vectors"]["size"],
+                recreate_collection=True,
+                similarity=info_json["config"]["params"]["vectors"]["distance"].lower(),
+                use_sparse_embeddings=use_sparse_embeddings,
+                sparse_idf=sparse_idf,
+                on_disk=info_json["config"]["hnsw_config"]["on_disk"],
+                payload_fields_to_index=info_json["payload_schema"],
+            )
+        else:
+            try:
+                self._client.delete(
+                    collection_name=self.index,
+                    points_selector=rest.FilterSelector(
+                        filter=rest.Filter(
+                            must=[],
+                        )
+                    ),
+                    wait=self.wait_result_from_api,
+                )
+            except Exception as e:
+                logger.warning(
+                    f"Error {e} when calling QdrantDocumentStore.delete_all_documents()",
+                )
+    async def delete_all_documents_async(self, recreate_index: bool = False) -> None:
+        """
+        Asynchronously deletes all documents from the document store.
+        :param recreate_index: Whether to recreate the index after deleting all documents.
+        """
+        await self._initialize_async_client()
+        assert self._async_client is not None
+        if recreate_index:
+            # get current collection config as json
+            collection_info = await self._async_client.get_collection(collection_name=self.index)
+            info_json = collection_info.model_dump()
+            # deal with the Optional use_sparse_embeddings
+            sparse_vectors = info_json["config"]["params"]["sparse_vectors"]
+            use_sparse_embeddings = True if sparse_vectors else False
+            # deal with the Optional sparse_idf
+            hnsw_config = info_json["config"]["params"]["vectors"].get("config", {}).get("hnsw_config", None)
+            sparse_idf = True if use_sparse_embeddings and hnsw_config else False
+            # recreate collection
+            await self._set_up_collection_async(
+                collection_name=self.index,
+                embedding_dim=info_json["config"]["params"]["vectors"]["size"],
+                recreate_collection=True,
+                similarity=info_json["config"]["params"]["vectors"]["distance"].lower(),
+                use_sparse_embeddings=use_sparse_embeddings,
+                sparse_idf=sparse_idf,
+                on_disk=info_json["config"]["hnsw_config"]["on_disk"],
+                payload_fields_to_index=info_json["payload_schema"],
+            )
+        else:
+            try:
+                await self._async_client.delete(
+                    collection_name=self.index,
+                    points_selector=rest.FilterSelector(
+                        filter=rest.Filter(
+                            must=[],
+                        )
+                    ),
+                    wait=self.wait_result_from_api,
+                )
+            except Exception as e:
+                logger.warning(
+                    f"Error {e} when calling QdrantDocumentStore.delete_all_documents_async()",
+                )
+    def count_documents_by_filter(self, filters: dict[str, Any]) -> int:
+        """
+        Returns the number of documents that match the provided filters.
+        :param filters: The filters to apply to count documents.
+            For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
+        :returns: The number of documents that match the filters.
+        """
+        self._initialize_client()
+        assert self._client is not None
+        qdrant_filter = convert_filters_to_qdrant(filters)
+        try:
+            response = self._client.count(
+                collection_name=self.index,
+                count_filter=qdrant_filter,
+            )
+            return response.count
+        except (UnexpectedResponse, ValueError) as e:
+            logger.warning(f"Error {e} when calling QdrantDocumentStore.count_documents_by_filter()")
+            return 0
+    async def count_documents_by_filter_async(self, filters: dict[str, Any]) -> int:
+        """
+        Asynchronously returns the number of documents that match the provided filters.
+        :param filters: The filters to apply to select documents for counting.
+            For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
+        :returns:
+            The number of documents that match the filters.
+        """
+        await self._initialize_async_client()
+        assert self._async_client is not None
+        qdrant_filter = convert_filters_to_qdrant(filters)
+        try:
+            response = await self._async_client.count(
+                collection_name=self.index,
+                count_filter=qdrant_filter,
+            )
+            return response.count
+        except (UnexpectedResponse, ValueError) as e:
+            logger.warning(f"Error {e} when calling QdrantDocumentStore.count_documents_by_filter_async()")
+            return 0
+    def get_metadata_fields_info(self) -> dict[str, str]:
+        """
+        Returns the information about the fields from the collection.
+        :returns:
+            A dictionary mapping field names to their types (e.g., {"field_name": "integer"}).
+        """
+        self._initialize_client()
+        assert self._client is not None
+        try:
+            collection_info = self._client.get_collection(self.index)
+            payload_schema = collection_info.payload_schema or {}
+            return self._metadata_fields_info_from_schema(payload_schema)
+        except (UnexpectedResponse, ValueError) as e:
+            logger.warning(f"Error {e} when calling QdrantDocumentStore.get_metadata_fields_info()")
+            return {}
+    async def get_metadata_fields_info_async(self) -> dict[str, str]:
+        """
+        Asynchronously returns the information about the fields from the collection.
+        :returns:
+            A dictionary mapping field names to their types (e.g., {"field_name": "integer"}).
+        """
+        await self._initialize_async_client()
+        assert self._async_client is not None
+        try:
+            collection_info = await self._async_client.get_collection(self.index)
+            payload_schema = collection_info.payload_schema or {}
+            return self._metadata_fields_info_from_schema(payload_schema)
+        except (UnexpectedResponse, ValueError) as e:
+            logger.warning(f"Error {e} when calling QdrantDocumentStore.get_metadata_fields_info_async()")
+            return {}
+    def get_metadata_field_min_max(self, metadata_field: str) -> dict[str, Any]:
+        """
+        Returns the minimum and maximum values for the given metadata field.
+        :param metadata_field: The metadata field key (inside ``meta``) to get the minimum and maximum values for.
+        :returns: A dictionary with the keys "min" and "max", where each value is the minimum or maximum value of the
+                  metadata field across all documents. Returns an empty dict if no documents have the field.
+        """
+        self._initialize_client()
+        assert self._client is not None
+        try:
+            min_value: Any = None
+            max_value: Any = None
+            next_offset = None
+            while True:
+                records, next_offset = self._client.scroll(
+                    collection_name=self.index,
+                    scroll_filter=None,
+                    limit=self.scroll_size,
+                    offset=next_offset,
+                    with_payload=True,
+                    with_vectors=False,
+                )
+                min_value, max_value = self._process_records_min_max(records, metadata_field, min_value, max_value)
+                if self._check_stop_scrolling(next_offset):
+                    break
+            if min_value is not None and max_value is not None:
+                return {"min": min_value, "max": max_value}
+            return {}
+        except Exception as e:
+            logger.warning(f"Error {e} when calling QdrantDocumentStore.get_metadata_field_min_max()")
+            return {}
+    async def get_metadata_field_min_max_async(self, metadata_field: str) -> dict[str, Any]:
+        """
+        Asynchronously returns the minimum and maximum values for the given metadata field.
+        :param metadata_field: The metadata field key (inside ``meta``) to get the minimum and maximum values for.
+        :returns: A dictionary with the keys "min" and "max", where each value is the minimum or maximum value of the
+                  metadata field across all documents. Returns an empty dict if no documents have the field.
+        """
+        await self._initialize_async_client()
+        assert self._async_client is not None
+        try:
+            min_value: Any = None
+            max_value: Any = None
+            next_offset = None
+            while True:
+                records, next_offset = await self._async_client.scroll(
+                    collection_name=self.index,
+                    scroll_filter=None,
+                    limit=self.scroll_size,
+                    offset=next_offset,
+                    with_payload=True,
+                    with_vectors=False,
+                )
+                min_value, max_value = self._process_records_min_max(records, metadata_field, min_value, max_value)
+                if self._check_stop_scrolling(next_offset):
+                    break
+            if min_value is not None and max_value is not None:
+                return {"min": min_value, "max": max_value}
+            return {}
+        except Exception as e:
+            logger.warning(f"Error {e} when calling QdrantDocumentStore.get_metadata_field_min_max_async()")
+            return {}
+    def count_unique_metadata_by_filter(self, filters: dict[str, Any], metadata_fields: list[str]) -> dict[str, int]:
+        """
+        Returns the number of unique values for each specified metadata field among documents that match the filters.
+        :param filters: The filters to restrict the documents considered.
+            For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
+        :param metadata_fields: List of metadata field keys (inside ``meta``) to count unique values for.
+        :returns: A dictionary mapping each metadata field name to the count of its unique values among the filtered
+                  documents.
+        """
+        self._initialize_client()
+        assert self._client is not None
+        qdrant_filter = convert_filters_to_qdrant(filters) if filters else None
+        unique_values_by_field: dict[str, set[Any]] = {field: set() for field in metadata_fields}
+        try:
+            next_offset = None
+            while True:
+                records, next_offset = self._client.scroll(
+                    collection_name=self.index,
+                    scroll_filter=qdrant_filter,
+                    limit=self.scroll_size,
+                    offset=next_offset,
+                    with_payload=True,
+                    with_vectors=False,
+                )
+                self._process_records_count_unique(records, metadata_fields, unique_values_by_field)
+                if self._check_stop_scrolling(next_offset):
+                    break
+            return {field: len(unique_values_by_field[field]) for field in metadata_fields}
+        except Exception as e:
+            logger.warning(f"Error {e} when calling QdrantDocumentStore.count_unique_metadata_by_filter()")
+            return dict.fromkeys(metadata_fields, 0)
+    async def count_unique_metadata_by_filter_async(
+        self, filters: dict[str, Any], metadata_fields: list[str]
+    ) -> dict[str, int]:
+        """
+        Asynchronously returns the number of unique values for each specified metadata field among documents that
+        match the filters.
+        :param filters: The filters to restrict the documents considered.
+            For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
+        :param metadata_fields: List of metadata field keys (inside ``meta``) to count unique values for.
+        :returns: A dictionary mapping each metadata field name to the count of its unique values among the filtered
+                  documents.
+        """
+        await self._initialize_async_client()
+        assert self._async_client is not None
+        qdrant_filter = convert_filters_to_qdrant(filters) if filters else None
+        unique_values_by_field: dict[str, set[Any]] = {field: set() for field in metadata_fields}
+        try:
+            next_offset = None
+            while True:
+                records, next_offset = await self._async_client.scroll(
+                    collection_name=self.index,
+                    scroll_filter=qdrant_filter,
+                    limit=self.scroll_size,
+                    offset=next_offset,
+                    with_payload=True,
+                    with_vectors=False,
+                )
+                self._process_records_count_unique(records, metadata_fields, unique_values_by_field)
+                if self._check_stop_scrolling(next_offset):
+                    break
+            return {field: len(unique_values_by_field[field]) for field in metadata_fields}
+        except Exception as e:
+            logger.warning(f"Error {e} when calling QdrantDocumentStore.count_unique_metadata_by_filter_async()")
+            return dict.fromkeys(metadata_fields, 0)
+    def get_metadata_field_unique_values(
+        self, metadata_field: str, filters: dict[str, Any] | None = None, limit: int = 100, offset: int = 0
+    ) -> list[Any]:
+        """
+        Returns unique values for a metadata field, with optional filters and offset/limit pagination.
+        Unique values are ordered by first occurrence during scroll. Pagination is offset-based over that order.
+        :param metadata_field: The metadata field key (inside ``meta``) to get unique values for.
+        :param filters: Optional filters to restrict the documents considered.
+            For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
+        :param limit: Maximum number of unique values to return per page. Defaults to 100.
+        :param offset: Number of unique values to skip (for pagination). Defaults to 0.
+        :returns: A list of unique values for the field (at most ``limit`` items, starting at ``offset``).
+        """
+        self._initialize_client()
+        assert self._client is not None
+        qdrant_filter = convert_filters_to_qdrant(filters) if filters else None
+        unique_values: list[Any] = []
+        unique_values_set: set[Any] = set()
+        try:
+            next_offset = None
+            while len(unique_values) < offset + limit:
+                records, next_offset = self._client.scroll(
+                    collection_name=self.index,
+                    scroll_filter=qdrant_filter,
+                    limit=self.scroll_size,
+                    offset=next_offset,
+                    with_payload=True,
+                    with_vectors=False,
+                )
+                if self._process_records_unique_values(
+                    records, metadata_field, unique_values, unique_values_set, offset, limit
+                ):
+                    break
+                if self._check_stop_scrolling(next_offset):
+                    break
+            return unique_values[offset : offset + limit]
+        except Exception as e:
+            logger.warning(f"Error {e} when calling QdrantDocumentStore.get_metadata_field_unique_values()")
+            return []
+    async def get_metadata_field_unique_values_async(
+        self, metadata_field: str, filters: dict[str, Any] | None = None, limit: int = 100, offset: int = 0
+    ) -> list[Any]:
+        """
+        Asynchronously returns unique values for a metadata field, with optional filters and offset/limit pagination.
+        Unique values are ordered by first occurrence during scroll. Pagination is offset-based over that order.
+        :param metadata_field: The metadata field key (inside ``meta``) to get unique values for.
+        :param filters: Optional filters to restrict the documents considered.
+            For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
+        :param limit: Maximum number of unique values to return per page. Defaults to 100.
+        :param offset: Number of unique values to skip (for pagination). Defaults to 0.
+        :returns: A list of unique values for the field (at most ``limit`` items, starting at ``offset``).
+        """
+        await self._initialize_async_client()
+        assert self._async_client is not None
+        qdrant_filter = convert_filters_to_qdrant(filters) if filters else None
+        unique_values: list[Any] = []
+        unique_values_set: set[Any] = set()
+        try:
+            next_offset = None
+            while len(unique_values) < offset + limit:
+                records, next_offset = await self._async_client.scroll(
+                    collection_name=self.index,
+                    scroll_filter=qdrant_filter,
+                    limit=self.scroll_size,
+                    offset=next_offset,
+                    with_payload=True,
+                    with_vectors=False,
+                )
+                if self._process_records_unique_values(
+                    records, metadata_field, unique_values, unique_values_set, offset, limit
+                ):
+                    break
+                if self._check_stop_scrolling(next_offset):
+                    break
+            return unique_values[offset : offset + limit]
+        except Exception as e:
+            logger.warning(f"Error {e} when calling QdrantDocumentStore.get_metadata_field_unique_values_async()")
+            return []
     @classmethod
-    def from_dict(cls, data: Dict[str, Any]) -> "QdrantDocumentStore":
+    def from_dict(cls, data: dict[str, Any]) -> "QdrantDocumentStore":
         """
         Deserializes the component from a dictionary.
@@ -528,7 +1270,7 @@ class QdrantDocumentStore:
         deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"])
         return default_from_dict(cls, data)
-    def to_dict(self) -> Dict[str, Any]:
+    def to_dict(self) -> dict[str, Any]:
         """
         Serializes the component to a dictionary.
@@ -547,7 +1289,7 @@ class QdrantDocumentStore:
     def _get_documents_generator(
         self,
-        filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
+        filters: dict[str, Any] | rest.Filter | None = None,
     ) -> Generator[Document, None, None]:
         """
         Returns a generator that yields documents from Qdrant based on the provided filters.
@@ -574,8 +1316,11 @@ class QdrantDocumentStore:
                 with_vectors=True,
             )
             stop_scrolling = next_offset is None or (
-                isinstance(next_offset, grpc.PointId) and next_offset.num == 0 and next_offset.uuid == ""
-            )
+                hasattr(next_offset, "num")
+                and hasattr(next_offset, "uuid")
+                and next_offset.num == 0
+                and next_offset.uuid == ""
+            )  # PointId always has num and uuid
             for record in records:
                 yield convert_qdrant_point_to_haystack_document(
@@ -584,7 +1329,7 @@ class QdrantDocumentStore:
     async def _get_documents_generator_async(
         self,
-        filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
+        filters: dict[str, Any] | rest.Filter | None = None,
     ) -> AsyncGenerator[Document, None]:
         """
         Returns an asynchronous generator that yields documents from Qdrant based on the provided filters.
@@ -611,8 +1356,11 @@ class QdrantDocumentStore:
                 with_vectors=True,
             )
             stop_scrolling = next_offset is None or (
-                isinstance(next_offset, grpc.PointId) and next_offset.num == 0 and next_offset.uuid == ""
-            )
+                hasattr(next_offset, "num")
+                and hasattr(next_offset, "uuid")
+                and next_offset.num == 0
+                and next_offset.uuid == ""
+            )  # PointId always has num and uuid
             for record in records:
                 yield convert_qdrant_point_to_haystack_document(
@@ -621,19 +1369,17 @@ class QdrantDocumentStore:
     def get_documents_by_id(
         self,
-        ids: List[str],
-    ) -> List[Document]:
+        ids: list[str],
+    ) -> list[Document]:
         """
         Retrieves documents from Qdrant by their IDs.
         :param ids:
             A list of document IDs to retrieve.
-        :param index:
-            The name of the index to retrieve documents from.
         :returns:
             A list of documents.
         """
-        documents: List[Document] = []
+        documents: list[Document] = []
         self._initialize_client()
         assert self._client is not None
@@ -654,19 +1400,17 @@ class QdrantDocumentStore:
     async def get_documents_by_id_async(
         self,
-        ids: List[str],
-    ) -> List[Document]:
+        ids: list[str],
+    ) -> list[Document]:
         """
         Retrieves documents from Qdrant by their IDs.
         :param ids:
             A list of document IDs to retrieve.
-        :param index:
-            The name of the index to retrieve documents from.
         :returns:
             A list of documents.
         """
-        documents: List[Document] = []
+        documents: list[Document] = []
         await self._initialize_async_client()
         assert self._async_client is not None
@@ -688,14 +1432,14 @@ class QdrantDocumentStore:
     def _query_by_sparse(
         self,
         query_sparse_embedding: SparseEmbedding,
-        filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
+        filters: dict[str, Any] | rest.Filter | None = None,
         top_k: int = 10,
         scale_score: bool = False,
         return_embedding: bool = False,
-        score_threshold: Optional[float] = None,
-        group_by: Optional[str] = None,
-        group_size: Optional[int] = None,
-    ) -> List[Document]:
+        score_threshold: float | None = None,
+        group_by: str | None = None,
+        group_size: int | None = None,
+    ) -> list[Document]:
         """
         Queries Qdrant using a sparse embedding and returns the most relevant documents.
@@ -742,7 +1486,7 @@ class QdrantDocumentStore:
                 query_filter=qdrant_filters,
                 limit=top_k,
                 group_by=group_by,
-                group_size=group_size,
+                group_size=group_size or DEFAULT_GROUP_SIZE,
                 with_vectors=return_embedding,
                 score_threshold=score_threshold,
             ).groups
@@ -764,15 +1508,15 @@ class QdrantDocumentStore:
     def _query_by_embedding(
         self,
-        query_embedding: List[float],
-        filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
+        query_embedding: list[float],
+        filters: dict[str, Any] | rest.Filter | None = None,
         top_k: int = 10,
         scale_score: bool = False,
         return_embedding: bool = False,
-        score_threshold: Optional[float] = None,
-        group_by: Optional[str] = None,
-        group_size: Optional[int] = None,
-    ) -> List[Document]:
+        score_threshold: float | None = None,
+        group_by: str | None = None,
+        group_size: int | None = None,
+    ) -> list[Document]:
         """
         Queries Qdrant using a dense embedding and returns the most relevant documents.
@@ -804,7 +1548,7 @@ class QdrantDocumentStore:
                 query_filter=qdrant_filters,
                 limit=top_k,
                 group_by=group_by,
-                group_size=group_size,
+                group_size=group_size or DEFAULT_GROUP_SIZE,
                 with_vectors=return_embedding,
                 score_threshold=score_threshold,
             ).groups
@@ -824,15 +1568,15 @@ class QdrantDocumentStore:
     def _query_hybrid(
         self,
-        query_embedding: List[float],
+        query_embedding: list[float],
         query_sparse_embedding: SparseEmbedding,
-        filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
+        filters: dict[str, Any] | rest.Filter | None = None,
         top_k: int = 10,
         return_embedding: bool = False,
-        score_threshold: Optional[float] = None,
-        group_by: Optional[str] = None,
-        group_size: Optional[int] = None,
-    ) -> List[Document]:
+        score_threshold: float | None = None,
+        group_by: str | None = None,
+        group_size: int | None = None,
+    ) -> list[Document]:
         """
         Retrieves documents based on dense and sparse embeddings and fuses the results using Reciprocal Rank Fusion.
@@ -896,7 +1640,7 @@ class QdrantDocumentStore:
                     query=rest.FusionQuery(fusion=rest.Fusion.RRF),
                     limit=top_k,
                     group_by=group_by,
-                    group_size=group_size,
+                    group_size=group_size or DEFAULT_GROUP_SIZE,
                     score_threshold=score_threshold,
                     with_payload=True,
                     with_vectors=return_embedding,
@@ -938,14 +1682,14 @@ class QdrantDocumentStore:
     async def _query_by_sparse_async(
         self,
         query_sparse_embedding: SparseEmbedding,
-        filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
+        filters: dict[str, Any] | rest.Filter | None = None,
         top_k: int = 10,
         scale_score: bool = False,
         return_embedding: bool = False,
-        score_threshold: Optional[float] = None,
-        group_by: Optional[str] = None,
-        group_size: Optional[int] = None,
-    ) -> List[Document]:
+        score_threshold: float | None = None,
+        group_by: str | None = None,
+        group_size: int | None = None,
+    ) -> list[Document]:
         """
         Asynchronously queries Qdrant using a sparse embedding and returns the most relevant documents.
@@ -993,14 +1737,14 @@ class QdrantDocumentStore:
                 query_filter=qdrant_filters,
                 limit=top_k,
                 group_by=group_by,
-                group_size=group_size,
+                group_size=group_size or DEFAULT_GROUP_SIZE,
                 with_vectors=return_embedding,
                 score_threshold=score_threshold,
             )
             groups = response.groups
             return self._process_group_results(groups)
         else:
-            response = await self._async_client.query_points(
+            query_response = await self._async_client.query_points(
                 collection_name=self.index,
                 query=rest.SparseVector(
                     indices=query_indices,
@@ -1012,20 +1756,20 @@ class QdrantDocumentStore:
                 with_vectors=return_embedding,
                 score_threshold=score_threshold,
             )
-            points = response.points
+            points = query_response.points
             return self._process_query_point_results(points, scale_score=scale_score)
     async def _query_by_embedding_async(
         self,
-        query_embedding: List[float],
-        filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
+        query_embedding: list[float],
+        filters: dict[str, Any] | rest.Filter | None = None,
         top_k: int = 10,
         scale_score: bool = False,
         return_embedding: bool = False,
-        score_threshold: Optional[float] = None,
-        group_by: Optional[str] = None,
-        group_size: Optional[int] = None,
-    ) -> List[Document]:
+        score_threshold: float | None = None,
+        group_by: str | None = None,
+        group_size: int | None = None,
+    ) -> list[Document]:
         """
         Asynchronously queries Qdrant using a dense embedding and returns the most relevant documents.
@@ -1057,14 +1801,14 @@ class QdrantDocumentStore:
                 query_filter=qdrant_filters,
                 limit=top_k,
                 group_by=group_by,
-                group_size=group_size,
+                group_size=group_size or DEFAULT_GROUP_SIZE,
                 with_vectors=return_embedding,
                 score_threshold=score_threshold,
             )
             groups = response.groups
             return self._process_group_results(groups)
         else:
-            response = await self._async_client.query_points(
+            query_response = await self._async_client.query_points(
                 collection_name=self.index,
                 query=query_embedding,
                 using=DENSE_VECTORS_NAME if self.use_sparse_embeddings else None,
@@ -1073,20 +1817,20 @@ class QdrantDocumentStore:
                 with_vectors=return_embedding,
                 score_threshold=score_threshold,
             )
-            points = response.points
+            points = query_response.points
             return self._process_query_point_results(points, scale_score=scale_score)
     async def _query_hybrid_async(
         self,
-        query_embedding: List[float],
+        query_embedding: list[float],
         query_sparse_embedding: SparseEmbedding,
-        filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
+        filters: dict[str, Any] | rest.Filter | None = None,
         top_k: int = 10,
         return_embedding: bool = False,
-        score_threshold: Optional[float] = None,
-        group_by: Optional[str] = None,
-        group_size: Optional[int] = None,
-    ) -> List[Document]:
+        score_threshold: float | None = None,
+        group_by: str | None = None,
+        group_size: int | None = None,
+    ) -> list[Document]:
         """
         Asynchronously retrieves documents based on dense and sparse embeddings and fuses
         the results using Reciprocal Rank Fusion.
@@ -1148,14 +1892,14 @@ class QdrantDocumentStore:
                     query=rest.FusionQuery(fusion=rest.Fusion.RRF),
                     limit=top_k,
                     group_by=group_by,
-                    group_size=group_size,
+                    group_size=group_size or DEFAULT_GROUP_SIZE,
                     score_threshold=score_threshold,
                     with_payload=True,
                     with_vectors=return_embedding,
                 )
                 groups = response.groups
             else:
-                response = await self._async_client.query_points(
+                query_response = await self._async_client.query_points(
                     collection_name=self.index,
                     prefetch=[
                         rest.Prefetch(
@@ -1178,7 +1922,7 @@ class QdrantDocumentStore:
                     with_payload=True,
                     with_vectors=return_embedding,
                 )
-                points = response.points
+                points = query_response.points
         except Exception as e:
             msg = "Error during hybrid search"
@@ -1210,9 +1954,10 @@ class QdrantDocumentStore:
             )
             raise QdrantStoreError(msg) from ke
-    def _create_payload_index(self, collection_name: str, payload_fields_to_index: Optional[List[dict]] = None):
+    def _create_payload_index(self, collection_name: str, payload_fields_to_index: list[dict] | None = None) -> None:
         """
-        Create payload index for the collection if payload_fields_to_index is provided
+        Create payload index for the collection if payload_fields_to_index is provided.
         See: https://qdrant.tech/documentation/concepts/indexing/#payload-index
         """
         if payload_fields_to_index is not None:
@@ -1228,15 +1973,15 @@ class QdrantDocumentStore:
                 )
     async def _create_payload_index_async(
-        self, collection_name: str, payload_fields_to_index: Optional[List[dict]] = None
-    ):
+        self, collection_name: str, payload_fields_to_index: list[dict] | None = None
+    ) -> None:
         """
-        Asynchronously create payload index for the collection if payload_fields_to_index is provided
+        Asynchronously create payload index for the collection if payload_fields_to_index is provided.
         See: https://qdrant.tech/documentation/concepts/indexing/#payload-index
         """
         if payload_fields_to_index is not None:
             for payload_index in payload_fields_to_index:
                 # self._async_client is initialized at this point
                 # since _initialize_async_client() is called before this method is executed
                 assert self._async_client is not None
@@ -1256,10 +2001,11 @@ class QdrantDocumentStore:
         use_sparse_embeddings: bool,
         sparse_idf: bool,
         on_disk: bool = False,
-        payload_fields_to_index: Optional[List[dict]] = None,
-    ):
+        payload_fields_to_index: list[dict] | None = None,
+    ) -> None:
         """
         Sets up the Qdrant collection with the specified parameters.
         :param collection_name:
             The name of the collection to set up.
         :param embedding_dim:
@@ -1312,10 +2058,11 @@ class QdrantDocumentStore:
         use_sparse_embeddings: bool,
         sparse_idf: bool,
         on_disk: bool = False,
-        payload_fields_to_index: Optional[List[dict]] = None,
-    ):
+        payload_fields_to_index: list[dict] | None = None,
+    ) -> None:
         """
         Asynchronously sets up the Qdrant collection with the specified parameters.
         :param collection_name:
             The name of the collection to set up.
         :param embedding_dim:
@@ -1362,12 +2109,12 @@ class QdrantDocumentStore:
     def recreate_collection(
         self,
         collection_name: str,
-        distance,
+        distance: rest.Distance,
         embedding_dim: int,
-        on_disk: Optional[bool] = None,
-        use_sparse_embeddings: Optional[bool] = None,
+        on_disk: bool | None = None,
+        use_sparse_embeddings: bool | None = None,
         sparse_idf: bool = False,
-    ):
+    ) -> None:
         """
         Recreates the Qdrant collection with the specified parameters.
@@ -1405,12 +2152,12 @@ class QdrantDocumentStore:
     async def recreate_collection_async(
         self,
         collection_name: str,
-        distance,
+        distance: rest.Distance,
         embedding_dim: int,
-        on_disk: Optional[bool] = None,
-        use_sparse_embeddings: Optional[bool] = None,
+        on_disk: bool | None = None,
+        use_sparse_embeddings: bool | None = None,
         sparse_idf: bool = False,
-    ):
+    ) -> None:
         """
         Asynchronously recreates the Qdrant collection with the specified parameters.
@@ -1447,9 +2194,9 @@ class QdrantDocumentStore:
     def _handle_duplicate_documents(
         self,
-        documents: List[Document],
-        policy: DuplicatePolicy = None,
-    ):
+        documents: list[Document],
+        policy: DuplicatePolicy | None = None,
+    ) -> list[Document]:
         """
         Checks whether any of the passed documents is already existing in the chosen index and returns a list of
         documents that are not in the index yet.
@@ -1462,7 +2209,7 @@ class QdrantDocumentStore:
         if policy in (DuplicatePolicy.SKIP, DuplicatePolicy.FAIL):
             documents = self._drop_duplicate_documents(documents)
             documents_found = self.get_documents_by_id(ids=[doc.id for doc in documents])
-            ids_exist_in_db: List[str] = [doc.id for doc in documents_found]
+            ids_exist_in_db: list[str] = [doc.id for doc in documents_found]
             if len(ids_exist_in_db) > 0 and policy == DuplicatePolicy.FAIL:
                 msg = f"Document with ids '{', '.join(ids_exist_in_db)} already exists in index = '{self.index}'."
@@ -1474,9 +2221,9 @@ class QdrantDocumentStore:
     async def _handle_duplicate_documents_async(
         self,
-        documents: List[Document],
-        policy: DuplicatePolicy = None,
-    ):
+        documents: list[Document],
+        policy: DuplicatePolicy | None = None,
+    ) -> list[Document]:
         """
         Asynchronously checks whether any of the passed documents is already existing
         in the chosen index and returns a list of
@@ -1490,7 +2237,7 @@ class QdrantDocumentStore:
         if policy in (DuplicatePolicy.SKIP, DuplicatePolicy.FAIL):
             documents = self._drop_duplicate_documents(documents)
             documents_found = await self.get_documents_by_id_async(ids=[doc.id for doc in documents])
-            ids_exist_in_db: List[str] = [doc.id for doc in documents_found]
+            ids_exist_in_db: list[str] = [doc.id for doc in documents_found]
             if len(ids_exist_in_db) > 0 and policy == DuplicatePolicy.FAIL:
                 msg = f"Document with ids '{', '.join(ids_exist_in_db)} already exists in index = '{self.index}'."
@@ -1500,13 +2247,13 @@ class QdrantDocumentStore:
         return documents
-    def _drop_duplicate_documents(self, documents: List[Document]) -> List[Document]:
+    def _drop_duplicate_documents(self, documents: list[Document]) -> list[Document]:
         """
         Drop duplicate documents based on same hash ID.
         """
-        _hash_ids: Set = set()
-        _documents: List[Document] = []
+        _hash_ids: set = set()
+        _documents: list[Document] = []
         for document in documents:
             if document.id in _hash_ids:
@@ -1521,7 +2268,7 @@ class QdrantDocumentStore:
         return _documents
-    def _prepare_collection_params(self):
+    def _prepare_collection_params(self) -> dict[str, Any]:
         """
         Prepares the common parameters for collection creation.
         """
@@ -1534,10 +2281,9 @@ class QdrantDocumentStore:
             "optimizers_config": self.optimizers_config,
             "wal_config": self.wal_config,
             "quantization_config": self.quantization_config,
-            "init_from": self.init_from,
         }
-    def _prepare_client_params(self):
+    def _prepare_client_params(self) -> dict[str, Any]:
         """
         Prepares the common parameters for client initialization.
@@ -1554,18 +2300,21 @@ class QdrantDocumentStore:
             "timeout": self.timeout,
             "host": self.host,
             "path": self.path,
-            "metadata": self.metadata,
+            # NOTE: We purposefully expand the fields of self.metadata to avoid modifying the original self.metadata
+            # class attribute. For example, the resolved api key is added to metadata by the QdrantClient class
+            # when using a hosted Qdrant service, which means running to_dict() exposes the api key.
+            "metadata": {**self.metadata},
             "force_disable_check_same_thread": self.force_disable_check_same_thread,
         }
     def _prepare_collection_config(
         self,
         embedding_dim: int,
-        distance,
-        on_disk: Optional[bool] = None,
-        use_sparse_embeddings: Optional[bool] = None,
+        distance: rest.Distance,
+        on_disk: bool | None = None,
+        use_sparse_embeddings: bool | None = None,
         sparse_idf: bool = False,
-    ):
+    ) -> tuple[dict[str, rest.VectorParams] | rest.VectorParams, dict[str, rest.SparseVectorParams] | None]:
         """
         Prepares the configuration for creating or recreating a Qdrant collection.
@@ -1577,12 +2326,14 @@ class QdrantDocumentStore:
             use_sparse_embeddings = self.use_sparse_embeddings
         # dense vectors configuration
-        vectors_config = rest.VectorParams(size=embedding_dim, on_disk=on_disk, distance=distance)
-        sparse_vectors_config = None
+        base_vectors_config = rest.VectorParams(size=embedding_dim, on_disk=on_disk, distance=distance)
+        vectors_config: rest.VectorParams | dict[str, rest.VectorParams] = base_vectors_config
+        sparse_vectors_config: dict[str, rest.SparseVectorParams] | None = None
         if use_sparse_embeddings:
             # in this case, we need to define named vectors
-            vectors_config = {DENSE_VECTORS_NAME: vectors_config}
+            vectors_config = {DENSE_VECTORS_NAME: base_vectors_config}
             sparse_vectors_config = {
                 SPARSE_VECTORS_NAME: rest.SparseVectorParams(
@@ -1595,9 +2346,13 @@ class QdrantDocumentStore:
         return vectors_config, sparse_vectors_config
-    def _validate_filters(self, filters: Optional[Union[Dict[str, Any], rest.Filter]] = None):
+    @staticmethod
+    def _validate_filters(filters: dict[str, Any] | rest.Filter | None = None) -> None:
         """
         Validates the filters provided for querying.
+        :param filters: Filters to validate. Can be a dictionary or an instance of `qdrant_client.http.models.Filter`.
+        :raises ValueError: If the filters are not in the correct format or syntax.
         """
         if filters and not isinstance(filters, dict) and not isinstance(filters, rest.Filter):
             msg = "Filter must be a dictionary or an instance of `qdrant_client.http.models.Filter`"
@@ -1607,7 +2362,9 @@ class QdrantDocumentStore:
             msg = "Invalid filter syntax. See https://docs.haystack.deepset.ai/docs/metadata-filtering for details."
             raise ValueError(msg)
-    def _process_query_point_results(self, results, scale_score: bool = False):
+    def _process_query_point_results(
+        self, results: list[rest.ScoredPoint], scale_score: bool = False
+    ) -> list[Document]:
         """
         Processes query results from Qdrant.
         """
@@ -1619,15 +2376,17 @@ class QdrantDocumentStore:
         if scale_score:
             for document in documents:
                 score = document.score
+                if score is None:
+                    continue
                 if self.similarity == "cosine":
                     score = (score + 1) / 2
                 else:
-                    score = float(1 / (1 + np.exp(-score / 100)))
+                    score = float(1 / (1 + exp(-score / 100)))
                 document.score = score
         return documents
-    def _process_group_results(self, groups):
+    def _process_group_results(self, groups: list[rest.PointGroup]) -> list[Document]:
         """
         Processes grouped query results from Qdrant.
@@ -1644,16 +2403,22 @@ class QdrantDocumentStore:
     def _validate_collection_compatibility(
         self,
         collection_name: str,
-        collection_info,
-        distance,
+        collection_info: rest.CollectionInfo,
+        distance: rest.Distance,
         embedding_dim: int,
-    ):
+    ) -> None:
         """
         Validates that an existing collection is compatible with the current configuration.
         """
-        has_named_vectors = isinstance(collection_info.config.params.vectors, dict)
+        vectors_config = collection_info.config.params.vectors
-        if has_named_vectors and DENSE_VECTORS_NAME not in collection_info.config.params.vectors:
+        if vectors_config is None:
+            msg = f"Collection '{collection_name}' has no vector configuration."
+            raise QdrantStoreError(msg)
+        has_named_vectors = isinstance(vectors_config, dict)
+        if has_named_vectors and DENSE_VECTORS_NAME not in vectors_config:
             msg = (
                 f"Collection '{collection_name}' already exists in Qdrant, "
                 f"but it has been originally created outside of Haystack and is not supported. "
@@ -1685,11 +2450,20 @@ class QdrantDocumentStore:
         # Get current distance and vector size based on collection configuration
         if self.use_sparse_embeddings:
-            current_distance = collection_info.config.params.vectors[DENSE_VECTORS_NAME].distance
-            current_vector_size = collection_info.config.params.vectors[DENSE_VECTORS_NAME].size
+            if not isinstance(vectors_config, dict):
+                msg = f"Collection '{collection_name}' has invalid vector configuration for sparse embeddings."
+                raise QdrantStoreError(msg)
+            dense_vector_config = vectors_config[DENSE_VECTORS_NAME]
+            current_distance = dense_vector_config.distance
+            current_vector_size = dense_vector_config.size
         else:
-            current_distance = collection_info.config.params.vectors.distance
-            current_vector_size = collection_info.config.params.vectors.size
+            if isinstance(vectors_config, dict):
+                msg = f"Collection '{collection_name}' has invalid vector configuration for dense embeddings only."
+                raise QdrantStoreError(msg)
+            current_distance = vectors_config.distance
+            current_vector_size = vectors_config.size
         # Validate distance metric
         if current_distance != distance:

qdrant-haystack 9.1.1__py3-none-any.whl → 10.2.0__py3-none-any.whl

qdrant-haystack 9.1.1py3-none-any.whl → 10.2.0py3-none-any.whl