qdrant-haystack 9.1.1__py3-none-any.whl → 10.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,8 @@
1
1
  import inspect
2
+ from collections.abc import AsyncGenerator, Generator
2
3
  from itertools import islice
3
- from typing import Any, AsyncGenerator, ClassVar, Dict, Generator, List, Optional, Set, Union
4
+ from typing import Any, ClassVar, cast
4
5
 
5
- import numpy as np
6
6
  import qdrant_client
7
7
  from haystack import default_from_dict, default_to_dict, logging
8
8
  from haystack.dataclasses import Document
@@ -10,7 +10,7 @@ from haystack.dataclasses.sparse_embedding import SparseEmbedding
10
10
  from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError
11
11
  from haystack.document_stores.types import DuplicatePolicy
12
12
  from haystack.utils import Secret, deserialize_secrets_inplace
13
- from qdrant_client import grpc
13
+ from numpy import exp
14
14
  from qdrant_client.http import models as rest
15
15
  from qdrant_client.http.exceptions import UnexpectedResponse
16
16
  from tqdm import tqdm
@@ -26,15 +26,21 @@ from .filters import convert_filters_to_qdrant
26
26
 
27
27
  logger = logging.getLogger(__name__)
28
28
 
29
+ # Default group size to apply when using group_by
30
+ # - Our methods use None as the default for optional group_size parameter.
31
+ # - Qdrant expects an integer and internally defaults to 3 when performing grouped queries.
32
+ # - When group_by is specified but group_size is None, we use this value instead of passing None.
33
+ DEFAULT_GROUP_SIZE = 3
34
+
29
35
 
30
36
  class QdrantStoreError(DocumentStoreError):
31
37
  pass
32
38
 
33
39
 
34
- FilterType = Dict[str, Union[Dict[str, Any], List[Any], str, int, float, bool]]
40
+ FilterType = dict[str, dict[str, Any] | list[Any] | str | int | float | bool]
35
41
 
36
42
 
37
- def get_batches_from_generator(iterable, n):
43
+ def get_batches_from_generator(iterable: list, n: int) -> Generator:
38
44
  """
39
45
  Batch elements of an iterable into fixed-length chunks or blocks.
40
46
  """
@@ -47,9 +53,8 @@ def get_batches_from_generator(iterable, n):
47
53
 
48
54
  class QdrantDocumentStore:
49
55
  """
50
- A QdrantDocumentStore implementation that you
51
- can use with any Qdrant instance: in-memory, disk-persisted, Docker-based,
52
- and Qdrant Cloud Cluster deployments.
56
+ A QdrantDocumentStore implementation that you can use with any Qdrant instance: in-memory, disk-persisted,
57
+ Docker-based, and Qdrant Cloud Cluster deployments.
53
58
 
54
59
  Usage example by creating an in-memory instance:
55
60
 
@@ -59,7 +64,8 @@ class QdrantDocumentStore:
59
64
 
60
65
  document_store = QdrantDocumentStore(
61
66
  ":memory:",
62
- recreate_index=True
67
+ recreate_index=True,
68
+ embedding_dim=5
63
69
  )
64
70
  document_store.write_documents([
65
71
  Document(content="This is first", embedding=[0.0]*5),
@@ -84,7 +90,7 @@ class QdrantDocumentStore:
84
90
  ```
85
91
  """
86
92
 
87
- SIMILARITY: ClassVar[Dict[str, str]] = {
93
+ SIMILARITY: ClassVar[dict[str, rest.Distance]] = {
88
94
  "cosine": rest.Distance.COSINE,
89
95
  "dot_product": rest.Distance.DOT,
90
96
  "l2": rest.Distance.EUCLID,
@@ -92,17 +98,17 @@ class QdrantDocumentStore:
92
98
 
93
99
  def __init__(
94
100
  self,
95
- location: Optional[str] = None,
96
- url: Optional[str] = None,
101
+ location: str | None = None,
102
+ url: str | None = None,
97
103
  port: int = 6333,
98
104
  grpc_port: int = 6334,
99
105
  prefer_grpc: bool = False,
100
- https: Optional[bool] = None,
101
- api_key: Optional[Secret] = None,
102
- prefix: Optional[str] = None,
103
- timeout: Optional[int] = None,
104
- host: Optional[str] = None,
105
- path: Optional[str] = None,
106
+ https: bool | None = None,
107
+ api_key: Secret | None = None,
108
+ prefix: str | None = None,
109
+ timeout: int | None = None,
110
+ host: str | None = None,
111
+ path: str | None = None,
106
112
  force_disable_check_same_thread: bool = False,
107
113
  index: str = "Document",
108
114
  embedding_dim: int = 768,
@@ -113,24 +119,25 @@ class QdrantDocumentStore:
113
119
  return_embedding: bool = False,
114
120
  progress_bar: bool = True,
115
121
  recreate_index: bool = False,
116
- shard_number: Optional[int] = None,
117
- replication_factor: Optional[int] = None,
118
- write_consistency_factor: Optional[int] = None,
119
- on_disk_payload: Optional[bool] = None,
120
- hnsw_config: Optional[dict] = None,
121
- optimizers_config: Optional[dict] = None,
122
- wal_config: Optional[dict] = None,
123
- quantization_config: Optional[dict] = None,
124
- init_from: Optional[dict] = None,
122
+ shard_number: int | None = None,
123
+ replication_factor: int | None = None,
124
+ write_consistency_factor: int | None = None,
125
+ on_disk_payload: bool | None = None,
126
+ hnsw_config: dict | None = None,
127
+ optimizers_config: dict | None = None,
128
+ wal_config: dict | None = None,
129
+ quantization_config: dict | None = None,
125
130
  wait_result_from_api: bool = True,
126
- metadata: Optional[dict] = None,
131
+ metadata: dict | None = None,
127
132
  write_batch_size: int = 100,
128
133
  scroll_size: int = 10_000,
129
- payload_fields_to_index: Optional[List[dict]] = None,
130
- ):
134
+ payload_fields_to_index: list[dict] | None = None,
135
+ ) -> None:
131
136
  """
137
+ Initializes a QdrantDocumentStore.
138
+
132
139
  :param location:
133
- If `memory` - use in-memory Qdrant instance.
140
+ If `":memory:"` - use in-memory Qdrant instance.
134
141
  If `str` - use it as a URL parameter.
135
142
  If `None` - use default values for host and port.
136
143
  :param url:
@@ -164,7 +171,7 @@ class QdrantDocumentStore:
164
171
  Dimension of the embeddings.
165
172
  :param on_disk:
166
173
  Whether to store the collection on disk.
167
- :param use_sparse_embedding:
174
+ :param use_sparse_embeddings:
168
175
  If set to `True`, enables support for sparse embeddings.
169
176
  :param sparse_idf:
170
177
  If set to `True`, computes the Inverse Document Frequency (IDF) when using sparse embeddings.
@@ -201,8 +208,6 @@ class QdrantDocumentStore:
201
208
  Params for Write-Ahead-Log.
202
209
  :param quantization_config:
203
210
  Params for quantization. If `None`, quantization will be disabled.
204
- :param init_from:
205
- Use data stored in another collection to initialize this collection.
206
211
  :param wait_result_from_api:
207
212
  Whether to wait for the result from the API after each request.
208
213
  :param metadata:
@@ -215,8 +220,8 @@ class QdrantDocumentStore:
215
220
  List of payload fields to index.
216
221
  """
217
222
 
218
- self._client = None
219
- self._async_client = None
223
+ self._client: qdrant_client.QdrantClient | None = None
224
+ self._async_client: qdrant_client.AsyncQdrantClient | None = None
220
225
 
221
226
  # Store the Qdrant client specific attributes
222
227
  self.location = location
@@ -232,7 +237,6 @@ class QdrantDocumentStore:
232
237
  self.path = path
233
238
  self.force_disable_check_same_thread = force_disable_check_same_thread
234
239
  self.metadata = metadata or {}
235
- self.api_key = api_key
236
240
 
237
241
  # Store the Qdrant collection specific attributes
238
242
  self.shard_number = shard_number
@@ -243,7 +247,6 @@ class QdrantDocumentStore:
243
247
  self.optimizers_config = optimizers_config
244
248
  self.wal_config = wal_config
245
249
  self.quantization_config = quantization_config
246
- self.init_from = init_from
247
250
  self.wait_result_from_api = wait_result_from_api
248
251
  self.recreate_index = recreate_index
249
252
  self.payload_fields_to_index = payload_fields_to_index
@@ -258,9 +261,10 @@ class QdrantDocumentStore:
258
261
  self.write_batch_size = write_batch_size
259
262
  self.scroll_size = scroll_size
260
263
 
261
- def _initialize_client(self):
264
+ def _initialize_client(self) -> None:
262
265
  if self._client is None:
263
266
  client_params = self._prepare_client_params()
267
+ # This step adds the api-key and User-Agent to metadata
264
268
  self._client = qdrant_client.QdrantClient(**client_params)
265
269
  # Make sure the collection is properly set up
266
270
  self._set_up_collection(
@@ -274,7 +278,7 @@ class QdrantDocumentStore:
274
278
  self.payload_fields_to_index,
275
279
  )
276
280
 
277
- async def _initialize_async_client(self):
281
+ async def _initialize_async_client(self) -> None:
278
282
  """
279
283
  Returns the asynchronous Qdrant client, initializing it if necessary.
280
284
  """
@@ -330,8 +334,8 @@ class QdrantDocumentStore:
330
334
 
331
335
  def filter_documents(
332
336
  self,
333
- filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
334
- ) -> List[Document]:
337
+ filters: dict[str, Any] | rest.Filter | None = None,
338
+ ) -> list[Document]:
335
339
  """
336
340
  Returns the documents that match the provided filters.
337
341
 
@@ -344,7 +348,7 @@ class QdrantDocumentStore:
344
348
  # No need to initialize client here as _get_documents_generator
345
349
  # will handle client initialization internally
346
350
 
347
- self._validate_filters(filters)
351
+ QdrantDocumentStore._validate_filters(filters)
348
352
  return list(
349
353
  self._get_documents_generator(
350
354
  filters,
@@ -353,20 +357,20 @@ class QdrantDocumentStore:
353
357
 
354
358
  async def filter_documents_async(
355
359
  self,
356
- filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
357
- ) -> List[Document]:
360
+ filters: dict[str, Any] | rest.Filter | None = None,
361
+ ) -> list[Document]:
358
362
  """
359
363
  Asynchronously returns the documents that match the provided filters.
360
364
  """
361
365
  # No need to initialize client here as _get_documents_generator_async
362
366
  # will handle client initialization internally
363
367
 
364
- self._validate_filters(filters)
368
+ QdrantDocumentStore._validate_filters(filters)
365
369
  return [doc async for doc in self._get_documents_generator_async(filters)]
366
370
 
367
371
  def write_documents(
368
372
  self,
369
- documents: List[Document],
373
+ documents: list[Document],
370
374
  policy: DuplicatePolicy = DuplicatePolicy.FAIL,
371
375
  ) -> int:
372
376
  """
@@ -419,7 +423,7 @@ class QdrantDocumentStore:
419
423
 
420
424
  async def write_documents_async(
421
425
  self,
422
- documents: List[Document],
426
+ documents: list[Document],
423
427
  policy: DuplicatePolicy = DuplicatePolicy.FAIL,
424
428
  ) -> int:
425
429
  """
@@ -471,7 +475,7 @@ class QdrantDocumentStore:
471
475
  progress_bar.update(self.write_batch_size)
472
476
  return len(document_objects)
473
477
 
474
- def delete_documents(self, document_ids: List[str]) -> None:
478
+ def delete_documents(self, document_ids: list[str]) -> None:
475
479
  """
476
480
  Deletes documents that match the provided `document_ids` from the document store.
477
481
 
@@ -481,11 +485,10 @@ class QdrantDocumentStore:
481
485
  self._initialize_client()
482
486
  assert self._client is not None
483
487
 
484
- ids = [convert_id(_id) for _id in document_ids]
485
488
  try:
486
489
  self._client.delete(
487
490
  collection_name=self.index,
488
- points_selector=ids,
491
+ points_selector=rest.PointIdsList(points=[convert_id(_id) for _id in document_ids]),
489
492
  wait=self.wait_result_from_api,
490
493
  )
491
494
  except KeyError:
@@ -493,7 +496,7 @@ class QdrantDocumentStore:
493
496
  "Called QdrantDocumentStore.delete_documents() on a non-existing ID",
494
497
  )
495
498
 
496
- async def delete_documents_async(self, document_ids: List[str]) -> None:
499
+ async def delete_documents_async(self, document_ids: list[str]) -> None:
497
500
  """
498
501
  Asynchronously deletes documents that match the provided `document_ids` from the document store.
499
502
 
@@ -503,11 +506,10 @@ class QdrantDocumentStore:
503
506
  await self._initialize_async_client()
504
507
  assert self._async_client is not None
505
508
 
506
- ids = [convert_id(_id) for _id in document_ids]
507
509
  try:
508
510
  await self._async_client.delete(
509
511
  collection_name=self.index,
510
- points_selector=ids,
512
+ points_selector=rest.PointIdsList(points=[convert_id(_id) for _id in document_ids]),
511
513
  wait=self.wait_result_from_api,
512
514
  )
513
515
  except KeyError:
@@ -515,8 +517,748 @@ class QdrantDocumentStore:
515
517
  "Called QdrantDocumentStore.delete_documents_async() on a non-existing ID",
516
518
  )
517
519
 
520
+ def delete_by_filter(self, filters: dict[str, Any]) -> int:
521
+ """
522
+ Deletes all documents that match the provided filters.
523
+
524
+ :param filters: The filters to apply to select documents for deletion.
525
+ For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
526
+
527
+ :returns:
528
+ The number of documents deleted.
529
+ """
530
+ self._initialize_client()
531
+ assert self._client is not None
532
+
533
+ try:
534
+ qdrant_filter = convert_filters_to_qdrant(filters)
535
+ if qdrant_filter is None:
536
+ return 0
537
+
538
+ count_response = self._client.count(
539
+ collection_name=self.index,
540
+ count_filter=qdrant_filter,
541
+ )
542
+ deleted_count = count_response.count
543
+
544
+ self._client.delete(
545
+ collection_name=self.index,
546
+ points_selector=rest.FilterSelector(filter=qdrant_filter),
547
+ wait=self.wait_result_from_api,
548
+ )
549
+ return deleted_count
550
+
551
+ except Exception as e:
552
+ msg = f"Failed to delete documents by filter from Qdrant: {e!s}"
553
+ raise QdrantStoreError(msg) from e
554
+
555
+ async def delete_by_filter_async(self, filters: dict[str, Any]) -> int:
556
+ """
557
+ Asynchronously deletes all documents that match the provided filters.
558
+
559
+ :param filters: The filters to apply to select documents for deletion.
560
+ For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
561
+
562
+ :returns:
563
+ The number of documents deleted.
564
+ """
565
+ await self._initialize_async_client()
566
+ assert self._async_client is not None
567
+
568
+ try:
569
+ qdrant_filter = convert_filters_to_qdrant(filters)
570
+ if qdrant_filter is None:
571
+ return 0
572
+
573
+ count_response = await self._async_client.count(
574
+ collection_name=self.index,
575
+ count_filter=qdrant_filter,
576
+ )
577
+ deleted_count = count_response.count
578
+
579
+ await self._async_client.delete(
580
+ collection_name=self.index,
581
+ points_selector=rest.FilterSelector(filter=qdrant_filter),
582
+ wait=self.wait_result_from_api,
583
+ )
584
+ return deleted_count
585
+
586
+ except Exception as e:
587
+ msg = f"Failed to delete documents by filter from Qdrant: {e!s}"
588
+ raise QdrantStoreError(msg) from e
589
+
590
+ @staticmethod
591
+ def _check_stop_scrolling(next_offset: Any) -> bool:
592
+ """
593
+ Checks if scrolling should stop based on the next_offset value.
594
+
595
+ :param next_offset: The offset returned from the scroll operation.
596
+ :returns: True if scrolling should stop, False otherwise.
597
+ """
598
+ return next_offset is None or (
599
+ hasattr(next_offset, "num")
600
+ and hasattr(next_offset, "uuid")
601
+ and next_offset.num == 0
602
+ and next_offset.uuid == ""
603
+ )
604
+
605
+ @staticmethod
606
+ def _metadata_fields_info_from_schema(payload_schema: dict[str, Any]) -> dict[str, str]:
607
+ """Build field name -> type dict from Qdrant payload_schema. Used by get_metadata_fields_info (sync/async)."""
608
+ fields_info: dict[str, str] = {}
609
+ for field_name, field_config in payload_schema.items():
610
+ if hasattr(field_config, "data_type"):
611
+ fields_info[field_name] = str(field_config.data_type)
612
+ else:
613
+ fields_info[field_name] = "unknown"
614
+ return fields_info
615
+
616
+ @staticmethod
617
+ def _process_records_min_max(
618
+ records: list[Any], metadata_field: str, min_value: Any, max_value: Any
619
+ ) -> tuple[Any, Any]:
620
+ """Update min/max from a batch of Qdrant records. Used by get_metadata_field_min_max (sync/async)."""
621
+ for record in records:
622
+ if record.payload and "meta" in record.payload:
623
+ meta = record.payload["meta"]
624
+ if metadata_field in meta:
625
+ value = meta[metadata_field]
626
+ if value is not None:
627
+ if min_value is None or value < min_value:
628
+ min_value = value
629
+ if max_value is None or value > max_value:
630
+ max_value = value
631
+ return min_value, max_value
632
+
633
+ @staticmethod
634
+ def _process_records_count_unique(
635
+ records: list[Any], metadata_fields: list[str], unique_values_by_field: dict[str, set[Any]]
636
+ ) -> None:
637
+ """
638
+ Update unique_values_by_field from a batch of Qdrant records.
639
+
640
+ Used by count_unique_metadata_by_filter (sync/async).
641
+ """
642
+ for record in records:
643
+ if record.payload and "meta" in record.payload:
644
+ meta = record.payload["meta"]
645
+ for field in metadata_fields:
646
+ if field in meta:
647
+ value = meta[field]
648
+ if value is not None:
649
+ if isinstance(value, (list, dict)):
650
+ unique_values_by_field[field].add(str(value))
651
+ else:
652
+ unique_values_by_field[field].add(value)
653
+
654
+ @staticmethod
655
+ def _process_records_unique_values(
656
+ records: list[Any],
657
+ metadata_field: str,
658
+ unique_values: list[Any],
659
+ unique_values_set: set[Any],
660
+ offset: int,
661
+ limit: int,
662
+ ) -> bool:
663
+ """Collect unique values from a batch of records. Returns True when len(unique_values) >= offset + limit."""
664
+ for record in records:
665
+ if record.payload and "meta" in record.payload:
666
+ meta = record.payload["meta"]
667
+ if metadata_field in meta:
668
+ value = meta[metadata_field]
669
+ if value is not None:
670
+ hashable_value = str(value) if isinstance(value, (list, dict)) else value
671
+ if hashable_value not in unique_values_set:
672
+ unique_values_set.add(hashable_value)
673
+ unique_values.append(value)
674
+ if len(unique_values) >= offset + limit:
675
+ return True
676
+ return False
677
+
678
+ @staticmethod
679
+ def _create_updated_point_from_record(record: Any, meta: dict[str, Any]) -> rest.PointStruct:
680
+ """
681
+ Creates an updated PointStruct from a Qdrant record with merged metadata.
682
+
683
+ :param record: The Qdrant record to update.
684
+ :param meta: The metadata fields to merge with existing metadata.
685
+ :returns: A PointStruct with updated metadata and preserved vectors.
686
+ """
687
+ # merge existing payload with new metadata
688
+ # Metadata is stored under the "meta" key in the payload
689
+ updated_payload = dict(record.payload or {})
690
+ if "meta" not in updated_payload:
691
+ updated_payload["meta"] = {}
692
+ updated_payload["meta"].update(meta)
693
+
694
+ # create updated point preserving vectors
695
+ # Type cast needed because record.vector type doesn't include all PointStruct vector types
696
+ vector_value = record.vector if record.vector is not None else {}
697
+ return rest.PointStruct(
698
+ id=record.id,
699
+ vector=cast(Any, vector_value),
700
+ payload=updated_payload,
701
+ )
702
+
703
+ def update_by_filter(self, filters: dict[str, Any], meta: dict[str, Any]) -> int:
704
+ """
705
+ Updates the metadata of all documents that match the provided filters.
706
+
707
+ **Note**: This operation is not atomic. Documents matching the filter are fetched first,
708
+ then updated. If documents are modified between the fetch and update operations,
709
+ those changes may be lost.
710
+
711
+ :param filters: The filters to apply to select documents for updating.
712
+ For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
713
+ :param meta: The metadata fields to update. This will be merged with existing metadata.
714
+
715
+ :returns:
716
+ The number of documents updated.
717
+ """
718
+ self._initialize_client()
719
+ assert self._client is not None
720
+
721
+ try:
722
+ qdrant_filter = convert_filters_to_qdrant(filters)
723
+ if qdrant_filter is None:
724
+ return 0
725
+
726
+ # get all matching documents using scroll
727
+ updated_points = []
728
+ next_offset = None
729
+
730
+ while True:
731
+ records, next_offset = self._client.scroll(
732
+ collection_name=self.index,
733
+ scroll_filter=qdrant_filter,
734
+ limit=self.scroll_size,
735
+ offset=next_offset,
736
+ with_payload=True,
737
+ with_vectors=True,
738
+ )
739
+
740
+ # update payload for each record
741
+ for record in records:
742
+ updated_points.append(self._create_updated_point_from_record(record, meta))
743
+
744
+ if self._check_stop_scrolling(next_offset):
745
+ break
746
+
747
+ if not updated_points:
748
+ return 0
749
+
750
+ # upsert updated points back in batches
751
+ for batch in get_batches_from_generator(updated_points, self.write_batch_size):
752
+ self._client.upsert(
753
+ collection_name=self.index,
754
+ points=list(batch),
755
+ wait=self.wait_result_from_api,
756
+ )
757
+
758
+ logger.info(
759
+ "Updated {n_docs} documents in collection '{name}' using filters.",
760
+ n_docs=len(updated_points),
761
+ name=self.index,
762
+ )
763
+ return len(updated_points)
764
+ except Exception as e:
765
+ msg = f"Failed to update documents by filter in Qdrant: {e!s}"
766
+ raise QdrantStoreError(msg) from e
767
+
768
+ async def update_by_filter_async(self, filters: dict[str, Any], meta: dict[str, Any]) -> int:
769
+ """
770
+ Asynchronously updates the metadata of all documents that match the provided filters.
771
+
772
+ **Note**: This operation is not atomic. Documents matching the filter are fetched first,
773
+ then updated. If documents are modified between the fetch and update operations,
774
+ those changes may be lost.
775
+
776
+ :param filters: The filters to apply to select documents for updating.
777
+ For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
778
+ :param meta: The metadata fields to update. This will be merged with existing metadata.
779
+
780
+ :returns:
781
+ The number of documents updated.
782
+ """
783
+ await self._initialize_async_client()
784
+ assert self._async_client is not None
785
+
786
+ try:
787
+ qdrant_filter = convert_filters_to_qdrant(filters)
788
+ if qdrant_filter is None:
789
+ return 0
790
+
791
+ updated_points = []
792
+ next_offset = None
793
+
794
+ while True:
795
+ records, next_offset = await self._async_client.scroll(
796
+ collection_name=self.index,
797
+ scroll_filter=qdrant_filter,
798
+ limit=self.scroll_size,
799
+ offset=next_offset,
800
+ with_payload=True,
801
+ with_vectors=True,
802
+ )
803
+
804
+ # update payload for each record
805
+ for record in records:
806
+ updated_points.append(self._create_updated_point_from_record(record, meta))
807
+
808
+ if self._check_stop_scrolling(next_offset):
809
+ break
810
+
811
+ if not updated_points:
812
+ return 0
813
+
814
+ # upsert updated points back in batches
815
+ for batch in get_batches_from_generator(updated_points, self.write_batch_size):
816
+ await self._async_client.upsert(
817
+ collection_name=self.index,
818
+ points=list(batch),
819
+ wait=self.wait_result_from_api,
820
+ )
821
+
822
+ logger.info(
823
+ "Updated {n_docs} documents in collection '{name}' using filters.",
824
+ n_docs=len(updated_points),
825
+ name=self.index,
826
+ )
827
+ return len(updated_points)
828
+ except Exception as e:
829
+ msg = f"Failed to update documents by filter in Qdrant: {e!s}"
830
+ raise QdrantStoreError(msg) from e
831
+
832
+ def delete_all_documents(self, recreate_index: bool = False) -> None:
833
+ """
834
+ Deletes all documents from the document store.
835
+
836
+ :param recreate_index: Whether to recreate the index after deleting all documents.
837
+ """
838
+
839
+ self._initialize_client()
840
+ assert self._client is not None
841
+
842
+ if recreate_index:
843
+ # get current collection config as json
844
+ collection_info = self._client.get_collection(collection_name=self.index)
845
+ info_json = collection_info.model_dump()
846
+
847
+ # deal with the Optional use_sparse_embeddings
848
+ sparse_vectors = info_json["config"]["params"]["sparse_vectors"]
849
+ use_sparse_embeddings = True if sparse_vectors else False
850
+
851
+ # deal with the Optional sparse_idf
852
+ hnsw_config = info_json["config"]["params"]["vectors"].get("config", {}).get("hnsw_config", None)
853
+ sparse_idf = True if use_sparse_embeddings and hnsw_config else False
854
+
855
+ # recreate collection
856
+ self._set_up_collection(
857
+ collection_name=self.index,
858
+ embedding_dim=info_json["config"]["params"]["vectors"]["size"],
859
+ recreate_collection=True,
860
+ similarity=info_json["config"]["params"]["vectors"]["distance"].lower(),
861
+ use_sparse_embeddings=use_sparse_embeddings,
862
+ sparse_idf=sparse_idf,
863
+ on_disk=info_json["config"]["hnsw_config"]["on_disk"],
864
+ payload_fields_to_index=info_json["payload_schema"],
865
+ )
866
+
867
+ else:
868
+ try:
869
+ self._client.delete(
870
+ collection_name=self.index,
871
+ points_selector=rest.FilterSelector(
872
+ filter=rest.Filter(
873
+ must=[],
874
+ )
875
+ ),
876
+ wait=self.wait_result_from_api,
877
+ )
878
+ except Exception as e:
879
+ logger.warning(
880
+ f"Error {e} when calling QdrantDocumentStore.delete_all_documents()",
881
+ )
882
+
883
+ async def delete_all_documents_async(self, recreate_index: bool = False) -> None:
884
+ """
885
+ Asynchronously deletes all documents from the document store.
886
+
887
+ :param recreate_index: Whether to recreate the index after deleting all documents.
888
+ """
889
+
890
+ await self._initialize_async_client()
891
+ assert self._async_client is not None
892
+
893
+ if recreate_index:
894
+ # get current collection config as json
895
+ collection_info = await self._async_client.get_collection(collection_name=self.index)
896
+ info_json = collection_info.model_dump()
897
+
898
+ # deal with the Optional use_sparse_embeddings
899
+ sparse_vectors = info_json["config"]["params"]["sparse_vectors"]
900
+ use_sparse_embeddings = True if sparse_vectors else False
901
+
902
+ # deal with the Optional sparse_idf
903
+ hnsw_config = info_json["config"]["params"]["vectors"].get("config", {}).get("hnsw_config", None)
904
+ sparse_idf = True if use_sparse_embeddings and hnsw_config else False
905
+
906
+ # recreate collection
907
+ await self._set_up_collection_async(
908
+ collection_name=self.index,
909
+ embedding_dim=info_json["config"]["params"]["vectors"]["size"],
910
+ recreate_collection=True,
911
+ similarity=info_json["config"]["params"]["vectors"]["distance"].lower(),
912
+ use_sparse_embeddings=use_sparse_embeddings,
913
+ sparse_idf=sparse_idf,
914
+ on_disk=info_json["config"]["hnsw_config"]["on_disk"],
915
+ payload_fields_to_index=info_json["payload_schema"],
916
+ )
917
+
918
+ else:
919
+ try:
920
+ await self._async_client.delete(
921
+ collection_name=self.index,
922
+ points_selector=rest.FilterSelector(
923
+ filter=rest.Filter(
924
+ must=[],
925
+ )
926
+ ),
927
+ wait=self.wait_result_from_api,
928
+ )
929
+ except Exception as e:
930
+ logger.warning(
931
+ f"Error {e} when calling QdrantDocumentStore.delete_all_documents_async()",
932
+ )
933
+
934
+ def count_documents_by_filter(self, filters: dict[str, Any]) -> int:
935
+ """
936
+ Returns the number of documents that match the provided filters.
937
+
938
+ :param filters: The filters to apply to count documents.
939
+ For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
940
+
941
+ :returns: The number of documents that match the filters.
942
+ """
943
+ self._initialize_client()
944
+ assert self._client is not None
945
+
946
+ qdrant_filter = convert_filters_to_qdrant(filters)
947
+ try:
948
+ response = self._client.count(
949
+ collection_name=self.index,
950
+ count_filter=qdrant_filter,
951
+ )
952
+ return response.count
953
+ except (UnexpectedResponse, ValueError) as e:
954
+ logger.warning(f"Error {e} when calling QdrantDocumentStore.count_documents_by_filter()")
955
+ return 0
956
+
957
+ async def count_documents_by_filter_async(self, filters: dict[str, Any]) -> int:
958
+ """
959
+ Asynchronously returns the number of documents that match the provided filters.
960
+
961
+ :param filters: The filters to apply to select documents for counting.
962
+ For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
963
+
964
+ :returns:
965
+ The number of documents that match the filters.
966
+ """
967
+ await self._initialize_async_client()
968
+ assert self._async_client is not None
969
+
970
+ qdrant_filter = convert_filters_to_qdrant(filters)
971
+ try:
972
+ response = await self._async_client.count(
973
+ collection_name=self.index,
974
+ count_filter=qdrant_filter,
975
+ )
976
+ return response.count
977
+ except (UnexpectedResponse, ValueError) as e:
978
+ logger.warning(f"Error {e} when calling QdrantDocumentStore.count_documents_by_filter_async()")
979
+ return 0
980
+
981
+ def get_metadata_fields_info(self) -> dict[str, str]:
982
+ """
983
+ Returns the information about the fields from the collection.
984
+
985
+ :returns:
986
+ A dictionary mapping field names to their types (e.g., {"field_name": "integer"}).
987
+ """
988
+ self._initialize_client()
989
+ assert self._client is not None
990
+
991
+ try:
992
+ collection_info = self._client.get_collection(self.index)
993
+ payload_schema = collection_info.payload_schema or {}
994
+ return self._metadata_fields_info_from_schema(payload_schema)
995
+ except (UnexpectedResponse, ValueError) as e:
996
+ logger.warning(f"Error {e} when calling QdrantDocumentStore.get_metadata_fields_info()")
997
+ return {}
998
+
999
+ async def get_metadata_fields_info_async(self) -> dict[str, str]:
1000
+ """
1001
+ Asynchronously returns the information about the fields from the collection.
1002
+
1003
+ :returns:
1004
+ A dictionary mapping field names to their types (e.g., {"field_name": "integer"}).
1005
+ """
1006
+ await self._initialize_async_client()
1007
+ assert self._async_client is not None
1008
+
1009
+ try:
1010
+ collection_info = await self._async_client.get_collection(self.index)
1011
+ payload_schema = collection_info.payload_schema or {}
1012
+ return self._metadata_fields_info_from_schema(payload_schema)
1013
+ except (UnexpectedResponse, ValueError) as e:
1014
+ logger.warning(f"Error {e} when calling QdrantDocumentStore.get_metadata_fields_info_async()")
1015
+ return {}
1016
+
1017
+ def get_metadata_field_min_max(self, metadata_field: str) -> dict[str, Any]:
1018
+ """
1019
+ Returns the minimum and maximum values for the given metadata field.
1020
+
1021
+ :param metadata_field: The metadata field key (inside ``meta``) to get the minimum and maximum values for.
1022
+
1023
+ :returns: A dictionary with the keys "min" and "max", where each value is the minimum or maximum value of the
1024
+ metadata field across all documents. Returns an empty dict if no documents have the field.
1025
+ """
1026
+ self._initialize_client()
1027
+ assert self._client is not None
1028
+
1029
+ try:
1030
+ min_value: Any = None
1031
+ max_value: Any = None
1032
+ next_offset = None
1033
+
1034
+ while True:
1035
+ records, next_offset = self._client.scroll(
1036
+ collection_name=self.index,
1037
+ scroll_filter=None,
1038
+ limit=self.scroll_size,
1039
+ offset=next_offset,
1040
+ with_payload=True,
1041
+ with_vectors=False,
1042
+ )
1043
+ min_value, max_value = self._process_records_min_max(records, metadata_field, min_value, max_value)
1044
+ if self._check_stop_scrolling(next_offset):
1045
+ break
1046
+
1047
+ if min_value is not None and max_value is not None:
1048
+ return {"min": min_value, "max": max_value}
1049
+ return {}
1050
+ except Exception as e:
1051
+ logger.warning(f"Error {e} when calling QdrantDocumentStore.get_metadata_field_min_max()")
1052
+ return {}
1053
+
1054
+ async def get_metadata_field_min_max_async(self, metadata_field: str) -> dict[str, Any]:
1055
+ """
1056
+ Asynchronously returns the minimum and maximum values for the given metadata field.
1057
+
1058
+ :param metadata_field: The metadata field key (inside ``meta``) to get the minimum and maximum values for.
1059
+
1060
+ :returns: A dictionary with the keys "min" and "max", where each value is the minimum or maximum value of the
1061
+ metadata field across all documents. Returns an empty dict if no documents have the field.
1062
+ """
1063
+ await self._initialize_async_client()
1064
+ assert self._async_client is not None
1065
+
1066
+ try:
1067
+ min_value: Any = None
1068
+ max_value: Any = None
1069
+ next_offset = None
1070
+
1071
+ while True:
1072
+ records, next_offset = await self._async_client.scroll(
1073
+ collection_name=self.index,
1074
+ scroll_filter=None,
1075
+ limit=self.scroll_size,
1076
+ offset=next_offset,
1077
+ with_payload=True,
1078
+ with_vectors=False,
1079
+ )
1080
+ min_value, max_value = self._process_records_min_max(records, metadata_field, min_value, max_value)
1081
+ if self._check_stop_scrolling(next_offset):
1082
+ break
1083
+
1084
+ if min_value is not None and max_value is not None:
1085
+ return {"min": min_value, "max": max_value}
1086
+ return {}
1087
+ except Exception as e:
1088
+ logger.warning(f"Error {e} when calling QdrantDocumentStore.get_metadata_field_min_max_async()")
1089
+ return {}
1090
+
1091
+ def count_unique_metadata_by_filter(self, filters: dict[str, Any], metadata_fields: list[str]) -> dict[str, int]:
1092
+ """
1093
+ Returns the number of unique values for each specified metadata field among documents that match the filters.
1094
+
1095
+ :param filters: The filters to restrict the documents considered.
1096
+ For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
1097
+ :param metadata_fields: List of metadata field keys (inside ``meta``) to count unique values for.
1098
+
1099
+ :returns: A dictionary mapping each metadata field name to the count of its unique values among the filtered
1100
+ documents.
1101
+ """
1102
+ self._initialize_client()
1103
+ assert self._client is not None
1104
+
1105
+ qdrant_filter = convert_filters_to_qdrant(filters) if filters else None
1106
+ unique_values_by_field: dict[str, set[Any]] = {field: set() for field in metadata_fields}
1107
+
1108
+ try:
1109
+ next_offset = None
1110
+ while True:
1111
+ records, next_offset = self._client.scroll(
1112
+ collection_name=self.index,
1113
+ scroll_filter=qdrant_filter,
1114
+ limit=self.scroll_size,
1115
+ offset=next_offset,
1116
+ with_payload=True,
1117
+ with_vectors=False,
1118
+ )
1119
+ self._process_records_count_unique(records, metadata_fields, unique_values_by_field)
1120
+ if self._check_stop_scrolling(next_offset):
1121
+ break
1122
+
1123
+ return {field: len(unique_values_by_field[field]) for field in metadata_fields}
1124
+ except Exception as e:
1125
+ logger.warning(f"Error {e} when calling QdrantDocumentStore.count_unique_metadata_by_filter()")
1126
+ return dict.fromkeys(metadata_fields, 0)
1127
+
1128
+ async def count_unique_metadata_by_filter_async(
1129
+ self, filters: dict[str, Any], metadata_fields: list[str]
1130
+ ) -> dict[str, int]:
1131
+ """
1132
+ Asynchronously returns the number of unique values for each specified metadata field among documents that
1133
+ match the filters.
1134
+
1135
+ :param filters: The filters to restrict the documents considered.
1136
+ For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
1137
+ :param metadata_fields: List of metadata field keys (inside ``meta``) to count unique values for.
1138
+
1139
+ :returns: A dictionary mapping each metadata field name to the count of its unique values among the filtered
1140
+ documents.
1141
+ """
1142
+ await self._initialize_async_client()
1143
+ assert self._async_client is not None
1144
+
1145
+ qdrant_filter = convert_filters_to_qdrant(filters) if filters else None
1146
+ unique_values_by_field: dict[str, set[Any]] = {field: set() for field in metadata_fields}
1147
+
1148
+ try:
1149
+ next_offset = None
1150
+ while True:
1151
+ records, next_offset = await self._async_client.scroll(
1152
+ collection_name=self.index,
1153
+ scroll_filter=qdrant_filter,
1154
+ limit=self.scroll_size,
1155
+ offset=next_offset,
1156
+ with_payload=True,
1157
+ with_vectors=False,
1158
+ )
1159
+ self._process_records_count_unique(records, metadata_fields, unique_values_by_field)
1160
+ if self._check_stop_scrolling(next_offset):
1161
+ break
1162
+
1163
+ return {field: len(unique_values_by_field[field]) for field in metadata_fields}
1164
+ except Exception as e:
1165
+ logger.warning(f"Error {e} when calling QdrantDocumentStore.count_unique_metadata_by_filter_async()")
1166
+ return dict.fromkeys(metadata_fields, 0)
1167
+
1168
+ def get_metadata_field_unique_values(
1169
+ self, metadata_field: str, filters: dict[str, Any] | None = None, limit: int = 100, offset: int = 0
1170
+ ) -> list[Any]:
1171
+ """
1172
+ Returns unique values for a metadata field, with optional filters and offset/limit pagination.
1173
+
1174
+ Unique values are ordered by first occurrence during scroll. Pagination is offset-based over that order.
1175
+
1176
+ :param metadata_field: The metadata field key (inside ``meta``) to get unique values for.
1177
+ :param filters: Optional filters to restrict the documents considered.
1178
+ For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
1179
+ :param limit: Maximum number of unique values to return per page. Defaults to 100.
1180
+ :param offset: Number of unique values to skip (for pagination). Defaults to 0.
1181
+
1182
+ :returns: A list of unique values for the field (at most ``limit`` items, starting at ``offset``).
1183
+ """
1184
+ self._initialize_client()
1185
+ assert self._client is not None
1186
+
1187
+ qdrant_filter = convert_filters_to_qdrant(filters) if filters else None
1188
+ unique_values: list[Any] = []
1189
+ unique_values_set: set[Any] = set()
1190
+
1191
+ try:
1192
+ next_offset = None
1193
+ while len(unique_values) < offset + limit:
1194
+ records, next_offset = self._client.scroll(
1195
+ collection_name=self.index,
1196
+ scroll_filter=qdrant_filter,
1197
+ limit=self.scroll_size,
1198
+ offset=next_offset,
1199
+ with_payload=True,
1200
+ with_vectors=False,
1201
+ )
1202
+ if self._process_records_unique_values(
1203
+ records, metadata_field, unique_values, unique_values_set, offset, limit
1204
+ ):
1205
+ break
1206
+ if self._check_stop_scrolling(next_offset):
1207
+ break
1208
+
1209
+ return unique_values[offset : offset + limit]
1210
+ except Exception as e:
1211
+ logger.warning(f"Error {e} when calling QdrantDocumentStore.get_metadata_field_unique_values()")
1212
+ return []
1213
+
1214
+ async def get_metadata_field_unique_values_async(
1215
+ self, metadata_field: str, filters: dict[str, Any] | None = None, limit: int = 100, offset: int = 0
1216
+ ) -> list[Any]:
1217
+ """
1218
+ Asynchronously returns unique values for a metadata field, with optional filters and offset/limit pagination.
1219
+
1220
+ Unique values are ordered by first occurrence during scroll. Pagination is offset-based over that order.
1221
+
1222
+ :param metadata_field: The metadata field key (inside ``meta``) to get unique values for.
1223
+ :param filters: Optional filters to restrict the documents considered.
1224
+ For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
1225
+ :param limit: Maximum number of unique values to return per page. Defaults to 100.
1226
+ :param offset: Number of unique values to skip (for pagination). Defaults to 0.
1227
+
1228
+ :returns: A list of unique values for the field (at most ``limit`` items, starting at ``offset``).
1229
+ """
1230
+ await self._initialize_async_client()
1231
+ assert self._async_client is not None
1232
+
1233
+ qdrant_filter = convert_filters_to_qdrant(filters) if filters else None
1234
+ unique_values: list[Any] = []
1235
+ unique_values_set: set[Any] = set()
1236
+
1237
+ try:
1238
+ next_offset = None
1239
+ while len(unique_values) < offset + limit:
1240
+ records, next_offset = await self._async_client.scroll(
1241
+ collection_name=self.index,
1242
+ scroll_filter=qdrant_filter,
1243
+ limit=self.scroll_size,
1244
+ offset=next_offset,
1245
+ with_payload=True,
1246
+ with_vectors=False,
1247
+ )
1248
+ if self._process_records_unique_values(
1249
+ records, metadata_field, unique_values, unique_values_set, offset, limit
1250
+ ):
1251
+ break
1252
+ if self._check_stop_scrolling(next_offset):
1253
+ break
1254
+
1255
+ return unique_values[offset : offset + limit]
1256
+ except Exception as e:
1257
+ logger.warning(f"Error {e} when calling QdrantDocumentStore.get_metadata_field_unique_values_async()")
1258
+ return []
1259
+
518
1260
  @classmethod
519
- def from_dict(cls, data: Dict[str, Any]) -> "QdrantDocumentStore":
1261
+ def from_dict(cls, data: dict[str, Any]) -> "QdrantDocumentStore":
520
1262
  """
521
1263
  Deserializes the component from a dictionary.
522
1264
 
@@ -528,7 +1270,7 @@ class QdrantDocumentStore:
528
1270
  deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"])
529
1271
  return default_from_dict(cls, data)
530
1272
 
531
- def to_dict(self) -> Dict[str, Any]:
1273
+ def to_dict(self) -> dict[str, Any]:
532
1274
  """
533
1275
  Serializes the component to a dictionary.
534
1276
 
@@ -547,7 +1289,7 @@ class QdrantDocumentStore:
547
1289
 
548
1290
  def _get_documents_generator(
549
1291
  self,
550
- filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
1292
+ filters: dict[str, Any] | rest.Filter | None = None,
551
1293
  ) -> Generator[Document, None, None]:
552
1294
  """
553
1295
  Returns a generator that yields documents from Qdrant based on the provided filters.
@@ -574,8 +1316,11 @@ class QdrantDocumentStore:
574
1316
  with_vectors=True,
575
1317
  )
576
1318
  stop_scrolling = next_offset is None or (
577
- isinstance(next_offset, grpc.PointId) and next_offset.num == 0 and next_offset.uuid == ""
578
- )
1319
+ hasattr(next_offset, "num")
1320
+ and hasattr(next_offset, "uuid")
1321
+ and next_offset.num == 0
1322
+ and next_offset.uuid == ""
1323
+ ) # PointId always has num and uuid
579
1324
 
580
1325
  for record in records:
581
1326
  yield convert_qdrant_point_to_haystack_document(
@@ -584,7 +1329,7 @@ class QdrantDocumentStore:
584
1329
 
585
1330
  async def _get_documents_generator_async(
586
1331
  self,
587
- filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
1332
+ filters: dict[str, Any] | rest.Filter | None = None,
588
1333
  ) -> AsyncGenerator[Document, None]:
589
1334
  """
590
1335
  Returns an asynchronous generator that yields documents from Qdrant based on the provided filters.
@@ -611,8 +1356,11 @@ class QdrantDocumentStore:
611
1356
  with_vectors=True,
612
1357
  )
613
1358
  stop_scrolling = next_offset is None or (
614
- isinstance(next_offset, grpc.PointId) and next_offset.num == 0 and next_offset.uuid == ""
615
- )
1359
+ hasattr(next_offset, "num")
1360
+ and hasattr(next_offset, "uuid")
1361
+ and next_offset.num == 0
1362
+ and next_offset.uuid == ""
1363
+ ) # PointId always has num and uuid
616
1364
 
617
1365
  for record in records:
618
1366
  yield convert_qdrant_point_to_haystack_document(
@@ -621,19 +1369,17 @@ class QdrantDocumentStore:
621
1369
 
622
1370
  def get_documents_by_id(
623
1371
  self,
624
- ids: List[str],
625
- ) -> List[Document]:
1372
+ ids: list[str],
1373
+ ) -> list[Document]:
626
1374
  """
627
1375
  Retrieves documents from Qdrant by their IDs.
628
1376
 
629
1377
  :param ids:
630
1378
  A list of document IDs to retrieve.
631
- :param index:
632
- The name of the index to retrieve documents from.
633
1379
  :returns:
634
1380
  A list of documents.
635
1381
  """
636
- documents: List[Document] = []
1382
+ documents: list[Document] = []
637
1383
 
638
1384
  self._initialize_client()
639
1385
  assert self._client is not None
@@ -654,19 +1400,17 @@ class QdrantDocumentStore:
654
1400
 
655
1401
  async def get_documents_by_id_async(
656
1402
  self,
657
- ids: List[str],
658
- ) -> List[Document]:
1403
+ ids: list[str],
1404
+ ) -> list[Document]:
659
1405
  """
660
1406
  Retrieves documents from Qdrant by their IDs.
661
1407
 
662
1408
  :param ids:
663
1409
  A list of document IDs to retrieve.
664
- :param index:
665
- The name of the index to retrieve documents from.
666
1410
  :returns:
667
1411
  A list of documents.
668
1412
  """
669
- documents: List[Document] = []
1413
+ documents: list[Document] = []
670
1414
 
671
1415
  await self._initialize_async_client()
672
1416
  assert self._async_client is not None
@@ -688,14 +1432,14 @@ class QdrantDocumentStore:
688
1432
  def _query_by_sparse(
689
1433
  self,
690
1434
  query_sparse_embedding: SparseEmbedding,
691
- filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
1435
+ filters: dict[str, Any] | rest.Filter | None = None,
692
1436
  top_k: int = 10,
693
1437
  scale_score: bool = False,
694
1438
  return_embedding: bool = False,
695
- score_threshold: Optional[float] = None,
696
- group_by: Optional[str] = None,
697
- group_size: Optional[int] = None,
698
- ) -> List[Document]:
1439
+ score_threshold: float | None = None,
1440
+ group_by: str | None = None,
1441
+ group_size: int | None = None,
1442
+ ) -> list[Document]:
699
1443
  """
700
1444
  Queries Qdrant using a sparse embedding and returns the most relevant documents.
701
1445
 
@@ -742,7 +1486,7 @@ class QdrantDocumentStore:
742
1486
  query_filter=qdrant_filters,
743
1487
  limit=top_k,
744
1488
  group_by=group_by,
745
- group_size=group_size,
1489
+ group_size=group_size or DEFAULT_GROUP_SIZE,
746
1490
  with_vectors=return_embedding,
747
1491
  score_threshold=score_threshold,
748
1492
  ).groups
@@ -764,15 +1508,15 @@ class QdrantDocumentStore:
764
1508
 
765
1509
  def _query_by_embedding(
766
1510
  self,
767
- query_embedding: List[float],
768
- filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
1511
+ query_embedding: list[float],
1512
+ filters: dict[str, Any] | rest.Filter | None = None,
769
1513
  top_k: int = 10,
770
1514
  scale_score: bool = False,
771
1515
  return_embedding: bool = False,
772
- score_threshold: Optional[float] = None,
773
- group_by: Optional[str] = None,
774
- group_size: Optional[int] = None,
775
- ) -> List[Document]:
1516
+ score_threshold: float | None = None,
1517
+ group_by: str | None = None,
1518
+ group_size: int | None = None,
1519
+ ) -> list[Document]:
776
1520
  """
777
1521
  Queries Qdrant using a dense embedding and returns the most relevant documents.
778
1522
 
@@ -804,7 +1548,7 @@ class QdrantDocumentStore:
804
1548
  query_filter=qdrant_filters,
805
1549
  limit=top_k,
806
1550
  group_by=group_by,
807
- group_size=group_size,
1551
+ group_size=group_size or DEFAULT_GROUP_SIZE,
808
1552
  with_vectors=return_embedding,
809
1553
  score_threshold=score_threshold,
810
1554
  ).groups
@@ -824,15 +1568,15 @@ class QdrantDocumentStore:
824
1568
 
825
1569
  def _query_hybrid(
826
1570
  self,
827
- query_embedding: List[float],
1571
+ query_embedding: list[float],
828
1572
  query_sparse_embedding: SparseEmbedding,
829
- filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
1573
+ filters: dict[str, Any] | rest.Filter | None = None,
830
1574
  top_k: int = 10,
831
1575
  return_embedding: bool = False,
832
- score_threshold: Optional[float] = None,
833
- group_by: Optional[str] = None,
834
- group_size: Optional[int] = None,
835
- ) -> List[Document]:
1576
+ score_threshold: float | None = None,
1577
+ group_by: str | None = None,
1578
+ group_size: int | None = None,
1579
+ ) -> list[Document]:
836
1580
  """
837
1581
  Retrieves documents based on dense and sparse embeddings and fuses the results using Reciprocal Rank Fusion.
838
1582
 
@@ -896,7 +1640,7 @@ class QdrantDocumentStore:
896
1640
  query=rest.FusionQuery(fusion=rest.Fusion.RRF),
897
1641
  limit=top_k,
898
1642
  group_by=group_by,
899
- group_size=group_size,
1643
+ group_size=group_size or DEFAULT_GROUP_SIZE,
900
1644
  score_threshold=score_threshold,
901
1645
  with_payload=True,
902
1646
  with_vectors=return_embedding,
@@ -938,14 +1682,14 @@ class QdrantDocumentStore:
938
1682
  async def _query_by_sparse_async(
939
1683
  self,
940
1684
  query_sparse_embedding: SparseEmbedding,
941
- filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
1685
+ filters: dict[str, Any] | rest.Filter | None = None,
942
1686
  top_k: int = 10,
943
1687
  scale_score: bool = False,
944
1688
  return_embedding: bool = False,
945
- score_threshold: Optional[float] = None,
946
- group_by: Optional[str] = None,
947
- group_size: Optional[int] = None,
948
- ) -> List[Document]:
1689
+ score_threshold: float | None = None,
1690
+ group_by: str | None = None,
1691
+ group_size: int | None = None,
1692
+ ) -> list[Document]:
949
1693
  """
950
1694
  Asynchronously queries Qdrant using a sparse embedding and returns the most relevant documents.
951
1695
 
@@ -993,14 +1737,14 @@ class QdrantDocumentStore:
993
1737
  query_filter=qdrant_filters,
994
1738
  limit=top_k,
995
1739
  group_by=group_by,
996
- group_size=group_size,
1740
+ group_size=group_size or DEFAULT_GROUP_SIZE,
997
1741
  with_vectors=return_embedding,
998
1742
  score_threshold=score_threshold,
999
1743
  )
1000
1744
  groups = response.groups
1001
1745
  return self._process_group_results(groups)
1002
1746
  else:
1003
- response = await self._async_client.query_points(
1747
+ query_response = await self._async_client.query_points(
1004
1748
  collection_name=self.index,
1005
1749
  query=rest.SparseVector(
1006
1750
  indices=query_indices,
@@ -1012,20 +1756,20 @@ class QdrantDocumentStore:
1012
1756
  with_vectors=return_embedding,
1013
1757
  score_threshold=score_threshold,
1014
1758
  )
1015
- points = response.points
1759
+ points = query_response.points
1016
1760
  return self._process_query_point_results(points, scale_score=scale_score)
1017
1761
 
1018
1762
  async def _query_by_embedding_async(
1019
1763
  self,
1020
- query_embedding: List[float],
1021
- filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
1764
+ query_embedding: list[float],
1765
+ filters: dict[str, Any] | rest.Filter | None = None,
1022
1766
  top_k: int = 10,
1023
1767
  scale_score: bool = False,
1024
1768
  return_embedding: bool = False,
1025
- score_threshold: Optional[float] = None,
1026
- group_by: Optional[str] = None,
1027
- group_size: Optional[int] = None,
1028
- ) -> List[Document]:
1769
+ score_threshold: float | None = None,
1770
+ group_by: str | None = None,
1771
+ group_size: int | None = None,
1772
+ ) -> list[Document]:
1029
1773
  """
1030
1774
  Asynchronously queries Qdrant using a dense embedding and returns the most relevant documents.
1031
1775
 
@@ -1057,14 +1801,14 @@ class QdrantDocumentStore:
1057
1801
  query_filter=qdrant_filters,
1058
1802
  limit=top_k,
1059
1803
  group_by=group_by,
1060
- group_size=group_size,
1804
+ group_size=group_size or DEFAULT_GROUP_SIZE,
1061
1805
  with_vectors=return_embedding,
1062
1806
  score_threshold=score_threshold,
1063
1807
  )
1064
1808
  groups = response.groups
1065
1809
  return self._process_group_results(groups)
1066
1810
  else:
1067
- response = await self._async_client.query_points(
1811
+ query_response = await self._async_client.query_points(
1068
1812
  collection_name=self.index,
1069
1813
  query=query_embedding,
1070
1814
  using=DENSE_VECTORS_NAME if self.use_sparse_embeddings else None,
@@ -1073,20 +1817,20 @@ class QdrantDocumentStore:
1073
1817
  with_vectors=return_embedding,
1074
1818
  score_threshold=score_threshold,
1075
1819
  )
1076
- points = response.points
1820
+ points = query_response.points
1077
1821
  return self._process_query_point_results(points, scale_score=scale_score)
1078
1822
 
1079
1823
  async def _query_hybrid_async(
1080
1824
  self,
1081
- query_embedding: List[float],
1825
+ query_embedding: list[float],
1082
1826
  query_sparse_embedding: SparseEmbedding,
1083
- filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
1827
+ filters: dict[str, Any] | rest.Filter | None = None,
1084
1828
  top_k: int = 10,
1085
1829
  return_embedding: bool = False,
1086
- score_threshold: Optional[float] = None,
1087
- group_by: Optional[str] = None,
1088
- group_size: Optional[int] = None,
1089
- ) -> List[Document]:
1830
+ score_threshold: float | None = None,
1831
+ group_by: str | None = None,
1832
+ group_size: int | None = None,
1833
+ ) -> list[Document]:
1090
1834
  """
1091
1835
  Asynchronously retrieves documents based on dense and sparse embeddings and fuses
1092
1836
  the results using Reciprocal Rank Fusion.
@@ -1148,14 +1892,14 @@ class QdrantDocumentStore:
1148
1892
  query=rest.FusionQuery(fusion=rest.Fusion.RRF),
1149
1893
  limit=top_k,
1150
1894
  group_by=group_by,
1151
- group_size=group_size,
1895
+ group_size=group_size or DEFAULT_GROUP_SIZE,
1152
1896
  score_threshold=score_threshold,
1153
1897
  with_payload=True,
1154
1898
  with_vectors=return_embedding,
1155
1899
  )
1156
1900
  groups = response.groups
1157
1901
  else:
1158
- response = await self._async_client.query_points(
1902
+ query_response = await self._async_client.query_points(
1159
1903
  collection_name=self.index,
1160
1904
  prefetch=[
1161
1905
  rest.Prefetch(
@@ -1178,7 +1922,7 @@ class QdrantDocumentStore:
1178
1922
  with_payload=True,
1179
1923
  with_vectors=return_embedding,
1180
1924
  )
1181
- points = response.points
1925
+ points = query_response.points
1182
1926
 
1183
1927
  except Exception as e:
1184
1928
  msg = "Error during hybrid search"
@@ -1210,9 +1954,10 @@ class QdrantDocumentStore:
1210
1954
  )
1211
1955
  raise QdrantStoreError(msg) from ke
1212
1956
 
1213
- def _create_payload_index(self, collection_name: str, payload_fields_to_index: Optional[List[dict]] = None):
1957
+ def _create_payload_index(self, collection_name: str, payload_fields_to_index: list[dict] | None = None) -> None:
1214
1958
  """
1215
- Create payload index for the collection if payload_fields_to_index is provided
1959
+ Create payload index for the collection if payload_fields_to_index is provided.
1960
+
1216
1961
  See: https://qdrant.tech/documentation/concepts/indexing/#payload-index
1217
1962
  """
1218
1963
  if payload_fields_to_index is not None:
@@ -1228,15 +1973,15 @@ class QdrantDocumentStore:
1228
1973
  )
1229
1974
 
1230
1975
  async def _create_payload_index_async(
1231
- self, collection_name: str, payload_fields_to_index: Optional[List[dict]] = None
1232
- ):
1976
+ self, collection_name: str, payload_fields_to_index: list[dict] | None = None
1977
+ ) -> None:
1233
1978
  """
1234
- Asynchronously create payload index for the collection if payload_fields_to_index is provided
1979
+ Asynchronously create payload index for the collection if payload_fields_to_index is provided.
1980
+
1235
1981
  See: https://qdrant.tech/documentation/concepts/indexing/#payload-index
1236
1982
  """
1237
1983
  if payload_fields_to_index is not None:
1238
1984
  for payload_index in payload_fields_to_index:
1239
-
1240
1985
  # self._async_client is initialized at this point
1241
1986
  # since _initialize_async_client() is called before this method is executed
1242
1987
  assert self._async_client is not None
@@ -1256,10 +2001,11 @@ class QdrantDocumentStore:
1256
2001
  use_sparse_embeddings: bool,
1257
2002
  sparse_idf: bool,
1258
2003
  on_disk: bool = False,
1259
- payload_fields_to_index: Optional[List[dict]] = None,
1260
- ):
2004
+ payload_fields_to_index: list[dict] | None = None,
2005
+ ) -> None:
1261
2006
  """
1262
2007
  Sets up the Qdrant collection with the specified parameters.
2008
+
1263
2009
  :param collection_name:
1264
2010
  The name of the collection to set up.
1265
2011
  :param embedding_dim:
@@ -1312,10 +2058,11 @@ class QdrantDocumentStore:
1312
2058
  use_sparse_embeddings: bool,
1313
2059
  sparse_idf: bool,
1314
2060
  on_disk: bool = False,
1315
- payload_fields_to_index: Optional[List[dict]] = None,
1316
- ):
2061
+ payload_fields_to_index: list[dict] | None = None,
2062
+ ) -> None:
1317
2063
  """
1318
2064
  Asynchronously sets up the Qdrant collection with the specified parameters.
2065
+
1319
2066
  :param collection_name:
1320
2067
  The name of the collection to set up.
1321
2068
  :param embedding_dim:
@@ -1362,12 +2109,12 @@ class QdrantDocumentStore:
1362
2109
  def recreate_collection(
1363
2110
  self,
1364
2111
  collection_name: str,
1365
- distance,
2112
+ distance: rest.Distance,
1366
2113
  embedding_dim: int,
1367
- on_disk: Optional[bool] = None,
1368
- use_sparse_embeddings: Optional[bool] = None,
2114
+ on_disk: bool | None = None,
2115
+ use_sparse_embeddings: bool | None = None,
1369
2116
  sparse_idf: bool = False,
1370
- ):
2117
+ ) -> None:
1371
2118
  """
1372
2119
  Recreates the Qdrant collection with the specified parameters.
1373
2120
 
@@ -1405,12 +2152,12 @@ class QdrantDocumentStore:
1405
2152
  async def recreate_collection_async(
1406
2153
  self,
1407
2154
  collection_name: str,
1408
- distance,
2155
+ distance: rest.Distance,
1409
2156
  embedding_dim: int,
1410
- on_disk: Optional[bool] = None,
1411
- use_sparse_embeddings: Optional[bool] = None,
2157
+ on_disk: bool | None = None,
2158
+ use_sparse_embeddings: bool | None = None,
1412
2159
  sparse_idf: bool = False,
1413
- ):
2160
+ ) -> None:
1414
2161
  """
1415
2162
  Asynchronously recreates the Qdrant collection with the specified parameters.
1416
2163
 
@@ -1447,9 +2194,9 @@ class QdrantDocumentStore:
1447
2194
 
1448
2195
  def _handle_duplicate_documents(
1449
2196
  self,
1450
- documents: List[Document],
1451
- policy: DuplicatePolicy = None,
1452
- ):
2197
+ documents: list[Document],
2198
+ policy: DuplicatePolicy | None = None,
2199
+ ) -> list[Document]:
1453
2200
  """
1454
2201
  Checks whether any of the passed documents is already existing in the chosen index and returns a list of
1455
2202
  documents that are not in the index yet.
@@ -1462,7 +2209,7 @@ class QdrantDocumentStore:
1462
2209
  if policy in (DuplicatePolicy.SKIP, DuplicatePolicy.FAIL):
1463
2210
  documents = self._drop_duplicate_documents(documents)
1464
2211
  documents_found = self.get_documents_by_id(ids=[doc.id for doc in documents])
1465
- ids_exist_in_db: List[str] = [doc.id for doc in documents_found]
2212
+ ids_exist_in_db: list[str] = [doc.id for doc in documents_found]
1466
2213
 
1467
2214
  if len(ids_exist_in_db) > 0 and policy == DuplicatePolicy.FAIL:
1468
2215
  msg = f"Document with ids '{', '.join(ids_exist_in_db)} already exists in index = '{self.index}'."
@@ -1474,9 +2221,9 @@ class QdrantDocumentStore:
1474
2221
 
1475
2222
  async def _handle_duplicate_documents_async(
1476
2223
  self,
1477
- documents: List[Document],
1478
- policy: DuplicatePolicy = None,
1479
- ):
2224
+ documents: list[Document],
2225
+ policy: DuplicatePolicy | None = None,
2226
+ ) -> list[Document]:
1480
2227
  """
1481
2228
  Asynchronously checks whether any of the passed documents is already existing
1482
2229
  in the chosen index and returns a list of
@@ -1490,7 +2237,7 @@ class QdrantDocumentStore:
1490
2237
  if policy in (DuplicatePolicy.SKIP, DuplicatePolicy.FAIL):
1491
2238
  documents = self._drop_duplicate_documents(documents)
1492
2239
  documents_found = await self.get_documents_by_id_async(ids=[doc.id for doc in documents])
1493
- ids_exist_in_db: List[str] = [doc.id for doc in documents_found]
2240
+ ids_exist_in_db: list[str] = [doc.id for doc in documents_found]
1494
2241
 
1495
2242
  if len(ids_exist_in_db) > 0 and policy == DuplicatePolicy.FAIL:
1496
2243
  msg = f"Document with ids '{', '.join(ids_exist_in_db)} already exists in index = '{self.index}'."
@@ -1500,13 +2247,13 @@ class QdrantDocumentStore:
1500
2247
 
1501
2248
  return documents
1502
2249
 
1503
- def _drop_duplicate_documents(self, documents: List[Document]) -> List[Document]:
2250
+ def _drop_duplicate_documents(self, documents: list[Document]) -> list[Document]:
1504
2251
  """
1505
2252
  Drop duplicate documents based on same hash ID.
1506
2253
 
1507
2254
  """
1508
- _hash_ids: Set = set()
1509
- _documents: List[Document] = []
2255
+ _hash_ids: set = set()
2256
+ _documents: list[Document] = []
1510
2257
 
1511
2258
  for document in documents:
1512
2259
  if document.id in _hash_ids:
@@ -1521,7 +2268,7 @@ class QdrantDocumentStore:
1521
2268
 
1522
2269
  return _documents
1523
2270
 
1524
- def _prepare_collection_params(self):
2271
+ def _prepare_collection_params(self) -> dict[str, Any]:
1525
2272
  """
1526
2273
  Prepares the common parameters for collection creation.
1527
2274
  """
@@ -1534,10 +2281,9 @@ class QdrantDocumentStore:
1534
2281
  "optimizers_config": self.optimizers_config,
1535
2282
  "wal_config": self.wal_config,
1536
2283
  "quantization_config": self.quantization_config,
1537
- "init_from": self.init_from,
1538
2284
  }
1539
2285
 
1540
- def _prepare_client_params(self):
2286
+ def _prepare_client_params(self) -> dict[str, Any]:
1541
2287
  """
1542
2288
  Prepares the common parameters for client initialization.
1543
2289
 
@@ -1554,18 +2300,21 @@ class QdrantDocumentStore:
1554
2300
  "timeout": self.timeout,
1555
2301
  "host": self.host,
1556
2302
  "path": self.path,
1557
- "metadata": self.metadata,
2303
+ # NOTE: We purposefully expand the fields of self.metadata to avoid modifying the original self.metadata
2304
+ # class attribute. For example, the resolved api key is added to metadata by the QdrantClient class
2305
+ # when using a hosted Qdrant service, which means running to_dict() exposes the api key.
2306
+ "metadata": {**self.metadata},
1558
2307
  "force_disable_check_same_thread": self.force_disable_check_same_thread,
1559
2308
  }
1560
2309
 
1561
2310
  def _prepare_collection_config(
1562
2311
  self,
1563
2312
  embedding_dim: int,
1564
- distance,
1565
- on_disk: Optional[bool] = None,
1566
- use_sparse_embeddings: Optional[bool] = None,
2313
+ distance: rest.Distance,
2314
+ on_disk: bool | None = None,
2315
+ use_sparse_embeddings: bool | None = None,
1567
2316
  sparse_idf: bool = False,
1568
- ):
2317
+ ) -> tuple[dict[str, rest.VectorParams] | rest.VectorParams, dict[str, rest.SparseVectorParams] | None]:
1569
2318
  """
1570
2319
  Prepares the configuration for creating or recreating a Qdrant collection.
1571
2320
 
@@ -1577,12 +2326,14 @@ class QdrantDocumentStore:
1577
2326
  use_sparse_embeddings = self.use_sparse_embeddings
1578
2327
 
1579
2328
  # dense vectors configuration
1580
- vectors_config = rest.VectorParams(size=embedding_dim, on_disk=on_disk, distance=distance)
1581
- sparse_vectors_config = None
2329
+ base_vectors_config = rest.VectorParams(size=embedding_dim, on_disk=on_disk, distance=distance)
2330
+ vectors_config: rest.VectorParams | dict[str, rest.VectorParams] = base_vectors_config
2331
+
2332
+ sparse_vectors_config: dict[str, rest.SparseVectorParams] | None = None
1582
2333
 
1583
2334
  if use_sparse_embeddings:
1584
2335
  # in this case, we need to define named vectors
1585
- vectors_config = {DENSE_VECTORS_NAME: vectors_config}
2336
+ vectors_config = {DENSE_VECTORS_NAME: base_vectors_config}
1586
2337
 
1587
2338
  sparse_vectors_config = {
1588
2339
  SPARSE_VECTORS_NAME: rest.SparseVectorParams(
@@ -1595,9 +2346,13 @@ class QdrantDocumentStore:
1595
2346
 
1596
2347
  return vectors_config, sparse_vectors_config
1597
2348
 
1598
- def _validate_filters(self, filters: Optional[Union[Dict[str, Any], rest.Filter]] = None):
2349
+ @staticmethod
2350
+ def _validate_filters(filters: dict[str, Any] | rest.Filter | None = None) -> None:
1599
2351
  """
1600
2352
  Validates the filters provided for querying.
2353
+
2354
+ :param filters: Filters to validate. Can be a dictionary or an instance of `qdrant_client.http.models.Filter`.
2355
+ :raises ValueError: If the filters are not in the correct format or syntax.
1601
2356
  """
1602
2357
  if filters and not isinstance(filters, dict) and not isinstance(filters, rest.Filter):
1603
2358
  msg = "Filter must be a dictionary or an instance of `qdrant_client.http.models.Filter`"
@@ -1607,7 +2362,9 @@ class QdrantDocumentStore:
1607
2362
  msg = "Invalid filter syntax. See https://docs.haystack.deepset.ai/docs/metadata-filtering for details."
1608
2363
  raise ValueError(msg)
1609
2364
 
1610
- def _process_query_point_results(self, results, scale_score: bool = False):
2365
+ def _process_query_point_results(
2366
+ self, results: list[rest.ScoredPoint], scale_score: bool = False
2367
+ ) -> list[Document]:
1611
2368
  """
1612
2369
  Processes query results from Qdrant.
1613
2370
  """
@@ -1619,15 +2376,17 @@ class QdrantDocumentStore:
1619
2376
  if scale_score:
1620
2377
  for document in documents:
1621
2378
  score = document.score
2379
+ if score is None:
2380
+ continue
1622
2381
  if self.similarity == "cosine":
1623
2382
  score = (score + 1) / 2
1624
2383
  else:
1625
- score = float(1 / (1 + np.exp(-score / 100)))
2384
+ score = float(1 / (1 + exp(-score / 100)))
1626
2385
  document.score = score
1627
2386
 
1628
2387
  return documents
1629
2388
 
1630
- def _process_group_results(self, groups):
2389
+ def _process_group_results(self, groups: list[rest.PointGroup]) -> list[Document]:
1631
2390
  """
1632
2391
  Processes grouped query results from Qdrant.
1633
2392
 
@@ -1644,16 +2403,22 @@ class QdrantDocumentStore:
1644
2403
  def _validate_collection_compatibility(
1645
2404
  self,
1646
2405
  collection_name: str,
1647
- collection_info,
1648
- distance,
2406
+ collection_info: rest.CollectionInfo,
2407
+ distance: rest.Distance,
1649
2408
  embedding_dim: int,
1650
- ):
2409
+ ) -> None:
1651
2410
  """
1652
2411
  Validates that an existing collection is compatible with the current configuration.
1653
2412
  """
1654
- has_named_vectors = isinstance(collection_info.config.params.vectors, dict)
2413
+ vectors_config = collection_info.config.params.vectors
1655
2414
 
1656
- if has_named_vectors and DENSE_VECTORS_NAME not in collection_info.config.params.vectors:
2415
+ if vectors_config is None:
2416
+ msg = f"Collection '{collection_name}' has no vector configuration."
2417
+ raise QdrantStoreError(msg)
2418
+
2419
+ has_named_vectors = isinstance(vectors_config, dict)
2420
+
2421
+ if has_named_vectors and DENSE_VECTORS_NAME not in vectors_config:
1657
2422
  msg = (
1658
2423
  f"Collection '{collection_name}' already exists in Qdrant, "
1659
2424
  f"but it has been originally created outside of Haystack and is not supported. "
@@ -1685,11 +2450,20 @@ class QdrantDocumentStore:
1685
2450
 
1686
2451
  # Get current distance and vector size based on collection configuration
1687
2452
  if self.use_sparse_embeddings:
1688
- current_distance = collection_info.config.params.vectors[DENSE_VECTORS_NAME].distance
1689
- current_vector_size = collection_info.config.params.vectors[DENSE_VECTORS_NAME].size
2453
+ if not isinstance(vectors_config, dict):
2454
+ msg = f"Collection '{collection_name}' has invalid vector configuration for sparse embeddings."
2455
+ raise QdrantStoreError(msg)
2456
+
2457
+ dense_vector_config = vectors_config[DENSE_VECTORS_NAME]
2458
+ current_distance = dense_vector_config.distance
2459
+ current_vector_size = dense_vector_config.size
1690
2460
  else:
1691
- current_distance = collection_info.config.params.vectors.distance
1692
- current_vector_size = collection_info.config.params.vectors.size
2461
+ if isinstance(vectors_config, dict):
2462
+ msg = f"Collection '{collection_name}' has invalid vector configuration for dense embeddings only."
2463
+ raise QdrantStoreError(msg)
2464
+
2465
+ current_distance = vectors_config.distance
2466
+ current_vector_size = vectors_config.size
1693
2467
 
1694
2468
  # Validate distance metric
1695
2469
  if current_distance != distance: