qdrant-haystack 10.0.0__tar.gz → 10.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {qdrant_haystack-10.0.0 → qdrant_haystack-10.1.0}/CHANGELOG.md +8 -0
  2. {qdrant_haystack-10.0.0 → qdrant_haystack-10.1.0}/PKG-INFO +1 -1
  3. {qdrant_haystack-10.0.0 → qdrant_haystack-10.1.0}/src/haystack_integrations/document_stores/qdrant/document_store.py +399 -0
  4. {qdrant_haystack-10.0.0 → qdrant_haystack-10.1.0}/tests/test_document_store.py +143 -2
  5. {qdrant_haystack-10.0.0 → qdrant_haystack-10.1.0}/tests/test_document_store_async.py +155 -2
  6. {qdrant_haystack-10.0.0 → qdrant_haystack-10.1.0}/.gitignore +0 -0
  7. {qdrant_haystack-10.0.0 → qdrant_haystack-10.1.0}/LICENSE.txt +0 -0
  8. {qdrant_haystack-10.0.0 → qdrant_haystack-10.1.0}/README.md +0 -0
  9. {qdrant_haystack-10.0.0 → qdrant_haystack-10.1.0}/examples/embedding_retrieval.py +0 -0
  10. {qdrant_haystack-10.0.0 → qdrant_haystack-10.1.0}/pydoc/config_docusaurus.yml +0 -0
  11. {qdrant_haystack-10.0.0 → qdrant_haystack-10.1.0}/pyproject.toml +0 -0
  12. {qdrant_haystack-10.0.0 → qdrant_haystack-10.1.0}/src/haystack_integrations/components/retrievers/py.typed +0 -0
  13. {qdrant_haystack-10.0.0 → qdrant_haystack-10.1.0}/src/haystack_integrations/components/retrievers/qdrant/__init__.py +0 -0
  14. {qdrant_haystack-10.0.0 → qdrant_haystack-10.1.0}/src/haystack_integrations/components/retrievers/qdrant/retriever.py +0 -0
  15. {qdrant_haystack-10.0.0 → qdrant_haystack-10.1.0}/src/haystack_integrations/document_stores/py.typed +0 -0
  16. {qdrant_haystack-10.0.0 → qdrant_haystack-10.1.0}/src/haystack_integrations/document_stores/qdrant/__init__.py +0 -0
  17. {qdrant_haystack-10.0.0 → qdrant_haystack-10.1.0}/src/haystack_integrations/document_stores/qdrant/converters.py +0 -0
  18. {qdrant_haystack-10.0.0 → qdrant_haystack-10.1.0}/src/haystack_integrations/document_stores/qdrant/filters.py +0 -0
  19. {qdrant_haystack-10.0.0 → qdrant_haystack-10.1.0}/src/haystack_integrations/document_stores/qdrant/migrate_to_sparse.py +0 -0
  20. {qdrant_haystack-10.0.0 → qdrant_haystack-10.1.0}/tests/__init__.py +0 -0
  21. {qdrant_haystack-10.0.0 → qdrant_haystack-10.1.0}/tests/conftest.py +0 -0
  22. {qdrant_haystack-10.0.0 → qdrant_haystack-10.1.0}/tests/test_converters.py +0 -0
  23. {qdrant_haystack-10.0.0 → qdrant_haystack-10.1.0}/tests/test_dict_converters.py +0 -0
  24. {qdrant_haystack-10.0.0 → qdrant_haystack-10.1.0}/tests/test_embedding_retriever.py +0 -0
  25. {qdrant_haystack-10.0.0 → qdrant_haystack-10.1.0}/tests/test_filters.py +0 -0
  26. {qdrant_haystack-10.0.0 → qdrant_haystack-10.1.0}/tests/test_hybrid_retriever.py +0 -0
  27. {qdrant_haystack-10.0.0 → qdrant_haystack-10.1.0}/tests/test_sparse_embedding_retriever.py +0 -0
@@ -1,5 +1,13 @@
1
1
  # Changelog
2
2
 
3
+ ## [unreleased]
4
+
5
+ ### 🧹 Chores
6
+
7
+ - Make fmt command more forgiving (#2671)
8
+ - [**breaking**] Qdrant - drop Python 3.9 and use X|Y typing (#2726)
9
+
10
+
3
11
  ## [integrations/qdrant-v9.5.0] - 2026-01-07
4
12
 
5
13
  ### 🚀 Features
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: qdrant-haystack
3
- Version: 10.0.0
3
+ Version: 10.1.0
4
4
  Summary: An integration of Qdrant ANN vector database backend with Haystack
5
5
  Project-URL: Source, https://github.com/deepset-ai/haystack-core-integrations
6
6
  Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/qdrant/README.md
@@ -590,6 +590,79 @@ class QdrantDocumentStore:
590
590
  and next_offset.uuid == ""
591
591
  )
592
592
 
593
+ @staticmethod
594
+ def _metadata_fields_info_from_schema(payload_schema: dict[str, Any]) -> dict[str, str]:
595
+ """Build field name -> type dict from Qdrant payload_schema. Used by get_metadata_fields_info (sync/async)."""
596
+ fields_info: dict[str, str] = {}
597
+ for field_name, field_config in payload_schema.items():
598
+ if hasattr(field_config, "data_type"):
599
+ fields_info[field_name] = str(field_config.data_type)
600
+ else:
601
+ fields_info[field_name] = "unknown"
602
+ return fields_info
603
+
604
+ @staticmethod
605
+ def _process_records_min_max(
606
+ records: list[Any], metadata_field: str, min_value: Any, max_value: Any
607
+ ) -> tuple[Any, Any]:
608
+ """Update min/max from a batch of Qdrant records. Used by get_metadata_field_min_max (sync/async)."""
609
+ for record in records:
610
+ if record.payload and "meta" in record.payload:
611
+ meta = record.payload["meta"]
612
+ if metadata_field in meta:
613
+ value = meta[metadata_field]
614
+ if value is not None:
615
+ if min_value is None or value < min_value:
616
+ min_value = value
617
+ if max_value is None or value > max_value:
618
+ max_value = value
619
+ return min_value, max_value
620
+
621
+ @staticmethod
622
+ def _process_records_count_unique(
623
+ records: list[Any], metadata_fields: list[str], unique_values_by_field: dict[str, set[Any]]
624
+ ) -> None:
625
+ """
626
+ Update unique_values_by_field from a batch of Qdrant records.
627
+
628
+ Used by count_unique_metadata_by_filter (sync/async).
629
+ """
630
+ for record in records:
631
+ if record.payload and "meta" in record.payload:
632
+ meta = record.payload["meta"]
633
+ for field in metadata_fields:
634
+ if field in meta:
635
+ value = meta[field]
636
+ if value is not None:
637
+ if isinstance(value, (list, dict)):
638
+ unique_values_by_field[field].add(str(value))
639
+ else:
640
+ unique_values_by_field[field].add(value)
641
+
642
+ @staticmethod
643
+ def _process_records_unique_values(
644
+ records: list[Any],
645
+ metadata_field: str,
646
+ unique_values: list[Any],
647
+ unique_values_set: set[Any],
648
+ offset: int,
649
+ limit: int,
650
+ ) -> bool:
651
+ """Collect unique values from a batch of records. Returns True when len(unique_values) >= offset + limit."""
652
+ for record in records:
653
+ if record.payload and "meta" in record.payload:
654
+ meta = record.payload["meta"]
655
+ if metadata_field in meta:
656
+ value = meta[metadata_field]
657
+ if value is not None:
658
+ hashable_value = str(value) if isinstance(value, (list, dict)) else value
659
+ if hashable_value not in unique_values_set:
660
+ unique_values_set.add(hashable_value)
661
+ unique_values.append(value)
662
+ if len(unique_values) >= offset + limit:
663
+ return True
664
+ return False
665
+
593
666
  @staticmethod
594
667
  def _create_updated_point_from_record(record: Any, meta: dict[str, Any]) -> rest.PointStruct:
595
668
  """
@@ -846,6 +919,332 @@ class QdrantDocumentStore:
846
919
  f"Error {e} when calling QdrantDocumentStore.delete_all_documents_async()",
847
920
  )
848
921
 
922
+ def count_documents_by_filter(self, filters: dict[str, Any]) -> int:
923
+ """
924
+ Returns the number of documents that match the provided filters.
925
+
926
+ :param filters: The filters to apply to count documents.
927
+ For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
928
+
929
+ :returns: The number of documents that match the filters.
930
+ """
931
+ self._initialize_client()
932
+ assert self._client is not None
933
+
934
+ qdrant_filter = convert_filters_to_qdrant(filters)
935
+ try:
936
+ response = self._client.count(
937
+ collection_name=self.index,
938
+ count_filter=qdrant_filter,
939
+ )
940
+ return response.count
941
+ except (UnexpectedResponse, ValueError) as e:
942
+ logger.warning(f"Error {e} when calling QdrantDocumentStore.count_documents_by_filter()")
943
+ return 0
944
+
945
+ async def count_documents_by_filter_async(self, filters: dict[str, Any]) -> int:
946
+ """
947
+ Asynchronously returns the number of documents that match the provided filters.
948
+
949
+ :param filters: The filters to apply to select documents for counting.
950
+ For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
951
+
952
+ :returns:
953
+ The number of documents that match the filters.
954
+ """
955
+ await self._initialize_async_client()
956
+ assert self._async_client is not None
957
+
958
+ qdrant_filter = convert_filters_to_qdrant(filters)
959
+ try:
960
+ response = await self._async_client.count(
961
+ collection_name=self.index,
962
+ count_filter=qdrant_filter,
963
+ )
964
+ return response.count
965
+ except (UnexpectedResponse, ValueError) as e:
966
+ logger.warning(f"Error {e} when calling QdrantDocumentStore.count_documents_by_filter_async()")
967
+ return 0
968
+
969
+ def get_metadata_fields_info(self) -> dict[str, str]:
970
+ """
971
+ Returns the information about the fields from the collection.
972
+
973
+ :returns:
974
+ A dictionary mapping field names to their types (e.g., {"field_name": "integer"}).
975
+ """
976
+ self._initialize_client()
977
+ assert self._client is not None
978
+
979
+ try:
980
+ collection_info = self._client.get_collection(self.index)
981
+ payload_schema = collection_info.payload_schema or {}
982
+ return self._metadata_fields_info_from_schema(payload_schema)
983
+ except (UnexpectedResponse, ValueError) as e:
984
+ logger.warning(f"Error {e} when calling QdrantDocumentStore.get_metadata_fields_info()")
985
+ return {}
986
+
987
+ async def get_metadata_fields_info_async(self) -> dict[str, str]:
988
+ """
989
+ Asynchronously returns the information about the fields from the collection.
990
+
991
+ :returns:
992
+ A dictionary mapping field names to their types (e.g., {"field_name": "integer"}).
993
+ """
994
+ await self._initialize_async_client()
995
+ assert self._async_client is not None
996
+
997
+ try:
998
+ collection_info = await self._async_client.get_collection(self.index)
999
+ payload_schema = collection_info.payload_schema or {}
1000
+ return self._metadata_fields_info_from_schema(payload_schema)
1001
+ except (UnexpectedResponse, ValueError) as e:
1002
+ logger.warning(f"Error {e} when calling QdrantDocumentStore.get_metadata_fields_info_async()")
1003
+ return {}
1004
+
1005
+ def get_metadata_field_min_max(self, metadata_field: str) -> dict[str, Any]:
1006
+ """
1007
+ Returns the minimum and maximum values for the given metadata field.
1008
+
1009
+ :param metadata_field: The metadata field key (inside ``meta``) to get the minimum and maximum values for.
1010
+
1011
+ :returns: A dictionary with the keys "min" and "max", where each value is the minimum or maximum value of the
1012
+ metadata field across all documents. Returns an empty dict if no documents have the field.
1013
+ """
1014
+ self._initialize_client()
1015
+ assert self._client is not None
1016
+
1017
+ try:
1018
+ min_value: Any = None
1019
+ max_value: Any = None
1020
+ next_offset = None
1021
+
1022
+ while True:
1023
+ records, next_offset = self._client.scroll(
1024
+ collection_name=self.index,
1025
+ scroll_filter=None,
1026
+ limit=self.scroll_size,
1027
+ offset=next_offset,
1028
+ with_payload=True,
1029
+ with_vectors=False,
1030
+ )
1031
+ min_value, max_value = self._process_records_min_max(records, metadata_field, min_value, max_value)
1032
+ if self._check_stop_scrolling(next_offset):
1033
+ break
1034
+
1035
+ if min_value is not None and max_value is not None:
1036
+ return {"min": min_value, "max": max_value}
1037
+ return {}
1038
+ except Exception as e:
1039
+ logger.warning(f"Error {e} when calling QdrantDocumentStore.get_metadata_field_min_max()")
1040
+ return {}
1041
+
1042
+ async def get_metadata_field_min_max_async(self, metadata_field: str) -> dict[str, Any]:
1043
+ """
1044
+ Asynchronously returns the minimum and maximum values for the given metadata field.
1045
+
1046
+ :param metadata_field: The metadata field key (inside ``meta``) to get the minimum and maximum values for.
1047
+
1048
+ :returns: A dictionary with the keys "min" and "max", where each value is the minimum or maximum value of the
1049
+ metadata field across all documents. Returns an empty dict if no documents have the field.
1050
+ """
1051
+ await self._initialize_async_client()
1052
+ assert self._async_client is not None
1053
+
1054
+ try:
1055
+ min_value: Any = None
1056
+ max_value: Any = None
1057
+ next_offset = None
1058
+
1059
+ while True:
1060
+ records, next_offset = await self._async_client.scroll(
1061
+ collection_name=self.index,
1062
+ scroll_filter=None,
1063
+ limit=self.scroll_size,
1064
+ offset=next_offset,
1065
+ with_payload=True,
1066
+ with_vectors=False,
1067
+ )
1068
+ min_value, max_value = self._process_records_min_max(records, metadata_field, min_value, max_value)
1069
+ if self._check_stop_scrolling(next_offset):
1070
+ break
1071
+
1072
+ if min_value is not None and max_value is not None:
1073
+ return {"min": min_value, "max": max_value}
1074
+ return {}
1075
+ except Exception as e:
1076
+ logger.warning(f"Error {e} when calling QdrantDocumentStore.get_metadata_field_min_max_async()")
1077
+ return {}
1078
+
1079
+ def count_unique_metadata_by_filter(self, filters: dict[str, Any], metadata_fields: list[str]) -> dict[str, int]:
1080
+ """
1081
+ Returns the number of unique values for each specified metadata field among documents that match the filters.
1082
+
1083
+ :param filters: The filters to restrict the documents considered.
1084
+ For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
1085
+ :param metadata_fields: List of metadata field keys (inside ``meta``) to count unique values for.
1086
+
1087
+ :returns: A dictionary mapping each metadata field name to the count of its unique values among the filtered
1088
+ documents.
1089
+ """
1090
+ self._initialize_client()
1091
+ assert self._client is not None
1092
+
1093
+ qdrant_filter = convert_filters_to_qdrant(filters) if filters else None
1094
+ unique_values_by_field: dict[str, set[Any]] = {field: set() for field in metadata_fields}
1095
+
1096
+ try:
1097
+ next_offset = None
1098
+ while True:
1099
+ records, next_offset = self._client.scroll(
1100
+ collection_name=self.index,
1101
+ scroll_filter=qdrant_filter,
1102
+ limit=self.scroll_size,
1103
+ offset=next_offset,
1104
+ with_payload=True,
1105
+ with_vectors=False,
1106
+ )
1107
+ self._process_records_count_unique(records, metadata_fields, unique_values_by_field)
1108
+ if self._check_stop_scrolling(next_offset):
1109
+ break
1110
+
1111
+ return {field: len(unique_values_by_field[field]) for field in metadata_fields}
1112
+ except Exception as e:
1113
+ logger.warning(f"Error {e} when calling QdrantDocumentStore.count_unique_metadata_by_filter()")
1114
+ return dict.fromkeys(metadata_fields, 0)
1115
+
1116
+ async def count_unique_metadata_by_filter_async(
1117
+ self, filters: dict[str, Any], metadata_fields: list[str]
1118
+ ) -> dict[str, int]:
1119
+ """
1120
+ Asynchronously returns the number of unique values for each specified metadata field among documents that
1121
+ match the filters.
1122
+
1123
+ :param filters: The filters to restrict the documents considered.
1124
+ For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
1125
+ :param metadata_fields: List of metadata field keys (inside ``meta``) to count unique values for.
1126
+
1127
+ :returns: A dictionary mapping each metadata field name to the count of its unique values among the filtered
1128
+ documents.
1129
+ """
1130
+ await self._initialize_async_client()
1131
+ assert self._async_client is not None
1132
+
1133
+ qdrant_filter = convert_filters_to_qdrant(filters) if filters else None
1134
+ unique_values_by_field: dict[str, set[Any]] = {field: set() for field in metadata_fields}
1135
+
1136
+ try:
1137
+ next_offset = None
1138
+ while True:
1139
+ records, next_offset = await self._async_client.scroll(
1140
+ collection_name=self.index,
1141
+ scroll_filter=qdrant_filter,
1142
+ limit=self.scroll_size,
1143
+ offset=next_offset,
1144
+ with_payload=True,
1145
+ with_vectors=False,
1146
+ )
1147
+ self._process_records_count_unique(records, metadata_fields, unique_values_by_field)
1148
+ if self._check_stop_scrolling(next_offset):
1149
+ break
1150
+
1151
+ return {field: len(unique_values_by_field[field]) for field in metadata_fields}
1152
+ except Exception as e:
1153
+ logger.warning(f"Error {e} when calling QdrantDocumentStore.count_unique_metadata_by_filter_async()")
1154
+ return dict.fromkeys(metadata_fields, 0)
1155
+
1156
+ def get_metadata_field_unique_values(
1157
+ self, metadata_field: str, filters: dict[str, Any] | None = None, limit: int = 100, offset: int = 0
1158
+ ) -> list[Any]:
1159
+ """
1160
+ Returns unique values for a metadata field, with optional filters and offset/limit pagination.
1161
+
1162
+ Unique values are ordered by first occurrence during scroll. Pagination is offset-based over that order.
1163
+
1164
+ :param metadata_field: The metadata field key (inside ``meta``) to get unique values for.
1165
+ :param filters: Optional filters to restrict the documents considered.
1166
+ For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
1167
+ :param limit: Maximum number of unique values to return per page. Defaults to 100.
1168
+ :param offset: Number of unique values to skip (for pagination). Defaults to 0.
1169
+
1170
+ :returns: A list of unique values for the field (at most ``limit`` items, starting at ``offset``).
1171
+ """
1172
+ self._initialize_client()
1173
+ assert self._client is not None
1174
+
1175
+ qdrant_filter = convert_filters_to_qdrant(filters) if filters else None
1176
+ unique_values: list[Any] = []
1177
+ unique_values_set: set[Any] = set()
1178
+
1179
+ try:
1180
+ next_offset = None
1181
+ while len(unique_values) < offset + limit:
1182
+ records, next_offset = self._client.scroll(
1183
+ collection_name=self.index,
1184
+ scroll_filter=qdrant_filter,
1185
+ limit=self.scroll_size,
1186
+ offset=next_offset,
1187
+ with_payload=True,
1188
+ with_vectors=False,
1189
+ )
1190
+ if self._process_records_unique_values(
1191
+ records, metadata_field, unique_values, unique_values_set, offset, limit
1192
+ ):
1193
+ break
1194
+ if self._check_stop_scrolling(next_offset):
1195
+ break
1196
+
1197
+ return unique_values[offset : offset + limit]
1198
+ except Exception as e:
1199
+ logger.warning(f"Error {e} when calling QdrantDocumentStore.get_metadata_field_unique_values()")
1200
+ return []
1201
+
1202
+ async def get_metadata_field_unique_values_async(
1203
+ self, metadata_field: str, filters: dict[str, Any] | None = None, limit: int = 100, offset: int = 0
1204
+ ) -> list[Any]:
1205
+ """
1206
+ Asynchronously returns unique values for a metadata field, with optional filters and offset/limit pagination.
1207
+
1208
+ Unique values are ordered by first occurrence during scroll. Pagination is offset-based over that order.
1209
+
1210
+ :param metadata_field: The metadata field key (inside ``meta``) to get unique values for.
1211
+ :param filters: Optional filters to restrict the documents considered.
1212
+ For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
1213
+ :param limit: Maximum number of unique values to return per page. Defaults to 100.
1214
+ :param offset: Number of unique values to skip (for pagination). Defaults to 0.
1215
+
1216
+ :returns: A list of unique values for the field (at most ``limit`` items, starting at ``offset``).
1217
+ """
1218
+ await self._initialize_async_client()
1219
+ assert self._async_client is not None
1220
+
1221
+ qdrant_filter = convert_filters_to_qdrant(filters) if filters else None
1222
+ unique_values: list[Any] = []
1223
+ unique_values_set: set[Any] = set()
1224
+
1225
+ try:
1226
+ next_offset = None
1227
+ while len(unique_values) < offset + limit:
1228
+ records, next_offset = await self._async_client.scroll(
1229
+ collection_name=self.index,
1230
+ scroll_filter=qdrant_filter,
1231
+ limit=self.scroll_size,
1232
+ offset=next_offset,
1233
+ with_payload=True,
1234
+ with_vectors=False,
1235
+ )
1236
+ if self._process_records_unique_values(
1237
+ records, metadata_field, unique_values, unique_values_set, offset, limit
1238
+ ):
1239
+ break
1240
+ if self._check_stop_scrolling(next_offset):
1241
+ break
1242
+
1243
+ return unique_values[offset : offset + limit]
1244
+ except Exception as e:
1245
+ logger.warning(f"Error {e} when calling QdrantDocumentStore.get_metadata_field_unique_values_async()")
1246
+ return []
1247
+
849
1248
  @classmethod
850
1249
  def from_dict(cls, data: dict[str, Any]) -> "QdrantDocumentStore":
851
1250
  """
@@ -31,6 +31,7 @@ class TestQdrantDocumentStore(CountDocumentsTest, WriteDocumentsTest, DeleteDocu
31
31
  return_embedding=True,
32
32
  wait_result_from_api=True,
33
33
  use_sparse_embeddings=False,
34
+ progress_bar=False,
34
35
  )
35
36
 
36
37
  def test_init_is_lazy(self):
@@ -146,7 +147,7 @@ class TestQdrantDocumentStore(CountDocumentsTest, WriteDocumentsTest, DeleteDocu
146
147
  assert sparse_config[SPARSE_VECTORS_NAME].modifier == rest.Modifier.IDF
147
148
 
148
149
  def test_query_hybrid(self, generate_sparse_embedding):
149
- document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=True)
150
+ document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=True, progress_bar=False)
150
151
 
151
152
  docs = []
152
153
  for i in range(20):
@@ -171,7 +172,7 @@ class TestQdrantDocumentStore(CountDocumentsTest, WriteDocumentsTest, DeleteDocu
171
172
  assert document.embedding
172
173
 
173
174
  def test_query_hybrid_with_group_by(self, generate_sparse_embedding):
174
- document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=True)
175
+ document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=True, progress_bar=False)
175
176
 
176
177
  docs = []
177
178
  for i in range(20):
@@ -527,3 +528,143 @@ class TestQdrantDocumentStore(CountDocumentsTest, WriteDocumentsTest, DeleteDocu
527
528
  assert len(updated_docs) == 1
528
529
  assert updated_docs[0].embedding is not None
529
530
  assert len(updated_docs[0].embedding) == 768
531
+
532
+ def test_count_documents_by_filter(self, document_store: QdrantDocumentStore):
533
+ """Test counting documents with filters."""
534
+ docs = [
535
+ Document(content="Doc 1", meta={"category": "A", "year": 2023}),
536
+ Document(content="Doc 2", meta={"category": "A", "year": 2024}),
537
+ Document(content="Doc 3", meta={"category": "B", "year": 2023}),
538
+ Document(content="Doc 4", meta={"category": "B", "year": 2024}),
539
+ ]
540
+ document_store.write_documents(docs)
541
+
542
+ # Test counting all documents
543
+ assert document_store.count_documents() == 4
544
+
545
+ # Test counting with single filter
546
+ count = document_store.count_documents_by_filter(
547
+ filters={"field": "meta.category", "operator": "==", "value": "A"}
548
+ )
549
+ assert count == 2
550
+
551
+ # Test counting with multiple filters
552
+ count = document_store.count_documents_by_filter(
553
+ filters={
554
+ "operator": "AND",
555
+ "conditions": [
556
+ {"field": "meta.category", "operator": "==", "value": "B"},
557
+ {"field": "meta.year", "operator": "==", "value": 2023},
558
+ ],
559
+ }
560
+ )
561
+ assert count == 1
562
+
563
+ def test_get_metadata_fields_info(self, document_store: QdrantDocumentStore):
564
+ """Test getting metadata field information."""
565
+ docs = [
566
+ Document(content="Doc 1", meta={"category": "A", "score": 0.9, "tags": ["tag1", "tag2"]}),
567
+ Document(content="Doc 2", meta={"category": "B", "score": 0.8, "tags": ["tag2"]}),
568
+ ]
569
+ document_store.write_documents(docs)
570
+
571
+ fields_info = document_store.get_metadata_fields_info()
572
+ # Should return empty dict or field info depending on Qdrant collection setup
573
+ assert isinstance(fields_info, dict)
574
+
575
+ def test_get_metadata_field_min_max(self, document_store: QdrantDocumentStore):
576
+ """Test getting min/max values for a metadata field."""
577
+ docs = [
578
+ Document(content="Doc 1", meta={"score": 0.5}),
579
+ Document(content="Doc 2", meta={"score": 0.8}),
580
+ Document(content="Doc 3", meta={"score": 0.3}),
581
+ ]
582
+ document_store.write_documents(docs)
583
+
584
+ result = document_store.get_metadata_field_min_max("score")
585
+ assert result.get("min") == 0.3
586
+ assert result.get("max") == 0.8
587
+
588
+ def test_count_unique_metadata_by_filter(self, document_store: QdrantDocumentStore):
589
+ """Test counting unique metadata field values."""
590
+ docs = [
591
+ Document(content="Doc 1", meta={"category": "A"}),
592
+ Document(content="Doc 2", meta={"category": "B"}),
593
+ Document(content="Doc 3", meta={"category": "A"}),
594
+ Document(content="Doc 4", meta={"category": "C"}),
595
+ ]
596
+ document_store.write_documents(docs)
597
+
598
+ result = document_store.count_unique_metadata_by_filter(filters={}, metadata_fields=["category"])
599
+ assert result == {"category": 3}
600
+
601
+ def test_count_unique_metadata_by_filter_multiple_fields(self, document_store: QdrantDocumentStore):
602
+ """Test counting unique values for multiple metadata fields."""
603
+ docs = [
604
+ Document(content="Doc 1", meta={"category": "A", "status": "active"}),
605
+ Document(content="Doc 2", meta={"category": "B", "status": "active"}),
606
+ Document(content="Doc 3", meta={"category": "A", "status": "inactive"}),
607
+ ]
608
+ document_store.write_documents(docs)
609
+
610
+ result = document_store.count_unique_metadata_by_filter(filters={}, metadata_fields=["category", "status"])
611
+ assert result == {"category": 2, "status": 2}
612
+
613
+ def test_count_unique_metadata_by_filter_with_filter(self, document_store: QdrantDocumentStore):
614
+ """Test counting unique metadata field values with filtering."""
615
+ docs = [
616
+ Document(content="Doc 1", meta={"category": "A", "status": "active"}),
617
+ Document(content="Doc 2", meta={"category": "B", "status": "active"}),
618
+ Document(content="Doc 3", meta={"category": "A", "status": "inactive"}),
619
+ ]
620
+ document_store.write_documents(docs)
621
+
622
+ result = document_store.count_unique_metadata_by_filter(
623
+ filters={"field": "meta.status", "operator": "==", "value": "active"},
624
+ metadata_fields=["category"],
625
+ )
626
+ assert result == {"category": 2}
627
+
628
+ def test_get_metadata_field_unique_values(self, document_store: QdrantDocumentStore):
629
+ """Test getting unique metadata field values."""
630
+ docs = [
631
+ Document(content="Doc 1", meta={"category": "A"}),
632
+ Document(content="Doc 2", meta={"category": "B"}),
633
+ Document(content="Doc 3", meta={"category": "A"}),
634
+ Document(content="Doc 4", meta={"category": "C"}),
635
+ ]
636
+ document_store.write_documents(docs)
637
+
638
+ values = document_store.get_metadata_field_unique_values("category")
639
+ assert len(values) == 3
640
+ assert set(values) == {"A", "B", "C"}
641
+
642
+ def test_get_metadata_field_unique_values_pagination(self, document_store: QdrantDocumentStore):
643
+ """Test getting unique metadata field values with pagination."""
644
+ docs = [Document(content=f"Doc {i}", meta={"value": i % 5}) for i in range(10)]
645
+ document_store.write_documents(docs)
646
+
647
+ # Get first 2 unique values
648
+ values_page_1 = document_store.get_metadata_field_unique_values("value", limit=2, offset=0)
649
+ assert len(values_page_1) == 2
650
+
651
+ # Get next 2 unique values
652
+ values_page_2 = document_store.get_metadata_field_unique_values("value", limit=2, offset=2)
653
+ assert len(values_page_2) == 2
654
+
655
+ # Values should not overlap
656
+ assert set(values_page_1) != set(values_page_2)
657
+
658
+ def test_get_metadata_field_unique_values_with_filter(self, document_store: QdrantDocumentStore):
659
+ """Test getting unique metadata field values with filtering."""
660
+ docs = [
661
+ Document(content="Doc 1", meta={"category": "A", "status": "active"}),
662
+ Document(content="Doc 2", meta={"category": "B", "status": "active"}),
663
+ Document(content="Doc 3", meta={"category": "A", "status": "inactive"}),
664
+ ]
665
+ document_store.write_documents(docs)
666
+
667
+ values = document_store.get_metadata_field_unique_values(
668
+ "category", filters={"field": "meta.status", "operator": "==", "value": "active"}
669
+ )
670
+ assert set(values) == {"A", "B"}
@@ -27,6 +27,7 @@ class TestQdrantDocumentStore:
27
27
  return_embedding=True,
28
28
  wait_result_from_api=True,
29
29
  use_sparse_embeddings=False,
30
+ progress_bar=False,
30
31
  )
31
32
 
32
33
  @pytest.mark.asyncio
@@ -59,7 +60,7 @@ class TestQdrantDocumentStore:
59
60
 
60
61
  @pytest.mark.asyncio
61
62
  async def test_query_hybrid_async(self, generate_sparse_embedding):
62
- document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=True)
63
+ document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=True, progress_bar=False)
63
64
 
64
65
  docs = []
65
66
  for i in range(20):
@@ -84,7 +85,7 @@ class TestQdrantDocumentStore:
84
85
 
85
86
  @pytest.mark.asyncio
86
87
  async def test_query_hybrid_with_group_by_async(self, generate_sparse_embedding):
87
- document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=True)
88
+ document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=True, progress_bar=False)
88
89
 
89
90
  docs = []
90
91
  for i in range(20):
@@ -474,3 +475,155 @@ class TestQdrantDocumentStore:
474
475
  assert len(updated_docs) == 1
475
476
  assert updated_docs[0].embedding is not None
476
477
  assert len(updated_docs[0].embedding) == 768
478
+
479
+ @pytest.mark.asyncio
480
+ async def test_count_documents_by_filter_async(self, document_store: QdrantDocumentStore):
481
+ """Test counting documents with filters (async)."""
482
+ docs = [
483
+ Document(content="Doc 1", meta={"category": "A", "year": 2023}),
484
+ Document(content="Doc 2", meta={"category": "A", "year": 2024}),
485
+ Document(content="Doc 3", meta={"category": "B", "year": 2023}),
486
+ Document(content="Doc 4", meta={"category": "B", "year": 2024}),
487
+ ]
488
+ await document_store.write_documents_async(docs)
489
+
490
+ # Test counting all documents
491
+ count = await document_store.count_documents_async()
492
+ assert count == 4
493
+
494
+ # Test counting with single filter
495
+ count = await document_store.count_documents_by_filter_async(
496
+ filters={"field": "meta.category", "operator": "==", "value": "A"}
497
+ )
498
+ assert count == 2
499
+
500
+ # Test counting with multiple filters
501
+ count = await document_store.count_documents_by_filter_async(
502
+ filters={
503
+ "operator": "AND",
504
+ "conditions": [
505
+ {"field": "meta.category", "operator": "==", "value": "B"},
506
+ {"field": "meta.year", "operator": "==", "value": 2023},
507
+ ],
508
+ }
509
+ )
510
+ assert count == 1
511
+
512
+ @pytest.mark.asyncio
513
+ async def test_get_metadata_fields_info_async(self, document_store: QdrantDocumentStore):
514
+ """Test getting metadata field information (async)."""
515
+ docs = [
516
+ Document(content="Doc 1", meta={"category": "A", "score": 0.9, "tags": ["tag1", "tag2"]}),
517
+ Document(content="Doc 2", meta={"category": "B", "score": 0.8, "tags": ["tag2"]}),
518
+ ]
519
+ await document_store.write_documents_async(docs)
520
+
521
+ fields_info = await document_store.get_metadata_fields_info_async()
522
+ # Should return empty dict or field info depending on Qdrant collection setup
523
+ assert isinstance(fields_info, dict)
524
+
525
+ @pytest.mark.asyncio
526
+ async def test_get_metadata_field_min_max_async(self, document_store: QdrantDocumentStore):
527
+ """Test getting min/max values for a metadata field (async)."""
528
+ docs = [
529
+ Document(content="Doc 1", meta={"score": 0.5}),
530
+ Document(content="Doc 2", meta={"score": 0.8}),
531
+ Document(content="Doc 3", meta={"score": 0.3}),
532
+ ]
533
+ await document_store.write_documents_async(docs)
534
+
535
+ result = await document_store.get_metadata_field_min_max_async("score")
536
+ assert result.get("min") == 0.3
537
+ assert result.get("max") == 0.8
538
+
539
+ @pytest.mark.asyncio
540
+ async def test_count_unique_metadata_by_filter_async(self, document_store: QdrantDocumentStore):
541
+ """Test counting unique metadata field values (async)."""
542
+ docs = [
543
+ Document(content="Doc 1", meta={"category": "A"}),
544
+ Document(content="Doc 2", meta={"category": "B"}),
545
+ Document(content="Doc 3", meta={"category": "A"}),
546
+ Document(content="Doc 4", meta={"category": "C"}),
547
+ ]
548
+ await document_store.write_documents_async(docs)
549
+
550
+ result = await document_store.count_unique_metadata_by_filter_async(filters={}, metadata_fields=["category"])
551
+ assert result == {"category": 3}
552
+
553
+ @pytest.mark.asyncio
554
+ async def test_count_unique_metadata_by_filter_async_multiple_fields(self, document_store: QdrantDocumentStore):
555
+ """Test counting unique values for multiple metadata fields (async)."""
556
+ docs = [
557
+ Document(content="Doc 1", meta={"category": "A", "status": "active"}),
558
+ Document(content="Doc 2", meta={"category": "B", "status": "active"}),
559
+ Document(content="Doc 3", meta={"category": "A", "status": "inactive"}),
560
+ ]
561
+ await document_store.write_documents_async(docs)
562
+
563
+ result = await document_store.count_unique_metadata_by_filter_async(
564
+ filters={}, metadata_fields=["category", "status"]
565
+ )
566
+ assert result == {"category": 2, "status": 2}
567
+
568
+ @pytest.mark.asyncio
569
+ async def test_count_unique_metadata_by_filter_async_with_filter(self, document_store: QdrantDocumentStore):
570
+ """Test counting unique metadata field values with filtering (async)."""
571
+ docs = [
572
+ Document(content="Doc 1", meta={"category": "A", "status": "active"}),
573
+ Document(content="Doc 2", meta={"category": "B", "status": "active"}),
574
+ Document(content="Doc 3", meta={"category": "A", "status": "inactive"}),
575
+ ]
576
+ await document_store.write_documents_async(docs)
577
+
578
+ result = await document_store.count_unique_metadata_by_filter_async(
579
+ filters={"field": "meta.status", "operator": "==", "value": "active"},
580
+ metadata_fields=["category"],
581
+ )
582
+ assert result == {"category": 2}
583
+
584
+ @pytest.mark.asyncio
585
+ async def test_get_metadata_field_unique_values_async(self, document_store: QdrantDocumentStore):
586
+ """Test getting unique metadata field values (async)."""
587
+ docs = [
588
+ Document(content="Doc 1", meta={"category": "A"}),
589
+ Document(content="Doc 2", meta={"category": "B"}),
590
+ Document(content="Doc 3", meta={"category": "A"}),
591
+ Document(content="Doc 4", meta={"category": "C"}),
592
+ ]
593
+ await document_store.write_documents_async(docs)
594
+
595
+ values = await document_store.get_metadata_field_unique_values_async("category")
596
+ assert len(values) == 3
597
+ assert set(values) == {"A", "B", "C"}
598
+
599
+ @pytest.mark.asyncio
600
+ async def test_get_metadata_field_unique_values_async_pagination(self, document_store: QdrantDocumentStore):
601
+ """Test getting unique metadata field values with pagination (async)."""
602
+ docs = [Document(content=f"Doc {i}", meta={"value": i % 5}) for i in range(10)]
603
+ await document_store.write_documents_async(docs)
604
+
605
+ # Get first 2 unique values
606
+ values_page_1 = await document_store.get_metadata_field_unique_values_async("value", limit=2, offset=0)
607
+ assert len(values_page_1) == 2
608
+
609
+ # Get next 2 unique values
610
+ values_page_2 = await document_store.get_metadata_field_unique_values_async("value", limit=2, offset=2)
611
+ assert len(values_page_2) == 2
612
+
613
+ # Values should not overlap
614
+ assert set(values_page_1) != set(values_page_2)
615
+
616
+ @pytest.mark.asyncio
617
+ async def test_get_metadata_field_unique_values_async_with_filter(self, document_store: QdrantDocumentStore):
618
+ """Test getting unique metadata field values with filtering (async)."""
619
+ docs = [
620
+ Document(content="Doc 1", meta={"category": "A", "status": "active"}),
621
+ Document(content="Doc 2", meta={"category": "B", "status": "active"}),
622
+ Document(content="Doc 3", meta={"category": "A", "status": "inactive"}),
623
+ ]
624
+ await document_store.write_documents_async(docs)
625
+
626
+ values = await document_store.get_metadata_field_unique_values_async(
627
+ "category", filters={"field": "meta.status", "operator": "==", "value": "active"}
628
+ )
629
+ assert set(values) == {"A", "B"}