qdrant-haystack 10.0.0__tar.gz → 10.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {qdrant_haystack-10.0.0 → qdrant_haystack-10.2.0}/CHANGELOG.md +15 -0
- {qdrant_haystack-10.0.0 → qdrant_haystack-10.2.0}/PKG-INFO +1 -1
- {qdrant_haystack-10.0.0 → qdrant_haystack-10.2.0}/src/haystack_integrations/document_stores/qdrant/document_store.py +417 -6
- {qdrant_haystack-10.0.0 → qdrant_haystack-10.2.0}/tests/test_document_store.py +160 -9
- {qdrant_haystack-10.0.0 → qdrant_haystack-10.2.0}/tests/test_document_store_async.py +173 -9
- {qdrant_haystack-10.0.0 → qdrant_haystack-10.2.0}/.gitignore +0 -0
- {qdrant_haystack-10.0.0 → qdrant_haystack-10.2.0}/LICENSE.txt +0 -0
- {qdrant_haystack-10.0.0 → qdrant_haystack-10.2.0}/README.md +0 -0
- {qdrant_haystack-10.0.0 → qdrant_haystack-10.2.0}/examples/embedding_retrieval.py +0 -0
- {qdrant_haystack-10.0.0 → qdrant_haystack-10.2.0}/pydoc/config_docusaurus.yml +0 -0
- {qdrant_haystack-10.0.0 → qdrant_haystack-10.2.0}/pyproject.toml +0 -0
- {qdrant_haystack-10.0.0 → qdrant_haystack-10.2.0}/src/haystack_integrations/components/retrievers/py.typed +0 -0
- {qdrant_haystack-10.0.0 → qdrant_haystack-10.2.0}/src/haystack_integrations/components/retrievers/qdrant/__init__.py +0 -0
- {qdrant_haystack-10.0.0 → qdrant_haystack-10.2.0}/src/haystack_integrations/components/retrievers/qdrant/retriever.py +0 -0
- {qdrant_haystack-10.0.0 → qdrant_haystack-10.2.0}/src/haystack_integrations/document_stores/py.typed +0 -0
- {qdrant_haystack-10.0.0 → qdrant_haystack-10.2.0}/src/haystack_integrations/document_stores/qdrant/__init__.py +0 -0
- {qdrant_haystack-10.0.0 → qdrant_haystack-10.2.0}/src/haystack_integrations/document_stores/qdrant/converters.py +0 -0
- {qdrant_haystack-10.0.0 → qdrant_haystack-10.2.0}/src/haystack_integrations/document_stores/qdrant/filters.py +0 -0
- {qdrant_haystack-10.0.0 → qdrant_haystack-10.2.0}/src/haystack_integrations/document_stores/qdrant/migrate_to_sparse.py +0 -0
- {qdrant_haystack-10.0.0 → qdrant_haystack-10.2.0}/tests/__init__.py +0 -0
- {qdrant_haystack-10.0.0 → qdrant_haystack-10.2.0}/tests/conftest.py +0 -0
- {qdrant_haystack-10.0.0 → qdrant_haystack-10.2.0}/tests/test_converters.py +0 -0
- {qdrant_haystack-10.0.0 → qdrant_haystack-10.2.0}/tests/test_dict_converters.py +0 -0
- {qdrant_haystack-10.0.0 → qdrant_haystack-10.2.0}/tests/test_embedding_retriever.py +0 -0
- {qdrant_haystack-10.0.0 → qdrant_haystack-10.2.0}/tests/test_filters.py +0 -0
- {qdrant_haystack-10.0.0 → qdrant_haystack-10.2.0}/tests/test_hybrid_retriever.py +0 -0
- {qdrant_haystack-10.0.0 → qdrant_haystack-10.2.0}/tests/test_sparse_embedding_retriever.py +0 -0
|
@@ -1,5 +1,20 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [integrations/qdrant-v9.6.0] - 2026-02-02
|
|
4
|
+
|
|
5
|
+
### 🚀 Features
|
|
6
|
+
|
|
7
|
+
- Adding count with filtering operations to`QdrantDocumentStore` (#2803)
|
|
8
|
+
|
|
9
|
+
### 🧹 Chores
|
|
10
|
+
|
|
11
|
+
- Make fmt command more forgiving (#2671)
|
|
12
|
+
- [**breaking**] Qdrant - drop Python 3.9 and use X|Y typing (#2726)
|
|
13
|
+
- Disabling progress bar in `QdrantDocumentStore` tests (#2797)
|
|
14
|
+
|
|
15
|
+
### 🌀 Miscellaneous
|
|
16
|
+
|
|
17
|
+
|
|
3
18
|
## [integrations/qdrant-v9.5.0] - 2026-01-07
|
|
4
19
|
|
|
5
20
|
### 🚀 Features
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: qdrant-haystack
|
|
3
|
-
Version: 10.
|
|
3
|
+
Version: 10.2.0
|
|
4
4
|
Summary: An integration of Qdrant ANN vector database backend with Haystack
|
|
5
5
|
Project-URL: Source, https://github.com/deepset-ai/haystack-core-integrations
|
|
6
6
|
Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/qdrant/README.md
|
|
@@ -517,7 +517,7 @@ class QdrantDocumentStore:
|
|
|
517
517
|
"Called QdrantDocumentStore.delete_documents_async() on a non-existing ID",
|
|
518
518
|
)
|
|
519
519
|
|
|
520
|
-
def delete_by_filter(self, filters: dict[str, Any]) ->
|
|
520
|
+
def delete_by_filter(self, filters: dict[str, Any]) -> int:
|
|
521
521
|
"""
|
|
522
522
|
Deletes all documents that match the provided filters.
|
|
523
523
|
|
|
@@ -533,20 +533,26 @@ class QdrantDocumentStore:
|
|
|
533
533
|
try:
|
|
534
534
|
qdrant_filter = convert_filters_to_qdrant(filters)
|
|
535
535
|
if qdrant_filter is None:
|
|
536
|
-
return
|
|
536
|
+
return 0
|
|
537
|
+
|
|
538
|
+
count_response = self._client.count(
|
|
539
|
+
collection_name=self.index,
|
|
540
|
+
count_filter=qdrant_filter,
|
|
541
|
+
)
|
|
542
|
+
deleted_count = count_response.count
|
|
537
543
|
|
|
538
|
-
# perform deletion using FilterSelector
|
|
539
544
|
self._client.delete(
|
|
540
545
|
collection_name=self.index,
|
|
541
546
|
points_selector=rest.FilterSelector(filter=qdrant_filter),
|
|
542
547
|
wait=self.wait_result_from_api,
|
|
543
548
|
)
|
|
549
|
+
return deleted_count
|
|
544
550
|
|
|
545
551
|
except Exception as e:
|
|
546
552
|
msg = f"Failed to delete documents by filter from Qdrant: {e!s}"
|
|
547
553
|
raise QdrantStoreError(msg) from e
|
|
548
554
|
|
|
549
|
-
async def delete_by_filter_async(self, filters: dict[str, Any]) ->
|
|
555
|
+
async def delete_by_filter_async(self, filters: dict[str, Any]) -> int:
|
|
550
556
|
"""
|
|
551
557
|
Asynchronously deletes all documents that match the provided filters.
|
|
552
558
|
|
|
@@ -562,14 +568,20 @@ class QdrantDocumentStore:
|
|
|
562
568
|
try:
|
|
563
569
|
qdrant_filter = convert_filters_to_qdrant(filters)
|
|
564
570
|
if qdrant_filter is None:
|
|
565
|
-
return
|
|
571
|
+
return 0
|
|
572
|
+
|
|
573
|
+
count_response = await self._async_client.count(
|
|
574
|
+
collection_name=self.index,
|
|
575
|
+
count_filter=qdrant_filter,
|
|
576
|
+
)
|
|
577
|
+
deleted_count = count_response.count
|
|
566
578
|
|
|
567
|
-
# perform deletion using FilterSelector
|
|
568
579
|
await self._async_client.delete(
|
|
569
580
|
collection_name=self.index,
|
|
570
581
|
points_selector=rest.FilterSelector(filter=qdrant_filter),
|
|
571
582
|
wait=self.wait_result_from_api,
|
|
572
583
|
)
|
|
584
|
+
return deleted_count
|
|
573
585
|
|
|
574
586
|
except Exception as e:
|
|
575
587
|
msg = f"Failed to delete documents by filter from Qdrant: {e!s}"
|
|
@@ -590,6 +602,79 @@ class QdrantDocumentStore:
|
|
|
590
602
|
and next_offset.uuid == ""
|
|
591
603
|
)
|
|
592
604
|
|
|
605
|
+
@staticmethod
|
|
606
|
+
def _metadata_fields_info_from_schema(payload_schema: dict[str, Any]) -> dict[str, str]:
|
|
607
|
+
"""Build field name -> type dict from Qdrant payload_schema. Used by get_metadata_fields_info (sync/async)."""
|
|
608
|
+
fields_info: dict[str, str] = {}
|
|
609
|
+
for field_name, field_config in payload_schema.items():
|
|
610
|
+
if hasattr(field_config, "data_type"):
|
|
611
|
+
fields_info[field_name] = str(field_config.data_type)
|
|
612
|
+
else:
|
|
613
|
+
fields_info[field_name] = "unknown"
|
|
614
|
+
return fields_info
|
|
615
|
+
|
|
616
|
+
@staticmethod
|
|
617
|
+
def _process_records_min_max(
|
|
618
|
+
records: list[Any], metadata_field: str, min_value: Any, max_value: Any
|
|
619
|
+
) -> tuple[Any, Any]:
|
|
620
|
+
"""Update min/max from a batch of Qdrant records. Used by get_metadata_field_min_max (sync/async)."""
|
|
621
|
+
for record in records:
|
|
622
|
+
if record.payload and "meta" in record.payload:
|
|
623
|
+
meta = record.payload["meta"]
|
|
624
|
+
if metadata_field in meta:
|
|
625
|
+
value = meta[metadata_field]
|
|
626
|
+
if value is not None:
|
|
627
|
+
if min_value is None or value < min_value:
|
|
628
|
+
min_value = value
|
|
629
|
+
if max_value is None or value > max_value:
|
|
630
|
+
max_value = value
|
|
631
|
+
return min_value, max_value
|
|
632
|
+
|
|
633
|
+
@staticmethod
|
|
634
|
+
def _process_records_count_unique(
|
|
635
|
+
records: list[Any], metadata_fields: list[str], unique_values_by_field: dict[str, set[Any]]
|
|
636
|
+
) -> None:
|
|
637
|
+
"""
|
|
638
|
+
Update unique_values_by_field from a batch of Qdrant records.
|
|
639
|
+
|
|
640
|
+
Used by count_unique_metadata_by_filter (sync/async).
|
|
641
|
+
"""
|
|
642
|
+
for record in records:
|
|
643
|
+
if record.payload and "meta" in record.payload:
|
|
644
|
+
meta = record.payload["meta"]
|
|
645
|
+
for field in metadata_fields:
|
|
646
|
+
if field in meta:
|
|
647
|
+
value = meta[field]
|
|
648
|
+
if value is not None:
|
|
649
|
+
if isinstance(value, (list, dict)):
|
|
650
|
+
unique_values_by_field[field].add(str(value))
|
|
651
|
+
else:
|
|
652
|
+
unique_values_by_field[field].add(value)
|
|
653
|
+
|
|
654
|
+
@staticmethod
|
|
655
|
+
def _process_records_unique_values(
|
|
656
|
+
records: list[Any],
|
|
657
|
+
metadata_field: str,
|
|
658
|
+
unique_values: list[Any],
|
|
659
|
+
unique_values_set: set[Any],
|
|
660
|
+
offset: int,
|
|
661
|
+
limit: int,
|
|
662
|
+
) -> bool:
|
|
663
|
+
"""Collect unique values from a batch of records. Returns True when len(unique_values) >= offset + limit."""
|
|
664
|
+
for record in records:
|
|
665
|
+
if record.payload and "meta" in record.payload:
|
|
666
|
+
meta = record.payload["meta"]
|
|
667
|
+
if metadata_field in meta:
|
|
668
|
+
value = meta[metadata_field]
|
|
669
|
+
if value is not None:
|
|
670
|
+
hashable_value = str(value) if isinstance(value, (list, dict)) else value
|
|
671
|
+
if hashable_value not in unique_values_set:
|
|
672
|
+
unique_values_set.add(hashable_value)
|
|
673
|
+
unique_values.append(value)
|
|
674
|
+
if len(unique_values) >= offset + limit:
|
|
675
|
+
return True
|
|
676
|
+
return False
|
|
677
|
+
|
|
593
678
|
@staticmethod
|
|
594
679
|
def _create_updated_point_from_record(record: Any, meta: dict[str, Any]) -> rest.PointStruct:
|
|
595
680
|
"""
|
|
@@ -846,6 +931,332 @@ class QdrantDocumentStore:
|
|
|
846
931
|
f"Error {e} when calling QdrantDocumentStore.delete_all_documents_async()",
|
|
847
932
|
)
|
|
848
933
|
|
|
934
|
+
def count_documents_by_filter(self, filters: dict[str, Any]) -> int:
|
|
935
|
+
"""
|
|
936
|
+
Returns the number of documents that match the provided filters.
|
|
937
|
+
|
|
938
|
+
:param filters: The filters to apply to count documents.
|
|
939
|
+
For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
|
|
940
|
+
|
|
941
|
+
:returns: The number of documents that match the filters.
|
|
942
|
+
"""
|
|
943
|
+
self._initialize_client()
|
|
944
|
+
assert self._client is not None
|
|
945
|
+
|
|
946
|
+
qdrant_filter = convert_filters_to_qdrant(filters)
|
|
947
|
+
try:
|
|
948
|
+
response = self._client.count(
|
|
949
|
+
collection_name=self.index,
|
|
950
|
+
count_filter=qdrant_filter,
|
|
951
|
+
)
|
|
952
|
+
return response.count
|
|
953
|
+
except (UnexpectedResponse, ValueError) as e:
|
|
954
|
+
logger.warning(f"Error {e} when calling QdrantDocumentStore.count_documents_by_filter()")
|
|
955
|
+
return 0
|
|
956
|
+
|
|
957
|
+
async def count_documents_by_filter_async(self, filters: dict[str, Any]) -> int:
|
|
958
|
+
"""
|
|
959
|
+
Asynchronously returns the number of documents that match the provided filters.
|
|
960
|
+
|
|
961
|
+
:param filters: The filters to apply to select documents for counting.
|
|
962
|
+
For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
|
|
963
|
+
|
|
964
|
+
:returns:
|
|
965
|
+
The number of documents that match the filters.
|
|
966
|
+
"""
|
|
967
|
+
await self._initialize_async_client()
|
|
968
|
+
assert self._async_client is not None
|
|
969
|
+
|
|
970
|
+
qdrant_filter = convert_filters_to_qdrant(filters)
|
|
971
|
+
try:
|
|
972
|
+
response = await self._async_client.count(
|
|
973
|
+
collection_name=self.index,
|
|
974
|
+
count_filter=qdrant_filter,
|
|
975
|
+
)
|
|
976
|
+
return response.count
|
|
977
|
+
except (UnexpectedResponse, ValueError) as e:
|
|
978
|
+
logger.warning(f"Error {e} when calling QdrantDocumentStore.count_documents_by_filter_async()")
|
|
979
|
+
return 0
|
|
980
|
+
|
|
981
|
+
def get_metadata_fields_info(self) -> dict[str, str]:
|
|
982
|
+
"""
|
|
983
|
+
Returns the information about the fields from the collection.
|
|
984
|
+
|
|
985
|
+
:returns:
|
|
986
|
+
A dictionary mapping field names to their types (e.g., {"field_name": "integer"}).
|
|
987
|
+
"""
|
|
988
|
+
self._initialize_client()
|
|
989
|
+
assert self._client is not None
|
|
990
|
+
|
|
991
|
+
try:
|
|
992
|
+
collection_info = self._client.get_collection(self.index)
|
|
993
|
+
payload_schema = collection_info.payload_schema or {}
|
|
994
|
+
return self._metadata_fields_info_from_schema(payload_schema)
|
|
995
|
+
except (UnexpectedResponse, ValueError) as e:
|
|
996
|
+
logger.warning(f"Error {e} when calling QdrantDocumentStore.get_metadata_fields_info()")
|
|
997
|
+
return {}
|
|
998
|
+
|
|
999
|
+
async def get_metadata_fields_info_async(self) -> dict[str, str]:
|
|
1000
|
+
"""
|
|
1001
|
+
Asynchronously returns the information about the fields from the collection.
|
|
1002
|
+
|
|
1003
|
+
:returns:
|
|
1004
|
+
A dictionary mapping field names to their types (e.g., {"field_name": "integer"}).
|
|
1005
|
+
"""
|
|
1006
|
+
await self._initialize_async_client()
|
|
1007
|
+
assert self._async_client is not None
|
|
1008
|
+
|
|
1009
|
+
try:
|
|
1010
|
+
collection_info = await self._async_client.get_collection(self.index)
|
|
1011
|
+
payload_schema = collection_info.payload_schema or {}
|
|
1012
|
+
return self._metadata_fields_info_from_schema(payload_schema)
|
|
1013
|
+
except (UnexpectedResponse, ValueError) as e:
|
|
1014
|
+
logger.warning(f"Error {e} when calling QdrantDocumentStore.get_metadata_fields_info_async()")
|
|
1015
|
+
return {}
|
|
1016
|
+
|
|
1017
|
+
def get_metadata_field_min_max(self, metadata_field: str) -> dict[str, Any]:
|
|
1018
|
+
"""
|
|
1019
|
+
Returns the minimum and maximum values for the given metadata field.
|
|
1020
|
+
|
|
1021
|
+
:param metadata_field: The metadata field key (inside ``meta``) to get the minimum and maximum values for.
|
|
1022
|
+
|
|
1023
|
+
:returns: A dictionary with the keys "min" and "max", where each value is the minimum or maximum value of the
|
|
1024
|
+
metadata field across all documents. Returns an empty dict if no documents have the field.
|
|
1025
|
+
"""
|
|
1026
|
+
self._initialize_client()
|
|
1027
|
+
assert self._client is not None
|
|
1028
|
+
|
|
1029
|
+
try:
|
|
1030
|
+
min_value: Any = None
|
|
1031
|
+
max_value: Any = None
|
|
1032
|
+
next_offset = None
|
|
1033
|
+
|
|
1034
|
+
while True:
|
|
1035
|
+
records, next_offset = self._client.scroll(
|
|
1036
|
+
collection_name=self.index,
|
|
1037
|
+
scroll_filter=None,
|
|
1038
|
+
limit=self.scroll_size,
|
|
1039
|
+
offset=next_offset,
|
|
1040
|
+
with_payload=True,
|
|
1041
|
+
with_vectors=False,
|
|
1042
|
+
)
|
|
1043
|
+
min_value, max_value = self._process_records_min_max(records, metadata_field, min_value, max_value)
|
|
1044
|
+
if self._check_stop_scrolling(next_offset):
|
|
1045
|
+
break
|
|
1046
|
+
|
|
1047
|
+
if min_value is not None and max_value is not None:
|
|
1048
|
+
return {"min": min_value, "max": max_value}
|
|
1049
|
+
return {}
|
|
1050
|
+
except Exception as e:
|
|
1051
|
+
logger.warning(f"Error {e} when calling QdrantDocumentStore.get_metadata_field_min_max()")
|
|
1052
|
+
return {}
|
|
1053
|
+
|
|
1054
|
+
async def get_metadata_field_min_max_async(self, metadata_field: str) -> dict[str, Any]:
|
|
1055
|
+
"""
|
|
1056
|
+
Asynchronously returns the minimum and maximum values for the given metadata field.
|
|
1057
|
+
|
|
1058
|
+
:param metadata_field: The metadata field key (inside ``meta``) to get the minimum and maximum values for.
|
|
1059
|
+
|
|
1060
|
+
:returns: A dictionary with the keys "min" and "max", where each value is the minimum or maximum value of the
|
|
1061
|
+
metadata field across all documents. Returns an empty dict if no documents have the field.
|
|
1062
|
+
"""
|
|
1063
|
+
await self._initialize_async_client()
|
|
1064
|
+
assert self._async_client is not None
|
|
1065
|
+
|
|
1066
|
+
try:
|
|
1067
|
+
min_value: Any = None
|
|
1068
|
+
max_value: Any = None
|
|
1069
|
+
next_offset = None
|
|
1070
|
+
|
|
1071
|
+
while True:
|
|
1072
|
+
records, next_offset = await self._async_client.scroll(
|
|
1073
|
+
collection_name=self.index,
|
|
1074
|
+
scroll_filter=None,
|
|
1075
|
+
limit=self.scroll_size,
|
|
1076
|
+
offset=next_offset,
|
|
1077
|
+
with_payload=True,
|
|
1078
|
+
with_vectors=False,
|
|
1079
|
+
)
|
|
1080
|
+
min_value, max_value = self._process_records_min_max(records, metadata_field, min_value, max_value)
|
|
1081
|
+
if self._check_stop_scrolling(next_offset):
|
|
1082
|
+
break
|
|
1083
|
+
|
|
1084
|
+
if min_value is not None and max_value is not None:
|
|
1085
|
+
return {"min": min_value, "max": max_value}
|
|
1086
|
+
return {}
|
|
1087
|
+
except Exception as e:
|
|
1088
|
+
logger.warning(f"Error {e} when calling QdrantDocumentStore.get_metadata_field_min_max_async()")
|
|
1089
|
+
return {}
|
|
1090
|
+
|
|
1091
|
+
def count_unique_metadata_by_filter(self, filters: dict[str, Any], metadata_fields: list[str]) -> dict[str, int]:
|
|
1092
|
+
"""
|
|
1093
|
+
Returns the number of unique values for each specified metadata field among documents that match the filters.
|
|
1094
|
+
|
|
1095
|
+
:param filters: The filters to restrict the documents considered.
|
|
1096
|
+
For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
|
|
1097
|
+
:param metadata_fields: List of metadata field keys (inside ``meta``) to count unique values for.
|
|
1098
|
+
|
|
1099
|
+
:returns: A dictionary mapping each metadata field name to the count of its unique values among the filtered
|
|
1100
|
+
documents.
|
|
1101
|
+
"""
|
|
1102
|
+
self._initialize_client()
|
|
1103
|
+
assert self._client is not None
|
|
1104
|
+
|
|
1105
|
+
qdrant_filter = convert_filters_to_qdrant(filters) if filters else None
|
|
1106
|
+
unique_values_by_field: dict[str, set[Any]] = {field: set() for field in metadata_fields}
|
|
1107
|
+
|
|
1108
|
+
try:
|
|
1109
|
+
next_offset = None
|
|
1110
|
+
while True:
|
|
1111
|
+
records, next_offset = self._client.scroll(
|
|
1112
|
+
collection_name=self.index,
|
|
1113
|
+
scroll_filter=qdrant_filter,
|
|
1114
|
+
limit=self.scroll_size,
|
|
1115
|
+
offset=next_offset,
|
|
1116
|
+
with_payload=True,
|
|
1117
|
+
with_vectors=False,
|
|
1118
|
+
)
|
|
1119
|
+
self._process_records_count_unique(records, metadata_fields, unique_values_by_field)
|
|
1120
|
+
if self._check_stop_scrolling(next_offset):
|
|
1121
|
+
break
|
|
1122
|
+
|
|
1123
|
+
return {field: len(unique_values_by_field[field]) for field in metadata_fields}
|
|
1124
|
+
except Exception as e:
|
|
1125
|
+
logger.warning(f"Error {e} when calling QdrantDocumentStore.count_unique_metadata_by_filter()")
|
|
1126
|
+
return dict.fromkeys(metadata_fields, 0)
|
|
1127
|
+
|
|
1128
|
+
async def count_unique_metadata_by_filter_async(
|
|
1129
|
+
self, filters: dict[str, Any], metadata_fields: list[str]
|
|
1130
|
+
) -> dict[str, int]:
|
|
1131
|
+
"""
|
|
1132
|
+
Asynchronously returns the number of unique values for each specified metadata field among documents that
|
|
1133
|
+
match the filters.
|
|
1134
|
+
|
|
1135
|
+
:param filters: The filters to restrict the documents considered.
|
|
1136
|
+
For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
|
|
1137
|
+
:param metadata_fields: List of metadata field keys (inside ``meta``) to count unique values for.
|
|
1138
|
+
|
|
1139
|
+
:returns: A dictionary mapping each metadata field name to the count of its unique values among the filtered
|
|
1140
|
+
documents.
|
|
1141
|
+
"""
|
|
1142
|
+
await self._initialize_async_client()
|
|
1143
|
+
assert self._async_client is not None
|
|
1144
|
+
|
|
1145
|
+
qdrant_filter = convert_filters_to_qdrant(filters) if filters else None
|
|
1146
|
+
unique_values_by_field: dict[str, set[Any]] = {field: set() for field in metadata_fields}
|
|
1147
|
+
|
|
1148
|
+
try:
|
|
1149
|
+
next_offset = None
|
|
1150
|
+
while True:
|
|
1151
|
+
records, next_offset = await self._async_client.scroll(
|
|
1152
|
+
collection_name=self.index,
|
|
1153
|
+
scroll_filter=qdrant_filter,
|
|
1154
|
+
limit=self.scroll_size,
|
|
1155
|
+
offset=next_offset,
|
|
1156
|
+
with_payload=True,
|
|
1157
|
+
with_vectors=False,
|
|
1158
|
+
)
|
|
1159
|
+
self._process_records_count_unique(records, metadata_fields, unique_values_by_field)
|
|
1160
|
+
if self._check_stop_scrolling(next_offset):
|
|
1161
|
+
break
|
|
1162
|
+
|
|
1163
|
+
return {field: len(unique_values_by_field[field]) for field in metadata_fields}
|
|
1164
|
+
except Exception as e:
|
|
1165
|
+
logger.warning(f"Error {e} when calling QdrantDocumentStore.count_unique_metadata_by_filter_async()")
|
|
1166
|
+
return dict.fromkeys(metadata_fields, 0)
|
|
1167
|
+
|
|
1168
|
+
def get_metadata_field_unique_values(
|
|
1169
|
+
self, metadata_field: str, filters: dict[str, Any] | None = None, limit: int = 100, offset: int = 0
|
|
1170
|
+
) -> list[Any]:
|
|
1171
|
+
"""
|
|
1172
|
+
Returns unique values for a metadata field, with optional filters and offset/limit pagination.
|
|
1173
|
+
|
|
1174
|
+
Unique values are ordered by first occurrence during scroll. Pagination is offset-based over that order.
|
|
1175
|
+
|
|
1176
|
+
:param metadata_field: The metadata field key (inside ``meta``) to get unique values for.
|
|
1177
|
+
:param filters: Optional filters to restrict the documents considered.
|
|
1178
|
+
For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
|
|
1179
|
+
:param limit: Maximum number of unique values to return per page. Defaults to 100.
|
|
1180
|
+
:param offset: Number of unique values to skip (for pagination). Defaults to 0.
|
|
1181
|
+
|
|
1182
|
+
:returns: A list of unique values for the field (at most ``limit`` items, starting at ``offset``).
|
|
1183
|
+
"""
|
|
1184
|
+
self._initialize_client()
|
|
1185
|
+
assert self._client is not None
|
|
1186
|
+
|
|
1187
|
+
qdrant_filter = convert_filters_to_qdrant(filters) if filters else None
|
|
1188
|
+
unique_values: list[Any] = []
|
|
1189
|
+
unique_values_set: set[Any] = set()
|
|
1190
|
+
|
|
1191
|
+
try:
|
|
1192
|
+
next_offset = None
|
|
1193
|
+
while len(unique_values) < offset + limit:
|
|
1194
|
+
records, next_offset = self._client.scroll(
|
|
1195
|
+
collection_name=self.index,
|
|
1196
|
+
scroll_filter=qdrant_filter,
|
|
1197
|
+
limit=self.scroll_size,
|
|
1198
|
+
offset=next_offset,
|
|
1199
|
+
with_payload=True,
|
|
1200
|
+
with_vectors=False,
|
|
1201
|
+
)
|
|
1202
|
+
if self._process_records_unique_values(
|
|
1203
|
+
records, metadata_field, unique_values, unique_values_set, offset, limit
|
|
1204
|
+
):
|
|
1205
|
+
break
|
|
1206
|
+
if self._check_stop_scrolling(next_offset):
|
|
1207
|
+
break
|
|
1208
|
+
|
|
1209
|
+
return unique_values[offset : offset + limit]
|
|
1210
|
+
except Exception as e:
|
|
1211
|
+
logger.warning(f"Error {e} when calling QdrantDocumentStore.get_metadata_field_unique_values()")
|
|
1212
|
+
return []
|
|
1213
|
+
|
|
1214
|
+
async def get_metadata_field_unique_values_async(
|
|
1215
|
+
self, metadata_field: str, filters: dict[str, Any] | None = None, limit: int = 100, offset: int = 0
|
|
1216
|
+
) -> list[Any]:
|
|
1217
|
+
"""
|
|
1218
|
+
Asynchronously returns unique values for a metadata field, with optional filters and offset/limit pagination.
|
|
1219
|
+
|
|
1220
|
+
Unique values are ordered by first occurrence during scroll. Pagination is offset-based over that order.
|
|
1221
|
+
|
|
1222
|
+
:param metadata_field: The metadata field key (inside ``meta``) to get unique values for.
|
|
1223
|
+
:param filters: Optional filters to restrict the documents considered.
|
|
1224
|
+
For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
|
|
1225
|
+
:param limit: Maximum number of unique values to return per page. Defaults to 100.
|
|
1226
|
+
:param offset: Number of unique values to skip (for pagination). Defaults to 0.
|
|
1227
|
+
|
|
1228
|
+
:returns: A list of unique values for the field (at most ``limit`` items, starting at ``offset``).
|
|
1229
|
+
"""
|
|
1230
|
+
await self._initialize_async_client()
|
|
1231
|
+
assert self._async_client is not None
|
|
1232
|
+
|
|
1233
|
+
qdrant_filter = convert_filters_to_qdrant(filters) if filters else None
|
|
1234
|
+
unique_values: list[Any] = []
|
|
1235
|
+
unique_values_set: set[Any] = set()
|
|
1236
|
+
|
|
1237
|
+
try:
|
|
1238
|
+
next_offset = None
|
|
1239
|
+
while len(unique_values) < offset + limit:
|
|
1240
|
+
records, next_offset = await self._async_client.scroll(
|
|
1241
|
+
collection_name=self.index,
|
|
1242
|
+
scroll_filter=qdrant_filter,
|
|
1243
|
+
limit=self.scroll_size,
|
|
1244
|
+
offset=next_offset,
|
|
1245
|
+
with_payload=True,
|
|
1246
|
+
with_vectors=False,
|
|
1247
|
+
)
|
|
1248
|
+
if self._process_records_unique_values(
|
|
1249
|
+
records, metadata_field, unique_values, unique_values_set, offset, limit
|
|
1250
|
+
):
|
|
1251
|
+
break
|
|
1252
|
+
if self._check_stop_scrolling(next_offset):
|
|
1253
|
+
break
|
|
1254
|
+
|
|
1255
|
+
return unique_values[offset : offset + limit]
|
|
1256
|
+
except Exception as e:
|
|
1257
|
+
logger.warning(f"Error {e} when calling QdrantDocumentStore.get_metadata_field_unique_values_async()")
|
|
1258
|
+
return []
|
|
1259
|
+
|
|
849
1260
|
@classmethod
|
|
850
1261
|
def from_dict(cls, data: dict[str, Any]) -> "QdrantDocumentStore":
|
|
851
1262
|
"""
|
|
@@ -31,6 +31,7 @@ class TestQdrantDocumentStore(CountDocumentsTest, WriteDocumentsTest, DeleteDocu
|
|
|
31
31
|
return_embedding=True,
|
|
32
32
|
wait_result_from_api=True,
|
|
33
33
|
use_sparse_embeddings=False,
|
|
34
|
+
progress_bar=False,
|
|
34
35
|
)
|
|
35
36
|
|
|
36
37
|
def test_init_is_lazy(self):
|
|
@@ -146,7 +147,7 @@ class TestQdrantDocumentStore(CountDocumentsTest, WriteDocumentsTest, DeleteDocu
|
|
|
146
147
|
assert sparse_config[SPARSE_VECTORS_NAME].modifier == rest.Modifier.IDF
|
|
147
148
|
|
|
148
149
|
def test_query_hybrid(self, generate_sparse_embedding):
|
|
149
|
-
document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=True)
|
|
150
|
+
document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=True, progress_bar=False)
|
|
150
151
|
|
|
151
152
|
docs = []
|
|
152
153
|
for i in range(20):
|
|
@@ -171,7 +172,7 @@ class TestQdrantDocumentStore(CountDocumentsTest, WriteDocumentsTest, DeleteDocu
|
|
|
171
172
|
assert document.embedding
|
|
172
173
|
|
|
173
174
|
def test_query_hybrid_with_group_by(self, generate_sparse_embedding):
|
|
174
|
-
document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=True)
|
|
175
|
+
document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=True, progress_bar=False)
|
|
175
176
|
|
|
176
177
|
docs = []
|
|
177
178
|
for i in range(20):
|
|
@@ -347,7 +348,11 @@ class TestQdrantDocumentStore(CountDocumentsTest, WriteDocumentsTest, DeleteDocu
|
|
|
347
348
|
]
|
|
348
349
|
document_store.write_documents(docs)
|
|
349
350
|
assert document_store.count_documents() == 3
|
|
350
|
-
|
|
351
|
+
|
|
352
|
+
deleted_count = document_store.delete_by_filter(
|
|
353
|
+
filters={"field": "meta.category", "operator": "==", "value": "A"}
|
|
354
|
+
)
|
|
355
|
+
assert deleted_count == 2
|
|
351
356
|
|
|
352
357
|
# Verify only category B remains
|
|
353
358
|
remaining_docs = document_store.filter_documents()
|
|
@@ -355,7 +360,8 @@ class TestQdrantDocumentStore(CountDocumentsTest, WriteDocumentsTest, DeleteDocu
|
|
|
355
360
|
assert remaining_docs[0].meta["category"] == "B"
|
|
356
361
|
|
|
357
362
|
# Delete remaining document by year
|
|
358
|
-
document_store.delete_by_filter(filters={"field": "meta.year", "operator": "==", "value": 2023})
|
|
363
|
+
deleted_count = document_store.delete_by_filter(filters={"field": "meta.year", "operator": "==", "value": 2023})
|
|
364
|
+
assert deleted_count == 1
|
|
359
365
|
assert document_store.count_documents() == 0
|
|
360
366
|
|
|
361
367
|
def test_delete_by_filter_no_matches(self, document_store: QdrantDocumentStore):
|
|
@@ -367,7 +373,10 @@ class TestQdrantDocumentStore(CountDocumentsTest, WriteDocumentsTest, DeleteDocu
|
|
|
367
373
|
assert document_store.count_documents() == 2
|
|
368
374
|
|
|
369
375
|
# try to delete documents with category="C" (no matches)
|
|
370
|
-
document_store.delete_by_filter(
|
|
376
|
+
deleted_count = document_store.delete_by_filter(
|
|
377
|
+
filters={"field": "meta.category", "operator": "==", "value": "C"}
|
|
378
|
+
)
|
|
379
|
+
assert deleted_count == 0
|
|
371
380
|
assert document_store.count_documents() == 2
|
|
372
381
|
|
|
373
382
|
def test_delete_by_filter_advanced_filters(self, document_store: QdrantDocumentStore):
|
|
@@ -379,8 +388,8 @@ class TestQdrantDocumentStore(CountDocumentsTest, WriteDocumentsTest, DeleteDocu
|
|
|
379
388
|
document_store.write_documents(docs)
|
|
380
389
|
assert document_store.count_documents() == 3
|
|
381
390
|
|
|
382
|
-
# AND condition
|
|
383
|
-
document_store.delete_by_filter(
|
|
391
|
+
# AND condition (matches only Doc 1)
|
|
392
|
+
deleted_count = document_store.delete_by_filter(
|
|
384
393
|
filters={
|
|
385
394
|
"operator": "AND",
|
|
386
395
|
"conditions": [
|
|
@@ -389,10 +398,11 @@ class TestQdrantDocumentStore(CountDocumentsTest, WriteDocumentsTest, DeleteDocu
|
|
|
389
398
|
],
|
|
390
399
|
}
|
|
391
400
|
)
|
|
401
|
+
assert deleted_count == 1
|
|
392
402
|
assert document_store.count_documents() == 2
|
|
393
403
|
|
|
394
|
-
# OR condition
|
|
395
|
-
document_store.delete_by_filter(
|
|
404
|
+
# OR condition (matches Doc 2 and Doc 3)
|
|
405
|
+
deleted_count = document_store.delete_by_filter(
|
|
396
406
|
filters={
|
|
397
407
|
"operator": "OR",
|
|
398
408
|
"conditions": [
|
|
@@ -401,6 +411,7 @@ class TestQdrantDocumentStore(CountDocumentsTest, WriteDocumentsTest, DeleteDocu
|
|
|
401
411
|
],
|
|
402
412
|
}
|
|
403
413
|
)
|
|
414
|
+
assert deleted_count == 2
|
|
404
415
|
assert document_store.count_documents() == 0
|
|
405
416
|
|
|
406
417
|
def test_update_by_filter(self, document_store: QdrantDocumentStore):
|
|
@@ -527,3 +538,143 @@ class TestQdrantDocumentStore(CountDocumentsTest, WriteDocumentsTest, DeleteDocu
|
|
|
527
538
|
assert len(updated_docs) == 1
|
|
528
539
|
assert updated_docs[0].embedding is not None
|
|
529
540
|
assert len(updated_docs[0].embedding) == 768
|
|
541
|
+
|
|
542
|
+
def test_count_documents_by_filter(self, document_store: QdrantDocumentStore):
|
|
543
|
+
"""Test counting documents with filters."""
|
|
544
|
+
docs = [
|
|
545
|
+
Document(content="Doc 1", meta={"category": "A", "year": 2023}),
|
|
546
|
+
Document(content="Doc 2", meta={"category": "A", "year": 2024}),
|
|
547
|
+
Document(content="Doc 3", meta={"category": "B", "year": 2023}),
|
|
548
|
+
Document(content="Doc 4", meta={"category": "B", "year": 2024}),
|
|
549
|
+
]
|
|
550
|
+
document_store.write_documents(docs)
|
|
551
|
+
|
|
552
|
+
# Test counting all documents
|
|
553
|
+
assert document_store.count_documents() == 4
|
|
554
|
+
|
|
555
|
+
# Test counting with single filter
|
|
556
|
+
count = document_store.count_documents_by_filter(
|
|
557
|
+
filters={"field": "meta.category", "operator": "==", "value": "A"}
|
|
558
|
+
)
|
|
559
|
+
assert count == 2
|
|
560
|
+
|
|
561
|
+
# Test counting with multiple filters
|
|
562
|
+
count = document_store.count_documents_by_filter(
|
|
563
|
+
filters={
|
|
564
|
+
"operator": "AND",
|
|
565
|
+
"conditions": [
|
|
566
|
+
{"field": "meta.category", "operator": "==", "value": "B"},
|
|
567
|
+
{"field": "meta.year", "operator": "==", "value": 2023},
|
|
568
|
+
],
|
|
569
|
+
}
|
|
570
|
+
)
|
|
571
|
+
assert count == 1
|
|
572
|
+
|
|
573
|
+
def test_get_metadata_fields_info(self, document_store: QdrantDocumentStore):
|
|
574
|
+
"""Test getting metadata field information."""
|
|
575
|
+
docs = [
|
|
576
|
+
Document(content="Doc 1", meta={"category": "A", "score": 0.9, "tags": ["tag1", "tag2"]}),
|
|
577
|
+
Document(content="Doc 2", meta={"category": "B", "score": 0.8, "tags": ["tag2"]}),
|
|
578
|
+
]
|
|
579
|
+
document_store.write_documents(docs)
|
|
580
|
+
|
|
581
|
+
fields_info = document_store.get_metadata_fields_info()
|
|
582
|
+
# Should return empty dict or field info depending on Qdrant collection setup
|
|
583
|
+
assert isinstance(fields_info, dict)
|
|
584
|
+
|
|
585
|
+
def test_get_metadata_field_min_max(self, document_store: QdrantDocumentStore):
|
|
586
|
+
"""Test getting min/max values for a metadata field."""
|
|
587
|
+
docs = [
|
|
588
|
+
Document(content="Doc 1", meta={"score": 0.5}),
|
|
589
|
+
Document(content="Doc 2", meta={"score": 0.8}),
|
|
590
|
+
Document(content="Doc 3", meta={"score": 0.3}),
|
|
591
|
+
]
|
|
592
|
+
document_store.write_documents(docs)
|
|
593
|
+
|
|
594
|
+
result = document_store.get_metadata_field_min_max("score")
|
|
595
|
+
assert result.get("min") == 0.3
|
|
596
|
+
assert result.get("max") == 0.8
|
|
597
|
+
|
|
598
|
+
def test_count_unique_metadata_by_filter(self, document_store: QdrantDocumentStore):
|
|
599
|
+
"""Test counting unique metadata field values."""
|
|
600
|
+
docs = [
|
|
601
|
+
Document(content="Doc 1", meta={"category": "A"}),
|
|
602
|
+
Document(content="Doc 2", meta={"category": "B"}),
|
|
603
|
+
Document(content="Doc 3", meta={"category": "A"}),
|
|
604
|
+
Document(content="Doc 4", meta={"category": "C"}),
|
|
605
|
+
]
|
|
606
|
+
document_store.write_documents(docs)
|
|
607
|
+
|
|
608
|
+
result = document_store.count_unique_metadata_by_filter(filters={}, metadata_fields=["category"])
|
|
609
|
+
assert result == {"category": 3}
|
|
610
|
+
|
|
611
|
+
def test_count_unique_metadata_by_filter_multiple_fields(self, document_store: QdrantDocumentStore):
|
|
612
|
+
"""Test counting unique values for multiple metadata fields."""
|
|
613
|
+
docs = [
|
|
614
|
+
Document(content="Doc 1", meta={"category": "A", "status": "active"}),
|
|
615
|
+
Document(content="Doc 2", meta={"category": "B", "status": "active"}),
|
|
616
|
+
Document(content="Doc 3", meta={"category": "A", "status": "inactive"}),
|
|
617
|
+
]
|
|
618
|
+
document_store.write_documents(docs)
|
|
619
|
+
|
|
620
|
+
result = document_store.count_unique_metadata_by_filter(filters={}, metadata_fields=["category", "status"])
|
|
621
|
+
assert result == {"category": 2, "status": 2}
|
|
622
|
+
|
|
623
|
+
def test_count_unique_metadata_by_filter_with_filter(self, document_store: QdrantDocumentStore):
|
|
624
|
+
"""Test counting unique metadata field values with filtering."""
|
|
625
|
+
docs = [
|
|
626
|
+
Document(content="Doc 1", meta={"category": "A", "status": "active"}),
|
|
627
|
+
Document(content="Doc 2", meta={"category": "B", "status": "active"}),
|
|
628
|
+
Document(content="Doc 3", meta={"category": "A", "status": "inactive"}),
|
|
629
|
+
]
|
|
630
|
+
document_store.write_documents(docs)
|
|
631
|
+
|
|
632
|
+
result = document_store.count_unique_metadata_by_filter(
|
|
633
|
+
filters={"field": "meta.status", "operator": "==", "value": "active"},
|
|
634
|
+
metadata_fields=["category"],
|
|
635
|
+
)
|
|
636
|
+
assert result == {"category": 2}
|
|
637
|
+
|
|
638
|
+
def test_get_metadata_field_unique_values(self, document_store: QdrantDocumentStore):
|
|
639
|
+
"""Test getting unique metadata field values."""
|
|
640
|
+
docs = [
|
|
641
|
+
Document(content="Doc 1", meta={"category": "A"}),
|
|
642
|
+
Document(content="Doc 2", meta={"category": "B"}),
|
|
643
|
+
Document(content="Doc 3", meta={"category": "A"}),
|
|
644
|
+
Document(content="Doc 4", meta={"category": "C"}),
|
|
645
|
+
]
|
|
646
|
+
document_store.write_documents(docs)
|
|
647
|
+
|
|
648
|
+
values = document_store.get_metadata_field_unique_values("category")
|
|
649
|
+
assert len(values) == 3
|
|
650
|
+
assert set(values) == {"A", "B", "C"}
|
|
651
|
+
|
|
652
|
+
def test_get_metadata_field_unique_values_pagination(self, document_store: QdrantDocumentStore):
|
|
653
|
+
"""Test getting unique metadata field values with pagination."""
|
|
654
|
+
docs = [Document(content=f"Doc {i}", meta={"value": i % 5}) for i in range(10)]
|
|
655
|
+
document_store.write_documents(docs)
|
|
656
|
+
|
|
657
|
+
# Get first 2 unique values
|
|
658
|
+
values_page_1 = document_store.get_metadata_field_unique_values("value", limit=2, offset=0)
|
|
659
|
+
assert len(values_page_1) == 2
|
|
660
|
+
|
|
661
|
+
# Get next 2 unique values
|
|
662
|
+
values_page_2 = document_store.get_metadata_field_unique_values("value", limit=2, offset=2)
|
|
663
|
+
assert len(values_page_2) == 2
|
|
664
|
+
|
|
665
|
+
# Values should not overlap
|
|
666
|
+
assert set(values_page_1) != set(values_page_2)
|
|
667
|
+
|
|
668
|
+
def test_get_metadata_field_unique_values_with_filter(self, document_store: QdrantDocumentStore):
|
|
669
|
+
"""Test getting unique metadata field values with filtering."""
|
|
670
|
+
docs = [
|
|
671
|
+
Document(content="Doc 1", meta={"category": "A", "status": "active"}),
|
|
672
|
+
Document(content="Doc 2", meta={"category": "B", "status": "active"}),
|
|
673
|
+
Document(content="Doc 3", meta={"category": "A", "status": "inactive"}),
|
|
674
|
+
]
|
|
675
|
+
document_store.write_documents(docs)
|
|
676
|
+
|
|
677
|
+
values = document_store.get_metadata_field_unique_values(
|
|
678
|
+
"category", filters={"field": "meta.status", "operator": "==", "value": "active"}
|
|
679
|
+
)
|
|
680
|
+
assert set(values) == {"A", "B"}
|
|
@@ -27,6 +27,7 @@ class TestQdrantDocumentStore:
|
|
|
27
27
|
return_embedding=True,
|
|
28
28
|
wait_result_from_api=True,
|
|
29
29
|
use_sparse_embeddings=False,
|
|
30
|
+
progress_bar=False,
|
|
30
31
|
)
|
|
31
32
|
|
|
32
33
|
@pytest.mark.asyncio
|
|
@@ -59,7 +60,7 @@ class TestQdrantDocumentStore:
|
|
|
59
60
|
|
|
60
61
|
@pytest.mark.asyncio
|
|
61
62
|
async def test_query_hybrid_async(self, generate_sparse_embedding):
|
|
62
|
-
document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=True)
|
|
63
|
+
document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=True, progress_bar=False)
|
|
63
64
|
|
|
64
65
|
docs = []
|
|
65
66
|
for i in range(20):
|
|
@@ -84,7 +85,7 @@ class TestQdrantDocumentStore:
|
|
|
84
85
|
|
|
85
86
|
@pytest.mark.asyncio
|
|
86
87
|
async def test_query_hybrid_with_group_by_async(self, generate_sparse_embedding):
|
|
87
|
-
document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=True)
|
|
88
|
+
document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=True, progress_bar=False)
|
|
88
89
|
|
|
89
90
|
docs = []
|
|
90
91
|
for i in range(20):
|
|
@@ -274,7 +275,10 @@ class TestQdrantDocumentStore:
|
|
|
274
275
|
assert await document_store.count_documents_async() == 3
|
|
275
276
|
|
|
276
277
|
# Delete documents with category="A"
|
|
277
|
-
await document_store.delete_by_filter_async(
|
|
278
|
+
deleted_count = await document_store.delete_by_filter_async(
|
|
279
|
+
filters={"field": "meta.category", "operator": "==", "value": "A"}
|
|
280
|
+
)
|
|
281
|
+
assert deleted_count == 2
|
|
278
282
|
assert await document_store.count_documents_async() == 1
|
|
279
283
|
|
|
280
284
|
# Verify only category B remains
|
|
@@ -285,7 +289,10 @@ class TestQdrantDocumentStore:
|
|
|
285
289
|
assert remaining_docs[0].meta["category"] == "B"
|
|
286
290
|
|
|
287
291
|
# Delete remaining document by year
|
|
288
|
-
await document_store.delete_by_filter_async(
|
|
292
|
+
deleted_count = await document_store.delete_by_filter_async(
|
|
293
|
+
filters={"field": "meta.year", "operator": "==", "value": 2023}
|
|
294
|
+
)
|
|
295
|
+
assert deleted_count == 1
|
|
289
296
|
assert await document_store.count_documents_async() == 0
|
|
290
297
|
|
|
291
298
|
@pytest.mark.asyncio
|
|
@@ -298,7 +305,10 @@ class TestQdrantDocumentStore:
|
|
|
298
305
|
assert await document_store.count_documents_async() == 2
|
|
299
306
|
|
|
300
307
|
# Try to delete documents with category="C" (no matches)
|
|
301
|
-
await document_store.delete_by_filter_async(
|
|
308
|
+
deleted_count = await document_store.delete_by_filter_async(
|
|
309
|
+
filters={"field": "meta.category", "operator": "==", "value": "C"}
|
|
310
|
+
)
|
|
311
|
+
assert deleted_count == 0
|
|
302
312
|
assert await document_store.count_documents_async() == 2
|
|
303
313
|
|
|
304
314
|
@pytest.mark.asyncio
|
|
@@ -311,8 +321,8 @@ class TestQdrantDocumentStore:
|
|
|
311
321
|
await document_store.write_documents_async(docs)
|
|
312
322
|
assert await document_store.count_documents_async() == 3
|
|
313
323
|
|
|
314
|
-
# AND condition
|
|
315
|
-
await document_store.delete_by_filter_async(
|
|
324
|
+
# AND condition (matches only Doc 1)
|
|
325
|
+
deleted_count = await document_store.delete_by_filter_async(
|
|
316
326
|
filters={
|
|
317
327
|
"operator": "AND",
|
|
318
328
|
"conditions": [
|
|
@@ -321,10 +331,11 @@ class TestQdrantDocumentStore:
|
|
|
321
331
|
],
|
|
322
332
|
}
|
|
323
333
|
)
|
|
334
|
+
assert deleted_count == 1
|
|
324
335
|
assert await document_store.count_documents_async() == 2
|
|
325
336
|
|
|
326
|
-
# OR condition
|
|
327
|
-
await document_store.delete_by_filter_async(
|
|
337
|
+
# OR condition (matches Doc 2 and Doc 3)
|
|
338
|
+
deleted_count = await document_store.delete_by_filter_async(
|
|
328
339
|
filters={
|
|
329
340
|
"operator": "OR",
|
|
330
341
|
"conditions": [
|
|
@@ -333,6 +344,7 @@ class TestQdrantDocumentStore:
|
|
|
333
344
|
],
|
|
334
345
|
}
|
|
335
346
|
)
|
|
347
|
+
assert deleted_count == 2
|
|
336
348
|
assert await document_store.count_documents_async() == 0
|
|
337
349
|
|
|
338
350
|
@pytest.mark.asyncio
|
|
@@ -474,3 +486,155 @@ class TestQdrantDocumentStore:
|
|
|
474
486
|
assert len(updated_docs) == 1
|
|
475
487
|
assert updated_docs[0].embedding is not None
|
|
476
488
|
assert len(updated_docs[0].embedding) == 768
|
|
489
|
+
|
|
490
|
+
@pytest.mark.asyncio
|
|
491
|
+
async def test_count_documents_by_filter_async(self, document_store: QdrantDocumentStore):
|
|
492
|
+
"""Test counting documents with filters (async)."""
|
|
493
|
+
docs = [
|
|
494
|
+
Document(content="Doc 1", meta={"category": "A", "year": 2023}),
|
|
495
|
+
Document(content="Doc 2", meta={"category": "A", "year": 2024}),
|
|
496
|
+
Document(content="Doc 3", meta={"category": "B", "year": 2023}),
|
|
497
|
+
Document(content="Doc 4", meta={"category": "B", "year": 2024}),
|
|
498
|
+
]
|
|
499
|
+
await document_store.write_documents_async(docs)
|
|
500
|
+
|
|
501
|
+
# Test counting all documents
|
|
502
|
+
count = await document_store.count_documents_async()
|
|
503
|
+
assert count == 4
|
|
504
|
+
|
|
505
|
+
# Test counting with single filter
|
|
506
|
+
count = await document_store.count_documents_by_filter_async(
|
|
507
|
+
filters={"field": "meta.category", "operator": "==", "value": "A"}
|
|
508
|
+
)
|
|
509
|
+
assert count == 2
|
|
510
|
+
|
|
511
|
+
# Test counting with multiple filters
|
|
512
|
+
count = await document_store.count_documents_by_filter_async(
|
|
513
|
+
filters={
|
|
514
|
+
"operator": "AND",
|
|
515
|
+
"conditions": [
|
|
516
|
+
{"field": "meta.category", "operator": "==", "value": "B"},
|
|
517
|
+
{"field": "meta.year", "operator": "==", "value": 2023},
|
|
518
|
+
],
|
|
519
|
+
}
|
|
520
|
+
)
|
|
521
|
+
assert count == 1
|
|
522
|
+
|
|
523
|
+
@pytest.mark.asyncio
|
|
524
|
+
async def test_get_metadata_fields_info_async(self, document_store: QdrantDocumentStore):
|
|
525
|
+
"""Test getting metadata field information (async)."""
|
|
526
|
+
docs = [
|
|
527
|
+
Document(content="Doc 1", meta={"category": "A", "score": 0.9, "tags": ["tag1", "tag2"]}),
|
|
528
|
+
Document(content="Doc 2", meta={"category": "B", "score": 0.8, "tags": ["tag2"]}),
|
|
529
|
+
]
|
|
530
|
+
await document_store.write_documents_async(docs)
|
|
531
|
+
|
|
532
|
+
fields_info = await document_store.get_metadata_fields_info_async()
|
|
533
|
+
# Should return empty dict or field info depending on Qdrant collection setup
|
|
534
|
+
assert isinstance(fields_info, dict)
|
|
535
|
+
|
|
536
|
+
@pytest.mark.asyncio
|
|
537
|
+
async def test_get_metadata_field_min_max_async(self, document_store: QdrantDocumentStore):
|
|
538
|
+
"""Test getting min/max values for a metadata field (async)."""
|
|
539
|
+
docs = [
|
|
540
|
+
Document(content="Doc 1", meta={"score": 0.5}),
|
|
541
|
+
Document(content="Doc 2", meta={"score": 0.8}),
|
|
542
|
+
Document(content="Doc 3", meta={"score": 0.3}),
|
|
543
|
+
]
|
|
544
|
+
await document_store.write_documents_async(docs)
|
|
545
|
+
|
|
546
|
+
result = await document_store.get_metadata_field_min_max_async("score")
|
|
547
|
+
assert result.get("min") == 0.3
|
|
548
|
+
assert result.get("max") == 0.8
|
|
549
|
+
|
|
550
|
+
@pytest.mark.asyncio
|
|
551
|
+
async def test_count_unique_metadata_by_filter_async(self, document_store: QdrantDocumentStore):
|
|
552
|
+
"""Test counting unique metadata field values (async)."""
|
|
553
|
+
docs = [
|
|
554
|
+
Document(content="Doc 1", meta={"category": "A"}),
|
|
555
|
+
Document(content="Doc 2", meta={"category": "B"}),
|
|
556
|
+
Document(content="Doc 3", meta={"category": "A"}),
|
|
557
|
+
Document(content="Doc 4", meta={"category": "C"}),
|
|
558
|
+
]
|
|
559
|
+
await document_store.write_documents_async(docs)
|
|
560
|
+
|
|
561
|
+
result = await document_store.count_unique_metadata_by_filter_async(filters={}, metadata_fields=["category"])
|
|
562
|
+
assert result == {"category": 3}
|
|
563
|
+
|
|
564
|
+
@pytest.mark.asyncio
|
|
565
|
+
async def test_count_unique_metadata_by_filter_async_multiple_fields(self, document_store: QdrantDocumentStore):
|
|
566
|
+
"""Test counting unique values for multiple metadata fields (async)."""
|
|
567
|
+
docs = [
|
|
568
|
+
Document(content="Doc 1", meta={"category": "A", "status": "active"}),
|
|
569
|
+
Document(content="Doc 2", meta={"category": "B", "status": "active"}),
|
|
570
|
+
Document(content="Doc 3", meta={"category": "A", "status": "inactive"}),
|
|
571
|
+
]
|
|
572
|
+
await document_store.write_documents_async(docs)
|
|
573
|
+
|
|
574
|
+
result = await document_store.count_unique_metadata_by_filter_async(
|
|
575
|
+
filters={}, metadata_fields=["category", "status"]
|
|
576
|
+
)
|
|
577
|
+
assert result == {"category": 2, "status": 2}
|
|
578
|
+
|
|
579
|
+
@pytest.mark.asyncio
|
|
580
|
+
async def test_count_unique_metadata_by_filter_async_with_filter(self, document_store: QdrantDocumentStore):
|
|
581
|
+
"""Test counting unique metadata field values with filtering (async)."""
|
|
582
|
+
docs = [
|
|
583
|
+
Document(content="Doc 1", meta={"category": "A", "status": "active"}),
|
|
584
|
+
Document(content="Doc 2", meta={"category": "B", "status": "active"}),
|
|
585
|
+
Document(content="Doc 3", meta={"category": "A", "status": "inactive"}),
|
|
586
|
+
]
|
|
587
|
+
await document_store.write_documents_async(docs)
|
|
588
|
+
|
|
589
|
+
result = await document_store.count_unique_metadata_by_filter_async(
|
|
590
|
+
filters={"field": "meta.status", "operator": "==", "value": "active"},
|
|
591
|
+
metadata_fields=["category"],
|
|
592
|
+
)
|
|
593
|
+
assert result == {"category": 2}
|
|
594
|
+
|
|
595
|
+
@pytest.mark.asyncio
|
|
596
|
+
async def test_get_metadata_field_unique_values_async(self, document_store: QdrantDocumentStore):
|
|
597
|
+
"""Test getting unique metadata field values (async)."""
|
|
598
|
+
docs = [
|
|
599
|
+
Document(content="Doc 1", meta={"category": "A"}),
|
|
600
|
+
Document(content="Doc 2", meta={"category": "B"}),
|
|
601
|
+
Document(content="Doc 3", meta={"category": "A"}),
|
|
602
|
+
Document(content="Doc 4", meta={"category": "C"}),
|
|
603
|
+
]
|
|
604
|
+
await document_store.write_documents_async(docs)
|
|
605
|
+
|
|
606
|
+
values = await document_store.get_metadata_field_unique_values_async("category")
|
|
607
|
+
assert len(values) == 3
|
|
608
|
+
assert set(values) == {"A", "B", "C"}
|
|
609
|
+
|
|
610
|
+
@pytest.mark.asyncio
|
|
611
|
+
async def test_get_metadata_field_unique_values_async_pagination(self, document_store: QdrantDocumentStore):
|
|
612
|
+
"""Test getting unique metadata field values with pagination (async)."""
|
|
613
|
+
docs = [Document(content=f"Doc {i}", meta={"value": i % 5}) for i in range(10)]
|
|
614
|
+
await document_store.write_documents_async(docs)
|
|
615
|
+
|
|
616
|
+
# Get first 2 unique values
|
|
617
|
+
values_page_1 = await document_store.get_metadata_field_unique_values_async("value", limit=2, offset=0)
|
|
618
|
+
assert len(values_page_1) == 2
|
|
619
|
+
|
|
620
|
+
# Get next 2 unique values
|
|
621
|
+
values_page_2 = await document_store.get_metadata_field_unique_values_async("value", limit=2, offset=2)
|
|
622
|
+
assert len(values_page_2) == 2
|
|
623
|
+
|
|
624
|
+
# Values should not overlap
|
|
625
|
+
assert set(values_page_1) != set(values_page_2)
|
|
626
|
+
|
|
627
|
+
@pytest.mark.asyncio
|
|
628
|
+
async def test_get_metadata_field_unique_values_async_with_filter(self, document_store: QdrantDocumentStore):
|
|
629
|
+
"""Test getting unique metadata field values with filtering (async)."""
|
|
630
|
+
docs = [
|
|
631
|
+
Document(content="Doc 1", meta={"category": "A", "status": "active"}),
|
|
632
|
+
Document(content="Doc 2", meta={"category": "B", "status": "active"}),
|
|
633
|
+
Document(content="Doc 3", meta={"category": "A", "status": "inactive"}),
|
|
634
|
+
]
|
|
635
|
+
await document_store.write_documents_async(docs)
|
|
636
|
+
|
|
637
|
+
values = await document_store.get_metadata_field_unique_values_async(
|
|
638
|
+
"category", filters={"field": "meta.status", "operator": "==", "value": "active"}
|
|
639
|
+
)
|
|
640
|
+
assert set(values) == {"A", "B"}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{qdrant_haystack-10.0.0 → qdrant_haystack-10.2.0}/src/haystack_integrations/document_stores/py.typed
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|