qdrant-haystack 6.0.0__py3-none-any.whl → 8.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of qdrant-haystack might be problematic. Click here for more details.
- haystack_integrations/components/retrievers/qdrant/__init__.py +1 -1
- haystack_integrations/document_stores/qdrant/converters.py +17 -0
- haystack_integrations/document_stores/qdrant/document_store.py +6 -14
- {qdrant_haystack-6.0.0.dist-info → qdrant_haystack-8.0.0.dist-info}/METADATA +2 -2
- {qdrant_haystack-6.0.0.dist-info → qdrant_haystack-8.0.0.dist-info}/RECORD +7 -7
- {qdrant_haystack-6.0.0.dist-info → qdrant_haystack-8.0.0.dist-info}/WHEEL +1 -1
- {qdrant_haystack-6.0.0.dist-info → qdrant_haystack-8.0.0.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -4,4 +4,4 @@
|
|
|
4
4
|
|
|
5
5
|
from .retriever import QdrantEmbeddingRetriever, QdrantHybridRetriever, QdrantSparseEmbeddingRetriever
|
|
6
6
|
|
|
7
|
-
__all__ = ("QdrantEmbeddingRetriever", "
|
|
7
|
+
__all__ = ("QdrantEmbeddingRetriever", "QdrantHybridRetriever", "QdrantSparseEmbeddingRetriever")
|
|
@@ -22,6 +22,15 @@ def convert_haystack_documents_to_qdrant_points(
|
|
|
22
22
|
points = []
|
|
23
23
|
for document in documents:
|
|
24
24
|
payload = document.to_dict(flatten=False)
|
|
25
|
+
|
|
26
|
+
if payload.pop("dataframe", None):
|
|
27
|
+
logger.warning(
|
|
28
|
+
"Document %s has the `dataframe` field set,"
|
|
29
|
+
"QdrantDocumentStore no longer supports dataframes and this field will be ignored. "
|
|
30
|
+
"The `dataframe` field will soon be removed from Haystack Document.",
|
|
31
|
+
document.id,
|
|
32
|
+
)
|
|
33
|
+
|
|
25
34
|
if use_sparse_embeddings:
|
|
26
35
|
vector = {}
|
|
27
36
|
|
|
@@ -64,6 +73,14 @@ def convert_qdrant_point_to_haystack_document(point: QdrantPoint, use_sparse_emb
|
|
|
64
73
|
payload = {**point.payload}
|
|
65
74
|
payload["score"] = point.score if hasattr(point, "score") else None
|
|
66
75
|
|
|
76
|
+
if payload.pop("dataframe", None):
|
|
77
|
+
logger.warning(
|
|
78
|
+
"Document %s has the `dataframe` field set,"
|
|
79
|
+
"QdrantDocumentStore no longer supports dataframes and this field will be ignored. "
|
|
80
|
+
"The `dataframe` field will soon be removed from Haystack Document.",
|
|
81
|
+
payload["id"],
|
|
82
|
+
)
|
|
83
|
+
|
|
67
84
|
if not use_sparse_embeddings:
|
|
68
85
|
payload["embedding"] = point.vector if hasattr(point, "vector") else None
|
|
69
86
|
elif hasattr(point, "vector") and point.vector is not None:
|
|
@@ -362,7 +362,6 @@ class QdrantDocumentStore:
|
|
|
362
362
|
|
|
363
363
|
document_objects = self._handle_duplicate_documents(
|
|
364
364
|
documents=documents,
|
|
365
|
-
index=self.index,
|
|
366
365
|
policy=policy,
|
|
367
366
|
)
|
|
368
367
|
|
|
@@ -468,7 +467,6 @@ class QdrantDocumentStore:
|
|
|
468
467
|
def get_documents_by_id(
|
|
469
468
|
self,
|
|
470
469
|
ids: List[str],
|
|
471
|
-
index: Optional[str] = None,
|
|
472
470
|
) -> List[Document]:
|
|
473
471
|
"""
|
|
474
472
|
Retrieves documents from Qdrant by their IDs.
|
|
@@ -480,13 +478,11 @@ class QdrantDocumentStore:
|
|
|
480
478
|
:returns:
|
|
481
479
|
A list of documents.
|
|
482
480
|
"""
|
|
483
|
-
index = index or self.index
|
|
484
|
-
|
|
485
481
|
documents: List[Document] = []
|
|
486
482
|
|
|
487
483
|
ids = [convert_id(_id) for _id in ids]
|
|
488
484
|
records = self.client.retrieve(
|
|
489
|
-
collection_name=index,
|
|
485
|
+
collection_name=self.index,
|
|
490
486
|
ids=ids,
|
|
491
487
|
with_payload=True,
|
|
492
488
|
with_vectors=True,
|
|
@@ -987,7 +983,6 @@ class QdrantDocumentStore:
|
|
|
987
983
|
def _handle_duplicate_documents(
|
|
988
984
|
self,
|
|
989
985
|
documents: List[Document],
|
|
990
|
-
index: Optional[str] = None,
|
|
991
986
|
policy: DuplicatePolicy = None,
|
|
992
987
|
):
|
|
993
988
|
"""
|
|
@@ -995,31 +990,28 @@ class QdrantDocumentStore:
|
|
|
995
990
|
documents that are not in the index yet.
|
|
996
991
|
|
|
997
992
|
:param documents: A list of Haystack Document objects.
|
|
998
|
-
:param index: name of the index
|
|
999
993
|
:param policy: The duplicate policy to use when writing documents.
|
|
1000
994
|
:returns: A list of Haystack Document objects.
|
|
1001
995
|
"""
|
|
1002
996
|
|
|
1003
|
-
index = index or self.index
|
|
1004
997
|
if policy in (DuplicatePolicy.SKIP, DuplicatePolicy.FAIL):
|
|
1005
|
-
documents = self._drop_duplicate_documents(documents
|
|
1006
|
-
documents_found = self.get_documents_by_id(ids=[doc.id for doc in documents]
|
|
998
|
+
documents = self._drop_duplicate_documents(documents)
|
|
999
|
+
documents_found = self.get_documents_by_id(ids=[doc.id for doc in documents])
|
|
1007
1000
|
ids_exist_in_db: List[str] = [doc.id for doc in documents_found]
|
|
1008
1001
|
|
|
1009
1002
|
if len(ids_exist_in_db) > 0 and policy == DuplicatePolicy.FAIL:
|
|
1010
|
-
msg = f"Document with ids '{', '.join(ids_exist_in_db)} already exists in index = '{index}'."
|
|
1003
|
+
msg = f"Document with ids '{', '.join(ids_exist_in_db)} already exists in index = '{self.index}'."
|
|
1011
1004
|
raise DuplicateDocumentError(msg)
|
|
1012
1005
|
|
|
1013
1006
|
documents = list(filter(lambda doc: doc.id not in ids_exist_in_db, documents))
|
|
1014
1007
|
|
|
1015
1008
|
return documents
|
|
1016
1009
|
|
|
1017
|
-
def _drop_duplicate_documents(self, documents: List[Document]
|
|
1010
|
+
def _drop_duplicate_documents(self, documents: List[Document]) -> List[Document]:
|
|
1018
1011
|
"""
|
|
1019
1012
|
Drop duplicate documents based on same hash ID.
|
|
1020
1013
|
|
|
1021
1014
|
:param documents: A list of Haystack Document objects.
|
|
1022
|
-
:param index: Name of the index.
|
|
1023
1015
|
:returns: A list of Haystack Document objects.
|
|
1024
1016
|
"""
|
|
1025
1017
|
_hash_ids: Set = set()
|
|
@@ -1030,7 +1022,7 @@ class QdrantDocumentStore:
|
|
|
1030
1022
|
logger.info(
|
|
1031
1023
|
"Duplicate Documents: Document with id '%s' already exists in index '%s'",
|
|
1032
1024
|
document.id,
|
|
1033
|
-
|
|
1025
|
+
self.index,
|
|
1034
1026
|
)
|
|
1035
1027
|
continue
|
|
1036
1028
|
_documents.append(document)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: qdrant-haystack
|
|
3
|
-
Version:
|
|
3
|
+
Version: 8.0.0
|
|
4
4
|
Summary: An integration of Qdrant ANN vector database backend with Haystack
|
|
5
5
|
Project-URL: Source, https://github.com/deepset-ai/haystack-core-integrations
|
|
6
6
|
Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/qdrant/README.md
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
haystack_integrations/components/retrievers/qdrant/__init__.py,sha256=
|
|
1
|
+
haystack_integrations/components/retrievers/qdrant/__init__.py,sha256=AE1hdw4sqb0rTSqfAxKCRUOZVE8gbHdQ1wDccdN86hc,313
|
|
2
2
|
haystack_integrations/components/retrievers/qdrant/retriever.py,sha256=VsQVsvf79imTCdWUKikUxpjczl5oxOV64a91aGXZwpE,21997
|
|
3
3
|
haystack_integrations/document_stores/qdrant/__init__.py,sha256=kUGc5uewqArhmVR-JqB_NmJ4kNkTIQIvYDNSoO2ELn0,302
|
|
4
|
-
haystack_integrations/document_stores/qdrant/converters.py,sha256=
|
|
5
|
-
haystack_integrations/document_stores/qdrant/document_store.py,sha256=
|
|
4
|
+
haystack_integrations/document_stores/qdrant/converters.py,sha256=ndFZjMjweJJDyC_994zDX4BGhGcW1SfLf79zBeyUits,3192
|
|
5
|
+
haystack_integrations/document_stores/qdrant/document_store.py,sha256=pcVuU9pNdjwROISG19vaj2Zpkl4_N2k_3UOi2XO7b00,42246
|
|
6
6
|
haystack_integrations/document_stores/qdrant/filters.py,sha256=Nv_eKIYKwUWvldJfa0omfFQ0kgqi6L3DUFeMuIWziOY,11751
|
|
7
7
|
haystack_integrations/document_stores/qdrant/migrate_to_sparse.py,sha256=yhZr4GB6N1S-Ikzl52hpuZt2aHNIb4leqFDhVMU3Uho,4910
|
|
8
|
-
qdrant_haystack-
|
|
9
|
-
qdrant_haystack-
|
|
10
|
-
qdrant_haystack-
|
|
11
|
-
qdrant_haystack-
|
|
8
|
+
qdrant_haystack-8.0.0.dist-info/METADATA,sha256=PorUoQRLkK0yhoMEifdBakszFFneNjT7KEkKIxvJ6Fg,1863
|
|
9
|
+
qdrant_haystack-8.0.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
10
|
+
qdrant_haystack-8.0.0.dist-info/licenses/LICENSE.txt,sha256=B05uMshqTA74s-0ltyHKI6yoPfJ3zYgQbvcXfDVGFf8,10280
|
|
11
|
+
qdrant_haystack-8.0.0.dist-info/RECORD,,
|
|
File without changes
|