qdrant-haystack 3.2.1__tar.gz → 3.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of qdrant-haystack might be problematic. Click here for more details.

Files changed (26) hide show
  1. {qdrant_haystack-3.2.1 → qdrant_haystack-3.3.1}/PKG-INFO +2 -2
  2. {qdrant_haystack-3.2.1 → qdrant_haystack-3.3.1}/pydoc/config.yml +1 -2
  3. {qdrant_haystack-3.2.1 → qdrant_haystack-3.3.1}/pyproject.toml +3 -1
  4. qdrant_haystack-3.3.1/src/haystack_integrations/components/retrievers/qdrant/__init__.py +7 -0
  5. qdrant_haystack-3.3.1/src/haystack_integrations/components/retrievers/qdrant/retriever.py +241 -0
  6. {qdrant_haystack-3.2.1 → qdrant_haystack-3.3.1}/src/haystack_integrations/document_stores/qdrant/__init__.py +2 -1
  7. qdrant_haystack-3.3.1/src/haystack_integrations/document_stores/qdrant/converters.py +80 -0
  8. {qdrant_haystack-3.2.1 → qdrant_haystack-3.3.1}/src/haystack_integrations/document_stores/qdrant/document_store.py +147 -43
  9. qdrant_haystack-3.3.1/src/haystack_integrations/document_stores/qdrant/filters.py +237 -0
  10. qdrant_haystack-3.3.1/src/haystack_integrations/document_stores/qdrant/migrate_to_sparse.py +127 -0
  11. qdrant_haystack-3.3.1/tests/test_converters.py +63 -0
  12. {qdrant_haystack-3.2.1 → qdrant_haystack-3.3.1}/tests/test_dict_converters.py +3 -0
  13. qdrant_haystack-3.3.1/tests/test_retriever.py +254 -0
  14. qdrant_haystack-3.2.1/src/haystack_integrations/components/retrievers/qdrant/__init__.py +0 -7
  15. qdrant_haystack-3.2.1/src/haystack_integrations/components/retrievers/qdrant/retriever.py +0 -122
  16. qdrant_haystack-3.2.1/src/haystack_integrations/document_stores/qdrant/converters.py +0 -70
  17. qdrant_haystack-3.2.1/src/haystack_integrations/document_stores/qdrant/filters.py +0 -233
  18. qdrant_haystack-3.2.1/tests/test_converters.py +0 -52
  19. qdrant_haystack-3.2.1/tests/test_retriever.py +0 -114
  20. {qdrant_haystack-3.2.1 → qdrant_haystack-3.3.1}/.gitignore +0 -0
  21. {qdrant_haystack-3.2.1 → qdrant_haystack-3.3.1}/LICENSE.txt +0 -0
  22. {qdrant_haystack-3.2.1 → qdrant_haystack-3.3.1}/README.md +0 -0
  23. {qdrant_haystack-3.2.1 → qdrant_haystack-3.3.1}/tests/__init__.py +0 -0
  24. {qdrant_haystack-3.2.1 → qdrant_haystack-3.3.1}/tests/test_document_store.py +0 -0
  25. {qdrant_haystack-3.2.1 → qdrant_haystack-3.3.1}/tests/test_filters.py +0 -0
  26. {qdrant_haystack-3.2.1 → qdrant_haystack-3.3.1}/tests/test_legacy_filters.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: qdrant-haystack
3
- Version: 3.2.1
3
+ Version: 3.3.1
4
4
  Summary: An integration of Qdrant ANN vector database backend with Haystack
5
5
  Project-URL: Source, https://github.com/deepset-ai/haystack-core-integrations
6
6
  Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/qdrant/README.md
@@ -17,7 +17,7 @@ Classifier: Programming Language :: Python :: 3.11
17
17
  Classifier: Programming Language :: Python :: Implementation :: CPython
18
18
  Classifier: Programming Language :: Python :: Implementation :: PyPy
19
19
  Requires-Python: >=3.8
20
- Requires-Dist: haystack-ai
20
+ Requires-Dist: haystack-ai>=2.0.1
21
21
  Requires-Dist: qdrant-client
22
22
  Description-Content-Type: text/markdown
23
23
 
@@ -5,8 +5,7 @@ loaders:
5
5
  [
6
6
  "haystack_integrations.components.retrievers.qdrant.retriever",
7
7
  "haystack_integrations.document_stores.qdrant.document_store",
8
- "haystack_integrations.document_stores.qdrant.converters",
9
- "haystack_integrations.document_stores.qdrant.filters",
8
+ "haystack_integrations.document_stores.qdrant.migrate_to_sparse",
10
9
  ]
11
10
  ignore_when_discovered: ["__init__"]
12
11
  processors:
@@ -24,7 +24,7 @@ classifiers = [
24
24
  "Programming Language :: Python :: Implementation :: CPython",
25
25
  "Programming Language :: Python :: Implementation :: PyPy",
26
26
  ]
27
- dependencies = ["haystack-ai", "qdrant-client"]
27
+ dependencies = ["haystack-ai>=2.0.1", "qdrant-client"]
28
28
 
29
29
  [project.urls]
30
30
  Source = "https://github.com/deepset-ai/haystack-core-integrations"
@@ -103,6 +103,8 @@ ignore = [
103
103
  "B027",
104
104
  # Allow boolean positional values in function calls, like `dict.get(... True)`
105
105
  "FBT003",
106
+ # Allow boolean arguments in function definition
107
+ "FBT001", "FBT002",
106
108
  # Ignore checks for possible passwords
107
109
  "S105",
108
110
  "S106",
@@ -0,0 +1,7 @@
1
+ # SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ from .retriever import QdrantEmbeddingRetriever, QdrantSparseRetriever
6
+
7
+ __all__ = ("QdrantEmbeddingRetriever", "QdrantSparseRetriever")
@@ -0,0 +1,241 @@
1
+ from typing import Any, Dict, List, Optional
2
+
3
+ from haystack import Document, component, default_from_dict, default_to_dict
4
+ from haystack.dataclasses.sparse_embedding import SparseEmbedding
5
+ from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
6
+
7
+
8
+ @component
9
+ class QdrantEmbeddingRetriever:
10
+ """
11
+ A component for retrieving documents from an QdrantDocumentStore using dense vectors.
12
+
13
+ Usage example:
14
+ ```python
15
+ from haystack_integrations.components.retrievers.qdrant import QdrantEmbeddingRetriever
16
+ from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
17
+
18
+ document_store = QdrantDocumentStore(
19
+ ":memory:",
20
+ recreate_index=True,
21
+ return_embedding=True,
22
+ wait_result_from_api=True,
23
+ )
24
+ retriever = QdrantEmbeddingRetriever(document_store=document_store)
25
+
26
+ # using a fake vector to keep the example simple
27
+ retriever.run(query_embedding=[0.1]*768)
28
+ ```
29
+ """
30
+
31
+ def __init__(
32
+ self,
33
+ document_store: QdrantDocumentStore,
34
+ filters: Optional[Dict[str, Any]] = None,
35
+ top_k: int = 10,
36
+ scale_score: bool = True,
37
+ return_embedding: bool = False,
38
+ ):
39
+ """
40
+ Create a QdrantEmbeddingRetriever component.
41
+
42
+ :param document_store: An instance of QdrantDocumentStore.
43
+ :param filters: A dictionary with filters to narrow down the search space. Default is None.
44
+ :param top_k: The maximum number of documents to retrieve. Default is 10.
45
+ :param scale_score: Whether to scale the scores of the retrieved documents or not. Default is True.
46
+ :param return_embedding: Whether to return the embedding of the retrieved Documents. Default is False.
47
+
48
+ :raises ValueError: If 'document_store' is not an instance of QdrantDocumentStore.
49
+ """
50
+
51
+ if not isinstance(document_store, QdrantDocumentStore):
52
+ msg = "document_store must be an instance of QdrantDocumentStore"
53
+ raise ValueError(msg)
54
+
55
+ self._document_store = document_store
56
+ self._filters = filters
57
+ self._top_k = top_k
58
+ self._scale_score = scale_score
59
+ self._return_embedding = return_embedding
60
+
61
+ def to_dict(self) -> Dict[str, Any]:
62
+ """
63
+ Serializes the component to a dictionary.
64
+
65
+ :returns:
66
+ Dictionary with serialized data.
67
+ """
68
+ d = default_to_dict(
69
+ self,
70
+ document_store=self._document_store,
71
+ filters=self._filters,
72
+ top_k=self._top_k,
73
+ scale_score=self._scale_score,
74
+ return_embedding=self._return_embedding,
75
+ )
76
+ d["init_parameters"]["document_store"] = self._document_store.to_dict()
77
+
78
+ return d
79
+
80
+ @classmethod
81
+ def from_dict(cls, data: Dict[str, Any]) -> "QdrantEmbeddingRetriever":
82
+ """
83
+ Deserializes the component from a dictionary.
84
+
85
+ :param data:
86
+ Dictionary to deserialize from.
87
+ :returns:
88
+ Deserialized component.
89
+ """
90
+ document_store = QdrantDocumentStore.from_dict(data["init_parameters"]["document_store"])
91
+ data["init_parameters"]["document_store"] = document_store
92
+ return default_from_dict(cls, data)
93
+
94
+ @component.output_types(documents=List[Document])
95
+ def run(
96
+ self,
97
+ query_embedding: List[float],
98
+ filters: Optional[Dict[str, Any]] = None,
99
+ top_k: Optional[int] = None,
100
+ scale_score: Optional[bool] = None,
101
+ return_embedding: Optional[bool] = None,
102
+ ):
103
+ """
104
+ Run the Embedding Retriever on the given input data.
105
+
106
+ :param query_embedding: Embedding of the query.
107
+ :param filters: A dictionary with filters to narrow down the search space.
108
+ :param top_k: The maximum number of documents to return.
109
+ :param scale_score: Whether to scale the scores of the retrieved documents or not.
110
+ :param return_embedding: Whether to return the embedding of the retrieved Documents.
111
+ :returns:
112
+ The retrieved documents.
113
+
114
+ """
115
+ docs = self._document_store.query_by_embedding(
116
+ query_embedding=query_embedding,
117
+ filters=filters or self._filters,
118
+ top_k=top_k or self._top_k,
119
+ scale_score=scale_score or self._scale_score,
120
+ return_embedding=return_embedding or self._return_embedding,
121
+ )
122
+
123
+ return {"documents": docs}
124
+
125
+
126
+ @component
127
+ class QdrantSparseRetriever:
128
+ """
129
+ A component for retrieving documents from an QdrantDocumentStore using sparse vectors.
130
+
131
+ Usage example:
132
+ ```python
133
+ from haystack_integrations.components.retrievers.qdrant import QdrantSparseRetriever
134
+ from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
135
+ from haystack.dataclasses.sparse_embedding import SparseEmbedding
136
+
137
+ document_store = QdrantDocumentStore(
138
+ ":memory:",
139
+ recreate_index=True,
140
+ return_embedding=True,
141
+ wait_result_from_api=True,
142
+ )
143
+ retriever = QdrantSparseRetriever(document_store=document_store)
144
+ sparse_embedding = SparseEmbedding(indices=[0, 1, 2, 3], values=[0.1, 0.8, 0.05, 0.33])
145
+ retriever.run(query_sparse_embedding=sparse_embedding)
146
+ ```
147
+ """
148
+
149
+ def __init__(
150
+ self,
151
+ document_store: QdrantDocumentStore,
152
+ filters: Optional[Dict[str, Any]] = None,
153
+ top_k: int = 10,
154
+ scale_score: bool = True,
155
+ return_embedding: bool = False,
156
+ ):
157
+ """
158
+ Create a QdrantSparseRetriever component.
159
+
160
+ :param document_store: An instance of QdrantDocumentStore.
161
+ :param filters: A dictionary with filters to narrow down the search space. Default is None.
162
+ :param top_k: The maximum number of documents to retrieve. Default is 10.
163
+ :param scale_score: Whether to scale the scores of the retrieved documents or not. Default is True.
164
+ :param return_embedding: Whether to return the sparse embedding of the retrieved Documents. Default is False.
165
+
166
+ :raises ValueError: If 'document_store' is not an instance of QdrantDocumentStore.
167
+ """
168
+
169
+ if not isinstance(document_store, QdrantDocumentStore):
170
+ msg = "document_store must be an instance of QdrantDocumentStore"
171
+ raise ValueError(msg)
172
+
173
+ self._document_store = document_store
174
+ self._filters = filters
175
+ self._top_k = top_k
176
+ self._scale_score = scale_score
177
+ self._return_embedding = return_embedding
178
+
179
+ def to_dict(self) -> Dict[str, Any]:
180
+ """
181
+ Serializes the component to a dictionary.
182
+
183
+ :returns:
184
+ Dictionary with serialized data.
185
+ """
186
+ d = default_to_dict(
187
+ self,
188
+ document_store=self._document_store,
189
+ filters=self._filters,
190
+ top_k=self._top_k,
191
+ scale_score=self._scale_score,
192
+ return_embedding=self._return_embedding,
193
+ )
194
+ d["init_parameters"]["document_store"] = self._document_store.to_dict()
195
+
196
+ return d
197
+
198
+ @classmethod
199
+ def from_dict(cls, data: Dict[str, Any]) -> "QdrantEmbeddingRetriever":
200
+ """
201
+ Deserializes the component from a dictionary.
202
+
203
+ :param data:
204
+ Dictionary to deserialize from.
205
+ :returns:
206
+ Deserialized component.
207
+ """
208
+ document_store = QdrantDocumentStore.from_dict(data["init_parameters"]["document_store"])
209
+ data["init_parameters"]["document_store"] = document_store
210
+ return default_from_dict(cls, data)
211
+
212
+ @component.output_types(documents=List[Document])
213
+ def run(
214
+ self,
215
+ query_sparse_embedding: SparseEmbedding,
216
+ filters: Optional[Dict[str, Any]] = None,
217
+ top_k: Optional[int] = None,
218
+ scale_score: Optional[bool] = None,
219
+ return_embedding: Optional[bool] = None,
220
+ ):
221
+ """
222
+ Run the Sparse Embedding Retriever on the given input data.
223
+
224
+ :param query_sparse_embedding: Sparse Embedding of the query.
225
+ :param filters: A dictionary with filters to narrow down the search space.
226
+ :param top_k: The maximum number of documents to return.
227
+ :param scale_score: Whether to scale the scores of the retrieved documents or not.
228
+ :param return_embedding: Whether to return the embedding of the retrieved Documents.
229
+ :returns:
230
+ The retrieved documents.
231
+
232
+ """
233
+ docs = self._document_store.query_by_sparse(
234
+ query_sparse_embedding=query_sparse_embedding,
235
+ filters=filters or self._filters,
236
+ top_k=top_k or self._top_k,
237
+ scale_score=scale_score or self._scale_score,
238
+ return_embedding=return_embedding or self._return_embedding,
239
+ )
240
+
241
+ return {"documents": docs}
@@ -3,5 +3,6 @@
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
 
5
5
  from .document_store import QdrantDocumentStore
6
+ from .migrate_to_sparse import migrate_to_sparse_embeddings_support
6
7
 
7
- __all__ = ("QdrantDocumentStore",)
8
+ __all__ = ("QdrantDocumentStore", "migrate_to_sparse_embeddings_support")
@@ -0,0 +1,80 @@
1
+ import logging
2
+ import uuid
3
+ from typing import List, Union
4
+
5
+ from haystack.dataclasses import Document
6
+ from qdrant_client.http import models as rest
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+ DENSE_VECTORS_NAME = "text-dense"
11
+ SPARSE_VECTORS_NAME = "text-sparse"
12
+
13
+
14
+ UUID_NAMESPACE = uuid.UUID("3896d314-1e95-4a3a-b45a-945f9f0b541d")
15
+
16
+
17
+ def convert_haystack_documents_to_qdrant_points(
18
+ documents: List[Document],
19
+ *,
20
+ embedding_field: str,
21
+ use_sparse_embeddings: bool,
22
+ ) -> List[rest.PointStruct]:
23
+ points = []
24
+ for document in documents:
25
+ payload = document.to_dict(flatten=False)
26
+ if use_sparse_embeddings:
27
+ vector = {}
28
+
29
+ dense_vector = payload.pop(embedding_field, None)
30
+ if dense_vector is not None:
31
+ vector[DENSE_VECTORS_NAME] = dense_vector
32
+
33
+ sparse_vector = payload.pop("sparse_embedding", None)
34
+ if sparse_vector is not None:
35
+ sparse_vector_instance = rest.SparseVector(**sparse_vector)
36
+ vector[SPARSE_VECTORS_NAME] = sparse_vector_instance
37
+
38
+ else:
39
+ vector = payload.pop(embedding_field) or {}
40
+ _id = convert_id(payload.get("id"))
41
+
42
+ point = rest.PointStruct(
43
+ payload=payload,
44
+ vector=vector,
45
+ id=_id,
46
+ )
47
+ points.append(point)
48
+ return points
49
+
50
+
51
+ def convert_id(_id: str) -> str:
52
+ """
53
+ Converts any string into a UUID-like format in a deterministic way.
54
+
55
+ Qdrant does not accept any string as an id, so an internal id has to be
56
+ generated for each point. This is a deterministic way of doing so.
57
+ """
58
+ return uuid.uuid5(UUID_NAMESPACE, _id).hex
59
+
60
+
61
+ QdrantPoint = Union[rest.ScoredPoint, rest.Record]
62
+
63
+
64
+ def convert_qdrant_point_to_haystack_document(point: QdrantPoint, use_sparse_embeddings: bool) -> Document:
65
+ payload = {**point.payload}
66
+ payload["score"] = point.score if hasattr(point, "score") else None
67
+
68
+ if not use_sparse_embeddings:
69
+ payload["embedding"] = point.vector if hasattr(point, "vector") else None
70
+ elif hasattr(point, "vector") and point.vector is not None:
71
+ payload["embedding"] = point.vector.get(DENSE_VECTORS_NAME)
72
+
73
+ if SPARSE_VECTORS_NAME in point.vector:
74
+ parse_vector_dict = {
75
+ "indices": point.vector[SPARSE_VECTORS_NAME].indices,
76
+ "values": point.vector[SPARSE_VECTORS_NAME].values,
77
+ }
78
+ payload["sparse_embedding"] = parse_vector_dict
79
+
80
+ return Document.from_dict(payload)