qdrant-haystack 3.5.0__py3-none-any.whl → 3.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of qdrant-haystack might be problematic. Click here for more details.
- haystack_integrations/components/retrievers/qdrant/retriever.py +21 -19
- haystack_integrations/document_stores/qdrant/document_store.py +40 -33
- haystack_integrations/document_stores/qdrant/filters.py +3 -2
- {qdrant_haystack-3.5.0.dist-info → qdrant_haystack-3.6.0.dist-info}/METADATA +1 -1
- {qdrant_haystack-3.5.0.dist-info → qdrant_haystack-3.6.0.dist-info}/RECORD +7 -7
- {qdrant_haystack-3.5.0.dist-info → qdrant_haystack-3.6.0.dist-info}/WHEEL +0 -0
- {qdrant_haystack-3.5.0.dist-info → qdrant_haystack-3.6.0.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
from typing import Any, Dict, List, Optional
|
|
1
|
+
from typing import Any, Dict, List, Optional, Union
|
|
2
2
|
|
|
3
3
|
from haystack import Document, component, default_from_dict, default_to_dict
|
|
4
4
|
from haystack.dataclasses.sparse_embedding import SparseEmbedding
|
|
5
5
|
from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
|
|
6
|
+
from qdrant_client.http import models
|
|
6
7
|
|
|
7
8
|
|
|
8
9
|
@component
|
|
@@ -12,6 +13,7 @@ class QdrantEmbeddingRetriever:
|
|
|
12
13
|
|
|
13
14
|
Usage example:
|
|
14
15
|
```python
|
|
16
|
+
from haystack.dataclasses import Document
|
|
15
17
|
from haystack_integrations.components.retrievers.qdrant import QdrantEmbeddingRetriever
|
|
16
18
|
from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
|
|
17
19
|
|
|
@@ -33,7 +35,7 @@ class QdrantEmbeddingRetriever:
|
|
|
33
35
|
def __init__(
|
|
34
36
|
self,
|
|
35
37
|
document_store: QdrantDocumentStore,
|
|
36
|
-
filters: Optional[Dict[str, Any]] = None,
|
|
38
|
+
filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
|
|
37
39
|
top_k: int = 10,
|
|
38
40
|
scale_score: bool = True,
|
|
39
41
|
return_embedding: bool = False,
|
|
@@ -42,12 +44,12 @@ class QdrantEmbeddingRetriever:
|
|
|
42
44
|
Create a QdrantEmbeddingRetriever component.
|
|
43
45
|
|
|
44
46
|
:param document_store: An instance of QdrantDocumentStore.
|
|
45
|
-
:param filters: A dictionary with filters to narrow down the search space.
|
|
46
|
-
:param top_k: The maximum number of documents to retrieve.
|
|
47
|
-
:param scale_score: Whether to scale the scores of the retrieved documents or not.
|
|
48
|
-
:param return_embedding: Whether to return the embedding of the retrieved Documents.
|
|
47
|
+
:param filters: A dictionary with filters to narrow down the search space.
|
|
48
|
+
:param top_k: The maximum number of documents to retrieve.
|
|
49
|
+
:param scale_score: Whether to scale the scores of the retrieved documents or not.
|
|
50
|
+
:param return_embedding: Whether to return the embedding of the retrieved Documents.
|
|
49
51
|
|
|
50
|
-
:raises ValueError: If
|
|
52
|
+
:raises ValueError: If `document_store` is not an instance of `QdrantDocumentStore`.
|
|
51
53
|
"""
|
|
52
54
|
|
|
53
55
|
if not isinstance(document_store, QdrantDocumentStore):
|
|
@@ -97,7 +99,7 @@ class QdrantEmbeddingRetriever:
|
|
|
97
99
|
def run(
|
|
98
100
|
self,
|
|
99
101
|
query_embedding: List[float],
|
|
100
|
-
filters: Optional[Dict[str, Any]] = None,
|
|
102
|
+
filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
|
|
101
103
|
top_k: Optional[int] = None,
|
|
102
104
|
scale_score: Optional[bool] = None,
|
|
103
105
|
return_embedding: Optional[bool] = None,
|
|
@@ -134,7 +136,7 @@ class QdrantSparseEmbeddingRetriever:
|
|
|
134
136
|
```python
|
|
135
137
|
from haystack_integrations.components.retrievers.qdrant import QdrantSparseEmbeddingRetriever
|
|
136
138
|
from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
|
|
137
|
-
from haystack.dataclasses
|
|
139
|
+
from haystack.dataclasses import Document, SparseEmbedding
|
|
138
140
|
|
|
139
141
|
document_store = QdrantDocumentStore(
|
|
140
142
|
":memory:",
|
|
@@ -155,7 +157,7 @@ class QdrantSparseEmbeddingRetriever:
|
|
|
155
157
|
def __init__(
|
|
156
158
|
self,
|
|
157
159
|
document_store: QdrantDocumentStore,
|
|
158
|
-
filters: Optional[Dict[str, Any]] = None,
|
|
160
|
+
filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
|
|
159
161
|
top_k: int = 10,
|
|
160
162
|
scale_score: bool = True,
|
|
161
163
|
return_embedding: bool = False,
|
|
@@ -164,12 +166,12 @@ class QdrantSparseEmbeddingRetriever:
|
|
|
164
166
|
Create a QdrantSparseEmbeddingRetriever component.
|
|
165
167
|
|
|
166
168
|
:param document_store: An instance of QdrantDocumentStore.
|
|
167
|
-
:param filters: A dictionary with filters to narrow down the search space.
|
|
168
|
-
:param top_k: The maximum number of documents to retrieve.
|
|
169
|
-
:param scale_score: Whether to scale the scores of the retrieved documents or not.
|
|
170
|
-
:param return_embedding: Whether to return the sparse embedding of the retrieved Documents.
|
|
169
|
+
:param filters: A dictionary with filters to narrow down the search space.
|
|
170
|
+
:param top_k: The maximum number of documents to retrieve.
|
|
171
|
+
:param scale_score: Whether to scale the scores of the retrieved documents or not.
|
|
172
|
+
:param return_embedding: Whether to return the sparse embedding of the retrieved Documents.
|
|
171
173
|
|
|
172
|
-
:raises ValueError: If
|
|
174
|
+
:raises ValueError: If `document_store` is not an instance of `QdrantDocumentStore`.
|
|
173
175
|
"""
|
|
174
176
|
|
|
175
177
|
if not isinstance(document_store, QdrantDocumentStore):
|
|
@@ -219,7 +221,7 @@ class QdrantSparseEmbeddingRetriever:
|
|
|
219
221
|
def run(
|
|
220
222
|
self,
|
|
221
223
|
query_sparse_embedding: SparseEmbedding,
|
|
222
|
-
filters: Optional[Dict[str, Any]] = None,
|
|
224
|
+
filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
|
|
223
225
|
top_k: Optional[int] = None,
|
|
224
226
|
scale_score: Optional[bool] = None,
|
|
225
227
|
return_embedding: Optional[bool] = None,
|
|
@@ -257,7 +259,7 @@ class QdrantHybridRetriever:
|
|
|
257
259
|
```python
|
|
258
260
|
from haystack_integrations.components.retrievers.qdrant import QdrantHybridRetriever
|
|
259
261
|
from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
|
|
260
|
-
from haystack.dataclasses
|
|
262
|
+
from haystack.dataclasses import Document, SparseEmbedding
|
|
261
263
|
|
|
262
264
|
document_store = QdrantDocumentStore(
|
|
263
265
|
":memory:",
|
|
@@ -283,7 +285,7 @@ class QdrantHybridRetriever:
|
|
|
283
285
|
def __init__(
|
|
284
286
|
self,
|
|
285
287
|
document_store: QdrantDocumentStore,
|
|
286
|
-
filters: Optional[Dict[str, Any]] = None,
|
|
288
|
+
filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
|
|
287
289
|
top_k: int = 10,
|
|
288
290
|
return_embedding: bool = False,
|
|
289
291
|
):
|
|
@@ -341,7 +343,7 @@ class QdrantHybridRetriever:
|
|
|
341
343
|
self,
|
|
342
344
|
query_embedding: List[float],
|
|
343
345
|
query_sparse_embedding: SparseEmbedding,
|
|
344
|
-
filters: Optional[Dict[str, Any]] = None,
|
|
346
|
+
filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
|
|
345
347
|
top_k: Optional[int] = None,
|
|
346
348
|
return_embedding: Optional[bool] = None,
|
|
347
349
|
):
|
|
@@ -66,7 +66,7 @@ class QdrantDocumentStore:
|
|
|
66
66
|
https: Optional[bool] = None,
|
|
67
67
|
api_key: Optional[Secret] = None,
|
|
68
68
|
prefix: Optional[str] = None,
|
|
69
|
-
timeout: Optional[
|
|
69
|
+
timeout: Optional[int] = None,
|
|
70
70
|
host: Optional[str] = None,
|
|
71
71
|
path: Optional[str] = None,
|
|
72
72
|
index: str = "Document",
|
|
@@ -96,23 +96,7 @@ class QdrantDocumentStore:
|
|
|
96
96
|
scroll_size: int = 10_000,
|
|
97
97
|
payload_fields_to_index: Optional[List[dict]] = None,
|
|
98
98
|
):
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
metadata = metadata or {}
|
|
102
|
-
self.client = qdrant_client.QdrantClient(
|
|
103
|
-
location=location,
|
|
104
|
-
url=url,
|
|
105
|
-
port=port,
|
|
106
|
-
grpc_port=grpc_port,
|
|
107
|
-
prefer_grpc=prefer_grpc,
|
|
108
|
-
https=https,
|
|
109
|
-
api_key=api_key.resolve_value() if api_key else None,
|
|
110
|
-
prefix=prefix,
|
|
111
|
-
timeout=timeout,
|
|
112
|
-
host=host,
|
|
113
|
-
path=path,
|
|
114
|
-
metadata=metadata,
|
|
115
|
-
)
|
|
99
|
+
self._client = None
|
|
116
100
|
|
|
117
101
|
# Store the Qdrant client specific attributes
|
|
118
102
|
self.location = location
|
|
@@ -126,7 +110,7 @@ class QdrantDocumentStore:
|
|
|
126
110
|
self.timeout = timeout
|
|
127
111
|
self.host = host
|
|
128
112
|
self.path = path
|
|
129
|
-
self.metadata = metadata
|
|
113
|
+
self.metadata = metadata or {}
|
|
130
114
|
self.api_key = api_key
|
|
131
115
|
|
|
132
116
|
# Store the Qdrant collection specific attributes
|
|
@@ -143,12 +127,6 @@ class QdrantDocumentStore:
|
|
|
143
127
|
self.recreate_index = recreate_index
|
|
144
128
|
self.payload_fields_to_index = payload_fields_to_index
|
|
145
129
|
self.use_sparse_embeddings = use_sparse_embeddings
|
|
146
|
-
|
|
147
|
-
# Make sure the collection is properly set up
|
|
148
|
-
self._set_up_collection(
|
|
149
|
-
index, embedding_dim, recreate_index, similarity, use_sparse_embeddings, on_disk, payload_fields_to_index
|
|
150
|
-
)
|
|
151
|
-
|
|
152
130
|
self.embedding_dim = embedding_dim
|
|
153
131
|
self.on_disk = on_disk
|
|
154
132
|
self.content_field = content_field
|
|
@@ -162,6 +140,35 @@ class QdrantDocumentStore:
|
|
|
162
140
|
self.write_batch_size = write_batch_size
|
|
163
141
|
self.scroll_size = scroll_size
|
|
164
142
|
|
|
143
|
+
@property
|
|
144
|
+
def client(self):
|
|
145
|
+
if not self._client:
|
|
146
|
+
self._client = qdrant_client.QdrantClient(
|
|
147
|
+
location=self.location,
|
|
148
|
+
url=self.url,
|
|
149
|
+
port=self.port,
|
|
150
|
+
grpc_port=self.grpc_port,
|
|
151
|
+
prefer_grpc=self.prefer_grpc,
|
|
152
|
+
https=self.https,
|
|
153
|
+
api_key=self.api_key.resolve_value() if self.api_key else None,
|
|
154
|
+
prefix=self.prefix,
|
|
155
|
+
timeout=self.timeout,
|
|
156
|
+
host=self.host,
|
|
157
|
+
path=self.path,
|
|
158
|
+
metadata=self.metadata,
|
|
159
|
+
)
|
|
160
|
+
# Make sure the collection is properly set up
|
|
161
|
+
self._set_up_collection(
|
|
162
|
+
self.index,
|
|
163
|
+
self.embedding_dim,
|
|
164
|
+
self.recreate_index,
|
|
165
|
+
self.similarity,
|
|
166
|
+
self.use_sparse_embeddings,
|
|
167
|
+
self.on_disk,
|
|
168
|
+
self.payload_fields_to_index,
|
|
169
|
+
)
|
|
170
|
+
return self._client
|
|
171
|
+
|
|
165
172
|
def count_documents(self) -> int:
|
|
166
173
|
try:
|
|
167
174
|
response = self.client.count(
|
|
@@ -176,13 +183,13 @@ class QdrantDocumentStore:
|
|
|
176
183
|
|
|
177
184
|
def filter_documents(
|
|
178
185
|
self,
|
|
179
|
-
filters: Optional[Dict[str, Any]] = None,
|
|
186
|
+
filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
|
|
180
187
|
) -> List[Document]:
|
|
181
|
-
if filters and not isinstance(filters, dict):
|
|
182
|
-
msg = "Filter must be a dictionary"
|
|
188
|
+
if filters and not isinstance(filters, dict) and not isinstance(filters, rest.Filter):
|
|
189
|
+
msg = "Filter must be a dictionary or an instance of `qdrant_client.http.models.Filter`"
|
|
183
190
|
raise ValueError(msg)
|
|
184
191
|
|
|
185
|
-
if filters and "operator" not in filters:
|
|
192
|
+
if filters and not isinstance(filters, rest.Filter) and "operator" not in filters:
|
|
186
193
|
filters = convert_legacy_filters(filters)
|
|
187
194
|
return list(
|
|
188
195
|
self.get_documents_generator(
|
|
@@ -260,7 +267,7 @@ class QdrantDocumentStore:
|
|
|
260
267
|
|
|
261
268
|
def get_documents_generator(
|
|
262
269
|
self,
|
|
263
|
-
filters: Optional[Dict[str, Any]] = None,
|
|
270
|
+
filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
|
|
264
271
|
) -> Generator[Document, None, None]:
|
|
265
272
|
index = self.index
|
|
266
273
|
qdrant_filters = convert_filters_to_qdrant(filters)
|
|
@@ -311,7 +318,7 @@ class QdrantDocumentStore:
|
|
|
311
318
|
def _query_by_sparse(
|
|
312
319
|
self,
|
|
313
320
|
query_sparse_embedding: SparseEmbedding,
|
|
314
|
-
filters: Optional[Dict[str, Any]] = None,
|
|
321
|
+
filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
|
|
315
322
|
top_k: int = 10,
|
|
316
323
|
scale_score: bool = True,
|
|
317
324
|
return_embedding: bool = False,
|
|
@@ -353,7 +360,7 @@ class QdrantDocumentStore:
|
|
|
353
360
|
def _query_by_embedding(
|
|
354
361
|
self,
|
|
355
362
|
query_embedding: List[float],
|
|
356
|
-
filters: Optional[Dict[str, Any]] = None,
|
|
363
|
+
filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
|
|
357
364
|
top_k: int = 10,
|
|
358
365
|
scale_score: bool = True,
|
|
359
366
|
return_embedding: bool = False,
|
|
@@ -388,7 +395,7 @@ class QdrantDocumentStore:
|
|
|
388
395
|
self,
|
|
389
396
|
query_embedding: List[float],
|
|
390
397
|
query_sparse_embedding: SparseEmbedding,
|
|
391
|
-
filters: Optional[Dict[str, Any]] = None,
|
|
398
|
+
filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
|
|
392
399
|
top_k: int = 10,
|
|
393
400
|
return_embedding: bool = False,
|
|
394
401
|
) -> List[Document]:
|
|
@@ -11,10 +11,11 @@ LOGICAL_OPERATORS = LOGICAL_OPERATORS.keys()
|
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
def convert_filters_to_qdrant(
|
|
14
|
-
filter_term: Optional[Union[List[dict], dict]] = None,
|
|
14
|
+
filter_term: Optional[Union[List[dict], dict, models.Filter]] = None,
|
|
15
15
|
) -> Optional[models.Filter]:
|
|
16
16
|
"""Converts Haystack filters to the format used by Qdrant."""
|
|
17
|
-
|
|
17
|
+
if isinstance(filter_term, models.Filter):
|
|
18
|
+
return filter_term
|
|
18
19
|
if not filter_term:
|
|
19
20
|
return None
|
|
20
21
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: qdrant-haystack
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.6.0
|
|
4
4
|
Summary: An integration of Qdrant ANN vector database backend with Haystack
|
|
5
5
|
Project-URL: Source, https://github.com/deepset-ai/haystack-core-integrations
|
|
6
6
|
Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/qdrant/README.md
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
haystack_integrations/components/retrievers/qdrant/__init__.py,sha256=IRjcM4f8b5eKFEMn8tn6h6RrfslEGP3WafU7mrzNzQM,313
|
|
2
|
-
haystack_integrations/components/retrievers/qdrant/retriever.py,sha256=
|
|
2
|
+
haystack_integrations/components/retrievers/qdrant/retriever.py,sha256=r416_a7_6l7ehfPLdFtyHncsPrHj3mFCyROeJJF9rwM,13463
|
|
3
3
|
haystack_integrations/document_stores/qdrant/__init__.py,sha256=kUGc5uewqArhmVR-JqB_NmJ4kNkTIQIvYDNSoO2ELn0,302
|
|
4
4
|
haystack_integrations/document_stores/qdrant/converters.py,sha256=oSO2YlsWEQbcw9CPlWfSg_HoTZlnkAhZw_6VlYWzKKs,2525
|
|
5
|
-
haystack_integrations/document_stores/qdrant/document_store.py,sha256=
|
|
6
|
-
haystack_integrations/document_stores/qdrant/filters.py,sha256=
|
|
5
|
+
haystack_integrations/document_stores/qdrant/document_store.py,sha256=ouPp-oM1M4VvkYtpZ2pl5kGVQ5Ei4kc--Jwd7cYRlzk,26827
|
|
6
|
+
haystack_integrations/document_stores/qdrant/filters.py,sha256=0w70Wa3Za1fNdbJ5O95sZDIpXfblJG_sBBUv0JTQ0-o,8337
|
|
7
7
|
haystack_integrations/document_stores/qdrant/migrate_to_sparse.py,sha256=i6wBC_9_JVzYZtqKm3dhHKTxhwNdcAdpgki8GABDp1c,4909
|
|
8
|
-
qdrant_haystack-3.
|
|
9
|
-
qdrant_haystack-3.
|
|
10
|
-
qdrant_haystack-3.
|
|
11
|
-
qdrant_haystack-3.
|
|
8
|
+
qdrant_haystack-3.6.0.dist-info/METADATA,sha256=8pAGEH2tOoSqU-YXe0hoCfgUUOMEdcV1KSEcj4YMOsM,1862
|
|
9
|
+
qdrant_haystack-3.6.0.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
|
|
10
|
+
qdrant_haystack-3.6.0.dist-info/licenses/LICENSE.txt,sha256=B05uMshqTA74s-0ltyHKI6yoPfJ3zYgQbvcXfDVGFf8,10280
|
|
11
|
+
qdrant_haystack-3.6.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|