qdrant-haystack 3.8.0__py3-none-any.whl → 4.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of qdrant-haystack might be problematic. Click here for more details.
- haystack_integrations/components/retrievers/qdrant/retriever.py +2 -2
- haystack_integrations/document_stores/qdrant/converters.py +2 -3
- haystack_integrations/document_stores/qdrant/document_store.py +263 -21
- {qdrant_haystack-3.8.0.dist-info → qdrant_haystack-4.0.0.dist-info}/METADATA +1 -1
- {qdrant_haystack-3.8.0.dist-info → qdrant_haystack-4.0.0.dist-info}/RECORD +7 -7
- {qdrant_haystack-3.8.0.dist-info → qdrant_haystack-4.0.0.dist-info}/WHEEL +1 -1
- {qdrant_haystack-3.8.0.dist-info → qdrant_haystack-4.0.0.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -37,7 +37,7 @@ class QdrantEmbeddingRetriever:
|
|
|
37
37
|
document_store: QdrantDocumentStore,
|
|
38
38
|
filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
|
|
39
39
|
top_k: int = 10,
|
|
40
|
-
scale_score: bool =
|
|
40
|
+
scale_score: bool = False,
|
|
41
41
|
return_embedding: bool = False,
|
|
42
42
|
):
|
|
43
43
|
"""
|
|
@@ -159,7 +159,7 @@ class QdrantSparseEmbeddingRetriever:
|
|
|
159
159
|
document_store: QdrantDocumentStore,
|
|
160
160
|
filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
|
|
161
161
|
top_k: int = 10,
|
|
162
|
-
scale_score: bool =
|
|
162
|
+
scale_score: bool = False,
|
|
163
163
|
return_embedding: bool = False,
|
|
164
164
|
):
|
|
165
165
|
"""
|
|
@@ -17,7 +17,6 @@ UUID_NAMESPACE = uuid.UUID("3896d314-1e95-4a3a-b45a-945f9f0b541d")
|
|
|
17
17
|
def convert_haystack_documents_to_qdrant_points(
|
|
18
18
|
documents: List[Document],
|
|
19
19
|
*,
|
|
20
|
-
embedding_field: str,
|
|
21
20
|
use_sparse_embeddings: bool,
|
|
22
21
|
) -> List[rest.PointStruct]:
|
|
23
22
|
points = []
|
|
@@ -26,7 +25,7 @@ def convert_haystack_documents_to_qdrant_points(
|
|
|
26
25
|
if use_sparse_embeddings:
|
|
27
26
|
vector = {}
|
|
28
27
|
|
|
29
|
-
dense_vector = payload.pop(
|
|
28
|
+
dense_vector = payload.pop("embedding", None)
|
|
30
29
|
if dense_vector is not None:
|
|
31
30
|
vector[DENSE_VECTORS_NAME] = dense_vector
|
|
32
31
|
|
|
@@ -36,7 +35,7 @@ def convert_haystack_documents_to_qdrant_points(
|
|
|
36
35
|
vector[SPARSE_VECTORS_NAME] = sparse_vector_instance
|
|
37
36
|
|
|
38
37
|
else:
|
|
39
|
-
vector = payload.pop(
|
|
38
|
+
vector = payload.pop("embedding") or {}
|
|
40
39
|
_id = convert_id(payload.get("id"))
|
|
41
40
|
|
|
42
41
|
point = rest.PointStruct(
|
|
@@ -49,6 +49,44 @@ def get_batches_from_generator(iterable, n):
|
|
|
49
49
|
|
|
50
50
|
|
|
51
51
|
class QdrantDocumentStore:
|
|
52
|
+
"""
|
|
53
|
+
QdrantDocumentStore is a Document Store for Qdrant.
|
|
54
|
+
It can be used with any Qdrant instance: in-memory, disk-persisted, Docker-based,
|
|
55
|
+
and Qdrant Cloud Cluster deployments.
|
|
56
|
+
|
|
57
|
+
Usage example by creating an in-memory instance:
|
|
58
|
+
|
|
59
|
+
```python
|
|
60
|
+
from haystack.dataclasses.document import Document
|
|
61
|
+
from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
|
|
62
|
+
|
|
63
|
+
document_store = QdrantDocumentStore(
|
|
64
|
+
":memory:",
|
|
65
|
+
recreate_index=True
|
|
66
|
+
)
|
|
67
|
+
document_store.write_documents([
|
|
68
|
+
Document(content="This is first", embedding=[0.0]*5),
|
|
69
|
+
Document(content="This is second", embedding=[0.1, 0.2, 0.3, 0.4, 0.5])
|
|
70
|
+
])
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
Usage example with Qdrant Cloud:
|
|
74
|
+
|
|
75
|
+
```python
|
|
76
|
+
from haystack.dataclasses.document import Document
|
|
77
|
+
from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
|
|
78
|
+
|
|
79
|
+
document_store = QdrantDocumentStore(
|
|
80
|
+
url="https://xxxxxx-xxxxx-xxxxx-xxxx-xxxxxxxxx.us-east.aws.cloud.qdrant.io:6333",
|
|
81
|
+
api_key="<your-api-key>",
|
|
82
|
+
)
|
|
83
|
+
document_store.write_documents([
|
|
84
|
+
Document(content="This is first", embedding=[0.0]*5),
|
|
85
|
+
Document(content="This is second", embedding=[0.1, 0.2, 0.3, 0.4, 0.5])
|
|
86
|
+
])
|
|
87
|
+
```
|
|
88
|
+
"""
|
|
89
|
+
|
|
52
90
|
SIMILARITY: ClassVar[Dict[str, str]] = {
|
|
53
91
|
"cosine": rest.Distance.COSINE,
|
|
54
92
|
"dot_product": rest.Distance.DOT,
|
|
@@ -72,14 +110,10 @@ class QdrantDocumentStore:
|
|
|
72
110
|
index: str = "Document",
|
|
73
111
|
embedding_dim: int = 768,
|
|
74
112
|
on_disk: bool = False,
|
|
75
|
-
content_field: str = "content",
|
|
76
|
-
name_field: str = "name",
|
|
77
|
-
embedding_field: str = "embedding",
|
|
78
113
|
use_sparse_embeddings: bool = False,
|
|
79
114
|
similarity: str = "cosine",
|
|
80
115
|
return_embedding: bool = False,
|
|
81
116
|
progress_bar: bool = True,
|
|
82
|
-
duplicate_documents: str = "overwrite",
|
|
83
117
|
recreate_index: bool = False,
|
|
84
118
|
shard_number: Optional[int] = None,
|
|
85
119
|
replication_factor: Optional[int] = None,
|
|
@@ -96,6 +130,90 @@ class QdrantDocumentStore:
|
|
|
96
130
|
scroll_size: int = 10_000,
|
|
97
131
|
payload_fields_to_index: Optional[List[dict]] = None,
|
|
98
132
|
):
|
|
133
|
+
"""
|
|
134
|
+
:param location:
|
|
135
|
+
If `memory` - use in-memory Qdrant instance.
|
|
136
|
+
If `str` - use it as a URL parameter.
|
|
137
|
+
If `None` - use default values for host and port.
|
|
138
|
+
:param url:
|
|
139
|
+
Either host or str of `Optional[scheme], host, Optional[port], Optional[prefix]`.
|
|
140
|
+
:param port:
|
|
141
|
+
Port of the REST API interface.
|
|
142
|
+
:param grpc_port:
|
|
143
|
+
Port of the gRPC interface.
|
|
144
|
+
:param prefer_grpc:
|
|
145
|
+
If `True` - use gRPC interface whenever possible in custom methods.
|
|
146
|
+
:param https:
|
|
147
|
+
If `True` - use HTTPS(SSL) protocol.
|
|
148
|
+
:param api_key:
|
|
149
|
+
API key for authentication in Qdrant Cloud.
|
|
150
|
+
:param prefix:
|
|
151
|
+
If not `None` - add prefix to the REST URL path.
|
|
152
|
+
Example: service/v1 will result in http://localhost:6333/service/v1/{qdrant-endpoint}
|
|
153
|
+
for REST API.
|
|
154
|
+
:param timeout:
|
|
155
|
+
Timeout for REST and gRPC API requests.
|
|
156
|
+
:param host:
|
|
157
|
+
Host name of Qdrant service. If ùrl` and `host` are `None`, set to `localhost`.
|
|
158
|
+
:param path:
|
|
159
|
+
Persistence path for QdrantLocal.
|
|
160
|
+
:param force_disable_check_same_thread:
|
|
161
|
+
For QdrantLocal, force disable check_same_thread.
|
|
162
|
+
Only use this if you can guarantee that you can resolve the thread safety outside QdrantClient.
|
|
163
|
+
:param index:
|
|
164
|
+
Name of the index.
|
|
165
|
+
:param embedding_dim:
|
|
166
|
+
Dimension of the embeddings.
|
|
167
|
+
:param on_disk:
|
|
168
|
+
Whether to store the collection on disk.
|
|
169
|
+
:param use_sparse_embedding:
|
|
170
|
+
If set to `True`, enables support for sparse embeddings.
|
|
171
|
+
:param similarity:
|
|
172
|
+
The similarity metric to use.
|
|
173
|
+
:param return_embedding:
|
|
174
|
+
Whether to return embeddings in the search results.
|
|
175
|
+
:param progress_bar:
|
|
176
|
+
Whether to show a progress bar or not.
|
|
177
|
+
:param recreate_index:
|
|
178
|
+
Whether to recreate the index.
|
|
179
|
+
:param shard_number:
|
|
180
|
+
Number of shards in the collection.
|
|
181
|
+
:param replication_factor:
|
|
182
|
+
Replication factor for the collection.
|
|
183
|
+
Defines how many copies of each shard will be created. Effective only in distributed mode.
|
|
184
|
+
:param write_consistency_factor:
|
|
185
|
+
Write consistency factor for the collection. Minimum value is 1.
|
|
186
|
+
Defines how many replicas should apply to the operation for it to be considered successful.
|
|
187
|
+
Increasing this number makes the collection more resilient to inconsistencies
|
|
188
|
+
but will cause failures if not enough replicas are available.
|
|
189
|
+
Effective only in distributed mode.
|
|
190
|
+
:param on_disk_payload:
|
|
191
|
+
If `True`, the point's payload will not be stored in memory and
|
|
192
|
+
will be read from the disk every time it is requested.
|
|
193
|
+
This setting saves RAM by slightly increasing response time.
|
|
194
|
+
Note: indexed payload values remain in RAM.
|
|
195
|
+
:param hnsw_config:
|
|
196
|
+
Params for HNSW index.
|
|
197
|
+
:param optimizers_config:
|
|
198
|
+
Params for optimizer.
|
|
199
|
+
:param wal_config:
|
|
200
|
+
Params for Write-Ahead-Log.
|
|
201
|
+
:param quantization_config:
|
|
202
|
+
Params for quantization. If `None`, quantization will be disabled.
|
|
203
|
+
:param init_from:
|
|
204
|
+
Use data stored in another collection to initialize this collection.
|
|
205
|
+
:param wait_result_from_api:
|
|
206
|
+
Whether to wait for the result from the API after each request.
|
|
207
|
+
:param metadata:
|
|
208
|
+
Additional metadata to include with the documents.
|
|
209
|
+
:param write_batch_size:
|
|
210
|
+
The batch size for writing documents.
|
|
211
|
+
:param scroll_size:
|
|
212
|
+
The scroll size for reading documents.
|
|
213
|
+
:param payload_fields_to_index:
|
|
214
|
+
List of payload fields to index.
|
|
215
|
+
"""
|
|
216
|
+
|
|
99
217
|
self._client = None
|
|
100
218
|
|
|
101
219
|
# Store the Qdrant client specific attributes
|
|
@@ -130,14 +248,10 @@ class QdrantDocumentStore:
|
|
|
130
248
|
self.use_sparse_embeddings = use_sparse_embeddings
|
|
131
249
|
self.embedding_dim = embedding_dim
|
|
132
250
|
self.on_disk = on_disk
|
|
133
|
-
self.content_field = content_field
|
|
134
|
-
self.name_field = name_field
|
|
135
|
-
self.embedding_field = embedding_field
|
|
136
251
|
self.similarity = similarity
|
|
137
252
|
self.index = index
|
|
138
253
|
self.return_embedding = return_embedding
|
|
139
254
|
self.progress_bar = progress_bar
|
|
140
|
-
self.duplicate_documents = duplicate_documents
|
|
141
255
|
self.write_batch_size = write_batch_size
|
|
142
256
|
self.scroll_size = scroll_size
|
|
143
257
|
|
|
@@ -172,6 +286,9 @@ class QdrantDocumentStore:
|
|
|
172
286
|
return self._client
|
|
173
287
|
|
|
174
288
|
def count_documents(self) -> int:
|
|
289
|
+
"""
|
|
290
|
+
Returns the number of documents present in the Document Store.
|
|
291
|
+
"""
|
|
175
292
|
try:
|
|
176
293
|
response = self.client.count(
|
|
177
294
|
collection_name=self.index,
|
|
@@ -187,6 +304,15 @@ class QdrantDocumentStore:
|
|
|
187
304
|
self,
|
|
188
305
|
filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
|
|
189
306
|
) -> List[Document]:
|
|
307
|
+
"""
|
|
308
|
+
Returns the documents that match the provided filters.
|
|
309
|
+
|
|
310
|
+
For a detailed specification of the filters, refer to the
|
|
311
|
+
[documentation](https://docs.haystack.deepset.ai/docs/metadata-filtering)
|
|
312
|
+
|
|
313
|
+
:param filters: The filters to apply to the document list.
|
|
314
|
+
:returns: A list of documents that match the given filters.
|
|
315
|
+
"""
|
|
190
316
|
if filters and not isinstance(filters, dict) and not isinstance(filters, rest.Filter):
|
|
191
317
|
msg = "Filter must be a dictionary or an instance of `qdrant_client.http.models.Filter`"
|
|
192
318
|
raise ValueError(msg)
|
|
@@ -204,6 +330,19 @@ class QdrantDocumentStore:
|
|
|
204
330
|
documents: List[Document],
|
|
205
331
|
policy: DuplicatePolicy = DuplicatePolicy.FAIL,
|
|
206
332
|
):
|
|
333
|
+
"""
|
|
334
|
+
Writes documents to Qdrant using the specified policy.
|
|
335
|
+
The QdrantDocumentStore can handle duplicate documents based on the given policy.
|
|
336
|
+
The available policies are:
|
|
337
|
+
- `FAIL`: The operation will raise an error if any document already exists.
|
|
338
|
+
- `OVERWRITE`: Existing documents will be overwritten with the new ones.
|
|
339
|
+
- `SKIP`: Existing documents will be skipped, and only new documents will be added.
|
|
340
|
+
|
|
341
|
+
:param documents: A list of Document objects to write to Qdrant.
|
|
342
|
+
:param policy: The policy for handling duplicate documents.
|
|
343
|
+
|
|
344
|
+
:returns: The number of documents written to the document store.
|
|
345
|
+
"""
|
|
207
346
|
for doc in documents:
|
|
208
347
|
if not isinstance(doc, Document):
|
|
209
348
|
msg = f"DocumentStore.write_documents() expects a list of Documents but got an element of {type(doc)}."
|
|
@@ -225,7 +364,6 @@ class QdrantDocumentStore:
|
|
|
225
364
|
for document_batch in batched_documents:
|
|
226
365
|
batch = convert_haystack_documents_to_qdrant_points(
|
|
227
366
|
document_batch,
|
|
228
|
-
embedding_field=self.embedding_field,
|
|
229
367
|
use_sparse_embeddings=self.use_sparse_embeddings,
|
|
230
368
|
)
|
|
231
369
|
|
|
@@ -239,6 +377,11 @@ class QdrantDocumentStore:
|
|
|
239
377
|
return len(document_objects)
|
|
240
378
|
|
|
241
379
|
def delete_documents(self, ids: List[str]):
|
|
380
|
+
"""
|
|
381
|
+
Deletes documents that match the provided `document_ids` from the document store.
|
|
382
|
+
|
|
383
|
+
:param document_ids: the document ids to delete
|
|
384
|
+
"""
|
|
242
385
|
ids = [convert_id(_id) for _id in ids]
|
|
243
386
|
try:
|
|
244
387
|
self.client.delete(
|
|
@@ -253,10 +396,24 @@ class QdrantDocumentStore:
|
|
|
253
396
|
|
|
254
397
|
@classmethod
|
|
255
398
|
def from_dict(cls, data: Dict[str, Any]) -> "QdrantDocumentStore":
|
|
399
|
+
"""
|
|
400
|
+
Deserializes the component from a dictionary.
|
|
401
|
+
|
|
402
|
+
:param data:
|
|
403
|
+
The dictionary to deserialize from.
|
|
404
|
+
:returns:
|
|
405
|
+
The deserialized component.
|
|
406
|
+
"""
|
|
256
407
|
deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"])
|
|
257
408
|
return default_from_dict(cls, data)
|
|
258
409
|
|
|
259
410
|
def to_dict(self) -> Dict[str, Any]:
|
|
411
|
+
"""
|
|
412
|
+
Serializes the component to a dictionary.
|
|
413
|
+
|
|
414
|
+
:returns:
|
|
415
|
+
Dictionary with serialized data.
|
|
416
|
+
"""
|
|
260
417
|
params = inspect.signature(self.__init__).parameters # type: ignore
|
|
261
418
|
# All the __init__ params must be set as attributes
|
|
262
419
|
# Set as init_parms without default values
|
|
@@ -271,6 +428,13 @@ class QdrantDocumentStore:
|
|
|
271
428
|
self,
|
|
272
429
|
filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
|
|
273
430
|
) -> Generator[Document, None, None]:
|
|
431
|
+
"""
|
|
432
|
+
Returns a generator that yields documents from Qdrant based on the provided filters.
|
|
433
|
+
|
|
434
|
+
:param filters: Filters applied to the retrieved documents.
|
|
435
|
+
:returns: A generator that yields documents retrieved from Qdrant.
|
|
436
|
+
"""
|
|
437
|
+
|
|
274
438
|
index = self.index
|
|
275
439
|
qdrant_filters = convert_filters_to_qdrant(filters)
|
|
276
440
|
|
|
@@ -299,6 +463,16 @@ class QdrantDocumentStore:
|
|
|
299
463
|
ids: List[str],
|
|
300
464
|
index: Optional[str] = None,
|
|
301
465
|
) -> List[Document]:
|
|
466
|
+
"""
|
|
467
|
+
Retrieves documents from Qdrant by their IDs.
|
|
468
|
+
|
|
469
|
+
:param ids:
|
|
470
|
+
A list of document IDs to retrieve.
|
|
471
|
+
:param index:
|
|
472
|
+
The name of the index to retrieve documents from.
|
|
473
|
+
:returns:
|
|
474
|
+
A list of documents.
|
|
475
|
+
"""
|
|
302
476
|
index = index or self.index
|
|
303
477
|
|
|
304
478
|
documents: List[Document] = []
|
|
@@ -322,9 +496,24 @@ class QdrantDocumentStore:
|
|
|
322
496
|
query_sparse_embedding: SparseEmbedding,
|
|
323
497
|
filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
|
|
324
498
|
top_k: int = 10,
|
|
325
|
-
scale_score: bool =
|
|
499
|
+
scale_score: bool = False,
|
|
326
500
|
return_embedding: bool = False,
|
|
327
501
|
) -> List[Document]:
|
|
502
|
+
"""
|
|
503
|
+
Queries Qdrant using a sparse embedding and returns the most relevant documents.
|
|
504
|
+
|
|
505
|
+
:param query_sparse_embedding: Sparse embedding of the query.
|
|
506
|
+
:param filters: Filters applied to the retrieved documents.
|
|
507
|
+
:param top_k: Maximum number of documents to return.
|
|
508
|
+
:param scale_score: Whether to scale the scores of the retrieved documents.
|
|
509
|
+
:param return_embedding: Whether to return the embeddings of the retrieved documents.
|
|
510
|
+
|
|
511
|
+
:returns: List of documents that are most similar to `query_sparse_embedding`.
|
|
512
|
+
|
|
513
|
+
:raises QdrantStoreError:
|
|
514
|
+
If the Document Store was initialized with `use_sparse_embeddings=False`.
|
|
515
|
+
"""
|
|
516
|
+
|
|
328
517
|
if not self.use_sparse_embeddings:
|
|
329
518
|
message = (
|
|
330
519
|
"You are trying to query using sparse embeddings, but the Document Store "
|
|
@@ -364,9 +553,20 @@ class QdrantDocumentStore:
|
|
|
364
553
|
query_embedding: List[float],
|
|
365
554
|
filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
|
|
366
555
|
top_k: int = 10,
|
|
367
|
-
scale_score: bool =
|
|
556
|
+
scale_score: bool = False,
|
|
368
557
|
return_embedding: bool = False,
|
|
369
558
|
) -> List[Document]:
|
|
559
|
+
"""
|
|
560
|
+
Queries Qdrant using a dense embedding and returns the most relevant documents.
|
|
561
|
+
|
|
562
|
+
:param query_embedding: Dense embedding of the query.
|
|
563
|
+
:param filters: Filters applied to the retrieved documents.
|
|
564
|
+
:param top_k: Maximum number of documents to return.
|
|
565
|
+
:param scale_score: Whether to scale the scores of the retrieved documents.
|
|
566
|
+
:param return_embedding: Whether to return the embeddings of the retrieved documents.
|
|
567
|
+
|
|
568
|
+
:returns: List of documents that are most similar to `query_embedding`.
|
|
569
|
+
"""
|
|
370
570
|
qdrant_filters = convert_filters_to_qdrant(filters)
|
|
371
571
|
|
|
372
572
|
points = self.client.search(
|
|
@@ -409,8 +609,8 @@ class QdrantDocumentStore:
|
|
|
409
609
|
|
|
410
610
|
:param query_embedding: Dense embedding of the query.
|
|
411
611
|
:param query_sparse_embedding: Sparse embedding of the query.
|
|
412
|
-
:param filters: Filters applied to the retrieved
|
|
413
|
-
:param top_k: Maximum number of
|
|
612
|
+
:param filters: Filters applied to the retrieved documents.
|
|
613
|
+
:param top_k: Maximum number of documents to return.
|
|
414
614
|
:param return_embedding: Whether to return the embeddings of the retrieved documents.
|
|
415
615
|
|
|
416
616
|
:returns: List of Document that are most similar to `query_embedding` and `query_sparse_embedding`.
|
|
@@ -474,6 +674,16 @@ class QdrantDocumentStore:
|
|
|
474
674
|
return results
|
|
475
675
|
|
|
476
676
|
def get_distance(self, similarity: str) -> rest.Distance:
|
|
677
|
+
"""
|
|
678
|
+
Retrieves the distance metric for the specified similarity measure.
|
|
679
|
+
|
|
680
|
+
:param similarity:
|
|
681
|
+
The similarity measure to retrieve the distance.
|
|
682
|
+
:returns:
|
|
683
|
+
The corresponding rest.Distance object.
|
|
684
|
+
:raises QdrantStoreError:
|
|
685
|
+
If the provided similarity measure is not supported.
|
|
686
|
+
"""
|
|
477
687
|
try:
|
|
478
688
|
return self.SIMILARITY[similarity]
|
|
479
689
|
except KeyError as ke:
|
|
@@ -507,6 +717,29 @@ class QdrantDocumentStore:
|
|
|
507
717
|
on_disk: bool = False,
|
|
508
718
|
payload_fields_to_index: Optional[List[dict]] = None,
|
|
509
719
|
):
|
|
720
|
+
"""
|
|
721
|
+
Sets up the Qdrant collection with the specified parameters.
|
|
722
|
+
:param collection_name:
|
|
723
|
+
The name of the collection to set up.
|
|
724
|
+
:param embedding_dim:
|
|
725
|
+
The dimension of the embeddings.
|
|
726
|
+
:param recreate_collection:
|
|
727
|
+
Whether to recreate the collection if it already exists.
|
|
728
|
+
:param similarity:
|
|
729
|
+
The similarity measure to use.
|
|
730
|
+
:param use_sparse_embeddings:
|
|
731
|
+
Whether to use sparse embeddings.
|
|
732
|
+
:param on_disk:
|
|
733
|
+
Whether to store the collection on disk.
|
|
734
|
+
:param payload_fields_to_index:
|
|
735
|
+
List of payload fields to index.
|
|
736
|
+
|
|
737
|
+
:raises QdrantStoreError:
|
|
738
|
+
If the collection exists with incompatible settings.
|
|
739
|
+
:raises ValueError:
|
|
740
|
+
If the collection exists with a different similarity measure or embedding dimension.
|
|
741
|
+
|
|
742
|
+
"""
|
|
510
743
|
distance = self.get_distance(similarity)
|
|
511
744
|
|
|
512
745
|
if recreate_collection or not self.client.collection_exists(collection_name):
|
|
@@ -576,6 +809,20 @@ class QdrantDocumentStore:
|
|
|
576
809
|
on_disk: Optional[bool] = None,
|
|
577
810
|
use_sparse_embeddings: Optional[bool] = None,
|
|
578
811
|
):
|
|
812
|
+
"""
|
|
813
|
+
Recreates the Qdrant collection with the specified parameters.
|
|
814
|
+
|
|
815
|
+
:param collection_name:
|
|
816
|
+
The name of the collection to recreate.
|
|
817
|
+
:param distance:
|
|
818
|
+
The distance metric to use for the collection.
|
|
819
|
+
:param embedding_dim:
|
|
820
|
+
The dimension of the embeddings.
|
|
821
|
+
:param on_disk:
|
|
822
|
+
Whether to store the collection on disk.
|
|
823
|
+
:param use_sparse_embeddings:
|
|
824
|
+
Whether to use sparse embeddings.
|
|
825
|
+
"""
|
|
579
826
|
if on_disk is None:
|
|
580
827
|
on_disk = self.on_disk
|
|
581
828
|
|
|
@@ -627,12 +874,7 @@ class QdrantDocumentStore:
|
|
|
627
874
|
|
|
628
875
|
:param documents: A list of Haystack Document objects.
|
|
629
876
|
:param index: name of the index
|
|
630
|
-
:param
|
|
631
|
-
Parameter options : ( 'skip','overwrite','fail')
|
|
632
|
-
skip (default option): Ignore the duplicates documents
|
|
633
|
-
overwrite: Update any existing documents with the same ID when adding documents.
|
|
634
|
-
fail: an error is raised if the document ID of the document being added already
|
|
635
|
-
exists.
|
|
877
|
+
:param policy: The duplicate policy to use when writing documents.
|
|
636
878
|
:returns: A list of Haystack Document objects.
|
|
637
879
|
"""
|
|
638
880
|
|
|
@@ -652,10 +894,10 @@ class QdrantDocumentStore:
|
|
|
652
894
|
|
|
653
895
|
def _drop_duplicate_documents(self, documents: List[Document], index: Optional[str] = None) -> List[Document]:
|
|
654
896
|
"""
|
|
655
|
-
Drop
|
|
897
|
+
Drop duplicate documents based on same hash ID.
|
|
656
898
|
|
|
657
899
|
:param documents: A list of Haystack Document objects.
|
|
658
|
-
:param index:
|
|
900
|
+
:param index: Name of the index.
|
|
659
901
|
:returns: A list of Haystack Document objects.
|
|
660
902
|
"""
|
|
661
903
|
_hash_ids: Set = set()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: qdrant-haystack
|
|
3
|
-
Version:
|
|
3
|
+
Version: 4.0.0
|
|
4
4
|
Summary: An integration of Qdrant ANN vector database backend with Haystack
|
|
5
5
|
Project-URL: Source, https://github.com/deepset-ai/haystack-core-integrations
|
|
6
6
|
Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/qdrant/README.md
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
haystack_integrations/components/retrievers/qdrant/__init__.py,sha256=IRjcM4f8b5eKFEMn8tn6h6RrfslEGP3WafU7mrzNzQM,313
|
|
2
|
-
haystack_integrations/components/retrievers/qdrant/retriever.py,sha256=
|
|
2
|
+
haystack_integrations/components/retrievers/qdrant/retriever.py,sha256=_6noYJ0M71shgoTOywIgSuGQtB-CBhwRW_zUFiYIOTw,13465
|
|
3
3
|
haystack_integrations/document_stores/qdrant/__init__.py,sha256=kUGc5uewqArhmVR-JqB_NmJ4kNkTIQIvYDNSoO2ELn0,302
|
|
4
|
-
haystack_integrations/document_stores/qdrant/converters.py,sha256=
|
|
5
|
-
haystack_integrations/document_stores/qdrant/document_store.py,sha256=
|
|
4
|
+
haystack_integrations/document_stores/qdrant/converters.py,sha256=2hcuI3kty1dVHzX1WGXxEtlrnZ9E8TAG56XATCFa6Pw,2491
|
|
5
|
+
haystack_integrations/document_stores/qdrant/document_store.py,sha256=mjzv6Z3iE9oFRil_PVLjmEq-vX7a7ULpT5afGsU7iSU,36088
|
|
6
6
|
haystack_integrations/document_stores/qdrant/filters.py,sha256=0w70Wa3Za1fNdbJ5O95sZDIpXfblJG_sBBUv0JTQ0-o,8337
|
|
7
7
|
haystack_integrations/document_stores/qdrant/migrate_to_sparse.py,sha256=i6wBC_9_JVzYZtqKm3dhHKTxhwNdcAdpgki8GABDp1c,4909
|
|
8
|
-
qdrant_haystack-
|
|
9
|
-
qdrant_haystack-
|
|
10
|
-
qdrant_haystack-
|
|
11
|
-
qdrant_haystack-
|
|
8
|
+
qdrant_haystack-4.0.0.dist-info/METADATA,sha256=wHvVJIDCQDPFLX8fL_d11zNMZul4U6r02bVhhCdmitk,1862
|
|
9
|
+
qdrant_haystack-4.0.0.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
10
|
+
qdrant_haystack-4.0.0.dist-info/licenses/LICENSE.txt,sha256=B05uMshqTA74s-0ltyHKI6yoPfJ3zYgQbvcXfDVGFf8,10280
|
|
11
|
+
qdrant_haystack-4.0.0.dist-info/RECORD,,
|
|
File without changes
|