qdrant-haystack 9.1.0__tar.gz → 9.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of qdrant-haystack might be problematic. Click here for more details.
- {qdrant_haystack-9.1.0 → qdrant_haystack-9.1.2}/CHANGELOG.md +16 -0
- {qdrant_haystack-9.1.0 → qdrant_haystack-9.1.2}/PKG-INFO +2 -1
- {qdrant_haystack-9.1.0 → qdrant_haystack-9.1.2}/pyproject.toml +1 -3
- {qdrant_haystack-9.1.0 → qdrant_haystack-9.1.2}/src/haystack_integrations/components/retrievers/qdrant/retriever.py +9 -9
- {qdrant_haystack-9.1.0 → qdrant_haystack-9.1.2}/src/haystack_integrations/document_stores/qdrant/document_store.py +34 -31
- {qdrant_haystack-9.1.0 → qdrant_haystack-9.1.2}/src/haystack_integrations/document_stores/qdrant/filters.py +4 -4
- {qdrant_haystack-9.1.0 → qdrant_haystack-9.1.2}/src/haystack_integrations/document_stores/qdrant/migrate_to_sparse.py +1 -1
- {qdrant_haystack-9.1.0 → qdrant_haystack-9.1.2}/tests/test_document_store.py +74 -0
- {qdrant_haystack-9.1.0 → qdrant_haystack-9.1.2}/.gitignore +0 -0
- {qdrant_haystack-9.1.0 → qdrant_haystack-9.1.2}/LICENSE.txt +0 -0
- {qdrant_haystack-9.1.0 → qdrant_haystack-9.1.2}/README.md +0 -0
- {qdrant_haystack-9.1.0 → qdrant_haystack-9.1.2}/examples/embedding_retrieval.py +0 -0
- {qdrant_haystack-9.1.0 → qdrant_haystack-9.1.2}/pydoc/config.yml +0 -0
- {qdrant_haystack-9.1.0 → qdrant_haystack-9.1.2}/src/haystack_integrations/components/retrievers/qdrant/__init__.py +0 -0
- {qdrant_haystack-9.1.0 → qdrant_haystack-9.1.2}/src/haystack_integrations/document_stores/qdrant/__init__.py +0 -0
- {qdrant_haystack-9.1.0 → qdrant_haystack-9.1.2}/src/haystack_integrations/document_stores/qdrant/converters.py +0 -0
- {qdrant_haystack-9.1.0 → qdrant_haystack-9.1.2}/tests/__init__.py +0 -0
- {qdrant_haystack-9.1.0 → qdrant_haystack-9.1.2}/tests/conftest.py +0 -0
- {qdrant_haystack-9.1.0 → qdrant_haystack-9.1.2}/tests/test_converters.py +0 -0
- {qdrant_haystack-9.1.0 → qdrant_haystack-9.1.2}/tests/test_dict_converters.py +0 -0
- {qdrant_haystack-9.1.0 → qdrant_haystack-9.1.2}/tests/test_document_store_async.py +0 -0
- {qdrant_haystack-9.1.0 → qdrant_haystack-9.1.2}/tests/test_embedding_retriever.py +0 -0
- {qdrant_haystack-9.1.0 → qdrant_haystack-9.1.2}/tests/test_filters.py +0 -0
- {qdrant_haystack-9.1.0 → qdrant_haystack-9.1.2}/tests/test_hybrid_retriever.py +0 -0
- {qdrant_haystack-9.1.0 → qdrant_haystack-9.1.2}/tests/test_sparse_embedding_retriever.py +0 -0
|
@@ -1,5 +1,21 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [integrations/qdrant-v9.1.1] - 2025-03-20
|
|
4
|
+
|
|
5
|
+
### ⚙️ CI
|
|
6
|
+
|
|
7
|
+
- Review testing workflows (#1541)
|
|
8
|
+
|
|
9
|
+
### 🌀 Miscellaneous
|
|
10
|
+
|
|
11
|
+
- Fix: `TypeError` in `QdrantDocumentStore` when handling duplicate documents (#1551)
|
|
12
|
+
|
|
13
|
+
## [integrations/qdrant-v9.1.0] - 2025-03-14
|
|
14
|
+
|
|
15
|
+
### 🚀 Features
|
|
16
|
+
|
|
17
|
+
- Qdrant -- async support (#1480)
|
|
18
|
+
|
|
3
19
|
## [integrations/qdrant-v9.0.0] - 2025-03-11
|
|
4
20
|
|
|
5
21
|
### 🧹 Chores
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: qdrant-haystack
|
|
3
|
-
Version: 9.1.
|
|
3
|
+
Version: 9.1.2
|
|
4
4
|
Summary: An integration of Qdrant ANN vector database backend with Haystack
|
|
5
5
|
Project-URL: Source, https://github.com/deepset-ai/haystack-core-integrations
|
|
6
6
|
Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/qdrant/README.md
|
|
@@ -15,6 +15,7 @@ Classifier: Programming Language :: Python :: 3.9
|
|
|
15
15
|
Classifier: Programming Language :: Python :: 3.10
|
|
16
16
|
Classifier: Programming Language :: Python :: 3.11
|
|
17
17
|
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
19
|
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
19
20
|
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
|
20
21
|
Requires-Python: >=3.9
|
|
@@ -22,6 +22,7 @@ classifiers = [
|
|
|
22
22
|
"Programming Language :: Python :: 3.10",
|
|
23
23
|
"Programming Language :: Python :: 3.11",
|
|
24
24
|
"Programming Language :: Python :: 3.12",
|
|
25
|
+
"Programming Language :: Python :: 3.13",
|
|
25
26
|
"Programming Language :: Python :: Implementation :: CPython",
|
|
26
27
|
"Programming Language :: Python :: Implementation :: PyPy",
|
|
27
28
|
]
|
|
@@ -55,9 +56,6 @@ cov = ["test-cov", "cov-report"]
|
|
|
55
56
|
cov-retry = ["test-cov-retry", "cov-report"]
|
|
56
57
|
docs = ["pydoc-markdown pydoc/config.yml"]
|
|
57
58
|
|
|
58
|
-
[[tool.hatch.envs.all.matrix]]
|
|
59
|
-
python = [ "3.9", "3.10", "3.11"]
|
|
60
|
-
|
|
61
59
|
[tool.hatch.envs.lint]
|
|
62
60
|
installer = "uv"
|
|
63
61
|
detached = true
|
|
@@ -46,7 +46,7 @@ class QdrantEmbeddingRetriever:
|
|
|
46
46
|
score_threshold: Optional[float] = None,
|
|
47
47
|
group_by: Optional[str] = None,
|
|
48
48
|
group_size: Optional[int] = None,
|
|
49
|
-
):
|
|
49
|
+
) -> None:
|
|
50
50
|
"""
|
|
51
51
|
Create a QdrantEmbeddingRetriever component.
|
|
52
52
|
|
|
@@ -136,7 +136,7 @@ class QdrantEmbeddingRetriever:
|
|
|
136
136
|
score_threshold: Optional[float] = None,
|
|
137
137
|
group_by: Optional[str] = None,
|
|
138
138
|
group_size: Optional[int] = None,
|
|
139
|
-
):
|
|
139
|
+
) -> Dict[str, List[Document]]:
|
|
140
140
|
"""
|
|
141
141
|
Run the Embedding Retriever on the given input data.
|
|
142
142
|
|
|
@@ -180,7 +180,7 @@ class QdrantEmbeddingRetriever:
|
|
|
180
180
|
score_threshold: Optional[float] = None,
|
|
181
181
|
group_by: Optional[str] = None,
|
|
182
182
|
group_size: Optional[int] = None,
|
|
183
|
-
):
|
|
183
|
+
) -> Dict[str, List[Document]]:
|
|
184
184
|
"""
|
|
185
185
|
Asynchronously run the Embedding Retriever on the given input data.
|
|
186
186
|
|
|
@@ -252,7 +252,7 @@ class QdrantSparseEmbeddingRetriever:
|
|
|
252
252
|
score_threshold: Optional[float] = None,
|
|
253
253
|
group_by: Optional[str] = None,
|
|
254
254
|
group_size: Optional[int] = None,
|
|
255
|
-
):
|
|
255
|
+
) -> None:
|
|
256
256
|
"""
|
|
257
257
|
Create a QdrantSparseEmbeddingRetriever component.
|
|
258
258
|
|
|
@@ -342,7 +342,7 @@ class QdrantSparseEmbeddingRetriever:
|
|
|
342
342
|
score_threshold: Optional[float] = None,
|
|
343
343
|
group_by: Optional[str] = None,
|
|
344
344
|
group_size: Optional[int] = None,
|
|
345
|
-
):
|
|
345
|
+
) -> Dict[str, List[Document]]:
|
|
346
346
|
"""
|
|
347
347
|
Run the Sparse Embedding Retriever on the given input data.
|
|
348
348
|
|
|
@@ -391,7 +391,7 @@ class QdrantSparseEmbeddingRetriever:
|
|
|
391
391
|
score_threshold: Optional[float] = None,
|
|
392
392
|
group_by: Optional[str] = None,
|
|
393
393
|
group_size: Optional[int] = None,
|
|
394
|
-
):
|
|
394
|
+
) -> Dict[str, List[Document]]:
|
|
395
395
|
"""
|
|
396
396
|
Asynchronously run the Sparse Embedding Retriever on the given input data.
|
|
397
397
|
|
|
@@ -473,7 +473,7 @@ class QdrantHybridRetriever:
|
|
|
473
473
|
score_threshold: Optional[float] = None,
|
|
474
474
|
group_by: Optional[str] = None,
|
|
475
475
|
group_size: Optional[int] = None,
|
|
476
|
-
):
|
|
476
|
+
) -> None:
|
|
477
477
|
"""
|
|
478
478
|
Create a QdrantHybridRetriever component.
|
|
479
479
|
|
|
@@ -557,7 +557,7 @@ class QdrantHybridRetriever:
|
|
|
557
557
|
score_threshold: Optional[float] = None,
|
|
558
558
|
group_by: Optional[str] = None,
|
|
559
559
|
group_size: Optional[int] = None,
|
|
560
|
-
):
|
|
560
|
+
) -> Dict[str, List[Document]]:
|
|
561
561
|
"""
|
|
562
562
|
Run the Sparse Embedding Retriever on the given input data.
|
|
563
563
|
|
|
@@ -606,7 +606,7 @@ class QdrantHybridRetriever:
|
|
|
606
606
|
score_threshold: Optional[float] = None,
|
|
607
607
|
group_by: Optional[str] = None,
|
|
608
608
|
group_size: Optional[int] = None,
|
|
609
|
-
):
|
|
609
|
+
) -> Dict[str, List[Document]]:
|
|
610
610
|
"""
|
|
611
611
|
Asynchronously run the Sparse Embedding Retriever on the given input data.
|
|
612
612
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import inspect
|
|
2
2
|
from itertools import islice
|
|
3
|
-
from typing import Any, AsyncGenerator, ClassVar, Dict, Generator, List, Optional, Set, Union
|
|
3
|
+
from typing import Any, AsyncGenerator, ClassVar, Dict, Generator, List, Optional, Set, Tuple, Union
|
|
4
4
|
|
|
5
5
|
import numpy as np
|
|
6
6
|
import qdrant_client
|
|
@@ -18,6 +18,7 @@ from tqdm import tqdm
|
|
|
18
18
|
from .converters import (
|
|
19
19
|
DENSE_VECTORS_NAME,
|
|
20
20
|
SPARSE_VECTORS_NAME,
|
|
21
|
+
QdrantPoint,
|
|
21
22
|
convert_haystack_documents_to_qdrant_points,
|
|
22
23
|
convert_id,
|
|
23
24
|
convert_qdrant_point_to_haystack_document,
|
|
@@ -34,7 +35,7 @@ class QdrantStoreError(DocumentStoreError):
|
|
|
34
35
|
FilterType = Dict[str, Union[Dict[str, Any], List[Any], str, int, float, bool]]
|
|
35
36
|
|
|
36
37
|
|
|
37
|
-
def get_batches_from_generator(iterable, n):
|
|
38
|
+
def get_batches_from_generator(iterable: List, n: int) -> Generator:
|
|
38
39
|
"""
|
|
39
40
|
Batch elements of an iterable into fixed-length chunks or blocks.
|
|
40
41
|
"""
|
|
@@ -127,10 +128,10 @@ class QdrantDocumentStore:
|
|
|
127
128
|
write_batch_size: int = 100,
|
|
128
129
|
scroll_size: int = 10_000,
|
|
129
130
|
payload_fields_to_index: Optional[List[dict]] = None,
|
|
130
|
-
):
|
|
131
|
+
) -> None:
|
|
131
132
|
"""
|
|
132
133
|
:param location:
|
|
133
|
-
If `memory` - use in-memory Qdrant instance.
|
|
134
|
+
If `":memory:"` - use in-memory Qdrant instance.
|
|
134
135
|
If `str` - use it as a URL parameter.
|
|
135
136
|
If `None` - use default values for host and port.
|
|
136
137
|
:param url:
|
|
@@ -164,7 +165,7 @@ class QdrantDocumentStore:
|
|
|
164
165
|
Dimension of the embeddings.
|
|
165
166
|
:param on_disk:
|
|
166
167
|
Whether to store the collection on disk.
|
|
167
|
-
:param
|
|
168
|
+
:param use_sparse_embeddings:
|
|
168
169
|
If set to `True`, enables support for sparse embeddings.
|
|
169
170
|
:param sparse_idf:
|
|
170
171
|
If set to `True`, computes the Inverse Document Frequency (IDF) when using sparse embeddings.
|
|
@@ -232,7 +233,6 @@ class QdrantDocumentStore:
|
|
|
232
233
|
self.path = path
|
|
233
234
|
self.force_disable_check_same_thread = force_disable_check_same_thread
|
|
234
235
|
self.metadata = metadata or {}
|
|
235
|
-
self.api_key = api_key
|
|
236
236
|
|
|
237
237
|
# Store the Qdrant collection specific attributes
|
|
238
238
|
self.shard_number = shard_number
|
|
@@ -258,9 +258,10 @@ class QdrantDocumentStore:
|
|
|
258
258
|
self.write_batch_size = write_batch_size
|
|
259
259
|
self.scroll_size = scroll_size
|
|
260
260
|
|
|
261
|
-
def _initialize_client(self):
|
|
261
|
+
def _initialize_client(self) -> None:
|
|
262
262
|
if self._client is None:
|
|
263
263
|
client_params = self._prepare_client_params()
|
|
264
|
+
# This step adds the api-key and User-Agent to metadata
|
|
264
265
|
self._client = qdrant_client.QdrantClient(**client_params)
|
|
265
266
|
# Make sure the collection is properly set up
|
|
266
267
|
self._set_up_collection(
|
|
@@ -274,7 +275,7 @@ class QdrantDocumentStore:
|
|
|
274
275
|
self.payload_fields_to_index,
|
|
275
276
|
)
|
|
276
277
|
|
|
277
|
-
async def _initialize_async_client(self):
|
|
278
|
+
async def _initialize_async_client(self) -> None:
|
|
278
279
|
"""
|
|
279
280
|
Returns the asynchronous Qdrant client, initializing it if necessary.
|
|
280
281
|
"""
|
|
@@ -628,8 +629,6 @@ class QdrantDocumentStore:
|
|
|
628
629
|
|
|
629
630
|
:param ids:
|
|
630
631
|
A list of document IDs to retrieve.
|
|
631
|
-
:param index:
|
|
632
|
-
The name of the index to retrieve documents from.
|
|
633
632
|
:returns:
|
|
634
633
|
A list of documents.
|
|
635
634
|
"""
|
|
@@ -661,8 +660,6 @@ class QdrantDocumentStore:
|
|
|
661
660
|
|
|
662
661
|
:param ids:
|
|
663
662
|
A list of document IDs to retrieve.
|
|
664
|
-
:param index:
|
|
665
|
-
The name of the index to retrieve documents from.
|
|
666
663
|
:returns:
|
|
667
664
|
A list of documents.
|
|
668
665
|
"""
|
|
@@ -1210,7 +1207,7 @@ class QdrantDocumentStore:
|
|
|
1210
1207
|
)
|
|
1211
1208
|
raise QdrantStoreError(msg) from ke
|
|
1212
1209
|
|
|
1213
|
-
def _create_payload_index(self, collection_name: str, payload_fields_to_index: Optional[List[dict]] = None):
|
|
1210
|
+
def _create_payload_index(self, collection_name: str, payload_fields_to_index: Optional[List[dict]] = None) -> None:
|
|
1214
1211
|
"""
|
|
1215
1212
|
Create payload index for the collection if payload_fields_to_index is provided
|
|
1216
1213
|
See: https://qdrant.tech/documentation/concepts/indexing/#payload-index
|
|
@@ -1229,7 +1226,7 @@ class QdrantDocumentStore:
|
|
|
1229
1226
|
|
|
1230
1227
|
async def _create_payload_index_async(
|
|
1231
1228
|
self, collection_name: str, payload_fields_to_index: Optional[List[dict]] = None
|
|
1232
|
-
):
|
|
1229
|
+
) -> None:
|
|
1233
1230
|
"""
|
|
1234
1231
|
Asynchronously create payload index for the collection if payload_fields_to_index is provided
|
|
1235
1232
|
See: https://qdrant.tech/documentation/concepts/indexing/#payload-index
|
|
@@ -1257,7 +1254,7 @@ class QdrantDocumentStore:
|
|
|
1257
1254
|
sparse_idf: bool,
|
|
1258
1255
|
on_disk: bool = False,
|
|
1259
1256
|
payload_fields_to_index: Optional[List[dict]] = None,
|
|
1260
|
-
):
|
|
1257
|
+
) -> None:
|
|
1261
1258
|
"""
|
|
1262
1259
|
Sets up the Qdrant collection with the specified parameters.
|
|
1263
1260
|
:param collection_name:
|
|
@@ -1313,7 +1310,7 @@ class QdrantDocumentStore:
|
|
|
1313
1310
|
sparse_idf: bool,
|
|
1314
1311
|
on_disk: bool = False,
|
|
1315
1312
|
payload_fields_to_index: Optional[List[dict]] = None,
|
|
1316
|
-
):
|
|
1313
|
+
) -> None:
|
|
1317
1314
|
"""
|
|
1318
1315
|
Asynchronously sets up the Qdrant collection with the specified parameters.
|
|
1319
1316
|
:param collection_name:
|
|
@@ -1367,7 +1364,7 @@ class QdrantDocumentStore:
|
|
|
1367
1364
|
on_disk: Optional[bool] = None,
|
|
1368
1365
|
use_sparse_embeddings: Optional[bool] = None,
|
|
1369
1366
|
sparse_idf: bool = False,
|
|
1370
|
-
):
|
|
1367
|
+
) -> None:
|
|
1371
1368
|
"""
|
|
1372
1369
|
Recreates the Qdrant collection with the specified parameters.
|
|
1373
1370
|
|
|
@@ -1410,7 +1407,7 @@ class QdrantDocumentStore:
|
|
|
1410
1407
|
on_disk: Optional[bool] = None,
|
|
1411
1408
|
use_sparse_embeddings: Optional[bool] = None,
|
|
1412
1409
|
sparse_idf: bool = False,
|
|
1413
|
-
):
|
|
1410
|
+
) -> None:
|
|
1414
1411
|
"""
|
|
1415
1412
|
Asynchronously recreates the Qdrant collection with the specified parameters.
|
|
1416
1413
|
|
|
@@ -1449,7 +1446,7 @@ class QdrantDocumentStore:
|
|
|
1449
1446
|
self,
|
|
1450
1447
|
documents: List[Document],
|
|
1451
1448
|
policy: DuplicatePolicy = None,
|
|
1452
|
-
):
|
|
1449
|
+
) -> List[Document]:
|
|
1453
1450
|
"""
|
|
1454
1451
|
Checks whether any of the passed documents is already existing in the chosen index and returns a list of
|
|
1455
1452
|
documents that are not in the index yet.
|
|
@@ -1476,7 +1473,7 @@ class QdrantDocumentStore:
|
|
|
1476
1473
|
self,
|
|
1477
1474
|
documents: List[Document],
|
|
1478
1475
|
policy: DuplicatePolicy = None,
|
|
1479
|
-
):
|
|
1476
|
+
) -> List[Document]:
|
|
1480
1477
|
"""
|
|
1481
1478
|
Asynchronously checks whether any of the passed documents is already existing
|
|
1482
1479
|
in the chosen index and returns a list of
|
|
@@ -1511,9 +1508,9 @@ class QdrantDocumentStore:
|
|
|
1511
1508
|
for document in documents:
|
|
1512
1509
|
if document.id in _hash_ids:
|
|
1513
1510
|
logger.info(
|
|
1514
|
-
"Duplicate Documents: Document with id '
|
|
1515
|
-
document.id,
|
|
1516
|
-
self.index,
|
|
1511
|
+
"Duplicate Documents: Document with id '{document_id}' already exists in index '{index}'",
|
|
1512
|
+
document_id=document.id,
|
|
1513
|
+
index=self.index,
|
|
1517
1514
|
)
|
|
1518
1515
|
continue
|
|
1519
1516
|
_documents.append(document)
|
|
@@ -1521,7 +1518,7 @@ class QdrantDocumentStore:
|
|
|
1521
1518
|
|
|
1522
1519
|
return _documents
|
|
1523
1520
|
|
|
1524
|
-
def _prepare_collection_params(self):
|
|
1521
|
+
def _prepare_collection_params(self) -> Dict[str, Any]:
|
|
1525
1522
|
"""
|
|
1526
1523
|
Prepares the common parameters for collection creation.
|
|
1527
1524
|
"""
|
|
@@ -1537,7 +1534,7 @@ class QdrantDocumentStore:
|
|
|
1537
1534
|
"init_from": self.init_from,
|
|
1538
1535
|
}
|
|
1539
1536
|
|
|
1540
|
-
def _prepare_client_params(self):
|
|
1537
|
+
def _prepare_client_params(self) -> Dict[str, Any]:
|
|
1541
1538
|
"""
|
|
1542
1539
|
Prepares the common parameters for client initialization.
|
|
1543
1540
|
|
|
@@ -1554,7 +1551,10 @@ class QdrantDocumentStore:
|
|
|
1554
1551
|
"timeout": self.timeout,
|
|
1555
1552
|
"host": self.host,
|
|
1556
1553
|
"path": self.path,
|
|
1557
|
-
|
|
1554
|
+
# NOTE: We purposefully expand the fields of self.metadata to avoid modifying the original self.metadata
|
|
1555
|
+
# class attribute. For example, the resolved api key is added to metadata by the QdrantClient class
|
|
1556
|
+
# when using a hosted Qdrant service, which means running to_dict() exposes the api key.
|
|
1557
|
+
"metadata": {**self.metadata},
|
|
1558
1558
|
"force_disable_check_same_thread": self.force_disable_check_same_thread,
|
|
1559
1559
|
}
|
|
1560
1560
|
|
|
@@ -1565,7 +1565,7 @@ class QdrantDocumentStore:
|
|
|
1565
1565
|
on_disk: Optional[bool] = None,
|
|
1566
1566
|
use_sparse_embeddings: Optional[bool] = None,
|
|
1567
1567
|
sparse_idf: bool = False,
|
|
1568
|
-
):
|
|
1568
|
+
) -> Tuple[Dict[str, rest.VectorParams], Optional[Dict[str, rest.SparseVectorParams]]]:
|
|
1569
1569
|
"""
|
|
1570
1570
|
Prepares the configuration for creating or recreating a Qdrant collection.
|
|
1571
1571
|
|
|
@@ -1595,9 +1595,12 @@ class QdrantDocumentStore:
|
|
|
1595
1595
|
|
|
1596
1596
|
return vectors_config, sparse_vectors_config
|
|
1597
1597
|
|
|
1598
|
-
def _validate_filters(self, filters: Optional[Union[Dict[str, Any], rest.Filter]] = None):
|
|
1598
|
+
def _validate_filters(self, filters: Optional[Union[Dict[str, Any], rest.Filter]] = None) -> None:
|
|
1599
1599
|
"""
|
|
1600
1600
|
Validates the filters provided for querying.
|
|
1601
|
+
|
|
1602
|
+
:param filters: Filters to validate. Can be a dictionary or an instance of `qdrant_client.http.models.Filter`.
|
|
1603
|
+
:raises ValueError: If the filters are not in the correct format or syntax.
|
|
1601
1604
|
"""
|
|
1602
1605
|
if filters and not isinstance(filters, dict) and not isinstance(filters, rest.Filter):
|
|
1603
1606
|
msg = "Filter must be a dictionary or an instance of `qdrant_client.http.models.Filter`"
|
|
@@ -1607,7 +1610,7 @@ class QdrantDocumentStore:
|
|
|
1607
1610
|
msg = "Invalid filter syntax. See https://docs.haystack.deepset.ai/docs/metadata-filtering for details."
|
|
1608
1611
|
raise ValueError(msg)
|
|
1609
1612
|
|
|
1610
|
-
def _process_query_point_results(self, results, scale_score: bool = False):
|
|
1613
|
+
def _process_query_point_results(self, results: List[QdrantPoint], scale_score: bool = False) -> List[Document]:
|
|
1611
1614
|
"""
|
|
1612
1615
|
Processes query results from Qdrant.
|
|
1613
1616
|
"""
|
|
@@ -1627,7 +1630,7 @@ class QdrantDocumentStore:
|
|
|
1627
1630
|
|
|
1628
1631
|
return documents
|
|
1629
1632
|
|
|
1630
|
-
def _process_group_results(self, groups):
|
|
1633
|
+
def _process_group_results(self, groups: List[rest.PointGroup]) -> List[Document]:
|
|
1631
1634
|
"""
|
|
1632
1635
|
Processes grouped query results from Qdrant.
|
|
1633
1636
|
|
|
@@ -1647,7 +1650,7 @@ class QdrantDocumentStore:
|
|
|
1647
1650
|
collection_info,
|
|
1648
1651
|
distance,
|
|
1649
1652
|
embedding_dim: int,
|
|
1650
|
-
):
|
|
1653
|
+
) -> None:
|
|
1651
1654
|
"""
|
|
1652
1655
|
Validates that an existing collection is compatible with the current configuration.
|
|
1653
1656
|
"""
|
|
@@ -138,10 +138,10 @@ def convert_filters_to_qdrant(
|
|
|
138
138
|
|
|
139
139
|
|
|
140
140
|
def build_filters_for_repeated_operators(
|
|
141
|
-
must_clauses,
|
|
142
|
-
should_clauses,
|
|
143
|
-
must_not_clauses,
|
|
144
|
-
qdrant_filter,
|
|
141
|
+
must_clauses: List,
|
|
142
|
+
should_clauses: List,
|
|
143
|
+
must_not_clauses: List,
|
|
144
|
+
qdrant_filter: List[models.Filter],
|
|
145
145
|
) -> List[models.Filter]:
|
|
146
146
|
"""
|
|
147
147
|
Flattens the nested lists of clauses by creating separate Filters for each clause of a logical operator.
|
|
@@ -11,7 +11,7 @@ logger.addHandler(python_logging.StreamHandler())
|
|
|
11
11
|
logger.setLevel(python_logging.INFO)
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
def migrate_to_sparse_embeddings_support(old_document_store: QdrantDocumentStore, new_index: str):
|
|
14
|
+
def migrate_to_sparse_embeddings_support(old_document_store: QdrantDocumentStore, new_index: str) -> None:
|
|
15
15
|
"""
|
|
16
16
|
Utility function to migrate an existing `QdrantDocumentStore` to a new one with support for sparse embeddings.
|
|
17
17
|
|
|
@@ -12,6 +12,7 @@ from haystack.testing.document_store import (
|
|
|
12
12
|
WriteDocumentsTest,
|
|
13
13
|
_random_embeddings,
|
|
14
14
|
)
|
|
15
|
+
from haystack.utils import Secret
|
|
15
16
|
from qdrant_client.http import models as rest
|
|
16
17
|
|
|
17
18
|
from haystack_integrations.document_stores.qdrant.document_store import (
|
|
@@ -38,6 +39,79 @@ class TestQdrantDocumentStore(CountDocumentsTest, WriteDocumentsTest, DeleteDocu
|
|
|
38
39
|
QdrantDocumentStore(location=":memory:", use_sparse_embeddings=True)
|
|
39
40
|
mocked_qdrant.assert_not_called()
|
|
40
41
|
|
|
42
|
+
def test_prepare_client_params_no_mutability(self):
|
|
43
|
+
metadata = {"key": "value"}
|
|
44
|
+
doc_store = QdrantDocumentStore(
|
|
45
|
+
":memory:",
|
|
46
|
+
recreate_index=True,
|
|
47
|
+
return_embedding=True,
|
|
48
|
+
wait_result_from_api=True,
|
|
49
|
+
use_sparse_embeddings=False,
|
|
50
|
+
metadata=metadata,
|
|
51
|
+
)
|
|
52
|
+
client_params = doc_store._prepare_client_params()
|
|
53
|
+
# Mutate value of metadata in client_params
|
|
54
|
+
client_params["metadata"] = client_params["metadata"].update({"new_key": "new_value"})
|
|
55
|
+
|
|
56
|
+
# Assert that the original metadata in the document store is unchanged
|
|
57
|
+
assert metadata == {"key": "value"}
|
|
58
|
+
|
|
59
|
+
def test_to_dict(self, monkeypatch):
|
|
60
|
+
monkeypatch.setenv("QDRANT_API_KEY", "test_api_key")
|
|
61
|
+
doc_store = QdrantDocumentStore(
|
|
62
|
+
":memory:",
|
|
63
|
+
recreate_index=True,
|
|
64
|
+
return_embedding=True,
|
|
65
|
+
wait_result_from_api=True,
|
|
66
|
+
use_sparse_embeddings=False,
|
|
67
|
+
api_key=Secret.from_env_var("QDRANT_API_KEY"),
|
|
68
|
+
)
|
|
69
|
+
expected_dict = {
|
|
70
|
+
"type": "haystack_integrations.document_stores.qdrant.document_store.QdrantDocumentStore",
|
|
71
|
+
"init_parameters": {
|
|
72
|
+
"location": ":memory:",
|
|
73
|
+
"url": None,
|
|
74
|
+
"port": 6333,
|
|
75
|
+
"grpc_port": 6334,
|
|
76
|
+
"prefer_grpc": False,
|
|
77
|
+
"https": None,
|
|
78
|
+
"api_key": {
|
|
79
|
+
"env_vars": ["QDRANT_API_KEY"],
|
|
80
|
+
"strict": True,
|
|
81
|
+
"type": "env_var",
|
|
82
|
+
},
|
|
83
|
+
"prefix": None,
|
|
84
|
+
"timeout": None,
|
|
85
|
+
"host": None,
|
|
86
|
+
"path": None,
|
|
87
|
+
"force_disable_check_same_thread": False,
|
|
88
|
+
"index": "Document",
|
|
89
|
+
"embedding_dim": 768,
|
|
90
|
+
"on_disk": False,
|
|
91
|
+
"use_sparse_embeddings": False,
|
|
92
|
+
"sparse_idf": False,
|
|
93
|
+
"similarity": "cosine",
|
|
94
|
+
"return_embedding": True,
|
|
95
|
+
"progress_bar": True,
|
|
96
|
+
"recreate_index": True,
|
|
97
|
+
"shard_number": None,
|
|
98
|
+
"replication_factor": None,
|
|
99
|
+
"write_consistency_factor": None,
|
|
100
|
+
"on_disk_payload": None,
|
|
101
|
+
"hnsw_config": None,
|
|
102
|
+
"optimizers_config": None,
|
|
103
|
+
"wal_config": None,
|
|
104
|
+
"quantization_config": None,
|
|
105
|
+
"init_from": None,
|
|
106
|
+
"wait_result_from_api": True,
|
|
107
|
+
"metadata": {},
|
|
108
|
+
"write_batch_size": 100,
|
|
109
|
+
"scroll_size": 10000,
|
|
110
|
+
"payload_fields_to_index": None,
|
|
111
|
+
},
|
|
112
|
+
}
|
|
113
|
+
assert doc_store.to_dict() == expected_dict
|
|
114
|
+
|
|
41
115
|
def assert_documents_are_equal(self, received: List[Document], expected: List[Document]):
|
|
42
116
|
"""
|
|
43
117
|
Assert that two lists of Documents are equal.
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|