qdrant-haystack 9.1.3__tar.gz → 9.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {qdrant_haystack-9.1.3 → qdrant_haystack-9.2.0}/CHANGELOG.md +6 -0
- {qdrant_haystack-9.1.3 → qdrant_haystack-9.2.0}/PKG-INFO +2 -2
- {qdrant_haystack-9.1.3 → qdrant_haystack-9.2.0}/README.md +1 -1
- {qdrant_haystack-9.1.3 → qdrant_haystack-9.2.0}/pyproject.toml +33 -29
- qdrant_haystack-9.2.0/src/haystack_integrations/components/retrievers/py.typed +0 -0
- {qdrant_haystack-9.1.3 → qdrant_haystack-9.2.0}/src/haystack_integrations/components/retrievers/qdrant/retriever.py +77 -6
- qdrant_haystack-9.2.0/src/haystack_integrations/document_stores/py.typed +0 -0
- {qdrant_haystack-9.1.3 → qdrant_haystack-9.2.0}/src/haystack_integrations/document_stores/qdrant/converters.py +10 -8
- {qdrant_haystack-9.1.3 → qdrant_haystack-9.2.0}/src/haystack_integrations/document_stores/qdrant/document_store.py +66 -41
- qdrant_haystack-9.2.0/src/haystack_integrations/document_stores/qdrant/filters.py +234 -0
- {qdrant_haystack-9.1.3 → qdrant_haystack-9.2.0}/src/haystack_integrations/document_stores/qdrant/migrate_to_sparse.py +11 -7
- {qdrant_haystack-9.1.3 → qdrant_haystack-9.2.0}/tests/test_converters.py +0 -2
- {qdrant_haystack-9.1.3 → qdrant_haystack-9.2.0}/tests/test_document_store.py +0 -7
- {qdrant_haystack-9.1.3 → qdrant_haystack-9.2.0}/tests/test_document_store_async.py +0 -4
- qdrant_haystack-9.1.3/src/haystack_integrations/document_stores/qdrant/filters.py +0 -316
- {qdrant_haystack-9.1.3 → qdrant_haystack-9.2.0}/.gitignore +0 -0
- {qdrant_haystack-9.1.3 → qdrant_haystack-9.2.0}/LICENSE.txt +0 -0
- {qdrant_haystack-9.1.3 → qdrant_haystack-9.2.0}/examples/embedding_retrieval.py +0 -0
- {qdrant_haystack-9.1.3 → qdrant_haystack-9.2.0}/pydoc/config.yml +0 -0
- {qdrant_haystack-9.1.3 → qdrant_haystack-9.2.0}/src/haystack_integrations/components/retrievers/qdrant/__init__.py +0 -0
- {qdrant_haystack-9.1.3 → qdrant_haystack-9.2.0}/src/haystack_integrations/document_stores/qdrant/__init__.py +0 -0
- {qdrant_haystack-9.1.3 → qdrant_haystack-9.2.0}/tests/__init__.py +0 -0
- {qdrant_haystack-9.1.3 → qdrant_haystack-9.2.0}/tests/conftest.py +0 -0
- {qdrant_haystack-9.1.3 → qdrant_haystack-9.2.0}/tests/test_dict_converters.py +0 -0
- {qdrant_haystack-9.1.3 → qdrant_haystack-9.2.0}/tests/test_embedding_retriever.py +0 -0
- {qdrant_haystack-9.1.3 → qdrant_haystack-9.2.0}/tests/test_filters.py +0 -0
- {qdrant_haystack-9.1.3 → qdrant_haystack-9.2.0}/tests/test_hybrid_retriever.py +0 -0
- {qdrant_haystack-9.1.3 → qdrant_haystack-9.2.0}/tests/test_sparse_embedding_retriever.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: qdrant-haystack
|
|
3
|
-
Version: 9.
|
|
3
|
+
Version: 9.2.0
|
|
4
4
|
Summary: An integration of Qdrant ANN vector database backend with Haystack
|
|
5
5
|
Project-URL: Source, https://github.com/deepset-ai/haystack-core-integrations
|
|
6
6
|
Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/qdrant/README.md
|
|
@@ -45,7 +45,7 @@ pip install qdrant-haystack
|
|
|
45
45
|
The test suites use Qdrant's in-memory instance. No additional steps required.
|
|
46
46
|
|
|
47
47
|
```console
|
|
48
|
-
hatch run test
|
|
48
|
+
hatch run test:all
|
|
49
49
|
```
|
|
50
50
|
|
|
51
51
|
## License
|
|
@@ -46,25 +46,37 @@ git_describe_command = 'git describe --tags --match="integrations/qdrant-v[0-9]*
|
|
|
46
46
|
|
|
47
47
|
[tool.hatch.envs.default]
|
|
48
48
|
installer = "uv"
|
|
49
|
-
dependencies = ["
|
|
49
|
+
dependencies = ["haystack-pydoc-tools", "ruff"]
|
|
50
|
+
|
|
50
51
|
[tool.hatch.envs.default.scripts]
|
|
51
|
-
test = "pytest {args:tests}"
|
|
52
|
-
test-cov = "coverage run -m pytest {args:tests}"
|
|
53
|
-
test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x"
|
|
54
|
-
cov-report = ["- coverage combine", "coverage report"]
|
|
55
|
-
cov = ["test-cov", "cov-report"]
|
|
56
|
-
cov-retry = ["test-cov-retry", "cov-report"]
|
|
57
52
|
docs = ["pydoc-markdown pydoc/config.yml"]
|
|
53
|
+
fmt = "ruff check --fix {args} && ruff format {args}"
|
|
54
|
+
fmt-check = "ruff check {args} && ruff format --check {args}"
|
|
55
|
+
|
|
56
|
+
[tool.hatch.envs.test]
|
|
57
|
+
dependencies = [
|
|
58
|
+
"pytest",
|
|
59
|
+
"pytest-asyncio",
|
|
60
|
+
"pytest-cov",
|
|
61
|
+
"pytest-rerunfailures",
|
|
62
|
+
"mypy",
|
|
63
|
+
"pip"
|
|
64
|
+
]
|
|
58
65
|
|
|
59
|
-
[tool.hatch.envs.
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
66
|
+
[tool.hatch.envs.test.scripts]
|
|
67
|
+
unit = 'pytest -m "not integration" {args:tests}'
|
|
68
|
+
integration = 'pytest -m "integration" {args:tests}'
|
|
69
|
+
all = 'pytest {args:tests}'
|
|
70
|
+
cov-retry = 'all --cov=haystack_integrations --reruns 3 --reruns-delay 30 -x'
|
|
71
|
+
|
|
72
|
+
types = """mypy -p haystack_integrations.document_stores.qdrant \
|
|
73
|
+
-p haystack_integrations.components.retrievers.qdrant {args}"""
|
|
74
|
+
|
|
75
|
+
[tool.mypy]
|
|
76
|
+
install_types = true
|
|
77
|
+
non_interactive = true
|
|
78
|
+
check_untyped_defs = true
|
|
79
|
+
disallow_incomplete_defs = true
|
|
68
80
|
|
|
69
81
|
[tool.black]
|
|
70
82
|
target-version = ["py38"]
|
|
@@ -146,18 +158,10 @@ parallel = false
|
|
|
146
158
|
|
|
147
159
|
|
|
148
160
|
[tool.coverage.report]
|
|
149
|
-
omit = [
|
|
161
|
+
omit = [
|
|
162
|
+
"*/tests/*",
|
|
163
|
+
"*/__init__.py",
|
|
164
|
+
"src/haystack_integrations/document_stores/qdrant/migrate_to_sparse.py",
|
|
165
|
+
]
|
|
150
166
|
show_missing = true
|
|
151
167
|
exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"]
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
[[tool.mypy.overrides]]
|
|
155
|
-
module = [
|
|
156
|
-
"haystack.*",
|
|
157
|
-
"haystack_integrations.*",
|
|
158
|
-
"pytest.*",
|
|
159
|
-
"qdrant_client.*",
|
|
160
|
-
"numpy",
|
|
161
|
-
"grpc",
|
|
162
|
-
]
|
|
163
|
-
ignore_missing_imports = true
|
|
File without changes
|
|
@@ -8,6 +8,11 @@ from qdrant_client.http import models
|
|
|
8
8
|
|
|
9
9
|
from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
|
|
10
10
|
|
|
11
|
+
FILTER_POLICY_MERGE_ERROR_MESSAGE = (
|
|
12
|
+
"Native Qdrant filters cannot be used with filter_policy set to MERGE. "
|
|
13
|
+
"Set filter_policy to REPLACE or use Haystack filters instead."
|
|
14
|
+
)
|
|
15
|
+
|
|
11
16
|
|
|
12
17
|
@component
|
|
13
18
|
class QdrantEmbeddingRetriever:
|
|
@@ -153,8 +158,19 @@ class QdrantEmbeddingRetriever:
|
|
|
153
158
|
:returns:
|
|
154
159
|
The retrieved documents.
|
|
155
160
|
|
|
161
|
+
:raises ValueError: If 'filter_policy' is set to 'MERGE' and 'filters' is a native Qdrant filter.
|
|
156
162
|
"""
|
|
157
|
-
|
|
163
|
+
if self._filter_policy == FilterPolicy.MERGE and (
|
|
164
|
+
isinstance(self._filters, models.Filter) or isinstance(filters, models.Filter)
|
|
165
|
+
):
|
|
166
|
+
raise ValueError(FILTER_POLICY_MERGE_ERROR_MESSAGE)
|
|
167
|
+
|
|
168
|
+
# Replacing filters works with native Qdrant filters even if the type is wrong
|
|
169
|
+
filters = apply_filter_policy(
|
|
170
|
+
filter_policy=self._filter_policy,
|
|
171
|
+
init_filters=self._filters, # type: ignore[arg-type]
|
|
172
|
+
runtime_filters=filters, # type: ignore[arg-type]
|
|
173
|
+
)
|
|
158
174
|
|
|
159
175
|
docs = self._document_store._query_by_embedding(
|
|
160
176
|
query_embedding=query_embedding,
|
|
@@ -197,8 +213,19 @@ class QdrantEmbeddingRetriever:
|
|
|
197
213
|
:returns:
|
|
198
214
|
The retrieved documents.
|
|
199
215
|
|
|
216
|
+
:raises ValueError: If 'filter_policy' is set to 'MERGE' and 'filters' is a native Qdrant filter.
|
|
200
217
|
"""
|
|
201
|
-
|
|
218
|
+
if self._filter_policy == FilterPolicy.MERGE and (
|
|
219
|
+
isinstance(self._filters, models.Filter) or isinstance(filters, models.Filter)
|
|
220
|
+
):
|
|
221
|
+
raise ValueError(FILTER_POLICY_MERGE_ERROR_MESSAGE)
|
|
222
|
+
|
|
223
|
+
# Replacing filters works with native Qdrant filters even if the type is wrong
|
|
224
|
+
filters = apply_filter_policy(
|
|
225
|
+
filter_policy=self._filter_policy,
|
|
226
|
+
init_filters=self._filters, # type: ignore[arg-type]
|
|
227
|
+
runtime_filters=filters, # type: ignore[arg-type]
|
|
228
|
+
)
|
|
202
229
|
|
|
203
230
|
docs = await self._document_store._query_by_embedding_async(
|
|
204
231
|
query_embedding=query_embedding,
|
|
@@ -364,8 +391,19 @@ class QdrantSparseEmbeddingRetriever:
|
|
|
364
391
|
:returns:
|
|
365
392
|
The retrieved documents.
|
|
366
393
|
|
|
394
|
+
:raises ValueError: If 'filter_policy' is set to 'MERGE' and 'filters' is a native Qdrant filter.
|
|
367
395
|
"""
|
|
368
|
-
|
|
396
|
+
if self._filter_policy == FilterPolicy.MERGE and (
|
|
397
|
+
isinstance(self._filters, models.Filter) or isinstance(filters, models.Filter)
|
|
398
|
+
):
|
|
399
|
+
raise ValueError(FILTER_POLICY_MERGE_ERROR_MESSAGE)
|
|
400
|
+
|
|
401
|
+
# Replacing filters works with native Qdrant filters even if the type is wrong
|
|
402
|
+
filters = apply_filter_policy(
|
|
403
|
+
filter_policy=self._filter_policy,
|
|
404
|
+
init_filters=self._filters, # type: ignore[arg-type]
|
|
405
|
+
runtime_filters=filters, # type: ignore[arg-type]
|
|
406
|
+
)
|
|
369
407
|
|
|
370
408
|
docs = self._document_store._query_by_sparse(
|
|
371
409
|
query_sparse_embedding=query_sparse_embedding,
|
|
@@ -413,8 +451,19 @@ class QdrantSparseEmbeddingRetriever:
|
|
|
413
451
|
:returns:
|
|
414
452
|
The retrieved documents.
|
|
415
453
|
|
|
454
|
+
:raises ValueError: If 'filter_policy' is set to 'MERGE' and 'filters' is a native Qdrant filter.
|
|
416
455
|
"""
|
|
417
|
-
|
|
456
|
+
if self._filter_policy == FilterPolicy.MERGE and (
|
|
457
|
+
isinstance(self._filters, models.Filter) or isinstance(filters, models.Filter)
|
|
458
|
+
):
|
|
459
|
+
raise ValueError(FILTER_POLICY_MERGE_ERROR_MESSAGE)
|
|
460
|
+
|
|
461
|
+
# Replacing filters works with native Qdrant filters even if the type is wrong
|
|
462
|
+
filters = apply_filter_policy(
|
|
463
|
+
filter_policy=self._filter_policy,
|
|
464
|
+
init_filters=self._filters, # type: ignore[arg-type]
|
|
465
|
+
runtime_filters=filters, # type: ignore[arg-type]
|
|
466
|
+
)
|
|
418
467
|
|
|
419
468
|
docs = await self._document_store._query_by_sparse_async(
|
|
420
469
|
query_sparse_embedding=query_sparse_embedding,
|
|
@@ -579,8 +628,19 @@ class QdrantHybridRetriever:
|
|
|
579
628
|
:returns:
|
|
580
629
|
The retrieved documents.
|
|
581
630
|
|
|
631
|
+
:raises ValueError: If 'filter_policy' is set to 'MERGE' and 'filters' is a native Qdrant filter.
|
|
582
632
|
"""
|
|
583
|
-
|
|
633
|
+
if self._filter_policy == FilterPolicy.MERGE and (
|
|
634
|
+
isinstance(self._filters, models.Filter) or isinstance(filters, models.Filter)
|
|
635
|
+
):
|
|
636
|
+
raise ValueError(FILTER_POLICY_MERGE_ERROR_MESSAGE)
|
|
637
|
+
|
|
638
|
+
# Replacing filters works with native Qdrant filters even if the type is wrong
|
|
639
|
+
filters = apply_filter_policy(
|
|
640
|
+
filter_policy=self._filter_policy,
|
|
641
|
+
init_filters=self._filters, # type: ignore[arg-type]
|
|
642
|
+
runtime_filters=filters, # type: ignore[arg-type]
|
|
643
|
+
)
|
|
584
644
|
|
|
585
645
|
docs = self._document_store._query_hybrid(
|
|
586
646
|
query_embedding=query_embedding,
|
|
@@ -628,8 +688,19 @@ class QdrantHybridRetriever:
|
|
|
628
688
|
:returns:
|
|
629
689
|
The retrieved documents.
|
|
630
690
|
|
|
691
|
+
:raises ValueError: If 'filter_policy' is set to 'MERGE' and 'filters' is a native Qdrant filter.
|
|
631
692
|
"""
|
|
632
|
-
|
|
693
|
+
if self._filter_policy == FilterPolicy.MERGE and (
|
|
694
|
+
isinstance(self._filters, models.Filter) or isinstance(filters, models.Filter)
|
|
695
|
+
):
|
|
696
|
+
raise ValueError(FILTER_POLICY_MERGE_ERROR_MESSAGE)
|
|
697
|
+
|
|
698
|
+
# Replacing filters works with native Qdrant filters even if the type is wrong
|
|
699
|
+
filters = apply_filter_policy(
|
|
700
|
+
filter_policy=self._filter_policy,
|
|
701
|
+
init_filters=self._filters, # type: ignore[arg-type]
|
|
702
|
+
runtime_filters=filters, # type: ignore[arg-type]
|
|
703
|
+
)
|
|
633
704
|
|
|
634
705
|
docs = await self._document_store._query_hybrid_async(
|
|
635
706
|
query_embedding=query_embedding,
|
|
File without changes
|
|
@@ -37,7 +37,7 @@ def convert_haystack_documents_to_qdrant_points(
|
|
|
37
37
|
|
|
38
38
|
else:
|
|
39
39
|
vector = payload.pop("embedding") or {}
|
|
40
|
-
_id = convert_id(
|
|
40
|
+
_id = convert_id(document.id)
|
|
41
41
|
|
|
42
42
|
point = rest.PointStruct(
|
|
43
43
|
payload=payload,
|
|
@@ -62,19 +62,21 @@ QdrantPoint = Union[rest.ScoredPoint, rest.Record]
|
|
|
62
62
|
|
|
63
63
|
|
|
64
64
|
def convert_qdrant_point_to_haystack_document(point: QdrantPoint, use_sparse_embeddings: bool) -> Document:
|
|
65
|
-
payload =
|
|
65
|
+
payload = point.payload or {}
|
|
66
66
|
payload["score"] = point.score if hasattr(point, "score") else None
|
|
67
67
|
|
|
68
68
|
if not use_sparse_embeddings:
|
|
69
69
|
payload["embedding"] = point.vector if hasattr(point, "vector") else None
|
|
70
|
-
elif hasattr(point, "vector") and point.vector is not None:
|
|
70
|
+
elif hasattr(point, "vector") and point.vector is not None and isinstance(point.vector, dict):
|
|
71
71
|
payload["embedding"] = point.vector.get(DENSE_VECTORS_NAME)
|
|
72
72
|
|
|
73
73
|
if SPARSE_VECTORS_NAME in point.vector:
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
74
|
+
sparse_vector = point.vector[SPARSE_VECTORS_NAME]
|
|
75
|
+
if isinstance(sparse_vector, rest.SparseVector):
|
|
76
|
+
sparse_vector_dict = {
|
|
77
|
+
"indices": sparse_vector.indices,
|
|
78
|
+
"values": sparse_vector.values,
|
|
79
|
+
}
|
|
80
|
+
payload["sparse_embedding"] = sparse_vector_dict
|
|
79
81
|
|
|
80
82
|
return Document.from_dict(payload)
|
|
@@ -2,7 +2,6 @@ import inspect
|
|
|
2
2
|
from itertools import islice
|
|
3
3
|
from typing import Any, AsyncGenerator, ClassVar, Dict, Generator, List, Optional, Set, Tuple, Union
|
|
4
4
|
|
|
5
|
-
import numpy as np
|
|
6
5
|
import qdrant_client
|
|
7
6
|
from haystack import default_from_dict, default_to_dict, logging
|
|
8
7
|
from haystack.dataclasses import Document
|
|
@@ -10,6 +9,7 @@ from haystack.dataclasses.sparse_embedding import SparseEmbedding
|
|
|
10
9
|
from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError
|
|
11
10
|
from haystack.document_stores.types import DuplicatePolicy
|
|
12
11
|
from haystack.utils import Secret, deserialize_secrets_inplace
|
|
12
|
+
from numpy import exp
|
|
13
13
|
from qdrant_client import grpc
|
|
14
14
|
from qdrant_client.http import models as rest
|
|
15
15
|
from qdrant_client.http.exceptions import UnexpectedResponse
|
|
@@ -18,7 +18,6 @@ from tqdm import tqdm
|
|
|
18
18
|
from .converters import (
|
|
19
19
|
DENSE_VECTORS_NAME,
|
|
20
20
|
SPARSE_VECTORS_NAME,
|
|
21
|
-
QdrantPoint,
|
|
22
21
|
convert_haystack_documents_to_qdrant_points,
|
|
23
22
|
convert_id,
|
|
24
23
|
convert_qdrant_point_to_haystack_document,
|
|
@@ -27,6 +26,12 @@ from .filters import convert_filters_to_qdrant
|
|
|
27
26
|
|
|
28
27
|
logger = logging.getLogger(__name__)
|
|
29
28
|
|
|
29
|
+
# Default group size to apply when using group_by
|
|
30
|
+
# - Our methods use None as the default for optional group_size parameter.
|
|
31
|
+
# - Qdrant expects an integer and internally defaults to 3 when performing grouped queries.
|
|
32
|
+
# - When group_by is specified but group_size is None, we use this value instead of passing None.
|
|
33
|
+
DEFAULT_GROUP_SIZE = 3
|
|
34
|
+
|
|
30
35
|
|
|
31
36
|
class QdrantStoreError(DocumentStoreError):
|
|
32
37
|
pass
|
|
@@ -85,7 +90,7 @@ class QdrantDocumentStore:
|
|
|
85
90
|
```
|
|
86
91
|
"""
|
|
87
92
|
|
|
88
|
-
SIMILARITY: ClassVar[Dict[str,
|
|
93
|
+
SIMILARITY: ClassVar[Dict[str, rest.Distance]] = {
|
|
89
94
|
"cosine": rest.Distance.COSINE,
|
|
90
95
|
"dot_product": rest.Distance.DOT,
|
|
91
96
|
"l2": rest.Distance.EUCLID,
|
|
@@ -216,8 +221,8 @@ class QdrantDocumentStore:
|
|
|
216
221
|
List of payload fields to index.
|
|
217
222
|
"""
|
|
218
223
|
|
|
219
|
-
self._client = None
|
|
220
|
-
self._async_client = None
|
|
224
|
+
self._client: Optional[qdrant_client.QdrantClient] = None
|
|
225
|
+
self._async_client: Optional[qdrant_client.AsyncQdrantClient] = None
|
|
221
226
|
|
|
222
227
|
# Store the Qdrant client specific attributes
|
|
223
228
|
self.location = location
|
|
@@ -575,8 +580,8 @@ class QdrantDocumentStore:
|
|
|
575
580
|
with_vectors=True,
|
|
576
581
|
)
|
|
577
582
|
stop_scrolling = next_offset is None or (
|
|
578
|
-
isinstance(next_offset, grpc.PointId) and next_offset.num == 0 and next_offset.uuid == ""
|
|
579
|
-
)
|
|
583
|
+
isinstance(next_offset, grpc.PointId) and next_offset.num == 0 and next_offset.uuid == "" # type: ignore[union-attr]
|
|
584
|
+
) # grpc.PointId always has num and uuid
|
|
580
585
|
|
|
581
586
|
for record in records:
|
|
582
587
|
yield convert_qdrant_point_to_haystack_document(
|
|
@@ -612,8 +617,8 @@ class QdrantDocumentStore:
|
|
|
612
617
|
with_vectors=True,
|
|
613
618
|
)
|
|
614
619
|
stop_scrolling = next_offset is None or (
|
|
615
|
-
isinstance(next_offset, grpc.PointId) and next_offset.num == 0 and next_offset.uuid == ""
|
|
616
|
-
)
|
|
620
|
+
isinstance(next_offset, grpc.PointId) and next_offset.num == 0 and next_offset.uuid == "" # type: ignore[union-attr]
|
|
621
|
+
) # grpc.PointId always has num and uuid
|
|
617
622
|
|
|
618
623
|
for record in records:
|
|
619
624
|
yield convert_qdrant_point_to_haystack_document(
|
|
@@ -739,7 +744,7 @@ class QdrantDocumentStore:
|
|
|
739
744
|
query_filter=qdrant_filters,
|
|
740
745
|
limit=top_k,
|
|
741
746
|
group_by=group_by,
|
|
742
|
-
group_size=group_size,
|
|
747
|
+
group_size=group_size or DEFAULT_GROUP_SIZE,
|
|
743
748
|
with_vectors=return_embedding,
|
|
744
749
|
score_threshold=score_threshold,
|
|
745
750
|
).groups
|
|
@@ -801,7 +806,7 @@ class QdrantDocumentStore:
|
|
|
801
806
|
query_filter=qdrant_filters,
|
|
802
807
|
limit=top_k,
|
|
803
808
|
group_by=group_by,
|
|
804
|
-
group_size=group_size,
|
|
809
|
+
group_size=group_size or DEFAULT_GROUP_SIZE,
|
|
805
810
|
with_vectors=return_embedding,
|
|
806
811
|
score_threshold=score_threshold,
|
|
807
812
|
).groups
|
|
@@ -893,7 +898,7 @@ class QdrantDocumentStore:
|
|
|
893
898
|
query=rest.FusionQuery(fusion=rest.Fusion.RRF),
|
|
894
899
|
limit=top_k,
|
|
895
900
|
group_by=group_by,
|
|
896
|
-
group_size=group_size,
|
|
901
|
+
group_size=group_size or DEFAULT_GROUP_SIZE,
|
|
897
902
|
score_threshold=score_threshold,
|
|
898
903
|
with_payload=True,
|
|
899
904
|
with_vectors=return_embedding,
|
|
@@ -990,14 +995,14 @@ class QdrantDocumentStore:
|
|
|
990
995
|
query_filter=qdrant_filters,
|
|
991
996
|
limit=top_k,
|
|
992
997
|
group_by=group_by,
|
|
993
|
-
group_size=group_size,
|
|
998
|
+
group_size=group_size or DEFAULT_GROUP_SIZE,
|
|
994
999
|
with_vectors=return_embedding,
|
|
995
1000
|
score_threshold=score_threshold,
|
|
996
1001
|
)
|
|
997
1002
|
groups = response.groups
|
|
998
1003
|
return self._process_group_results(groups)
|
|
999
1004
|
else:
|
|
1000
|
-
|
|
1005
|
+
query_response = await self._async_client.query_points(
|
|
1001
1006
|
collection_name=self.index,
|
|
1002
1007
|
query=rest.SparseVector(
|
|
1003
1008
|
indices=query_indices,
|
|
@@ -1009,7 +1014,7 @@ class QdrantDocumentStore:
|
|
|
1009
1014
|
with_vectors=return_embedding,
|
|
1010
1015
|
score_threshold=score_threshold,
|
|
1011
1016
|
)
|
|
1012
|
-
points =
|
|
1017
|
+
points = query_response.points
|
|
1013
1018
|
return self._process_query_point_results(points, scale_score=scale_score)
|
|
1014
1019
|
|
|
1015
1020
|
async def _query_by_embedding_async(
|
|
@@ -1054,14 +1059,14 @@ class QdrantDocumentStore:
|
|
|
1054
1059
|
query_filter=qdrant_filters,
|
|
1055
1060
|
limit=top_k,
|
|
1056
1061
|
group_by=group_by,
|
|
1057
|
-
group_size=group_size,
|
|
1062
|
+
group_size=group_size or DEFAULT_GROUP_SIZE,
|
|
1058
1063
|
with_vectors=return_embedding,
|
|
1059
1064
|
score_threshold=score_threshold,
|
|
1060
1065
|
)
|
|
1061
1066
|
groups = response.groups
|
|
1062
1067
|
return self._process_group_results(groups)
|
|
1063
1068
|
else:
|
|
1064
|
-
|
|
1069
|
+
query_response = await self._async_client.query_points(
|
|
1065
1070
|
collection_name=self.index,
|
|
1066
1071
|
query=query_embedding,
|
|
1067
1072
|
using=DENSE_VECTORS_NAME if self.use_sparse_embeddings else None,
|
|
@@ -1070,7 +1075,7 @@ class QdrantDocumentStore:
|
|
|
1070
1075
|
with_vectors=return_embedding,
|
|
1071
1076
|
score_threshold=score_threshold,
|
|
1072
1077
|
)
|
|
1073
|
-
points =
|
|
1078
|
+
points = query_response.points
|
|
1074
1079
|
return self._process_query_point_results(points, scale_score=scale_score)
|
|
1075
1080
|
|
|
1076
1081
|
async def _query_hybrid_async(
|
|
@@ -1145,14 +1150,14 @@ class QdrantDocumentStore:
|
|
|
1145
1150
|
query=rest.FusionQuery(fusion=rest.Fusion.RRF),
|
|
1146
1151
|
limit=top_k,
|
|
1147
1152
|
group_by=group_by,
|
|
1148
|
-
group_size=group_size,
|
|
1153
|
+
group_size=group_size or DEFAULT_GROUP_SIZE,
|
|
1149
1154
|
score_threshold=score_threshold,
|
|
1150
1155
|
with_payload=True,
|
|
1151
1156
|
with_vectors=return_embedding,
|
|
1152
1157
|
)
|
|
1153
1158
|
groups = response.groups
|
|
1154
1159
|
else:
|
|
1155
|
-
|
|
1160
|
+
query_response = await self._async_client.query_points(
|
|
1156
1161
|
collection_name=self.index,
|
|
1157
1162
|
prefetch=[
|
|
1158
1163
|
rest.Prefetch(
|
|
@@ -1175,7 +1180,7 @@ class QdrantDocumentStore:
|
|
|
1175
1180
|
with_payload=True,
|
|
1176
1181
|
with_vectors=return_embedding,
|
|
1177
1182
|
)
|
|
1178
|
-
points =
|
|
1183
|
+
points = query_response.points
|
|
1179
1184
|
|
|
1180
1185
|
except Exception as e:
|
|
1181
1186
|
msg = "Error during hybrid search"
|
|
@@ -1233,7 +1238,6 @@ class QdrantDocumentStore:
|
|
|
1233
1238
|
"""
|
|
1234
1239
|
if payload_fields_to_index is not None:
|
|
1235
1240
|
for payload_index in payload_fields_to_index:
|
|
1236
|
-
|
|
1237
1241
|
# self._async_client is initialized at this point
|
|
1238
1242
|
# since _initialize_async_client() is called before this method is executed
|
|
1239
1243
|
assert self._async_client is not None
|
|
@@ -1359,7 +1363,7 @@ class QdrantDocumentStore:
|
|
|
1359
1363
|
def recreate_collection(
|
|
1360
1364
|
self,
|
|
1361
1365
|
collection_name: str,
|
|
1362
|
-
distance,
|
|
1366
|
+
distance: rest.Distance,
|
|
1363
1367
|
embedding_dim: int,
|
|
1364
1368
|
on_disk: Optional[bool] = None,
|
|
1365
1369
|
use_sparse_embeddings: Optional[bool] = None,
|
|
@@ -1402,7 +1406,7 @@ class QdrantDocumentStore:
|
|
|
1402
1406
|
async def recreate_collection_async(
|
|
1403
1407
|
self,
|
|
1404
1408
|
collection_name: str,
|
|
1405
|
-
distance,
|
|
1409
|
+
distance: rest.Distance,
|
|
1406
1410
|
embedding_dim: int,
|
|
1407
1411
|
on_disk: Optional[bool] = None,
|
|
1408
1412
|
use_sparse_embeddings: Optional[bool] = None,
|
|
@@ -1445,7 +1449,7 @@ class QdrantDocumentStore:
|
|
|
1445
1449
|
def _handle_duplicate_documents(
|
|
1446
1450
|
self,
|
|
1447
1451
|
documents: List[Document],
|
|
1448
|
-
policy: DuplicatePolicy = None,
|
|
1452
|
+
policy: Optional[DuplicatePolicy] = None,
|
|
1449
1453
|
) -> List[Document]:
|
|
1450
1454
|
"""
|
|
1451
1455
|
Checks whether any of the passed documents is already existing in the chosen index and returns a list of
|
|
@@ -1472,7 +1476,7 @@ class QdrantDocumentStore:
|
|
|
1472
1476
|
async def _handle_duplicate_documents_async(
|
|
1473
1477
|
self,
|
|
1474
1478
|
documents: List[Document],
|
|
1475
|
-
policy: DuplicatePolicy = None,
|
|
1479
|
+
policy: Optional[DuplicatePolicy] = None,
|
|
1476
1480
|
) -> List[Document]:
|
|
1477
1481
|
"""
|
|
1478
1482
|
Asynchronously checks whether any of the passed documents is already existing
|
|
@@ -1561,11 +1565,11 @@ class QdrantDocumentStore:
|
|
|
1561
1565
|
def _prepare_collection_config(
|
|
1562
1566
|
self,
|
|
1563
1567
|
embedding_dim: int,
|
|
1564
|
-
distance,
|
|
1568
|
+
distance: rest.Distance,
|
|
1565
1569
|
on_disk: Optional[bool] = None,
|
|
1566
1570
|
use_sparse_embeddings: Optional[bool] = None,
|
|
1567
1571
|
sparse_idf: bool = False,
|
|
1568
|
-
) -> Tuple[Dict[str, rest.VectorParams], Optional[Dict[str, rest.SparseVectorParams]]]:
|
|
1572
|
+
) -> Tuple[Union[Dict[str, rest.VectorParams], rest.VectorParams], Optional[Dict[str, rest.SparseVectorParams]]]:
|
|
1569
1573
|
"""
|
|
1570
1574
|
Prepares the configuration for creating or recreating a Qdrant collection.
|
|
1571
1575
|
|
|
@@ -1577,12 +1581,14 @@ class QdrantDocumentStore:
|
|
|
1577
1581
|
use_sparse_embeddings = self.use_sparse_embeddings
|
|
1578
1582
|
|
|
1579
1583
|
# dense vectors configuration
|
|
1580
|
-
|
|
1581
|
-
|
|
1584
|
+
base_vectors_config = rest.VectorParams(size=embedding_dim, on_disk=on_disk, distance=distance)
|
|
1585
|
+
vectors_config: Union[rest.VectorParams, Dict[str, rest.VectorParams]] = base_vectors_config
|
|
1586
|
+
|
|
1587
|
+
sparse_vectors_config: Optional[Dict[str, rest.SparseVectorParams]] = None
|
|
1582
1588
|
|
|
1583
1589
|
if use_sparse_embeddings:
|
|
1584
1590
|
# in this case, we need to define named vectors
|
|
1585
|
-
vectors_config = {DENSE_VECTORS_NAME:
|
|
1591
|
+
vectors_config = {DENSE_VECTORS_NAME: base_vectors_config}
|
|
1586
1592
|
|
|
1587
1593
|
sparse_vectors_config = {
|
|
1588
1594
|
SPARSE_VECTORS_NAME: rest.SparseVectorParams(
|
|
@@ -1610,7 +1616,9 @@ class QdrantDocumentStore:
|
|
|
1610
1616
|
msg = "Invalid filter syntax. See https://docs.haystack.deepset.ai/docs/metadata-filtering for details."
|
|
1611
1617
|
raise ValueError(msg)
|
|
1612
1618
|
|
|
1613
|
-
def _process_query_point_results(
|
|
1619
|
+
def _process_query_point_results(
|
|
1620
|
+
self, results: List[rest.ScoredPoint], scale_score: bool = False
|
|
1621
|
+
) -> List[Document]:
|
|
1614
1622
|
"""
|
|
1615
1623
|
Processes query results from Qdrant.
|
|
1616
1624
|
"""
|
|
@@ -1622,10 +1630,12 @@ class QdrantDocumentStore:
|
|
|
1622
1630
|
if scale_score:
|
|
1623
1631
|
for document in documents:
|
|
1624
1632
|
score = document.score
|
|
1633
|
+
if score is None:
|
|
1634
|
+
continue
|
|
1625
1635
|
if self.similarity == "cosine":
|
|
1626
1636
|
score = (score + 1) / 2
|
|
1627
1637
|
else:
|
|
1628
|
-
score = float(1 / (1 +
|
|
1638
|
+
score = float(1 / (1 + exp(-score / 100)))
|
|
1629
1639
|
document.score = score
|
|
1630
1640
|
|
|
1631
1641
|
return documents
|
|
@@ -1647,16 +1657,22 @@ class QdrantDocumentStore:
|
|
|
1647
1657
|
def _validate_collection_compatibility(
|
|
1648
1658
|
self,
|
|
1649
1659
|
collection_name: str,
|
|
1650
|
-
collection_info,
|
|
1651
|
-
distance,
|
|
1660
|
+
collection_info: rest.CollectionInfo,
|
|
1661
|
+
distance: rest.Distance,
|
|
1652
1662
|
embedding_dim: int,
|
|
1653
1663
|
) -> None:
|
|
1654
1664
|
"""
|
|
1655
1665
|
Validates that an existing collection is compatible with the current configuration.
|
|
1656
1666
|
"""
|
|
1657
|
-
|
|
1667
|
+
vectors_config = collection_info.config.params.vectors
|
|
1658
1668
|
|
|
1659
|
-
if
|
|
1669
|
+
if vectors_config is None:
|
|
1670
|
+
msg = f"Collection '{collection_name}' has no vector configuration."
|
|
1671
|
+
raise QdrantStoreError(msg)
|
|
1672
|
+
|
|
1673
|
+
has_named_vectors = isinstance(vectors_config, dict)
|
|
1674
|
+
|
|
1675
|
+
if has_named_vectors and DENSE_VECTORS_NAME not in vectors_config:
|
|
1660
1676
|
msg = (
|
|
1661
1677
|
f"Collection '{collection_name}' already exists in Qdrant, "
|
|
1662
1678
|
f"but it has been originally created outside of Haystack and is not supported. "
|
|
@@ -1688,11 +1704,20 @@ class QdrantDocumentStore:
|
|
|
1688
1704
|
|
|
1689
1705
|
# Get current distance and vector size based on collection configuration
|
|
1690
1706
|
if self.use_sparse_embeddings:
|
|
1691
|
-
|
|
1692
|
-
|
|
1707
|
+
if not isinstance(vectors_config, dict):
|
|
1708
|
+
msg = f"Collection '{collection_name}' has invalid vector configuration for sparse embeddings."
|
|
1709
|
+
raise QdrantStoreError(msg)
|
|
1710
|
+
|
|
1711
|
+
dense_vector_config = vectors_config[DENSE_VECTORS_NAME]
|
|
1712
|
+
current_distance = dense_vector_config.distance
|
|
1713
|
+
current_vector_size = dense_vector_config.size
|
|
1693
1714
|
else:
|
|
1694
|
-
|
|
1695
|
-
|
|
1715
|
+
if isinstance(vectors_config, dict):
|
|
1716
|
+
msg = f"Collection '{collection_name}' has invalid vector configuration for dense embeddings only."
|
|
1717
|
+
raise QdrantStoreError(msg)
|
|
1718
|
+
|
|
1719
|
+
current_distance = vectors_config.distance
|
|
1720
|
+
current_vector_size = vectors_config.size
|
|
1696
1721
|
|
|
1697
1722
|
# Validate distance metric
|
|
1698
1723
|
if current_distance != distance:
|