qdrant-haystack 3.8.1__tar.gz → 4.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of qdrant-haystack might be problematic. Click here for more details.
- {qdrant_haystack-3.8.1 → qdrant_haystack-4.1.0}/.gitignore +9 -0
- {qdrant_haystack-3.8.1 → qdrant_haystack-4.1.0}/CHANGELOG.md +18 -0
- {qdrant_haystack-3.8.1 → qdrant_haystack-4.1.0}/PKG-INFO +2 -2
- {qdrant_haystack-3.8.1 → qdrant_haystack-4.1.0}/pyproject.toml +9 -12
- {qdrant_haystack-3.8.1 → qdrant_haystack-4.1.0}/src/haystack_integrations/components/retrievers/qdrant/retriever.py +38 -2
- {qdrant_haystack-3.8.1 → qdrant_haystack-4.1.0}/src/haystack_integrations/document_stores/qdrant/converters.py +2 -3
- {qdrant_haystack-3.8.1 → qdrant_haystack-4.1.0}/src/haystack_integrations/document_stores/qdrant/document_store.py +41 -28
- {qdrant_haystack-3.8.1 → qdrant_haystack-4.1.0}/tests/test_dict_converters.py +3 -12
- {qdrant_haystack-3.8.1 → qdrant_haystack-4.1.0}/tests/test_document_store.py +23 -1
- {qdrant_haystack-3.8.1 → qdrant_haystack-4.1.0}/tests/test_retriever.py +39 -14
- {qdrant_haystack-3.8.1 → qdrant_haystack-4.1.0}/LICENSE.txt +0 -0
- {qdrant_haystack-3.8.1 → qdrant_haystack-4.1.0}/README.md +0 -0
- {qdrant_haystack-3.8.1 → qdrant_haystack-4.1.0}/examples/embedding_retrieval.py +0 -0
- {qdrant_haystack-3.8.1 → qdrant_haystack-4.1.0}/pydoc/config.yml +0 -0
- {qdrant_haystack-3.8.1 → qdrant_haystack-4.1.0}/src/haystack_integrations/components/retrievers/qdrant/__init__.py +0 -0
- {qdrant_haystack-3.8.1 → qdrant_haystack-4.1.0}/src/haystack_integrations/document_stores/qdrant/__init__.py +0 -0
- {qdrant_haystack-3.8.1 → qdrant_haystack-4.1.0}/src/haystack_integrations/document_stores/qdrant/filters.py +0 -0
- {qdrant_haystack-3.8.1 → qdrant_haystack-4.1.0}/src/haystack_integrations/document_stores/qdrant/migrate_to_sparse.py +0 -0
- {qdrant_haystack-3.8.1 → qdrant_haystack-4.1.0}/tests/__init__.py +0 -0
- {qdrant_haystack-3.8.1 → qdrant_haystack-4.1.0}/tests/conftest.py +0 -0
- {qdrant_haystack-3.8.1 → qdrant_haystack-4.1.0}/tests/test_converters.py +0 -0
- {qdrant_haystack-3.8.1 → qdrant_haystack-4.1.0}/tests/test_filters.py +0 -0
- {qdrant_haystack-3.8.1 → qdrant_haystack-4.1.0}/tests/test_legacy_filters.py +0 -0
|
@@ -1,5 +1,23 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [integrations/qdrant-v4.0.0] - 2024-07-02
|
|
4
|
+
|
|
5
|
+
### 🚜 Refactor
|
|
6
|
+
|
|
7
|
+
- [**breaking**] Qdrant - remove unused init parameters: `content_field`, `name_field`, `embedding_field`, and `duplicate_documents` (#861)
|
|
8
|
+
- [**breaking**] Qdrant - set `scale_score` default value to `False` (#862)
|
|
9
|
+
|
|
10
|
+
### ⚙️ Miscellaneous Tasks
|
|
11
|
+
|
|
12
|
+
- Retry tests to reduce flakyness (#836)
|
|
13
|
+
- Update ruff invocation to include check parameter (#853)
|
|
14
|
+
|
|
15
|
+
## [integrations/qdrant-v3.8.1] - 2024-06-20
|
|
16
|
+
|
|
17
|
+
### 📚 Documentation
|
|
18
|
+
|
|
19
|
+
- Added docstrings for QdrantDocumentStore (#808)
|
|
20
|
+
|
|
3
21
|
## [integrations/qdrant-v3.8.0] - 2024-06-06
|
|
4
22
|
|
|
5
23
|
### 🚀 Features
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: qdrant-haystack
|
|
3
|
-
Version:
|
|
3
|
+
Version: 4.1.0
|
|
4
4
|
Summary: An integration of Qdrant ANN vector database backend with Haystack
|
|
5
5
|
Project-URL: Source, https://github.com/deepset-ai/haystack-core-integrations
|
|
6
6
|
Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/qdrant/README.md
|
|
@@ -19,7 +19,7 @@ Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
|
19
19
|
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
|
20
20
|
Requires-Python: >=3.8
|
|
21
21
|
Requires-Dist: haystack-ai>=2.0.1
|
|
22
|
-
Requires-Dist: qdrant-client
|
|
22
|
+
Requires-Dist: qdrant-client>=1.10.0
|
|
23
23
|
Description-Content-Type: text/markdown
|
|
24
24
|
|
|
25
25
|
# qdrant-haystack
|
|
@@ -25,7 +25,7 @@ classifiers = [
|
|
|
25
25
|
"Programming Language :: Python :: Implementation :: CPython",
|
|
26
26
|
"Programming Language :: Python :: Implementation :: PyPy",
|
|
27
27
|
]
|
|
28
|
-
dependencies = ["haystack-ai>=2.0.1", "qdrant-client"]
|
|
28
|
+
dependencies = ["haystack-ai>=2.0.1", "qdrant-client>=1.10.0"]
|
|
29
29
|
|
|
30
30
|
[project.urls]
|
|
31
31
|
Source = "https://github.com/deepset-ai/haystack-core-integrations"
|
|
@@ -44,10 +44,10 @@ root = "../.."
|
|
|
44
44
|
git_describe_command = 'git describe --tags --match="integrations/qdrant-v[0-9]*"'
|
|
45
45
|
|
|
46
46
|
[tool.hatch.envs.default]
|
|
47
|
-
dependencies = ["coverage[toml]>=6.5", "pytest", "haystack-pydoc-tools"]
|
|
47
|
+
dependencies = ["coverage[toml]>=6.5", "pytest", "pytest-rerunfailures", "haystack-pydoc-tools"]
|
|
48
48
|
[tool.hatch.envs.default.scripts]
|
|
49
|
-
test = "pytest {args:tests}"
|
|
50
|
-
test-cov = "coverage run -m pytest {args:tests}"
|
|
49
|
+
test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
|
|
50
|
+
test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
|
|
51
51
|
cov-report = ["- coverage combine", "coverage report"]
|
|
52
52
|
cov = ["test-cov", "cov-report"]
|
|
53
53
|
docs = ["pydoc-markdown pydoc/config.yml"]
|
|
@@ -60,7 +60,7 @@ detached = true
|
|
|
60
60
|
dependencies = ["black>=23.1.0", "mypy>=1.0.0", "ruff>=0.0.243"]
|
|
61
61
|
[tool.hatch.envs.lint.scripts]
|
|
62
62
|
typing = "mypy --install-types --non-interactive --explicit-package-bases {args:src/ tests}"
|
|
63
|
-
style = ["ruff {args:.}", "black --check --diff {args:.}"]
|
|
63
|
+
style = ["ruff check {args:.}", "black --check --diff {args:.}"]
|
|
64
64
|
fmt = ["black {args:.}", "ruff --fix {args:.}", "style"]
|
|
65
65
|
all = ["style", "typing"]
|
|
66
66
|
|
|
@@ -105,7 +105,8 @@ ignore = [
|
|
|
105
105
|
# Allow boolean positional values in function calls, like `dict.get(... True)`
|
|
106
106
|
"FBT003",
|
|
107
107
|
# Allow boolean arguments in function definition
|
|
108
|
-
"FBT001",
|
|
108
|
+
"FBT001",
|
|
109
|
+
"FBT002",
|
|
109
110
|
# Ignore checks for possible passwords
|
|
110
111
|
"S105",
|
|
111
112
|
"S106",
|
|
@@ -140,12 +141,8 @@ parallel = false
|
|
|
140
141
|
|
|
141
142
|
[tool.coverage.report]
|
|
142
143
|
omit = ["*/tests/*", "*/__init__.py"]
|
|
143
|
-
show_missing=true
|
|
144
|
-
exclude_lines = [
|
|
145
|
-
"no cov",
|
|
146
|
-
"if __name__ == .__main__.:",
|
|
147
|
-
"if TYPE_CHECKING:",
|
|
148
|
-
]
|
|
144
|
+
show_missing = true
|
|
145
|
+
exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"]
|
|
149
146
|
|
|
150
147
|
|
|
151
148
|
[[tool.mypy.overrides]]
|
|
@@ -37,8 +37,9 @@ class QdrantEmbeddingRetriever:
|
|
|
37
37
|
document_store: QdrantDocumentStore,
|
|
38
38
|
filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
|
|
39
39
|
top_k: int = 10,
|
|
40
|
-
scale_score: bool =
|
|
40
|
+
scale_score: bool = False,
|
|
41
41
|
return_embedding: bool = False,
|
|
42
|
+
score_threshold: Optional[float] = None,
|
|
42
43
|
):
|
|
43
44
|
"""
|
|
44
45
|
Create a QdrantEmbeddingRetriever component.
|
|
@@ -48,6 +49,10 @@ class QdrantEmbeddingRetriever:
|
|
|
48
49
|
:param top_k: The maximum number of documents to retrieve.
|
|
49
50
|
:param scale_score: Whether to scale the scores of the retrieved documents or not.
|
|
50
51
|
:param return_embedding: Whether to return the embedding of the retrieved Documents.
|
|
52
|
+
:param score_threshold: A minimal score threshold for the result.
|
|
53
|
+
Score of the returned result might be higher or smaller than the threshold
|
|
54
|
+
depending on the `similarity` function specified in the Document Store.
|
|
55
|
+
E.g. for cosine similarity only higher scores will be returned.
|
|
51
56
|
|
|
52
57
|
:raises ValueError: If `document_store` is not an instance of `QdrantDocumentStore`.
|
|
53
58
|
"""
|
|
@@ -61,6 +66,7 @@ class QdrantEmbeddingRetriever:
|
|
|
61
66
|
self._top_k = top_k
|
|
62
67
|
self._scale_score = scale_score
|
|
63
68
|
self._return_embedding = return_embedding
|
|
69
|
+
self._score_threshold = score_threshold
|
|
64
70
|
|
|
65
71
|
def to_dict(self) -> Dict[str, Any]:
|
|
66
72
|
"""
|
|
@@ -76,6 +82,7 @@ class QdrantEmbeddingRetriever:
|
|
|
76
82
|
top_k=self._top_k,
|
|
77
83
|
scale_score=self._scale_score,
|
|
78
84
|
return_embedding=self._return_embedding,
|
|
85
|
+
score_threshold=self._score_threshold,
|
|
79
86
|
)
|
|
80
87
|
d["init_parameters"]["document_store"] = self._document_store.to_dict()
|
|
81
88
|
|
|
@@ -103,6 +110,7 @@ class QdrantEmbeddingRetriever:
|
|
|
103
110
|
top_k: Optional[int] = None,
|
|
104
111
|
scale_score: Optional[bool] = None,
|
|
105
112
|
return_embedding: Optional[bool] = None,
|
|
113
|
+
score_threshold: Optional[float] = None,
|
|
106
114
|
):
|
|
107
115
|
"""
|
|
108
116
|
Run the Embedding Retriever on the given input data.
|
|
@@ -112,6 +120,7 @@ class QdrantEmbeddingRetriever:
|
|
|
112
120
|
:param top_k: The maximum number of documents to return.
|
|
113
121
|
:param scale_score: Whether to scale the scores of the retrieved documents or not.
|
|
114
122
|
:param return_embedding: Whether to return the embedding of the retrieved Documents.
|
|
123
|
+
:param score_threshold: A minimal score threshold for the result.
|
|
115
124
|
:returns:
|
|
116
125
|
The retrieved documents.
|
|
117
126
|
|
|
@@ -122,6 +131,7 @@ class QdrantEmbeddingRetriever:
|
|
|
122
131
|
top_k=top_k or self._top_k,
|
|
123
132
|
scale_score=scale_score or self._scale_score,
|
|
124
133
|
return_embedding=return_embedding or self._return_embedding,
|
|
134
|
+
score_threshold=score_threshold or self._score_threshold,
|
|
125
135
|
)
|
|
126
136
|
|
|
127
137
|
return {"documents": docs}
|
|
@@ -159,8 +169,9 @@ class QdrantSparseEmbeddingRetriever:
|
|
|
159
169
|
document_store: QdrantDocumentStore,
|
|
160
170
|
filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
|
|
161
171
|
top_k: int = 10,
|
|
162
|
-
scale_score: bool =
|
|
172
|
+
scale_score: bool = False,
|
|
163
173
|
return_embedding: bool = False,
|
|
174
|
+
score_threshold: Optional[float] = None,
|
|
164
175
|
):
|
|
165
176
|
"""
|
|
166
177
|
Create a QdrantSparseEmbeddingRetriever component.
|
|
@@ -170,6 +181,10 @@ class QdrantSparseEmbeddingRetriever:
|
|
|
170
181
|
:param top_k: The maximum number of documents to retrieve.
|
|
171
182
|
:param scale_score: Whether to scale the scores of the retrieved documents or not.
|
|
172
183
|
:param return_embedding: Whether to return the sparse embedding of the retrieved Documents.
|
|
184
|
+
:param score_threshold: A minimal score threshold for the result.
|
|
185
|
+
Score of the returned result might be higher or smaller than the threshold
|
|
186
|
+
depending on the Distance function used.
|
|
187
|
+
E.g. for cosine similarity only higher scores will be returned.
|
|
173
188
|
|
|
174
189
|
:raises ValueError: If `document_store` is not an instance of `QdrantDocumentStore`.
|
|
175
190
|
"""
|
|
@@ -183,6 +198,7 @@ class QdrantSparseEmbeddingRetriever:
|
|
|
183
198
|
self._top_k = top_k
|
|
184
199
|
self._scale_score = scale_score
|
|
185
200
|
self._return_embedding = return_embedding
|
|
201
|
+
self._score_threshold = score_threshold
|
|
186
202
|
|
|
187
203
|
def to_dict(self) -> Dict[str, Any]:
|
|
188
204
|
"""
|
|
@@ -198,6 +214,7 @@ class QdrantSparseEmbeddingRetriever:
|
|
|
198
214
|
top_k=self._top_k,
|
|
199
215
|
scale_score=self._scale_score,
|
|
200
216
|
return_embedding=self._return_embedding,
|
|
217
|
+
score_threshold=self._score_threshold,
|
|
201
218
|
)
|
|
202
219
|
d["init_parameters"]["document_store"] = self._document_store.to_dict()
|
|
203
220
|
|
|
@@ -225,6 +242,7 @@ class QdrantSparseEmbeddingRetriever:
|
|
|
225
242
|
top_k: Optional[int] = None,
|
|
226
243
|
scale_score: Optional[bool] = None,
|
|
227
244
|
return_embedding: Optional[bool] = None,
|
|
245
|
+
score_threshold: Optional[float] = None,
|
|
228
246
|
):
|
|
229
247
|
"""
|
|
230
248
|
Run the Sparse Embedding Retriever on the given input data.
|
|
@@ -234,6 +252,10 @@ class QdrantSparseEmbeddingRetriever:
|
|
|
234
252
|
:param top_k: The maximum number of documents to return.
|
|
235
253
|
:param scale_score: Whether to scale the scores of the retrieved documents or not.
|
|
236
254
|
:param return_embedding: Whether to return the embedding of the retrieved Documents.
|
|
255
|
+
:param score_threshold: A minimal score threshold for the result.
|
|
256
|
+
Score of the returned result might be higher or smaller than the threshold
|
|
257
|
+
depending on the Distance function used.
|
|
258
|
+
E.g. for cosine similarity only higher scores will be returned.
|
|
237
259
|
:returns:
|
|
238
260
|
The retrieved documents.
|
|
239
261
|
|
|
@@ -244,6 +266,7 @@ class QdrantSparseEmbeddingRetriever:
|
|
|
244
266
|
top_k=top_k or self._top_k,
|
|
245
267
|
scale_score=scale_score or self._scale_score,
|
|
246
268
|
return_embedding=return_embedding or self._return_embedding,
|
|
269
|
+
score_threshold=score_threshold or self._score_threshold,
|
|
247
270
|
)
|
|
248
271
|
|
|
249
272
|
return {"documents": docs}
|
|
@@ -288,6 +311,7 @@ class QdrantHybridRetriever:
|
|
|
288
311
|
filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
|
|
289
312
|
top_k: int = 10,
|
|
290
313
|
return_embedding: bool = False,
|
|
314
|
+
score_threshold: Optional[float] = None,
|
|
291
315
|
):
|
|
292
316
|
"""
|
|
293
317
|
Create a QdrantHybridRetriever component.
|
|
@@ -296,6 +320,10 @@ class QdrantHybridRetriever:
|
|
|
296
320
|
:param filters: A dictionary with filters to narrow down the search space.
|
|
297
321
|
:param top_k: The maximum number of documents to retrieve.
|
|
298
322
|
:param return_embedding: Whether to return the embeddings of the retrieved Documents.
|
|
323
|
+
:param score_threshold: A minimal score threshold for the result.
|
|
324
|
+
Score of the returned result might be higher or smaller than the threshold
|
|
325
|
+
depending on the Distance function used.
|
|
326
|
+
E.g. for cosine similarity only higher scores will be returned.
|
|
299
327
|
|
|
300
328
|
:raises ValueError: If 'document_store' is not an instance of QdrantDocumentStore.
|
|
301
329
|
"""
|
|
@@ -308,6 +336,7 @@ class QdrantHybridRetriever:
|
|
|
308
336
|
self._filters = filters
|
|
309
337
|
self._top_k = top_k
|
|
310
338
|
self._return_embedding = return_embedding
|
|
339
|
+
self._score_threshold = score_threshold
|
|
311
340
|
|
|
312
341
|
def to_dict(self) -> Dict[str, Any]:
|
|
313
342
|
"""
|
|
@@ -322,6 +351,7 @@ class QdrantHybridRetriever:
|
|
|
322
351
|
filters=self._filters,
|
|
323
352
|
top_k=self._top_k,
|
|
324
353
|
return_embedding=self._return_embedding,
|
|
354
|
+
score_threshold=self._score_threshold,
|
|
325
355
|
)
|
|
326
356
|
|
|
327
357
|
@classmethod
|
|
@@ -346,6 +376,7 @@ class QdrantHybridRetriever:
|
|
|
346
376
|
filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
|
|
347
377
|
top_k: Optional[int] = None,
|
|
348
378
|
return_embedding: Optional[bool] = None,
|
|
379
|
+
score_threshold: Optional[float] = None,
|
|
349
380
|
):
|
|
350
381
|
"""
|
|
351
382
|
Run the Sparse Embedding Retriever on the given input data.
|
|
@@ -355,6 +386,10 @@ class QdrantHybridRetriever:
|
|
|
355
386
|
:param filters: A dictionary with filters to narrow down the search space.
|
|
356
387
|
:param top_k: The maximum number of documents to return.
|
|
357
388
|
:param return_embedding: Whether to return the embedding of the retrieved Documents.
|
|
389
|
+
:param score_threshold: A minimal score threshold for the result.
|
|
390
|
+
Score of the returned result might be higher or smaller than the threshold
|
|
391
|
+
depending on the Distance function used.
|
|
392
|
+
E.g. for cosine similarity only higher scores will be returned.
|
|
358
393
|
:returns:
|
|
359
394
|
The retrieved documents.
|
|
360
395
|
|
|
@@ -365,6 +400,7 @@ class QdrantHybridRetriever:
|
|
|
365
400
|
filters=filters or self._filters,
|
|
366
401
|
top_k=top_k or self._top_k,
|
|
367
402
|
return_embedding=return_embedding or self._return_embedding,
|
|
403
|
+
score_threshold=score_threshold or self._score_threshold,
|
|
368
404
|
)
|
|
369
405
|
|
|
370
406
|
return {"documents": docs}
|
|
@@ -17,7 +17,6 @@ UUID_NAMESPACE = uuid.UUID("3896d314-1e95-4a3a-b45a-945f9f0b541d")
|
|
|
17
17
|
def convert_haystack_documents_to_qdrant_points(
|
|
18
18
|
documents: List[Document],
|
|
19
19
|
*,
|
|
20
|
-
embedding_field: str,
|
|
21
20
|
use_sparse_embeddings: bool,
|
|
22
21
|
) -> List[rest.PointStruct]:
|
|
23
22
|
points = []
|
|
@@ -26,7 +25,7 @@ def convert_haystack_documents_to_qdrant_points(
|
|
|
26
25
|
if use_sparse_embeddings:
|
|
27
26
|
vector = {}
|
|
28
27
|
|
|
29
|
-
dense_vector = payload.pop(
|
|
28
|
+
dense_vector = payload.pop("embedding", None)
|
|
30
29
|
if dense_vector is not None:
|
|
31
30
|
vector[DENSE_VECTORS_NAME] = dense_vector
|
|
32
31
|
|
|
@@ -36,7 +35,7 @@ def convert_haystack_documents_to_qdrant_points(
|
|
|
36
35
|
vector[SPARSE_VECTORS_NAME] = sparse_vector_instance
|
|
37
36
|
|
|
38
37
|
else:
|
|
39
|
-
vector = payload.pop(
|
|
38
|
+
vector = payload.pop("embedding") or {}
|
|
40
39
|
_id = convert_id(payload.get("id"))
|
|
41
40
|
|
|
42
41
|
point = rest.PointStruct(
|
|
@@ -110,14 +110,11 @@ class QdrantDocumentStore:
|
|
|
110
110
|
index: str = "Document",
|
|
111
111
|
embedding_dim: int = 768,
|
|
112
112
|
on_disk: bool = False,
|
|
113
|
-
content_field: str = "content",
|
|
114
|
-
name_field: str = "name",
|
|
115
|
-
embedding_field: str = "embedding",
|
|
116
113
|
use_sparse_embeddings: bool = False,
|
|
114
|
+
sparse_idf: bool = False,
|
|
117
115
|
similarity: str = "cosine",
|
|
118
116
|
return_embedding: bool = False,
|
|
119
117
|
progress_bar: bool = True,
|
|
120
|
-
duplicate_documents: str = "overwrite",
|
|
121
118
|
recreate_index: bool = False,
|
|
122
119
|
shard_number: Optional[int] = None,
|
|
123
120
|
replication_factor: Optional[int] = None,
|
|
@@ -170,22 +167,17 @@ class QdrantDocumentStore:
|
|
|
170
167
|
Dimension of the embeddings.
|
|
171
168
|
:param on_disk:
|
|
172
169
|
Whether to store the collection on disk.
|
|
173
|
-
:param content_field:
|
|
174
|
-
The field for the document content.
|
|
175
|
-
:param name_field:
|
|
176
|
-
The field for the document name.
|
|
177
|
-
:param embedding_field:
|
|
178
|
-
The field for the document embeddings.
|
|
179
170
|
:param use_sparse_embedding:
|
|
180
171
|
If set to `True`, enables support for sparse embeddings.
|
|
172
|
+
:param sparse_idf:
|
|
173
|
+
If set to `True`, computes the Inverse Document Frequency (IDF) when using sparse embeddings.
|
|
174
|
+
It is required to use techniques like BM42. It is ignored if `use_sparse_embeddings` is `False`.
|
|
181
175
|
:param similarity:
|
|
182
176
|
The similarity metric to use.
|
|
183
177
|
:param return_embedding:
|
|
184
178
|
Whether to return embeddings in the search results.
|
|
185
179
|
:param progress_bar:
|
|
186
180
|
Whether to show a progress bar or not.
|
|
187
|
-
:param duplicate_documents:
|
|
188
|
-
The parameter is not used and will be removed in future release.
|
|
189
181
|
:param recreate_index:
|
|
190
182
|
Whether to recreate the index.
|
|
191
183
|
:param shard_number:
|
|
@@ -258,16 +250,13 @@ class QdrantDocumentStore:
|
|
|
258
250
|
self.recreate_index = recreate_index
|
|
259
251
|
self.payload_fields_to_index = payload_fields_to_index
|
|
260
252
|
self.use_sparse_embeddings = use_sparse_embeddings
|
|
253
|
+
self.sparse_idf = use_sparse_embeddings and sparse_idf
|
|
261
254
|
self.embedding_dim = embedding_dim
|
|
262
255
|
self.on_disk = on_disk
|
|
263
|
-
self.content_field = content_field
|
|
264
|
-
self.name_field = name_field
|
|
265
|
-
self.embedding_field = embedding_field
|
|
266
256
|
self.similarity = similarity
|
|
267
257
|
self.index = index
|
|
268
258
|
self.return_embedding = return_embedding
|
|
269
259
|
self.progress_bar = progress_bar
|
|
270
|
-
self.duplicate_documents = duplicate_documents
|
|
271
260
|
self.write_batch_size = write_batch_size
|
|
272
261
|
self.scroll_size = scroll_size
|
|
273
262
|
|
|
@@ -296,6 +285,7 @@ class QdrantDocumentStore:
|
|
|
296
285
|
self.recreate_index,
|
|
297
286
|
self.similarity,
|
|
298
287
|
self.use_sparse_embeddings,
|
|
288
|
+
self.sparse_idf,
|
|
299
289
|
self.on_disk,
|
|
300
290
|
self.payload_fields_to_index,
|
|
301
291
|
)
|
|
@@ -363,7 +353,9 @@ class QdrantDocumentStore:
|
|
|
363
353
|
if not isinstance(doc, Document):
|
|
364
354
|
msg = f"DocumentStore.write_documents() expects a list of Documents but got an element of {type(doc)}."
|
|
365
355
|
raise ValueError(msg)
|
|
366
|
-
self._set_up_collection(
|
|
356
|
+
self._set_up_collection(
|
|
357
|
+
self.index, self.embedding_dim, False, self.similarity, self.use_sparse_embeddings, self.sparse_idf
|
|
358
|
+
)
|
|
367
359
|
|
|
368
360
|
if len(documents) == 0:
|
|
369
361
|
logger.warning("Calling QdrantDocumentStore.write_documents() with empty list")
|
|
@@ -380,7 +372,6 @@ class QdrantDocumentStore:
|
|
|
380
372
|
for document_batch in batched_documents:
|
|
381
373
|
batch = convert_haystack_documents_to_qdrant_points(
|
|
382
374
|
document_batch,
|
|
383
|
-
embedding_field=self.embedding_field,
|
|
384
375
|
use_sparse_embeddings=self.use_sparse_embeddings,
|
|
385
376
|
)
|
|
386
377
|
|
|
@@ -513,8 +504,9 @@ class QdrantDocumentStore:
|
|
|
513
504
|
query_sparse_embedding: SparseEmbedding,
|
|
514
505
|
filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
|
|
515
506
|
top_k: int = 10,
|
|
516
|
-
scale_score: bool =
|
|
507
|
+
scale_score: bool = False,
|
|
517
508
|
return_embedding: bool = False,
|
|
509
|
+
score_threshold: Optional[float] = None,
|
|
518
510
|
) -> List[Document]:
|
|
519
511
|
"""
|
|
520
512
|
Queries Qdrant using a sparse embedding and returns the most relevant documents.
|
|
@@ -524,6 +516,10 @@ class QdrantDocumentStore:
|
|
|
524
516
|
:param top_k: Maximum number of documents to return.
|
|
525
517
|
:param scale_score: Whether to scale the scores of the retrieved documents.
|
|
526
518
|
:param return_embedding: Whether to return the embeddings of the retrieved documents.
|
|
519
|
+
:param score_threshold: A minimal score threshold for the result.
|
|
520
|
+
Score of the returned result might be higher or smaller than the threshold
|
|
521
|
+
depending on the Distance function used.
|
|
522
|
+
E.g. for cosine similarity only higher scores will be returned.
|
|
527
523
|
|
|
528
524
|
:returns: List of documents that are most similar to `query_sparse_embedding`.
|
|
529
525
|
|
|
@@ -553,6 +549,7 @@ class QdrantDocumentStore:
|
|
|
553
549
|
query_filter=qdrant_filters,
|
|
554
550
|
limit=top_k,
|
|
555
551
|
with_vectors=return_embedding,
|
|
552
|
+
score_threshold=score_threshold,
|
|
556
553
|
)
|
|
557
554
|
results = [
|
|
558
555
|
convert_qdrant_point_to_haystack_document(point, use_sparse_embeddings=self.use_sparse_embeddings)
|
|
@@ -570,8 +567,9 @@ class QdrantDocumentStore:
|
|
|
570
567
|
query_embedding: List[float],
|
|
571
568
|
filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
|
|
572
569
|
top_k: int = 10,
|
|
573
|
-
scale_score: bool =
|
|
570
|
+
scale_score: bool = False,
|
|
574
571
|
return_embedding: bool = False,
|
|
572
|
+
score_threshold: Optional[float] = None,
|
|
575
573
|
) -> List[Document]:
|
|
576
574
|
"""
|
|
577
575
|
Queries Qdrant using a dense embedding and returns the most relevant documents.
|
|
@@ -581,6 +579,10 @@ class QdrantDocumentStore:
|
|
|
581
579
|
:param top_k: Maximum number of documents to return.
|
|
582
580
|
:param scale_score: Whether to scale the scores of the retrieved documents.
|
|
583
581
|
:param return_embedding: Whether to return the embeddings of the retrieved documents.
|
|
582
|
+
:param score_threshold: A minimal score threshold for the result.
|
|
583
|
+
Score of the returned result might be higher or smaller than the threshold
|
|
584
|
+
depending on the Distance function used.
|
|
585
|
+
E.g. for cosine similarity only higher scores will be returned.
|
|
584
586
|
|
|
585
587
|
:returns: List of documents that are most similar to `query_embedding`.
|
|
586
588
|
"""
|
|
@@ -595,6 +597,7 @@ class QdrantDocumentStore:
|
|
|
595
597
|
query_filter=qdrant_filters,
|
|
596
598
|
limit=top_k,
|
|
597
599
|
with_vectors=return_embedding,
|
|
600
|
+
score_threshold=score_threshold,
|
|
598
601
|
)
|
|
599
602
|
results = [
|
|
600
603
|
convert_qdrant_point_to_haystack_document(point, use_sparse_embeddings=self.use_sparse_embeddings)
|
|
@@ -617,6 +620,7 @@ class QdrantDocumentStore:
|
|
|
617
620
|
filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
|
|
618
621
|
top_k: int = 10,
|
|
619
622
|
return_embedding: bool = False,
|
|
623
|
+
score_threshold: Optional[float] = None,
|
|
620
624
|
) -> List[Document]:
|
|
621
625
|
"""
|
|
622
626
|
Retrieves documents based on dense and sparse embeddings and fuses the results using Reciprocal Rank Fusion.
|
|
@@ -629,6 +633,10 @@ class QdrantDocumentStore:
|
|
|
629
633
|
:param filters: Filters applied to the retrieved documents.
|
|
630
634
|
:param top_k: Maximum number of documents to return.
|
|
631
635
|
:param return_embedding: Whether to return the embeddings of the retrieved documents.
|
|
636
|
+
:param score_threshold: A minimal score threshold for the result.
|
|
637
|
+
Score of the returned result might be higher or smaller than the threshold
|
|
638
|
+
depending on the Distance function used.
|
|
639
|
+
E.g. for cosine similarity only higher scores will be returned.
|
|
632
640
|
|
|
633
641
|
:returns: List of Document that are most similar to `query_embedding` and `query_sparse_embedding`.
|
|
634
642
|
|
|
@@ -659,6 +667,7 @@ class QdrantDocumentStore:
|
|
|
659
667
|
limit=top_k,
|
|
660
668
|
with_payload=True,
|
|
661
669
|
with_vector=return_embedding,
|
|
670
|
+
score_threshold=score_threshold,
|
|
662
671
|
)
|
|
663
672
|
|
|
664
673
|
dense_request = rest.SearchRequest(
|
|
@@ -731,6 +740,7 @@ class QdrantDocumentStore:
|
|
|
731
740
|
recreate_collection: bool,
|
|
732
741
|
similarity: str,
|
|
733
742
|
use_sparse_embeddings: bool,
|
|
743
|
+
sparse_idf: bool,
|
|
734
744
|
on_disk: bool = False,
|
|
735
745
|
payload_fields_to_index: Optional[List[dict]] = None,
|
|
736
746
|
):
|
|
@@ -746,6 +756,8 @@ class QdrantDocumentStore:
|
|
|
746
756
|
The similarity measure to use.
|
|
747
757
|
:param use_sparse_embeddings:
|
|
748
758
|
Whether to use sparse embeddings.
|
|
759
|
+
:param sparse_idf:
|
|
760
|
+
Whether to compute the Inverse Document Frequency (IDF) when using sparse embeddings. Required for BM42.
|
|
749
761
|
:param on_disk:
|
|
750
762
|
Whether to store the collection on disk.
|
|
751
763
|
:param payload_fields_to_index:
|
|
@@ -762,7 +774,9 @@ class QdrantDocumentStore:
|
|
|
762
774
|
if recreate_collection or not self.client.collection_exists(collection_name):
|
|
763
775
|
# There is no need to verify the current configuration of that
|
|
764
776
|
# collection. It might be just recreated again or does not exist yet.
|
|
765
|
-
self.recreate_collection(
|
|
777
|
+
self.recreate_collection(
|
|
778
|
+
collection_name, distance, embedding_dim, on_disk, use_sparse_embeddings, sparse_idf
|
|
779
|
+
)
|
|
766
780
|
# Create Payload index if payload_fields_to_index is provided
|
|
767
781
|
self._create_payload_index(collection_name, payload_fields_to_index)
|
|
768
782
|
return
|
|
@@ -825,6 +839,7 @@ class QdrantDocumentStore:
|
|
|
825
839
|
embedding_dim: int,
|
|
826
840
|
on_disk: Optional[bool] = None,
|
|
827
841
|
use_sparse_embeddings: Optional[bool] = None,
|
|
842
|
+
sparse_idf: bool = False,
|
|
828
843
|
):
|
|
829
844
|
"""
|
|
830
845
|
Recreates the Qdrant collection with the specified parameters.
|
|
@@ -839,6 +854,8 @@ class QdrantDocumentStore:
|
|
|
839
854
|
Whether to store the collection on disk.
|
|
840
855
|
:param use_sparse_embeddings:
|
|
841
856
|
Whether to use sparse embeddings.
|
|
857
|
+
:param sparse_idf:
|
|
858
|
+
Whether to compute the Inverse Document Frequency (IDF) when using sparse embeddings. Required for BM42.
|
|
842
859
|
"""
|
|
843
860
|
if on_disk is None:
|
|
844
861
|
on_disk = self.on_disk
|
|
@@ -857,7 +874,8 @@ class QdrantDocumentStore:
|
|
|
857
874
|
SPARSE_VECTORS_NAME: rest.SparseVectorParams(
|
|
858
875
|
index=rest.SparseIndexParams(
|
|
859
876
|
on_disk=on_disk,
|
|
860
|
-
)
|
|
877
|
+
),
|
|
878
|
+
modifier=rest.Modifier.IDF if sparse_idf else None,
|
|
861
879
|
),
|
|
862
880
|
}
|
|
863
881
|
|
|
@@ -891,12 +909,7 @@ class QdrantDocumentStore:
|
|
|
891
909
|
|
|
892
910
|
:param documents: A list of Haystack Document objects.
|
|
893
911
|
:param index: name of the index
|
|
894
|
-
:param
|
|
895
|
-
Parameter options : ( 'skip','overwrite','fail')
|
|
896
|
-
skip (default option): Ignore the duplicates documents.
|
|
897
|
-
overwrite: Update any existing documents with the same ID when adding documents.
|
|
898
|
-
fail: An error is raised if the document ID of the document being added already
|
|
899
|
-
exists.
|
|
912
|
+
:param policy: The duplicate policy to use when writing documents.
|
|
900
913
|
:returns: A list of Haystack Document objects.
|
|
901
914
|
"""
|
|
902
915
|
|
|
@@ -22,15 +22,12 @@ def test_to_dict():
|
|
|
22
22
|
"index": "test",
|
|
23
23
|
"embedding_dim": 768,
|
|
24
24
|
"on_disk": False,
|
|
25
|
-
"content_field": "content",
|
|
26
|
-
"name_field": "name",
|
|
27
|
-
"embedding_field": "embedding",
|
|
28
25
|
"force_disable_check_same_thread": False,
|
|
29
26
|
"use_sparse_embeddings": False,
|
|
27
|
+
"sparse_idf": False,
|
|
30
28
|
"similarity": "cosine",
|
|
31
29
|
"return_embedding": False,
|
|
32
30
|
"progress_bar": True,
|
|
33
|
-
"duplicate_documents": "overwrite",
|
|
34
31
|
"recreate_index": False,
|
|
35
32
|
"shard_number": None,
|
|
36
33
|
"replication_factor": None,
|
|
@@ -62,15 +59,12 @@ def test_from_dict():
|
|
|
62
59
|
"index": "test",
|
|
63
60
|
"embedding_dim": 768,
|
|
64
61
|
"on_disk": False,
|
|
65
|
-
"content_field": "content",
|
|
66
|
-
"name_field": "name",
|
|
67
|
-
"embedding_field": "embedding",
|
|
68
62
|
"force_disable_check_same_thread": False,
|
|
69
63
|
"use_sparse_embeddings": True,
|
|
64
|
+
"sparse_idf": True,
|
|
70
65
|
"similarity": "cosine",
|
|
71
66
|
"return_embedding": False,
|
|
72
67
|
"progress_bar": True,
|
|
73
|
-
"duplicate_documents": "overwrite",
|
|
74
68
|
"recreate_index": True,
|
|
75
69
|
"shard_number": None,
|
|
76
70
|
"quantization_config": None,
|
|
@@ -87,16 +81,13 @@ def test_from_dict():
|
|
|
87
81
|
assert all(
|
|
88
82
|
[
|
|
89
83
|
document_store.index == "test",
|
|
90
|
-
document_store.content_field == "content",
|
|
91
|
-
document_store.name_field == "name",
|
|
92
|
-
document_store.embedding_field == "embedding",
|
|
93
84
|
document_store.force_disable_check_same_thread is False,
|
|
94
85
|
document_store.use_sparse_embeddings is True,
|
|
86
|
+
document_store.sparse_idf is True,
|
|
95
87
|
document_store.on_disk is False,
|
|
96
88
|
document_store.similarity == "cosine",
|
|
97
89
|
document_store.return_embedding is False,
|
|
98
90
|
document_store.progress_bar,
|
|
99
|
-
document_store.duplicate_documents == "overwrite",
|
|
100
91
|
document_store.recreate_index is True,
|
|
101
92
|
document_store.shard_number is None,
|
|
102
93
|
document_store.replication_factor is None,
|
|
@@ -12,7 +12,12 @@ from haystack.testing.document_store import (
|
|
|
12
12
|
WriteDocumentsTest,
|
|
13
13
|
_random_embeddings,
|
|
14
14
|
)
|
|
15
|
-
from haystack_integrations.document_stores.qdrant.document_store import
|
|
15
|
+
from haystack_integrations.document_stores.qdrant.document_store import (
|
|
16
|
+
SPARSE_VECTORS_NAME,
|
|
17
|
+
QdrantDocumentStore,
|
|
18
|
+
QdrantStoreError,
|
|
19
|
+
)
|
|
20
|
+
from qdrant_client.http import models as rest
|
|
16
21
|
|
|
17
22
|
|
|
18
23
|
class TestQdrantDocumentStore(CountDocumentsTest, WriteDocumentsTest, DeleteDocumentsTest):
|
|
@@ -49,6 +54,23 @@ class TestQdrantDocumentStore(CountDocumentsTest, WriteDocumentsTest, DeleteDocu
|
|
|
49
54
|
with pytest.raises(DuplicateDocumentError):
|
|
50
55
|
document_store.write_documents(docs, DuplicatePolicy.FAIL)
|
|
51
56
|
|
|
57
|
+
def test_sparse_configuration(self):
|
|
58
|
+
document_store = QdrantDocumentStore(
|
|
59
|
+
":memory:",
|
|
60
|
+
recreate_index=True,
|
|
61
|
+
use_sparse_embeddings=True,
|
|
62
|
+
sparse_idf=True,
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
client = document_store.client
|
|
66
|
+
sparse_config = client.get_collection("Document").config.params.sparse_vectors
|
|
67
|
+
|
|
68
|
+
assert SPARSE_VECTORS_NAME in sparse_config
|
|
69
|
+
|
|
70
|
+
# check that the `sparse_idf` parameter takes effect
|
|
71
|
+
assert hasattr(sparse_config[SPARSE_VECTORS_NAME], "modifier")
|
|
72
|
+
assert sparse_config[SPARSE_VECTORS_NAME].modifier == rest.Modifier.IDF
|
|
73
|
+
|
|
52
74
|
def test_query_hybrid(self, generate_sparse_embedding):
|
|
53
75
|
document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=True)
|
|
54
76
|
|
|
@@ -22,6 +22,7 @@ class TestQdrantRetriever(FilterableDocsFixtureMixin):
|
|
|
22
22
|
assert retriever._filters is None
|
|
23
23
|
assert retriever._top_k == 10
|
|
24
24
|
assert retriever._return_embedding is False
|
|
25
|
+
assert retriever._score_threshold is None
|
|
25
26
|
|
|
26
27
|
def test_to_dict(self):
|
|
27
28
|
document_store = QdrantDocumentStore(location=":memory:", index="test", use_sparse_embeddings=False)
|
|
@@ -47,15 +48,12 @@ class TestQdrantRetriever(FilterableDocsFixtureMixin):
|
|
|
47
48
|
"index": "test",
|
|
48
49
|
"embedding_dim": 768,
|
|
49
50
|
"on_disk": False,
|
|
50
|
-
"content_field": "content",
|
|
51
|
-
"name_field": "name",
|
|
52
51
|
"force_disable_check_same_thread": False,
|
|
53
|
-
"embedding_field": "embedding",
|
|
54
52
|
"use_sparse_embeddings": False,
|
|
53
|
+
"sparse_idf": False,
|
|
55
54
|
"similarity": "cosine",
|
|
56
55
|
"return_embedding": False,
|
|
57
56
|
"progress_bar": True,
|
|
58
|
-
"duplicate_documents": "overwrite",
|
|
59
57
|
"recreate_index": False,
|
|
60
58
|
"shard_number": None,
|
|
61
59
|
"replication_factor": None,
|
|
@@ -75,8 +73,9 @@ class TestQdrantRetriever(FilterableDocsFixtureMixin):
|
|
|
75
73
|
},
|
|
76
74
|
"filters": None,
|
|
77
75
|
"top_k": 10,
|
|
78
|
-
"scale_score":
|
|
76
|
+
"scale_score": False,
|
|
79
77
|
"return_embedding": False,
|
|
78
|
+
"score_threshold": None,
|
|
80
79
|
},
|
|
81
80
|
}
|
|
82
81
|
|
|
@@ -92,6 +91,7 @@ class TestQdrantRetriever(FilterableDocsFixtureMixin):
|
|
|
92
91
|
"top_k": 5,
|
|
93
92
|
"scale_score": False,
|
|
94
93
|
"return_embedding": True,
|
|
94
|
+
"score_threshold": None,
|
|
95
95
|
},
|
|
96
96
|
}
|
|
97
97
|
retriever = QdrantEmbeddingRetriever.from_dict(data)
|
|
@@ -101,6 +101,7 @@ class TestQdrantRetriever(FilterableDocsFixtureMixin):
|
|
|
101
101
|
assert retriever._top_k == 5
|
|
102
102
|
assert retriever._scale_score is False
|
|
103
103
|
assert retriever._return_embedding is True
|
|
104
|
+
assert retriever._score_threshold is None
|
|
104
105
|
|
|
105
106
|
def test_run(self, filterable_docs: List[Document]):
|
|
106
107
|
document_store = QdrantDocumentStore(location=":memory:", index="Boi", use_sparse_embeddings=False)
|
|
@@ -118,6 +119,28 @@ class TestQdrantRetriever(FilterableDocsFixtureMixin):
|
|
|
118
119
|
for document in results:
|
|
119
120
|
assert document.embedding is None
|
|
120
121
|
|
|
122
|
+
def test_run_with_score_threshold(self):
|
|
123
|
+
document_store = QdrantDocumentStore(
|
|
124
|
+
embedding_dim=4, location=":memory:", similarity="cosine", index="Boi", use_sparse_embeddings=False
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
document_store.write_documents(
|
|
128
|
+
[
|
|
129
|
+
Document(
|
|
130
|
+
content="Yet another document",
|
|
131
|
+
embedding=[-0.1, -0.9, -10.0, -0.2],
|
|
132
|
+
),
|
|
133
|
+
Document(content="The document", embedding=[1.0, 1.0, 1.0, 1.0]),
|
|
134
|
+
Document(content="Another document", embedding=[0.8, 0.8, 0.5, 1.0]),
|
|
135
|
+
]
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
retriever = QdrantEmbeddingRetriever(document_store=document_store)
|
|
139
|
+
results = retriever.run(
|
|
140
|
+
query_embedding=[0.9, 0.9, 0.9, 0.9], top_k=5, return_embedding=False, score_threshold=0.5
|
|
141
|
+
)["documents"]
|
|
142
|
+
assert len(results) == 2
|
|
143
|
+
|
|
121
144
|
def test_run_with_sparse_activated(self, filterable_docs: List[Document]):
|
|
122
145
|
document_store = QdrantDocumentStore(location=":memory:", index="Boi", use_sparse_embeddings=True)
|
|
123
146
|
|
|
@@ -145,6 +168,7 @@ class TestQdrantSparseEmbeddingRetriever(FilterableDocsFixtureMixin):
|
|
|
145
168
|
assert retriever._filters is None
|
|
146
169
|
assert retriever._top_k == 10
|
|
147
170
|
assert retriever._return_embedding is False
|
|
171
|
+
assert retriever._score_threshold is None
|
|
148
172
|
|
|
149
173
|
def test_to_dict(self):
|
|
150
174
|
document_store = QdrantDocumentStore(location=":memory:", index="test")
|
|
@@ -170,15 +194,12 @@ class TestQdrantSparseEmbeddingRetriever(FilterableDocsFixtureMixin):
|
|
|
170
194
|
"index": "test",
|
|
171
195
|
"embedding_dim": 768,
|
|
172
196
|
"on_disk": False,
|
|
173
|
-
"content_field": "content",
|
|
174
|
-
"name_field": "name",
|
|
175
|
-
"embedding_field": "embedding",
|
|
176
197
|
"force_disable_check_same_thread": False,
|
|
177
198
|
"use_sparse_embeddings": False,
|
|
199
|
+
"sparse_idf": False,
|
|
178
200
|
"similarity": "cosine",
|
|
179
201
|
"return_embedding": False,
|
|
180
202
|
"progress_bar": True,
|
|
181
|
-
"duplicate_documents": "overwrite",
|
|
182
203
|
"recreate_index": False,
|
|
183
204
|
"shard_number": None,
|
|
184
205
|
"replication_factor": None,
|
|
@@ -198,8 +219,9 @@ class TestQdrantSparseEmbeddingRetriever(FilterableDocsFixtureMixin):
|
|
|
198
219
|
},
|
|
199
220
|
"filters": None,
|
|
200
221
|
"top_k": 10,
|
|
201
|
-
"scale_score":
|
|
222
|
+
"scale_score": False,
|
|
202
223
|
"return_embedding": False,
|
|
224
|
+
"score_threshold": None,
|
|
203
225
|
},
|
|
204
226
|
}
|
|
205
227
|
|
|
@@ -215,6 +237,7 @@ class TestQdrantSparseEmbeddingRetriever(FilterableDocsFixtureMixin):
|
|
|
215
237
|
"top_k": 5,
|
|
216
238
|
"scale_score": False,
|
|
217
239
|
"return_embedding": True,
|
|
240
|
+
"score_threshold": None,
|
|
218
241
|
},
|
|
219
242
|
}
|
|
220
243
|
retriever = QdrantSparseEmbeddingRetriever.from_dict(data)
|
|
@@ -224,6 +247,7 @@ class TestQdrantSparseEmbeddingRetriever(FilterableDocsFixtureMixin):
|
|
|
224
247
|
assert retriever._top_k == 5
|
|
225
248
|
assert retriever._scale_score is False
|
|
226
249
|
assert retriever._return_embedding is True
|
|
250
|
+
assert retriever._score_threshold is None
|
|
227
251
|
|
|
228
252
|
def test_run(self, filterable_docs: List[Document], generate_sparse_embedding):
|
|
229
253
|
document_store = QdrantDocumentStore(location=":memory:", index="Boi", use_sparse_embeddings=True)
|
|
@@ -255,6 +279,7 @@ class TestQdrantHybridRetriever:
|
|
|
255
279
|
assert retriever._filters is None
|
|
256
280
|
assert retriever._top_k == 10
|
|
257
281
|
assert retriever._return_embedding is False
|
|
282
|
+
assert retriever._score_threshold is None
|
|
258
283
|
|
|
259
284
|
def test_to_dict(self):
|
|
260
285
|
document_store = QdrantDocumentStore(location=":memory:", index="test")
|
|
@@ -280,15 +305,12 @@ class TestQdrantHybridRetriever:
|
|
|
280
305
|
"index": "test",
|
|
281
306
|
"embedding_dim": 768,
|
|
282
307
|
"on_disk": False,
|
|
283
|
-
"content_field": "content",
|
|
284
|
-
"name_field": "name",
|
|
285
|
-
"embedding_field": "embedding",
|
|
286
308
|
"force_disable_check_same_thread": False,
|
|
287
309
|
"use_sparse_embeddings": False,
|
|
310
|
+
"sparse_idf": False,
|
|
288
311
|
"similarity": "cosine",
|
|
289
312
|
"return_embedding": False,
|
|
290
313
|
"progress_bar": True,
|
|
291
|
-
"duplicate_documents": "overwrite",
|
|
292
314
|
"recreate_index": False,
|
|
293
315
|
"shard_number": None,
|
|
294
316
|
"replication_factor": None,
|
|
@@ -309,6 +331,7 @@ class TestQdrantHybridRetriever:
|
|
|
309
331
|
"filters": None,
|
|
310
332
|
"top_k": 5,
|
|
311
333
|
"return_embedding": True,
|
|
334
|
+
"score_threshold": None,
|
|
312
335
|
},
|
|
313
336
|
}
|
|
314
337
|
|
|
@@ -323,6 +346,7 @@ class TestQdrantHybridRetriever:
|
|
|
323
346
|
"filters": None,
|
|
324
347
|
"top_k": 5,
|
|
325
348
|
"return_embedding": True,
|
|
349
|
+
"score_threshold": None,
|
|
326
350
|
},
|
|
327
351
|
}
|
|
328
352
|
retriever = QdrantHybridRetriever.from_dict(data)
|
|
@@ -331,6 +355,7 @@ class TestQdrantHybridRetriever:
|
|
|
331
355
|
assert retriever._filters is None
|
|
332
356
|
assert retriever._top_k == 5
|
|
333
357
|
assert retriever._return_embedding
|
|
358
|
+
assert retriever._score_threshold is None
|
|
334
359
|
|
|
335
360
|
def test_run(self):
|
|
336
361
|
mock_store = Mock(spec=QdrantDocumentStore)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|