qdrant-haystack 3.8.1__tar.gz → 4.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of qdrant-haystack might be problematic. Click here for more details.
- {qdrant_haystack-3.8.1 → qdrant_haystack-4.0.0}/.gitignore +9 -0
- {qdrant_haystack-3.8.1 → qdrant_haystack-4.0.0}/CHANGELOG.md +6 -0
- {qdrant_haystack-3.8.1 → qdrant_haystack-4.0.0}/PKG-INFO +1 -1
- {qdrant_haystack-3.8.1 → qdrant_haystack-4.0.0}/pyproject.toml +8 -11
- {qdrant_haystack-3.8.1 → qdrant_haystack-4.0.0}/src/haystack_integrations/components/retrievers/qdrant/retriever.py +2 -2
- {qdrant_haystack-3.8.1 → qdrant_haystack-4.0.0}/src/haystack_integrations/document_stores/qdrant/converters.py +2 -3
- {qdrant_haystack-3.8.1 → qdrant_haystack-4.0.0}/src/haystack_integrations/document_stores/qdrant/document_store.py +3 -25
- {qdrant_haystack-3.8.1 → qdrant_haystack-4.0.0}/tests/test_dict_converters.py +0 -12
- {qdrant_haystack-3.8.1 → qdrant_haystack-4.0.0}/tests/test_retriever.py +2 -14
- {qdrant_haystack-3.8.1 → qdrant_haystack-4.0.0}/LICENSE.txt +0 -0
- {qdrant_haystack-3.8.1 → qdrant_haystack-4.0.0}/README.md +0 -0
- {qdrant_haystack-3.8.1 → qdrant_haystack-4.0.0}/examples/embedding_retrieval.py +0 -0
- {qdrant_haystack-3.8.1 → qdrant_haystack-4.0.0}/pydoc/config.yml +0 -0
- {qdrant_haystack-3.8.1 → qdrant_haystack-4.0.0}/src/haystack_integrations/components/retrievers/qdrant/__init__.py +0 -0
- {qdrant_haystack-3.8.1 → qdrant_haystack-4.0.0}/src/haystack_integrations/document_stores/qdrant/__init__.py +0 -0
- {qdrant_haystack-3.8.1 → qdrant_haystack-4.0.0}/src/haystack_integrations/document_stores/qdrant/filters.py +0 -0
- {qdrant_haystack-3.8.1 → qdrant_haystack-4.0.0}/src/haystack_integrations/document_stores/qdrant/migrate_to_sparse.py +0 -0
- {qdrant_haystack-3.8.1 → qdrant_haystack-4.0.0}/tests/__init__.py +0 -0
- {qdrant_haystack-3.8.1 → qdrant_haystack-4.0.0}/tests/conftest.py +0 -0
- {qdrant_haystack-3.8.1 → qdrant_haystack-4.0.0}/tests/test_converters.py +0 -0
- {qdrant_haystack-3.8.1 → qdrant_haystack-4.0.0}/tests/test_document_store.py +0 -0
- {qdrant_haystack-3.8.1 → qdrant_haystack-4.0.0}/tests/test_filters.py +0 -0
- {qdrant_haystack-3.8.1 → qdrant_haystack-4.0.0}/tests/test_legacy_filters.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: qdrant-haystack
|
|
3
|
-
Version:
|
|
3
|
+
Version: 4.0.0
|
|
4
4
|
Summary: An integration of Qdrant ANN vector database backend with Haystack
|
|
5
5
|
Project-URL: Source, https://github.com/deepset-ai/haystack-core-integrations
|
|
6
6
|
Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/qdrant/README.md
|
|
@@ -44,10 +44,10 @@ root = "../.."
|
|
|
44
44
|
git_describe_command = 'git describe --tags --match="integrations/qdrant-v[0-9]*"'
|
|
45
45
|
|
|
46
46
|
[tool.hatch.envs.default]
|
|
47
|
-
dependencies = ["coverage[toml]>=6.5", "pytest", "haystack-pydoc-tools"]
|
|
47
|
+
dependencies = ["coverage[toml]>=6.5", "pytest", "pytest-rerunfailures", "haystack-pydoc-tools"]
|
|
48
48
|
[tool.hatch.envs.default.scripts]
|
|
49
|
-
test = "pytest {args:tests}"
|
|
50
|
-
test-cov = "coverage run -m pytest {args:tests}"
|
|
49
|
+
test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
|
|
50
|
+
test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
|
|
51
51
|
cov-report = ["- coverage combine", "coverage report"]
|
|
52
52
|
cov = ["test-cov", "cov-report"]
|
|
53
53
|
docs = ["pydoc-markdown pydoc/config.yml"]
|
|
@@ -60,7 +60,7 @@ detached = true
|
|
|
60
60
|
dependencies = ["black>=23.1.0", "mypy>=1.0.0", "ruff>=0.0.243"]
|
|
61
61
|
[tool.hatch.envs.lint.scripts]
|
|
62
62
|
typing = "mypy --install-types --non-interactive --explicit-package-bases {args:src/ tests}"
|
|
63
|
-
style = ["ruff {args:.}", "black --check --diff {args:.}"]
|
|
63
|
+
style = ["ruff check {args:.}", "black --check --diff {args:.}"]
|
|
64
64
|
fmt = ["black {args:.}", "ruff --fix {args:.}", "style"]
|
|
65
65
|
all = ["style", "typing"]
|
|
66
66
|
|
|
@@ -105,7 +105,8 @@ ignore = [
|
|
|
105
105
|
# Allow boolean positional values in function calls, like `dict.get(... True)`
|
|
106
106
|
"FBT003",
|
|
107
107
|
# Allow boolean arguments in function definition
|
|
108
|
-
"FBT001",
|
|
108
|
+
"FBT001",
|
|
109
|
+
"FBT002",
|
|
109
110
|
# Ignore checks for possible passwords
|
|
110
111
|
"S105",
|
|
111
112
|
"S106",
|
|
@@ -140,12 +141,8 @@ parallel = false
|
|
|
140
141
|
|
|
141
142
|
[tool.coverage.report]
|
|
142
143
|
omit = ["*/tests/*", "*/__init__.py"]
|
|
143
|
-
show_missing=true
|
|
144
|
-
exclude_lines = [
|
|
145
|
-
"no cov",
|
|
146
|
-
"if __name__ == .__main__.:",
|
|
147
|
-
"if TYPE_CHECKING:",
|
|
148
|
-
]
|
|
144
|
+
show_missing = true
|
|
145
|
+
exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"]
|
|
149
146
|
|
|
150
147
|
|
|
151
148
|
[[tool.mypy.overrides]]
|
|
@@ -37,7 +37,7 @@ class QdrantEmbeddingRetriever:
|
|
|
37
37
|
document_store: QdrantDocumentStore,
|
|
38
38
|
filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
|
|
39
39
|
top_k: int = 10,
|
|
40
|
-
scale_score: bool =
|
|
40
|
+
scale_score: bool = False,
|
|
41
41
|
return_embedding: bool = False,
|
|
42
42
|
):
|
|
43
43
|
"""
|
|
@@ -159,7 +159,7 @@ class QdrantSparseEmbeddingRetriever:
|
|
|
159
159
|
document_store: QdrantDocumentStore,
|
|
160
160
|
filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
|
|
161
161
|
top_k: int = 10,
|
|
162
|
-
scale_score: bool =
|
|
162
|
+
scale_score: bool = False,
|
|
163
163
|
return_embedding: bool = False,
|
|
164
164
|
):
|
|
165
165
|
"""
|
|
@@ -17,7 +17,6 @@ UUID_NAMESPACE = uuid.UUID("3896d314-1e95-4a3a-b45a-945f9f0b541d")
|
|
|
17
17
|
def convert_haystack_documents_to_qdrant_points(
|
|
18
18
|
documents: List[Document],
|
|
19
19
|
*,
|
|
20
|
-
embedding_field: str,
|
|
21
20
|
use_sparse_embeddings: bool,
|
|
22
21
|
) -> List[rest.PointStruct]:
|
|
23
22
|
points = []
|
|
@@ -26,7 +25,7 @@ def convert_haystack_documents_to_qdrant_points(
|
|
|
26
25
|
if use_sparse_embeddings:
|
|
27
26
|
vector = {}
|
|
28
27
|
|
|
29
|
-
dense_vector = payload.pop(
|
|
28
|
+
dense_vector = payload.pop("embedding", None)
|
|
30
29
|
if dense_vector is not None:
|
|
31
30
|
vector[DENSE_VECTORS_NAME] = dense_vector
|
|
32
31
|
|
|
@@ -36,7 +35,7 @@ def convert_haystack_documents_to_qdrant_points(
|
|
|
36
35
|
vector[SPARSE_VECTORS_NAME] = sparse_vector_instance
|
|
37
36
|
|
|
38
37
|
else:
|
|
39
|
-
vector = payload.pop(
|
|
38
|
+
vector = payload.pop("embedding") or {}
|
|
40
39
|
_id = convert_id(payload.get("id"))
|
|
41
40
|
|
|
42
41
|
point = rest.PointStruct(
|
|
@@ -110,14 +110,10 @@ class QdrantDocumentStore:
|
|
|
110
110
|
index: str = "Document",
|
|
111
111
|
embedding_dim: int = 768,
|
|
112
112
|
on_disk: bool = False,
|
|
113
|
-
content_field: str = "content",
|
|
114
|
-
name_field: str = "name",
|
|
115
|
-
embedding_field: str = "embedding",
|
|
116
113
|
use_sparse_embeddings: bool = False,
|
|
117
114
|
similarity: str = "cosine",
|
|
118
115
|
return_embedding: bool = False,
|
|
119
116
|
progress_bar: bool = True,
|
|
120
|
-
duplicate_documents: str = "overwrite",
|
|
121
117
|
recreate_index: bool = False,
|
|
122
118
|
shard_number: Optional[int] = None,
|
|
123
119
|
replication_factor: Optional[int] = None,
|
|
@@ -170,12 +166,6 @@ class QdrantDocumentStore:
|
|
|
170
166
|
Dimension of the embeddings.
|
|
171
167
|
:param on_disk:
|
|
172
168
|
Whether to store the collection on disk.
|
|
173
|
-
:param content_field:
|
|
174
|
-
The field for the document content.
|
|
175
|
-
:param name_field:
|
|
176
|
-
The field for the document name.
|
|
177
|
-
:param embedding_field:
|
|
178
|
-
The field for the document embeddings.
|
|
179
169
|
:param use_sparse_embedding:
|
|
180
170
|
If set to `True`, enables support for sparse embeddings.
|
|
181
171
|
:param similarity:
|
|
@@ -184,8 +174,6 @@ class QdrantDocumentStore:
|
|
|
184
174
|
Whether to return embeddings in the search results.
|
|
185
175
|
:param progress_bar:
|
|
186
176
|
Whether to show a progress bar or not.
|
|
187
|
-
:param duplicate_documents:
|
|
188
|
-
The parameter is not used and will be removed in future release.
|
|
189
177
|
:param recreate_index:
|
|
190
178
|
Whether to recreate the index.
|
|
191
179
|
:param shard_number:
|
|
@@ -260,14 +248,10 @@ class QdrantDocumentStore:
|
|
|
260
248
|
self.use_sparse_embeddings = use_sparse_embeddings
|
|
261
249
|
self.embedding_dim = embedding_dim
|
|
262
250
|
self.on_disk = on_disk
|
|
263
|
-
self.content_field = content_field
|
|
264
|
-
self.name_field = name_field
|
|
265
|
-
self.embedding_field = embedding_field
|
|
266
251
|
self.similarity = similarity
|
|
267
252
|
self.index = index
|
|
268
253
|
self.return_embedding = return_embedding
|
|
269
254
|
self.progress_bar = progress_bar
|
|
270
|
-
self.duplicate_documents = duplicate_documents
|
|
271
255
|
self.write_batch_size = write_batch_size
|
|
272
256
|
self.scroll_size = scroll_size
|
|
273
257
|
|
|
@@ -380,7 +364,6 @@ class QdrantDocumentStore:
|
|
|
380
364
|
for document_batch in batched_documents:
|
|
381
365
|
batch = convert_haystack_documents_to_qdrant_points(
|
|
382
366
|
document_batch,
|
|
383
|
-
embedding_field=self.embedding_field,
|
|
384
367
|
use_sparse_embeddings=self.use_sparse_embeddings,
|
|
385
368
|
)
|
|
386
369
|
|
|
@@ -513,7 +496,7 @@ class QdrantDocumentStore:
|
|
|
513
496
|
query_sparse_embedding: SparseEmbedding,
|
|
514
497
|
filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
|
|
515
498
|
top_k: int = 10,
|
|
516
|
-
scale_score: bool =
|
|
499
|
+
scale_score: bool = False,
|
|
517
500
|
return_embedding: bool = False,
|
|
518
501
|
) -> List[Document]:
|
|
519
502
|
"""
|
|
@@ -570,7 +553,7 @@ class QdrantDocumentStore:
|
|
|
570
553
|
query_embedding: List[float],
|
|
571
554
|
filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
|
|
572
555
|
top_k: int = 10,
|
|
573
|
-
scale_score: bool =
|
|
556
|
+
scale_score: bool = False,
|
|
574
557
|
return_embedding: bool = False,
|
|
575
558
|
) -> List[Document]:
|
|
576
559
|
"""
|
|
@@ -891,12 +874,7 @@ class QdrantDocumentStore:
|
|
|
891
874
|
|
|
892
875
|
:param documents: A list of Haystack Document objects.
|
|
893
876
|
:param index: name of the index
|
|
894
|
-
:param
|
|
895
|
-
Parameter options : ( 'skip','overwrite','fail')
|
|
896
|
-
skip (default option): Ignore the duplicates documents.
|
|
897
|
-
overwrite: Update any existing documents with the same ID when adding documents.
|
|
898
|
-
fail: An error is raised if the document ID of the document being added already
|
|
899
|
-
exists.
|
|
877
|
+
:param policy: The duplicate policy to use when writing documents.
|
|
900
878
|
:returns: A list of Haystack Document objects.
|
|
901
879
|
"""
|
|
902
880
|
|
|
@@ -22,15 +22,11 @@ def test_to_dict():
|
|
|
22
22
|
"index": "test",
|
|
23
23
|
"embedding_dim": 768,
|
|
24
24
|
"on_disk": False,
|
|
25
|
-
"content_field": "content",
|
|
26
|
-
"name_field": "name",
|
|
27
|
-
"embedding_field": "embedding",
|
|
28
25
|
"force_disable_check_same_thread": False,
|
|
29
26
|
"use_sparse_embeddings": False,
|
|
30
27
|
"similarity": "cosine",
|
|
31
28
|
"return_embedding": False,
|
|
32
29
|
"progress_bar": True,
|
|
33
|
-
"duplicate_documents": "overwrite",
|
|
34
30
|
"recreate_index": False,
|
|
35
31
|
"shard_number": None,
|
|
36
32
|
"replication_factor": None,
|
|
@@ -62,15 +58,11 @@ def test_from_dict():
|
|
|
62
58
|
"index": "test",
|
|
63
59
|
"embedding_dim": 768,
|
|
64
60
|
"on_disk": False,
|
|
65
|
-
"content_field": "content",
|
|
66
|
-
"name_field": "name",
|
|
67
|
-
"embedding_field": "embedding",
|
|
68
61
|
"force_disable_check_same_thread": False,
|
|
69
62
|
"use_sparse_embeddings": True,
|
|
70
63
|
"similarity": "cosine",
|
|
71
64
|
"return_embedding": False,
|
|
72
65
|
"progress_bar": True,
|
|
73
|
-
"duplicate_documents": "overwrite",
|
|
74
66
|
"recreate_index": True,
|
|
75
67
|
"shard_number": None,
|
|
76
68
|
"quantization_config": None,
|
|
@@ -87,16 +79,12 @@ def test_from_dict():
|
|
|
87
79
|
assert all(
|
|
88
80
|
[
|
|
89
81
|
document_store.index == "test",
|
|
90
|
-
document_store.content_field == "content",
|
|
91
|
-
document_store.name_field == "name",
|
|
92
|
-
document_store.embedding_field == "embedding",
|
|
93
82
|
document_store.force_disable_check_same_thread is False,
|
|
94
83
|
document_store.use_sparse_embeddings is True,
|
|
95
84
|
document_store.on_disk is False,
|
|
96
85
|
document_store.similarity == "cosine",
|
|
97
86
|
document_store.return_embedding is False,
|
|
98
87
|
document_store.progress_bar,
|
|
99
|
-
document_store.duplicate_documents == "overwrite",
|
|
100
88
|
document_store.recreate_index is True,
|
|
101
89
|
document_store.shard_number is None,
|
|
102
90
|
document_store.replication_factor is None,
|
|
@@ -47,15 +47,11 @@ class TestQdrantRetriever(FilterableDocsFixtureMixin):
|
|
|
47
47
|
"index": "test",
|
|
48
48
|
"embedding_dim": 768,
|
|
49
49
|
"on_disk": False,
|
|
50
|
-
"content_field": "content",
|
|
51
|
-
"name_field": "name",
|
|
52
50
|
"force_disable_check_same_thread": False,
|
|
53
|
-
"embedding_field": "embedding",
|
|
54
51
|
"use_sparse_embeddings": False,
|
|
55
52
|
"similarity": "cosine",
|
|
56
53
|
"return_embedding": False,
|
|
57
54
|
"progress_bar": True,
|
|
58
|
-
"duplicate_documents": "overwrite",
|
|
59
55
|
"recreate_index": False,
|
|
60
56
|
"shard_number": None,
|
|
61
57
|
"replication_factor": None,
|
|
@@ -75,7 +71,7 @@ class TestQdrantRetriever(FilterableDocsFixtureMixin):
|
|
|
75
71
|
},
|
|
76
72
|
"filters": None,
|
|
77
73
|
"top_k": 10,
|
|
78
|
-
"scale_score":
|
|
74
|
+
"scale_score": False,
|
|
79
75
|
"return_embedding": False,
|
|
80
76
|
},
|
|
81
77
|
}
|
|
@@ -170,15 +166,11 @@ class TestQdrantSparseEmbeddingRetriever(FilterableDocsFixtureMixin):
|
|
|
170
166
|
"index": "test",
|
|
171
167
|
"embedding_dim": 768,
|
|
172
168
|
"on_disk": False,
|
|
173
|
-
"content_field": "content",
|
|
174
|
-
"name_field": "name",
|
|
175
|
-
"embedding_field": "embedding",
|
|
176
169
|
"force_disable_check_same_thread": False,
|
|
177
170
|
"use_sparse_embeddings": False,
|
|
178
171
|
"similarity": "cosine",
|
|
179
172
|
"return_embedding": False,
|
|
180
173
|
"progress_bar": True,
|
|
181
|
-
"duplicate_documents": "overwrite",
|
|
182
174
|
"recreate_index": False,
|
|
183
175
|
"shard_number": None,
|
|
184
176
|
"replication_factor": None,
|
|
@@ -198,7 +190,7 @@ class TestQdrantSparseEmbeddingRetriever(FilterableDocsFixtureMixin):
|
|
|
198
190
|
},
|
|
199
191
|
"filters": None,
|
|
200
192
|
"top_k": 10,
|
|
201
|
-
"scale_score":
|
|
193
|
+
"scale_score": False,
|
|
202
194
|
"return_embedding": False,
|
|
203
195
|
},
|
|
204
196
|
}
|
|
@@ -280,15 +272,11 @@ class TestQdrantHybridRetriever:
|
|
|
280
272
|
"index": "test",
|
|
281
273
|
"embedding_dim": 768,
|
|
282
274
|
"on_disk": False,
|
|
283
|
-
"content_field": "content",
|
|
284
|
-
"name_field": "name",
|
|
285
|
-
"embedding_field": "embedding",
|
|
286
275
|
"force_disable_check_same_thread": False,
|
|
287
276
|
"use_sparse_embeddings": False,
|
|
288
277
|
"similarity": "cosine",
|
|
289
278
|
"return_embedding": False,
|
|
290
279
|
"progress_bar": True,
|
|
291
|
-
"duplicate_documents": "overwrite",
|
|
292
280
|
"recreate_index": False,
|
|
293
281
|
"shard_number": None,
|
|
294
282
|
"replication_factor": None,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|