qdrant-haystack 7.0.0__tar.gz → 8.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of qdrant-haystack might be problematic. Click here for more details.
- {qdrant_haystack-7.0.0 → qdrant_haystack-8.1.0}/CHANGELOG.md +81 -18
- {qdrant_haystack-7.0.0 → qdrant_haystack-8.1.0}/PKG-INFO +4 -4
- {qdrant_haystack-7.0.0 → qdrant_haystack-8.1.0}/pyproject.toml +3 -3
- {qdrant_haystack-7.0.0 → qdrant_haystack-8.1.0}/src/haystack_integrations/components/retrievers/qdrant/__init__.py +1 -1
- {qdrant_haystack-7.0.0 → qdrant_haystack-8.1.0}/src/haystack_integrations/document_stores/qdrant/converters.py +17 -0
- {qdrant_haystack-7.0.0 → qdrant_haystack-8.1.0}/src/haystack_integrations/document_stores/qdrant/document_store.py +15 -7
- {qdrant_haystack-7.0.0 → qdrant_haystack-8.1.0}/tests/test_converters.py +44 -0
- {qdrant_haystack-7.0.0 → qdrant_haystack-8.1.0}/tests/test_document_store.py +78 -1
- {qdrant_haystack-7.0.0 → qdrant_haystack-8.1.0}/tests/test_filters.py +0 -6
- {qdrant_haystack-7.0.0 → qdrant_haystack-8.1.0}/.gitignore +0 -0
- {qdrant_haystack-7.0.0 → qdrant_haystack-8.1.0}/LICENSE.txt +0 -0
- {qdrant_haystack-7.0.0 → qdrant_haystack-8.1.0}/README.md +0 -0
- {qdrant_haystack-7.0.0 → qdrant_haystack-8.1.0}/examples/embedding_retrieval.py +0 -0
- {qdrant_haystack-7.0.0 → qdrant_haystack-8.1.0}/pydoc/config.yml +0 -0
- {qdrant_haystack-7.0.0 → qdrant_haystack-8.1.0}/src/haystack_integrations/components/retrievers/qdrant/retriever.py +0 -0
- {qdrant_haystack-7.0.0 → qdrant_haystack-8.1.0}/src/haystack_integrations/document_stores/qdrant/__init__.py +0 -0
- {qdrant_haystack-7.0.0 → qdrant_haystack-8.1.0}/src/haystack_integrations/document_stores/qdrant/filters.py +0 -0
- {qdrant_haystack-7.0.0 → qdrant_haystack-8.1.0}/src/haystack_integrations/document_stores/qdrant/migrate_to_sparse.py +0 -0
- {qdrant_haystack-7.0.0 → qdrant_haystack-8.1.0}/tests/__init__.py +0 -0
- {qdrant_haystack-7.0.0 → qdrant_haystack-8.1.0}/tests/conftest.py +0 -0
- {qdrant_haystack-7.0.0 → qdrant_haystack-8.1.0}/tests/test_dict_converters.py +0 -0
- {qdrant_haystack-7.0.0 → qdrant_haystack-8.1.0}/tests/test_retriever.py +0 -0
|
@@ -1,15 +1,47 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [integrations/qdrant-v8.0.0] - 2025-02-19
|
|
4
|
+
|
|
5
|
+
### 🧹 Chores
|
|
6
|
+
|
|
7
|
+
- Fix linting/isort (#1215)
|
|
8
|
+
- Inherit from `FilterDocumentsTestWithDataframe` in Document Stores (#1290)
|
|
9
|
+
- [**breaking**] Qdrant - remove dataframe support (#1403)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
## [integrations/qdrant-v7.0.0] - 2024-10-29
|
|
13
|
+
|
|
14
|
+
### ⚙️ CI
|
|
15
|
+
|
|
16
|
+
- Adopt uv as installer (#1142)
|
|
17
|
+
|
|
18
|
+
### 🧹 Chores
|
|
19
|
+
|
|
20
|
+
- Update ruff linting scripts and settings (#1105)
|
|
21
|
+
|
|
22
|
+
### 🌀 Miscellaneous
|
|
23
|
+
|
|
24
|
+
- Refactor!: Qdrant - remove `index` parameter from methods (#1160)
|
|
25
|
+
|
|
3
26
|
## [integrations/qdrant-v6.0.0] - 2024-09-13
|
|
4
27
|
|
|
28
|
+
### 🌀 Miscellaneous
|
|
29
|
+
|
|
30
|
+
- Remove support for deprecated legacy filters in Qdrant (#1084)
|
|
31
|
+
|
|
5
32
|
## [integrations/qdrant-v5.1.0] - 2024-09-12
|
|
6
33
|
|
|
7
34
|
### 🚀 Features
|
|
8
35
|
|
|
9
36
|
- Qdrant - Add group_by and group_size optional parameters to Retrievers (#1054)
|
|
10
37
|
|
|
38
|
+
|
|
11
39
|
## [integrations/qdrant-v5.0.0] - 2024-09-02
|
|
12
40
|
|
|
41
|
+
### 🌀 Miscellaneous
|
|
42
|
+
|
|
43
|
+
- Fix!: fix type errors in `QdrantDocumentStore`; rename `ids` (parameter of `delete_documents`) to `document_ids` (#1041)
|
|
44
|
+
|
|
13
45
|
## [integrations/qdrant-v4.2.0] - 2024-08-27
|
|
14
46
|
|
|
15
47
|
### 🚜 Refactor
|
|
@@ -20,12 +52,18 @@
|
|
|
20
52
|
|
|
21
53
|
- Do not retry tests in `hatch run test` command (#954)
|
|
22
54
|
|
|
55
|
+
### 🌀 Miscellaneous
|
|
56
|
+
|
|
57
|
+
- Chore: Update Qdrant tests for the new `apply_filter_policy` usage (#969)
|
|
58
|
+
- Chore: qdrant - ruff update, don't ruff tests (#989)
|
|
59
|
+
|
|
23
60
|
## [integrations/qdrant-v4.1.2] - 2024-07-15
|
|
24
61
|
|
|
25
62
|
### 🐛 Bug Fixes
|
|
26
63
|
|
|
27
64
|
- `qdrant` - Fallback to default filter policy when deserializing retrievers without the init parameter (#902)
|
|
28
65
|
|
|
66
|
+
|
|
29
67
|
## [integrations/qdrant-v4.1.1] - 2024-07-10
|
|
30
68
|
|
|
31
69
|
### 🚀 Features
|
|
@@ -36,6 +74,10 @@
|
|
|
36
74
|
|
|
37
75
|
- Errors in convert_filters_to_qdrant (#870)
|
|
38
76
|
|
|
77
|
+
### 🌀 Miscellaneous
|
|
78
|
+
|
|
79
|
+
- Chore: Minor retriever pydoc fix (#884)
|
|
80
|
+
|
|
39
81
|
## [integrations/qdrant-v4.1.0] - 2024-07-03
|
|
40
82
|
|
|
41
83
|
### 🚀 Features
|
|
@@ -43,6 +85,7 @@
|
|
|
43
85
|
- Add `score_threshold` to Qdrant Retrievers (#860)
|
|
44
86
|
- Qdrant - add support for BM42 (#864)
|
|
45
87
|
|
|
88
|
+
|
|
46
89
|
## [integrations/qdrant-v4.0.0] - 2024-07-02
|
|
47
90
|
|
|
48
91
|
### 🚜 Refactor
|
|
@@ -50,17 +93,25 @@
|
|
|
50
93
|
- [**breaking**] Qdrant - remove unused init parameters: `content_field`, `name_field`, `embedding_field`, and `duplicate_documents` (#861)
|
|
51
94
|
- [**breaking**] Qdrant - set `scale_score` default value to `False` (#862)
|
|
52
95
|
|
|
53
|
-
### ⚙️
|
|
96
|
+
### ⚙️ CI
|
|
54
97
|
|
|
55
98
|
- Retry tests to reduce flakyness (#836)
|
|
99
|
+
|
|
100
|
+
### 🧹 Chores
|
|
101
|
+
|
|
56
102
|
- Update ruff invocation to include check parameter (#853)
|
|
57
103
|
|
|
104
|
+
### 🌀 Miscellaneous
|
|
105
|
+
|
|
106
|
+
- Ci: install `pytest-rerunfailures` where needed; add retry config to `test-cov` script (#845)
|
|
107
|
+
|
|
58
108
|
## [integrations/qdrant-v3.8.1] - 2024-06-20
|
|
59
109
|
|
|
60
110
|
### 📚 Documentation
|
|
61
111
|
|
|
62
112
|
- Added docstrings for QdrantDocumentStore (#808)
|
|
63
113
|
|
|
114
|
+
|
|
64
115
|
## [integrations/qdrant-v3.8.0] - 2024-06-06
|
|
65
116
|
|
|
66
117
|
### 🚀 Features
|
|
@@ -79,17 +130,29 @@
|
|
|
79
130
|
|
|
80
131
|
- Defer database connection to the first usage (#748)
|
|
81
132
|
|
|
133
|
+
### 🌀 Miscellaneous
|
|
134
|
+
|
|
135
|
+
- Qdrant - improve docstrings for retrievers (#687)
|
|
136
|
+
- Chore: change the pydoc renderer class (#718)
|
|
137
|
+
- Allow vanilla qdrant filters (#692)
|
|
138
|
+
|
|
82
139
|
## [integrations/qdrant-v3.5.0] - 2024-04-24
|
|
83
140
|
|
|
141
|
+
### 🌀 Miscellaneous
|
|
142
|
+
|
|
143
|
+
- Chore: add license classifiers (#680)
|
|
144
|
+
- Qdrant - add hybrid retriever (#675)
|
|
145
|
+
|
|
84
146
|
## [integrations/qdrant-v3.4.0] - 2024-04-23
|
|
85
147
|
|
|
86
|
-
###
|
|
148
|
+
### 🌀 Miscellaneous
|
|
87
149
|
|
|
88
150
|
- Add embedding retrieval example (#666)
|
|
151
|
+
- Rename `QdrantSparseRetriever` to `QdrantSparseEmbeddingRetriever` (#681)
|
|
89
152
|
|
|
90
153
|
## [integrations/qdrant-v3.3.1] - 2024-04-12
|
|
91
154
|
|
|
92
|
-
###
|
|
155
|
+
### 🌀 Miscellaneous
|
|
93
156
|
|
|
94
157
|
- Add migration utility function for Sparse Embedding support (#659)
|
|
95
158
|
|
|
@@ -103,9 +166,7 @@
|
|
|
103
166
|
|
|
104
167
|
### 🐛 Bug Fixes
|
|
105
168
|
|
|
106
|
-
- Fix haystack-ai
|
|
107
|
-
|
|
108
|
-
|
|
169
|
+
- Fix `haystack-ai` pins (#649)
|
|
109
170
|
|
|
110
171
|
## [integrations/qdrant-v3.2.0] - 2024-03-27
|
|
111
172
|
|
|
@@ -116,15 +177,9 @@
|
|
|
116
177
|
|
|
117
178
|
### 🐛 Bug Fixes
|
|
118
179
|
|
|
119
|
-
- Fix linter errors (#282)
|
|
120
|
-
|
|
121
|
-
|
|
180
|
+
- Fix: fix linter errors (#282)
|
|
122
181
|
- Fix order of API docs (#447)
|
|
123
|
-
|
|
124
|
-
This PR will also push the docs to Readme
|
|
125
|
-
- Fixes (#518)
|
|
126
|
-
|
|
127
|
-
|
|
182
|
+
- Doc: fixing docstrings for qdrant (#518)
|
|
128
183
|
|
|
129
184
|
### 🚜 Refactor
|
|
130
185
|
|
|
@@ -136,13 +191,19 @@ This PR will also push the docs to Readme
|
|
|
136
191
|
- Small consistency improvements (#536)
|
|
137
192
|
- Disable-class-def (#556)
|
|
138
193
|
|
|
139
|
-
### ⚙️
|
|
194
|
+
### ⚙️ CI
|
|
140
195
|
|
|
141
196
|
- Generate API docs for Qdrant (#361)
|
|
142
197
|
|
|
198
|
+
### 🌀 Miscellaneous
|
|
199
|
+
|
|
200
|
+
- Make tests show coverage (#566)
|
|
201
|
+
- Remove references to Python 3.7 (#601)
|
|
202
|
+
- Make Document Stores initially skip `SparseEmbedding` (#606)
|
|
203
|
+
|
|
143
204
|
## [integrations/qdrant-v3.0.0] - 2024-01-22
|
|
144
205
|
|
|
145
|
-
###
|
|
206
|
+
### 🌀 Miscellaneous
|
|
146
207
|
|
|
147
208
|
- [**breaking**] Change import paths (#255)
|
|
148
209
|
|
|
@@ -156,10 +217,12 @@ This PR will also push the docs to Readme
|
|
|
156
217
|
|
|
157
218
|
- Fix import paths for beta5 (#237)
|
|
158
219
|
|
|
159
|
-
|
|
160
|
-
|
|
161
220
|
### 🚜 Refactor
|
|
162
221
|
|
|
163
222
|
- Use `hatch_vcs` to manage integrations versioning (#103)
|
|
164
223
|
|
|
224
|
+
### 🌀 Miscellaneous
|
|
225
|
+
|
|
226
|
+
- Renamed QdrntRetriever to QdrntEmbeddingRetriever (#174)
|
|
227
|
+
|
|
165
228
|
<!-- generated by git-cliff -->
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: qdrant-haystack
|
|
3
|
-
Version:
|
|
3
|
+
Version: 8.1.0
|
|
4
4
|
Summary: An integration of Qdrant ANN vector database backend with Haystack
|
|
5
5
|
Project-URL: Source, https://github.com/deepset-ai/haystack-core-integrations
|
|
6
6
|
Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/qdrant/README.md
|
|
@@ -11,13 +11,13 @@ License-File: LICENSE.txt
|
|
|
11
11
|
Classifier: Development Status :: 4 - Beta
|
|
12
12
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
13
13
|
Classifier: Programming Language :: Python
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
15
14
|
Classifier: Programming Language :: Python :: 3.9
|
|
16
15
|
Classifier: Programming Language :: Python :: 3.10
|
|
17
16
|
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
18
|
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
19
19
|
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
|
20
|
-
Requires-Python: >=3.
|
|
20
|
+
Requires-Python: >=3.9
|
|
21
21
|
Requires-Dist: haystack-ai
|
|
22
22
|
Requires-Dist: qdrant-client>=1.10.0
|
|
23
23
|
Description-Content-Type: text/markdown
|
|
@@ -7,7 +7,7 @@ name = "qdrant-haystack"
|
|
|
7
7
|
dynamic = ["version"]
|
|
8
8
|
description = 'An integration of Qdrant ANN vector database backend with Haystack'
|
|
9
9
|
readme = "README.md"
|
|
10
|
-
requires-python = ">=3.
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
11
|
license = "Apache-2.0"
|
|
12
12
|
keywords = []
|
|
13
13
|
authors = [
|
|
@@ -18,10 +18,10 @@ classifiers = [
|
|
|
18
18
|
"License :: OSI Approved :: Apache Software License",
|
|
19
19
|
"Development Status :: 4 - Beta",
|
|
20
20
|
"Programming Language :: Python",
|
|
21
|
-
"Programming Language :: Python :: 3.8",
|
|
22
21
|
"Programming Language :: Python :: 3.9",
|
|
23
22
|
"Programming Language :: Python :: 3.10",
|
|
24
23
|
"Programming Language :: Python :: 3.11",
|
|
24
|
+
"Programming Language :: Python :: 3.12",
|
|
25
25
|
"Programming Language :: Python :: Implementation :: CPython",
|
|
26
26
|
"Programming Language :: Python :: Implementation :: PyPy",
|
|
27
27
|
]
|
|
@@ -56,7 +56,7 @@ cov-retry = ["test-cov-retry", "cov-report"]
|
|
|
56
56
|
docs = ["pydoc-markdown pydoc/config.yml"]
|
|
57
57
|
|
|
58
58
|
[[tool.hatch.envs.all.matrix]]
|
|
59
|
-
python = [
|
|
59
|
+
python = [ "3.9", "3.10", "3.11"]
|
|
60
60
|
|
|
61
61
|
[tool.hatch.envs.lint]
|
|
62
62
|
installer = "uv"
|
|
@@ -4,4 +4,4 @@
|
|
|
4
4
|
|
|
5
5
|
from .retriever import QdrantEmbeddingRetriever, QdrantHybridRetriever, QdrantSparseEmbeddingRetriever
|
|
6
6
|
|
|
7
|
-
__all__ = ("QdrantEmbeddingRetriever", "
|
|
7
|
+
__all__ = ("QdrantEmbeddingRetriever", "QdrantHybridRetriever", "QdrantSparseEmbeddingRetriever")
|
|
@@ -22,6 +22,15 @@ def convert_haystack_documents_to_qdrant_points(
|
|
|
22
22
|
points = []
|
|
23
23
|
for document in documents:
|
|
24
24
|
payload = document.to_dict(flatten=False)
|
|
25
|
+
|
|
26
|
+
if payload.pop("dataframe", None):
|
|
27
|
+
logger.warning(
|
|
28
|
+
"Document %s has the `dataframe` field set,"
|
|
29
|
+
"QdrantDocumentStore no longer supports dataframes and this field will be ignored. "
|
|
30
|
+
"The `dataframe` field will soon be removed from Haystack Document.",
|
|
31
|
+
document.id,
|
|
32
|
+
)
|
|
33
|
+
|
|
25
34
|
if use_sparse_embeddings:
|
|
26
35
|
vector = {}
|
|
27
36
|
|
|
@@ -64,6 +73,14 @@ def convert_qdrant_point_to_haystack_document(point: QdrantPoint, use_sparse_emb
|
|
|
64
73
|
payload = {**point.payload}
|
|
65
74
|
payload["score"] = point.score if hasattr(point, "score") else None
|
|
66
75
|
|
|
76
|
+
if payload.pop("dataframe", None):
|
|
77
|
+
logger.warning(
|
|
78
|
+
"Document %s has the `dataframe` field set,"
|
|
79
|
+
"QdrantDocumentStore no longer supports dataframes and this field will be ignored. "
|
|
80
|
+
"The `dataframe` field will soon be removed from Haystack Document.",
|
|
81
|
+
payload["id"],
|
|
82
|
+
)
|
|
83
|
+
|
|
67
84
|
if not use_sparse_embeddings:
|
|
68
85
|
payload["embedding"] = point.vector if hasattr(point, "vector") else None
|
|
69
86
|
elif hasattr(point, "vector") and point.vector is not None:
|
|
@@ -48,8 +48,8 @@ def get_batches_from_generator(iterable, n):
|
|
|
48
48
|
|
|
49
49
|
class QdrantDocumentStore:
|
|
50
50
|
"""
|
|
51
|
-
QdrantDocumentStore
|
|
52
|
-
|
|
51
|
+
A QdrantDocumentStore implementation that you
|
|
52
|
+
can use with any Qdrant instance: in-memory, disk-persisted, Docker-based,
|
|
53
53
|
and Qdrant Cloud Cluster deployments.
|
|
54
54
|
|
|
55
55
|
Usage example by creating an in-memory instance:
|
|
@@ -866,10 +866,18 @@ class QdrantDocumentStore:
|
|
|
866
866
|
|
|
867
867
|
collection_info = self.client.get_collection(collection_name)
|
|
868
868
|
|
|
869
|
-
has_named_vectors = (
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
869
|
+
has_named_vectors = isinstance(collection_info.config.params.vectors, dict)
|
|
870
|
+
|
|
871
|
+
if has_named_vectors and DENSE_VECTORS_NAME not in collection_info.config.params.vectors:
|
|
872
|
+
msg = (
|
|
873
|
+
f"Collection '{collection_name}' already exists in Qdrant, "
|
|
874
|
+
f"but it has been originally created outside of Haystack and is not supported. "
|
|
875
|
+
f"If possible, you should create a new Document Store with Haystack. "
|
|
876
|
+
f"In case you want to migrate the existing collection, see an example script in "
|
|
877
|
+
f"https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/qdrant/src/"
|
|
878
|
+
f"haystack_integrations/document_stores/qdrant/migrate_to_sparse.py."
|
|
879
|
+
)
|
|
880
|
+
raise QdrantStoreError(msg)
|
|
873
881
|
|
|
874
882
|
if self.use_sparse_embeddings and not has_named_vectors:
|
|
875
883
|
msg = (
|
|
@@ -882,7 +890,7 @@ class QdrantDocumentStore:
|
|
|
882
890
|
)
|
|
883
891
|
raise QdrantStoreError(msg)
|
|
884
892
|
|
|
885
|
-
|
|
893
|
+
if not self.use_sparse_embeddings and has_named_vectors:
|
|
886
894
|
msg = (
|
|
887
895
|
f"Collection '{collection_name}' already exists in Qdrant, "
|
|
888
896
|
f"but it has been originally created with sparse embedding vectors."
|
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
import numpy as np
|
|
2
|
+
from haystack import Document
|
|
3
|
+
from pandas import DataFrame
|
|
2
4
|
from qdrant_client.http import models as rest
|
|
3
5
|
|
|
4
6
|
from haystack_integrations.document_stores.qdrant.converters import (
|
|
7
|
+
convert_haystack_documents_to_qdrant_points,
|
|
5
8
|
convert_id,
|
|
6
9
|
convert_qdrant_point_to_haystack_document,
|
|
7
10
|
)
|
|
@@ -62,3 +65,44 @@ def test_point_to_document_reverts_proper_structure_from_record_without_sparse()
|
|
|
62
65
|
assert document.sparse_embedding is None
|
|
63
66
|
assert {"test_field": 1} == document.meta
|
|
64
67
|
assert 0.0 == np.sum(np.array([1.0, 0.0, 0.0, 0.0]) - document.embedding)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def test_point_to_document_skips_dataframe():
|
|
71
|
+
|
|
72
|
+
point = rest.Record(
|
|
73
|
+
id="c7c62e8e-02b9-4ec6-9f88-46bd97b628b7",
|
|
74
|
+
payload={
|
|
75
|
+
"id": "my-id",
|
|
76
|
+
"content": "Lorem ipsum",
|
|
77
|
+
"content_type": "text",
|
|
78
|
+
"meta": {
|
|
79
|
+
"test_field": 1,
|
|
80
|
+
},
|
|
81
|
+
"dataframe": {"a": [1, 2, 3]},
|
|
82
|
+
},
|
|
83
|
+
vector=[1.0, 0.0, 0.0, 0.0],
|
|
84
|
+
)
|
|
85
|
+
document = convert_qdrant_point_to_haystack_document(point, use_sparse_embeddings=False)
|
|
86
|
+
assert "my-id" == document.id
|
|
87
|
+
assert "Lorem ipsum" == document.content
|
|
88
|
+
assert "text" == document.content_type
|
|
89
|
+
assert {"test_field": 1} == document.meta
|
|
90
|
+
assert 0.0 == np.sum(np.array([1.0, 0.0, 0.0, 0.0]) - document.embedding)
|
|
91
|
+
assert not hasattr(document, "dataframe") or document.dataframe is None
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def test_documents_to_points_skips_dataframe():
|
|
95
|
+
doc = Document(
|
|
96
|
+
id="my-id",
|
|
97
|
+
content="Lorem ipsum",
|
|
98
|
+
embedding=[1.0, 0.0, 0.0, 0.0],
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
doc.dataframe = DataFrame([[1, 2], [3, 4]])
|
|
102
|
+
|
|
103
|
+
points = convert_haystack_documents_to_qdrant_points([doc], use_sparse_embeddings=False)
|
|
104
|
+
assert len(points) == 1
|
|
105
|
+
|
|
106
|
+
assert points[0].payload["content"] == "Lorem ipsum"
|
|
107
|
+
assert points[0].vector == [1.0, 0.0, 0.0, 0.0]
|
|
108
|
+
assert "dataframe" not in points[0].payload
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from typing import List
|
|
2
|
-
from unittest.mock import patch
|
|
2
|
+
from unittest.mock import MagicMock, patch
|
|
3
3
|
|
|
4
4
|
import pytest
|
|
5
5
|
from haystack import Document
|
|
@@ -15,6 +15,7 @@ from haystack.testing.document_store import (
|
|
|
15
15
|
from qdrant_client.http import models as rest
|
|
16
16
|
|
|
17
17
|
from haystack_integrations.document_stores.qdrant.document_store import (
|
|
18
|
+
DENSE_VECTORS_NAME,
|
|
18
19
|
SPARSE_VECTORS_NAME,
|
|
19
20
|
QdrantDocumentStore,
|
|
20
21
|
QdrantStoreError,
|
|
@@ -151,3 +152,79 @@ class TestQdrantDocumentStore(CountDocumentsTest, WriteDocumentsTest, DeleteDocu
|
|
|
151
152
|
|
|
152
153
|
with pytest.raises(QdrantStoreError):
|
|
153
154
|
document_store._query_hybrid(query_sparse_embedding=sparse_embedding, query_embedding=embedding)
|
|
155
|
+
|
|
156
|
+
def test_set_up_collection_with_existing_incompatible_collection(self):
|
|
157
|
+
document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=True)
|
|
158
|
+
|
|
159
|
+
# Mock collection info with named vectors but missing DENSE_VECTORS_NAME
|
|
160
|
+
mock_collection_info = MagicMock()
|
|
161
|
+
mock_collection_info.config.params.vectors = {"some_other_vector": MagicMock()}
|
|
162
|
+
|
|
163
|
+
with patch.object(document_store.client, "collection_exists", return_value=True), patch.object(
|
|
164
|
+
document_store.client, "get_collection", return_value=mock_collection_info
|
|
165
|
+
):
|
|
166
|
+
|
|
167
|
+
with pytest.raises(QdrantStoreError, match="created outside of Haystack"):
|
|
168
|
+
document_store._set_up_collection("test_collection", 768, False, "cosine", True, False)
|
|
169
|
+
|
|
170
|
+
def test_set_up_collection_use_sparse_embeddings_true_without_named_vectors(self):
|
|
171
|
+
"""Test that an error is raised when use_sparse_embeddings is True but collection doesn't have named vectors"""
|
|
172
|
+
document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=True)
|
|
173
|
+
|
|
174
|
+
# Mock collection info without named vectors
|
|
175
|
+
mock_collection_info = MagicMock()
|
|
176
|
+
mock_collection_info.config.params.vectors = MagicMock(spec=rest.VectorsConfig)
|
|
177
|
+
|
|
178
|
+
with patch.object(document_store.client, "collection_exists", return_value=True), patch.object(
|
|
179
|
+
document_store.client, "get_collection", return_value=mock_collection_info
|
|
180
|
+
):
|
|
181
|
+
|
|
182
|
+
with pytest.raises(QdrantStoreError, match="without sparse embedding vectors"):
|
|
183
|
+
document_store._set_up_collection("test_collection", 768, False, "cosine", True, False)
|
|
184
|
+
|
|
185
|
+
def test_set_up_collection_use_sparse_embeddings_false_with_named_vectors(self):
|
|
186
|
+
"""Test that an error is raised when use_sparse_embeddings is False but collection has named vectors"""
|
|
187
|
+
document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=False)
|
|
188
|
+
|
|
189
|
+
# Mock collection info with named vectors
|
|
190
|
+
mock_collection_info = MagicMock()
|
|
191
|
+
mock_collection_info.config.params.vectors = {DENSE_VECTORS_NAME: MagicMock()}
|
|
192
|
+
|
|
193
|
+
with patch.object(document_store.client, "collection_exists", return_value=True), patch.object(
|
|
194
|
+
document_store.client, "get_collection", return_value=mock_collection_info
|
|
195
|
+
):
|
|
196
|
+
|
|
197
|
+
with pytest.raises(QdrantStoreError, match="with sparse embedding vectors"):
|
|
198
|
+
document_store._set_up_collection("test_collection", 768, False, "cosine", False, False)
|
|
199
|
+
|
|
200
|
+
def test_set_up_collection_with_distance_mismatch(self):
|
|
201
|
+
document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=False, similarity="cosine")
|
|
202
|
+
|
|
203
|
+
# Mock collection info with different distance
|
|
204
|
+
mock_collection_info = MagicMock()
|
|
205
|
+
mock_collection_info.config.params.vectors = MagicMock()
|
|
206
|
+
mock_collection_info.config.params.vectors.distance = rest.Distance.DOT
|
|
207
|
+
mock_collection_info.config.params.vectors.size = 768
|
|
208
|
+
|
|
209
|
+
with patch.object(document_store.client, "collection_exists", return_value=True), patch.object(
|
|
210
|
+
document_store.client, "get_collection", return_value=mock_collection_info
|
|
211
|
+
):
|
|
212
|
+
|
|
213
|
+
with pytest.raises(ValueError, match="different similarity"):
|
|
214
|
+
document_store._set_up_collection("test_collection", 768, False, "cosine", False, False)
|
|
215
|
+
|
|
216
|
+
def test_set_up_collection_with_dimension_mismatch(self):
|
|
217
|
+
document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=False, similarity="cosine")
|
|
218
|
+
|
|
219
|
+
# Mock collection info with different vector size
|
|
220
|
+
mock_collection_info = MagicMock()
|
|
221
|
+
mock_collection_info.config.params.vectors = MagicMock()
|
|
222
|
+
mock_collection_info.config.params.vectors.distance = rest.Distance.COSINE
|
|
223
|
+
mock_collection_info.config.params.vectors.size = 512
|
|
224
|
+
|
|
225
|
+
with patch.object(document_store.client, "collection_exists", return_value=True), patch.object(
|
|
226
|
+
document_store.client, "get_collection", return_value=mock_collection_info
|
|
227
|
+
):
|
|
228
|
+
|
|
229
|
+
with pytest.raises(ValueError, match="different vector size"):
|
|
230
|
+
document_store._set_up_collection("test_collection", 768, False, "cosine", False, False)
|
|
@@ -208,11 +208,5 @@ class TestQdrantStoreBaseTests(FilterDocumentsTest):
|
|
|
208
208
|
|
|
209
209
|
# ======== ========================== ========
|
|
210
210
|
|
|
211
|
-
@pytest.mark.skip(reason="Qdrant doesn't support comparision with dataframe")
|
|
212
|
-
def test_comparison_equal_with_dataframe(self, document_store, filterable_docs): ...
|
|
213
|
-
|
|
214
|
-
@pytest.mark.skip(reason="Qdrant doesn't support comparision with dataframe")
|
|
215
|
-
def test_comparison_not_equal_with_dataframe(self, document_store, filterable_docs): ...
|
|
216
|
-
|
|
217
211
|
@pytest.mark.skip(reason="Cannot distinguish errors yet")
|
|
218
212
|
def test_missing_top_level_operator_key(self, document_store, filterable_docs): ...
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|