qdrant-haystack 8.0.0__tar.gz → 9.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of qdrant-haystack might be problematic. Click here for more details.
- {qdrant_haystack-8.0.0 → qdrant_haystack-9.0.0}/CHANGELOG.md +87 -12
- {qdrant_haystack-8.0.0 → qdrant_haystack-9.0.0}/PKG-INFO +4 -4
- {qdrant_haystack-8.0.0 → qdrant_haystack-9.0.0}/pyproject.toml +4 -4
- {qdrant_haystack-8.0.0 → qdrant_haystack-9.0.0}/src/haystack_integrations/document_stores/qdrant/converters.py +1 -17
- {qdrant_haystack-8.0.0 → qdrant_haystack-9.0.0}/src/haystack_integrations/document_stores/qdrant/document_store.py +16 -9
- {qdrant_haystack-8.0.0 → qdrant_haystack-9.0.0}/src/haystack_integrations/document_stores/qdrant/migrate_to_sparse.py +4 -3
- {qdrant_haystack-8.0.0 → qdrant_haystack-9.0.0}/tests/test_converters.py +0 -44
- {qdrant_haystack-8.0.0 → qdrant_haystack-9.0.0}/tests/test_document_store.py +78 -1
- {qdrant_haystack-8.0.0 → qdrant_haystack-9.0.0}/.gitignore +0 -0
- {qdrant_haystack-8.0.0 → qdrant_haystack-9.0.0}/LICENSE.txt +0 -0
- {qdrant_haystack-8.0.0 → qdrant_haystack-9.0.0}/README.md +0 -0
- {qdrant_haystack-8.0.0 → qdrant_haystack-9.0.0}/examples/embedding_retrieval.py +0 -0
- {qdrant_haystack-8.0.0 → qdrant_haystack-9.0.0}/pydoc/config.yml +0 -0
- {qdrant_haystack-8.0.0 → qdrant_haystack-9.0.0}/src/haystack_integrations/components/retrievers/qdrant/__init__.py +0 -0
- {qdrant_haystack-8.0.0 → qdrant_haystack-9.0.0}/src/haystack_integrations/components/retrievers/qdrant/retriever.py +0 -0
- {qdrant_haystack-8.0.0 → qdrant_haystack-9.0.0}/src/haystack_integrations/document_stores/qdrant/__init__.py +0 -0
- {qdrant_haystack-8.0.0 → qdrant_haystack-9.0.0}/src/haystack_integrations/document_stores/qdrant/filters.py +0 -0
- {qdrant_haystack-8.0.0 → qdrant_haystack-9.0.0}/tests/__init__.py +0 -0
- {qdrant_haystack-8.0.0 → qdrant_haystack-9.0.0}/tests/conftest.py +0 -0
- {qdrant_haystack-8.0.0 → qdrant_haystack-9.0.0}/tests/test_dict_converters.py +0 -0
- {qdrant_haystack-8.0.0 → qdrant_haystack-9.0.0}/tests/test_filters.py +0 -0
- {qdrant_haystack-8.0.0 → qdrant_haystack-9.0.0}/tests/test_retriever.py +0 -0
|
@@ -1,22 +1,58 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [integrations/qdrant-v8.1.0] - 2025-03-07
|
|
4
|
+
|
|
5
|
+
### 🧹 Chores
|
|
6
|
+
|
|
7
|
+
- Remove Python 3.8 support (#1421)
|
|
8
|
+
|
|
9
|
+
### 🌀 Miscellaneous
|
|
10
|
+
|
|
11
|
+
- Docs: Update document store descriptions for deepset Pipeline Builder (#1447)
|
|
12
|
+
- Refactor: Qdrant - raise error if existing collection is not compatible with Haystack (#1481)
|
|
13
|
+
|
|
14
|
+
## [integrations/qdrant-v8.0.0] - 2025-02-19
|
|
15
|
+
|
|
16
|
+
### 🧹 Chores
|
|
17
|
+
|
|
18
|
+
- Fix linting/isort (#1215)
|
|
19
|
+
- Inherit from `FilterDocumentsTestWithDataframe` in Document Stores (#1290)
|
|
20
|
+
- [**breaking**] Qdrant - remove dataframe support (#1403)
|
|
21
|
+
|
|
22
|
+
|
|
3
23
|
## [integrations/qdrant-v7.0.0] - 2024-10-29
|
|
4
24
|
|
|
5
|
-
### ⚙️
|
|
25
|
+
### ⚙️ CI
|
|
6
26
|
|
|
7
|
-
- Update ruff linting scripts and settings (#1105)
|
|
8
27
|
- Adopt uv as installer (#1142)
|
|
9
28
|
|
|
29
|
+
### 🧹 Chores
|
|
30
|
+
|
|
31
|
+
- Update ruff linting scripts and settings (#1105)
|
|
32
|
+
|
|
33
|
+
### 🌀 Miscellaneous
|
|
34
|
+
|
|
35
|
+
- Refactor!: Qdrant - remove `index` parameter from methods (#1160)
|
|
36
|
+
|
|
10
37
|
## [integrations/qdrant-v6.0.0] - 2024-09-13
|
|
11
38
|
|
|
39
|
+
### 🌀 Miscellaneous
|
|
40
|
+
|
|
41
|
+
- Remove support for deprecated legacy filters in Qdrant (#1084)
|
|
42
|
+
|
|
12
43
|
## [integrations/qdrant-v5.1.0] - 2024-09-12
|
|
13
44
|
|
|
14
45
|
### 🚀 Features
|
|
15
46
|
|
|
16
47
|
- Qdrant - Add group_by and group_size optional parameters to Retrievers (#1054)
|
|
17
48
|
|
|
49
|
+
|
|
18
50
|
## [integrations/qdrant-v5.0.0] - 2024-09-02
|
|
19
51
|
|
|
52
|
+
### 🌀 Miscellaneous
|
|
53
|
+
|
|
54
|
+
- Fix!: fix type errors in `QdrantDocumentStore`; rename `ids` (parameter of `delete_documents`) to `document_ids` (#1041)
|
|
55
|
+
|
|
20
56
|
## [integrations/qdrant-v4.2.0] - 2024-08-27
|
|
21
57
|
|
|
22
58
|
### 🚜 Refactor
|
|
@@ -27,12 +63,18 @@
|
|
|
27
63
|
|
|
28
64
|
- Do not retry tests in `hatch run test` command (#954)
|
|
29
65
|
|
|
66
|
+
### 🌀 Miscellaneous
|
|
67
|
+
|
|
68
|
+
- Chore: Update Qdrant tests for the new `apply_filter_policy` usage (#969)
|
|
69
|
+
- Chore: qdrant - ruff update, don't ruff tests (#989)
|
|
70
|
+
|
|
30
71
|
## [integrations/qdrant-v4.1.2] - 2024-07-15
|
|
31
72
|
|
|
32
73
|
### 🐛 Bug Fixes
|
|
33
74
|
|
|
34
75
|
- `qdrant` - Fallback to default filter policy when deserializing retrievers without the init parameter (#902)
|
|
35
76
|
|
|
77
|
+
|
|
36
78
|
## [integrations/qdrant-v4.1.1] - 2024-07-10
|
|
37
79
|
|
|
38
80
|
### 🚀 Features
|
|
@@ -43,6 +85,10 @@
|
|
|
43
85
|
|
|
44
86
|
- Errors in convert_filters_to_qdrant (#870)
|
|
45
87
|
|
|
88
|
+
### 🌀 Miscellaneous
|
|
89
|
+
|
|
90
|
+
- Chore: Minor retriever pydoc fix (#884)
|
|
91
|
+
|
|
46
92
|
## [integrations/qdrant-v4.1.0] - 2024-07-03
|
|
47
93
|
|
|
48
94
|
### 🚀 Features
|
|
@@ -50,6 +96,7 @@
|
|
|
50
96
|
- Add `score_threshold` to Qdrant Retrievers (#860)
|
|
51
97
|
- Qdrant - add support for BM42 (#864)
|
|
52
98
|
|
|
99
|
+
|
|
53
100
|
## [integrations/qdrant-v4.0.0] - 2024-07-02
|
|
54
101
|
|
|
55
102
|
### 🚜 Refactor
|
|
@@ -57,17 +104,25 @@
|
|
|
57
104
|
- [**breaking**] Qdrant - remove unused init parameters: `content_field`, `name_field`, `embedding_field`, and `duplicate_documents` (#861)
|
|
58
105
|
- [**breaking**] Qdrant - set `scale_score` default value to `False` (#862)
|
|
59
106
|
|
|
60
|
-
### ⚙️
|
|
107
|
+
### ⚙️ CI
|
|
61
108
|
|
|
62
109
|
- Retry tests to reduce flakyness (#836)
|
|
110
|
+
|
|
111
|
+
### 🧹 Chores
|
|
112
|
+
|
|
63
113
|
- Update ruff invocation to include check parameter (#853)
|
|
64
114
|
|
|
115
|
+
### 🌀 Miscellaneous
|
|
116
|
+
|
|
117
|
+
- Ci: install `pytest-rerunfailures` where needed; add retry config to `test-cov` script (#845)
|
|
118
|
+
|
|
65
119
|
## [integrations/qdrant-v3.8.1] - 2024-06-20
|
|
66
120
|
|
|
67
121
|
### 📚 Documentation
|
|
68
122
|
|
|
69
123
|
- Added docstrings for QdrantDocumentStore (#808)
|
|
70
124
|
|
|
125
|
+
|
|
71
126
|
## [integrations/qdrant-v3.8.0] - 2024-06-06
|
|
72
127
|
|
|
73
128
|
### 🚀 Features
|
|
@@ -86,17 +141,29 @@
|
|
|
86
141
|
|
|
87
142
|
- Defer database connection to the first usage (#748)
|
|
88
143
|
|
|
144
|
+
### 🌀 Miscellaneous
|
|
145
|
+
|
|
146
|
+
- Qdrant - improve docstrings for retrievers (#687)
|
|
147
|
+
- Chore: change the pydoc renderer class (#718)
|
|
148
|
+
- Allow vanilla qdrant filters (#692)
|
|
149
|
+
|
|
89
150
|
## [integrations/qdrant-v3.5.0] - 2024-04-24
|
|
90
151
|
|
|
152
|
+
### 🌀 Miscellaneous
|
|
153
|
+
|
|
154
|
+
- Chore: add license classifiers (#680)
|
|
155
|
+
- Qdrant - add hybrid retriever (#675)
|
|
156
|
+
|
|
91
157
|
## [integrations/qdrant-v3.4.0] - 2024-04-23
|
|
92
158
|
|
|
93
|
-
###
|
|
159
|
+
### 🌀 Miscellaneous
|
|
94
160
|
|
|
95
161
|
- Add embedding retrieval example (#666)
|
|
162
|
+
- Rename `QdrantSparseRetriever` to `QdrantSparseEmbeddingRetriever` (#681)
|
|
96
163
|
|
|
97
164
|
## [integrations/qdrant-v3.3.1] - 2024-04-12
|
|
98
165
|
|
|
99
|
-
###
|
|
166
|
+
### 🌀 Miscellaneous
|
|
100
167
|
|
|
101
168
|
- Add migration utility function for Sparse Embedding support (#659)
|
|
102
169
|
|
|
@@ -110,7 +177,7 @@
|
|
|
110
177
|
|
|
111
178
|
### 🐛 Bug Fixes
|
|
112
179
|
|
|
113
|
-
- Fix haystack-ai
|
|
180
|
+
- Fix `haystack-ai` pins (#649)
|
|
114
181
|
|
|
115
182
|
## [integrations/qdrant-v3.2.0] - 2024-03-27
|
|
116
183
|
|
|
@@ -121,11 +188,9 @@
|
|
|
121
188
|
|
|
122
189
|
### 🐛 Bug Fixes
|
|
123
190
|
|
|
124
|
-
- Fix linter errors (#282)
|
|
191
|
+
- Fix: fix linter errors (#282)
|
|
125
192
|
- Fix order of API docs (#447)
|
|
126
|
-
|
|
127
|
-
This PR will also push the docs to Readme
|
|
128
|
-
- Fixes (#518)
|
|
193
|
+
- Doc: fixing docstrings for qdrant (#518)
|
|
129
194
|
|
|
130
195
|
### 🚜 Refactor
|
|
131
196
|
|
|
@@ -137,13 +202,19 @@ This PR will also push the docs to Readme
|
|
|
137
202
|
- Small consistency improvements (#536)
|
|
138
203
|
- Disable-class-def (#556)
|
|
139
204
|
|
|
140
|
-
### ⚙️
|
|
205
|
+
### ⚙️ CI
|
|
141
206
|
|
|
142
207
|
- Generate API docs for Qdrant (#361)
|
|
143
208
|
|
|
209
|
+
### 🌀 Miscellaneous
|
|
210
|
+
|
|
211
|
+
- Make tests show coverage (#566)
|
|
212
|
+
- Remove references to Python 3.7 (#601)
|
|
213
|
+
- Make Document Stores initially skip `SparseEmbedding` (#606)
|
|
214
|
+
|
|
144
215
|
## [integrations/qdrant-v3.0.0] - 2024-01-22
|
|
145
216
|
|
|
146
|
-
###
|
|
217
|
+
### 🌀 Miscellaneous
|
|
147
218
|
|
|
148
219
|
- [**breaking**] Change import paths (#255)
|
|
149
220
|
|
|
@@ -161,4 +232,8 @@ This PR will also push the docs to Readme
|
|
|
161
232
|
|
|
162
233
|
- Use `hatch_vcs` to manage integrations versioning (#103)
|
|
163
234
|
|
|
235
|
+
### 🌀 Miscellaneous
|
|
236
|
+
|
|
237
|
+
- Renamed QdrntRetriever to QdrntEmbeddingRetriever (#174)
|
|
238
|
+
|
|
164
239
|
<!-- generated by git-cliff -->
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: qdrant-haystack
|
|
3
|
-
Version:
|
|
3
|
+
Version: 9.0.0
|
|
4
4
|
Summary: An integration of Qdrant ANN vector database backend with Haystack
|
|
5
5
|
Project-URL: Source, https://github.com/deepset-ai/haystack-core-integrations
|
|
6
6
|
Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/qdrant/README.md
|
|
@@ -11,14 +11,14 @@ License-File: LICENSE.txt
|
|
|
11
11
|
Classifier: Development Status :: 4 - Beta
|
|
12
12
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
13
13
|
Classifier: Programming Language :: Python
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
15
14
|
Classifier: Programming Language :: Python :: 3.9
|
|
16
15
|
Classifier: Programming Language :: Python :: 3.10
|
|
17
16
|
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
18
|
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
19
19
|
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
|
20
|
-
Requires-Python: >=3.
|
|
21
|
-
Requires-Dist: haystack-ai
|
|
20
|
+
Requires-Python: >=3.9
|
|
21
|
+
Requires-Dist: haystack-ai>=2.11.0
|
|
22
22
|
Requires-Dist: qdrant-client>=1.10.0
|
|
23
23
|
Description-Content-Type: text/markdown
|
|
24
24
|
|
|
@@ -7,7 +7,7 @@ name = "qdrant-haystack"
|
|
|
7
7
|
dynamic = ["version"]
|
|
8
8
|
description = 'An integration of Qdrant ANN vector database backend with Haystack'
|
|
9
9
|
readme = "README.md"
|
|
10
|
-
requires-python = ">=3.
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
11
|
license = "Apache-2.0"
|
|
12
12
|
keywords = []
|
|
13
13
|
authors = [
|
|
@@ -18,14 +18,14 @@ classifiers = [
|
|
|
18
18
|
"License :: OSI Approved :: Apache Software License",
|
|
19
19
|
"Development Status :: 4 - Beta",
|
|
20
20
|
"Programming Language :: Python",
|
|
21
|
-
"Programming Language :: Python :: 3.8",
|
|
22
21
|
"Programming Language :: Python :: 3.9",
|
|
23
22
|
"Programming Language :: Python :: 3.10",
|
|
24
23
|
"Programming Language :: Python :: 3.11",
|
|
24
|
+
"Programming Language :: Python :: 3.12",
|
|
25
25
|
"Programming Language :: Python :: Implementation :: CPython",
|
|
26
26
|
"Programming Language :: Python :: Implementation :: PyPy",
|
|
27
27
|
]
|
|
28
|
-
dependencies = ["haystack-ai", "qdrant-client>=1.10.0"]
|
|
28
|
+
dependencies = ["haystack-ai>=2.11.0", "qdrant-client>=1.10.0"]
|
|
29
29
|
|
|
30
30
|
[project.urls]
|
|
31
31
|
Source = "https://github.com/deepset-ai/haystack-core-integrations"
|
|
@@ -56,7 +56,7 @@ cov-retry = ["test-cov-retry", "cov-report"]
|
|
|
56
56
|
docs = ["pydoc-markdown pydoc/config.yml"]
|
|
57
57
|
|
|
58
58
|
[[tool.hatch.envs.all.matrix]]
|
|
59
|
-
python = [
|
|
59
|
+
python = [ "3.9", "3.10", "3.11"]
|
|
60
60
|
|
|
61
61
|
[tool.hatch.envs.lint]
|
|
62
62
|
installer = "uv"
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import logging
|
|
2
1
|
import uuid
|
|
3
2
|
from typing import List, Union
|
|
4
3
|
|
|
4
|
+
from haystack import logging
|
|
5
5
|
from haystack.dataclasses import Document
|
|
6
6
|
from qdrant_client.http import models as rest
|
|
7
7
|
|
|
@@ -23,14 +23,6 @@ def convert_haystack_documents_to_qdrant_points(
|
|
|
23
23
|
for document in documents:
|
|
24
24
|
payload = document.to_dict(flatten=False)
|
|
25
25
|
|
|
26
|
-
if payload.pop("dataframe", None):
|
|
27
|
-
logger.warning(
|
|
28
|
-
"Document %s has the `dataframe` field set,"
|
|
29
|
-
"QdrantDocumentStore no longer supports dataframes and this field will be ignored. "
|
|
30
|
-
"The `dataframe` field will soon be removed from Haystack Document.",
|
|
31
|
-
document.id,
|
|
32
|
-
)
|
|
33
|
-
|
|
34
26
|
if use_sparse_embeddings:
|
|
35
27
|
vector = {}
|
|
36
28
|
|
|
@@ -73,14 +65,6 @@ def convert_qdrant_point_to_haystack_document(point: QdrantPoint, use_sparse_emb
|
|
|
73
65
|
payload = {**point.payload}
|
|
74
66
|
payload["score"] = point.score if hasattr(point, "score") else None
|
|
75
67
|
|
|
76
|
-
if payload.pop("dataframe", None):
|
|
77
|
-
logger.warning(
|
|
78
|
-
"Document %s has the `dataframe` field set,"
|
|
79
|
-
"QdrantDocumentStore no longer supports dataframes and this field will be ignored. "
|
|
80
|
-
"The `dataframe` field will soon be removed from Haystack Document.",
|
|
81
|
-
payload["id"],
|
|
82
|
-
)
|
|
83
|
-
|
|
84
68
|
if not use_sparse_embeddings:
|
|
85
69
|
payload["embedding"] = point.vector if hasattr(point, "vector") else None
|
|
86
70
|
elif hasattr(point, "vector") and point.vector is not None:
|
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
import inspect
|
|
2
|
-
import logging
|
|
3
2
|
from itertools import islice
|
|
4
3
|
from typing import Any, ClassVar, Dict, Generator, List, Optional, Set, Union
|
|
5
4
|
|
|
6
5
|
import numpy as np
|
|
7
6
|
import qdrant_client
|
|
8
|
-
from haystack import default_from_dict, default_to_dict
|
|
7
|
+
from haystack import default_from_dict, default_to_dict, logging
|
|
9
8
|
from haystack.dataclasses import Document
|
|
10
9
|
from haystack.dataclasses.sparse_embedding import SparseEmbedding
|
|
11
10
|
from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError
|
|
@@ -48,8 +47,8 @@ def get_batches_from_generator(iterable, n):
|
|
|
48
47
|
|
|
49
48
|
class QdrantDocumentStore:
|
|
50
49
|
"""
|
|
51
|
-
QdrantDocumentStore
|
|
52
|
-
|
|
50
|
+
A QdrantDocumentStore implementation that you
|
|
51
|
+
can use with any Qdrant instance: in-memory, disk-persisted, Docker-based,
|
|
53
52
|
and Qdrant Cloud Cluster deployments.
|
|
54
53
|
|
|
55
54
|
Usage example by creating an in-memory instance:
|
|
@@ -866,10 +865,18 @@ class QdrantDocumentStore:
|
|
|
866
865
|
|
|
867
866
|
collection_info = self.client.get_collection(collection_name)
|
|
868
867
|
|
|
869
|
-
has_named_vectors = (
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
868
|
+
has_named_vectors = isinstance(collection_info.config.params.vectors, dict)
|
|
869
|
+
|
|
870
|
+
if has_named_vectors and DENSE_VECTORS_NAME not in collection_info.config.params.vectors:
|
|
871
|
+
msg = (
|
|
872
|
+
f"Collection '{collection_name}' already exists in Qdrant, "
|
|
873
|
+
f"but it has been originally created outside of Haystack and is not supported. "
|
|
874
|
+
f"If possible, you should create a new Document Store with Haystack. "
|
|
875
|
+
f"In case you want to migrate the existing collection, see an example script in "
|
|
876
|
+
f"https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/qdrant/src/"
|
|
877
|
+
f"haystack_integrations/document_stores/qdrant/migrate_to_sparse.py."
|
|
878
|
+
)
|
|
879
|
+
raise QdrantStoreError(msg)
|
|
873
880
|
|
|
874
881
|
if self.use_sparse_embeddings and not has_named_vectors:
|
|
875
882
|
msg = (
|
|
@@ -882,7 +889,7 @@ class QdrantDocumentStore:
|
|
|
882
889
|
)
|
|
883
890
|
raise QdrantStoreError(msg)
|
|
884
891
|
|
|
885
|
-
|
|
892
|
+
if not self.use_sparse_embeddings and has_named_vectors:
|
|
886
893
|
msg = (
|
|
887
894
|
f"Collection '{collection_name}' already exists in Qdrant, "
|
|
888
895
|
f"but it has been originally created with sparse embedding vectors."
|
|
@@ -1,13 +1,14 @@
|
|
|
1
|
-
import logging
|
|
1
|
+
import logging as python_logging
|
|
2
2
|
import time
|
|
3
3
|
|
|
4
|
+
from haystack import logging
|
|
4
5
|
from qdrant_client.http import models
|
|
5
6
|
|
|
6
7
|
from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
|
|
7
8
|
|
|
8
9
|
logger = logging.getLogger(__name__)
|
|
9
|
-
logger.addHandler(
|
|
10
|
-
logger.setLevel(
|
|
10
|
+
logger.addHandler(python_logging.StreamHandler())
|
|
11
|
+
logger.setLevel(python_logging.INFO)
|
|
11
12
|
|
|
12
13
|
|
|
13
14
|
def migrate_to_sparse_embeddings_support(old_document_store: QdrantDocumentStore, new_index: str):
|
|
@@ -1,10 +1,7 @@
|
|
|
1
1
|
import numpy as np
|
|
2
|
-
from haystack import Document
|
|
3
|
-
from pandas import DataFrame
|
|
4
2
|
from qdrant_client.http import models as rest
|
|
5
3
|
|
|
6
4
|
from haystack_integrations.document_stores.qdrant.converters import (
|
|
7
|
-
convert_haystack_documents_to_qdrant_points,
|
|
8
5
|
convert_id,
|
|
9
6
|
convert_qdrant_point_to_haystack_document,
|
|
10
7
|
)
|
|
@@ -65,44 +62,3 @@ def test_point_to_document_reverts_proper_structure_from_record_without_sparse()
|
|
|
65
62
|
assert document.sparse_embedding is None
|
|
66
63
|
assert {"test_field": 1} == document.meta
|
|
67
64
|
assert 0.0 == np.sum(np.array([1.0, 0.0, 0.0, 0.0]) - document.embedding)
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
def test_point_to_document_skips_dataframe():
|
|
71
|
-
|
|
72
|
-
point = rest.Record(
|
|
73
|
-
id="c7c62e8e-02b9-4ec6-9f88-46bd97b628b7",
|
|
74
|
-
payload={
|
|
75
|
-
"id": "my-id",
|
|
76
|
-
"content": "Lorem ipsum",
|
|
77
|
-
"content_type": "text",
|
|
78
|
-
"meta": {
|
|
79
|
-
"test_field": 1,
|
|
80
|
-
},
|
|
81
|
-
"dataframe": {"a": [1, 2, 3]},
|
|
82
|
-
},
|
|
83
|
-
vector=[1.0, 0.0, 0.0, 0.0],
|
|
84
|
-
)
|
|
85
|
-
document = convert_qdrant_point_to_haystack_document(point, use_sparse_embeddings=False)
|
|
86
|
-
assert "my-id" == document.id
|
|
87
|
-
assert "Lorem ipsum" == document.content
|
|
88
|
-
assert "text" == document.content_type
|
|
89
|
-
assert {"test_field": 1} == document.meta
|
|
90
|
-
assert 0.0 == np.sum(np.array([1.0, 0.0, 0.0, 0.0]) - document.embedding)
|
|
91
|
-
assert not hasattr(document, "dataframe") or document.dataframe is None
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
def test_documents_to_points_skips_dataframe():
|
|
95
|
-
doc = Document(
|
|
96
|
-
id="my-id",
|
|
97
|
-
content="Lorem ipsum",
|
|
98
|
-
embedding=[1.0, 0.0, 0.0, 0.0],
|
|
99
|
-
)
|
|
100
|
-
|
|
101
|
-
doc.dataframe = DataFrame([[1, 2], [3, 4]])
|
|
102
|
-
|
|
103
|
-
points = convert_haystack_documents_to_qdrant_points([doc], use_sparse_embeddings=False)
|
|
104
|
-
assert len(points) == 1
|
|
105
|
-
|
|
106
|
-
assert points[0].payload["content"] == "Lorem ipsum"
|
|
107
|
-
assert points[0].vector == [1.0, 0.0, 0.0, 0.0]
|
|
108
|
-
assert "dataframe" not in points[0].payload
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from typing import List
|
|
2
|
-
from unittest.mock import patch
|
|
2
|
+
from unittest.mock import MagicMock, patch
|
|
3
3
|
|
|
4
4
|
import pytest
|
|
5
5
|
from haystack import Document
|
|
@@ -15,6 +15,7 @@ from haystack.testing.document_store import (
|
|
|
15
15
|
from qdrant_client.http import models as rest
|
|
16
16
|
|
|
17
17
|
from haystack_integrations.document_stores.qdrant.document_store import (
|
|
18
|
+
DENSE_VECTORS_NAME,
|
|
18
19
|
SPARSE_VECTORS_NAME,
|
|
19
20
|
QdrantDocumentStore,
|
|
20
21
|
QdrantStoreError,
|
|
@@ -151,3 +152,79 @@ class TestQdrantDocumentStore(CountDocumentsTest, WriteDocumentsTest, DeleteDocu
|
|
|
151
152
|
|
|
152
153
|
with pytest.raises(QdrantStoreError):
|
|
153
154
|
document_store._query_hybrid(query_sparse_embedding=sparse_embedding, query_embedding=embedding)
|
|
155
|
+
|
|
156
|
+
def test_set_up_collection_with_existing_incompatible_collection(self):
|
|
157
|
+
document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=True)
|
|
158
|
+
|
|
159
|
+
# Mock collection info with named vectors but missing DENSE_VECTORS_NAME
|
|
160
|
+
mock_collection_info = MagicMock()
|
|
161
|
+
mock_collection_info.config.params.vectors = {"some_other_vector": MagicMock()}
|
|
162
|
+
|
|
163
|
+
with patch.object(document_store.client, "collection_exists", return_value=True), patch.object(
|
|
164
|
+
document_store.client, "get_collection", return_value=mock_collection_info
|
|
165
|
+
):
|
|
166
|
+
|
|
167
|
+
with pytest.raises(QdrantStoreError, match="created outside of Haystack"):
|
|
168
|
+
document_store._set_up_collection("test_collection", 768, False, "cosine", True, False)
|
|
169
|
+
|
|
170
|
+
def test_set_up_collection_use_sparse_embeddings_true_without_named_vectors(self):
|
|
171
|
+
"""Test that an error is raised when use_sparse_embeddings is True but collection doesn't have named vectors"""
|
|
172
|
+
document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=True)
|
|
173
|
+
|
|
174
|
+
# Mock collection info without named vectors
|
|
175
|
+
mock_collection_info = MagicMock()
|
|
176
|
+
mock_collection_info.config.params.vectors = MagicMock(spec=rest.VectorsConfig)
|
|
177
|
+
|
|
178
|
+
with patch.object(document_store.client, "collection_exists", return_value=True), patch.object(
|
|
179
|
+
document_store.client, "get_collection", return_value=mock_collection_info
|
|
180
|
+
):
|
|
181
|
+
|
|
182
|
+
with pytest.raises(QdrantStoreError, match="without sparse embedding vectors"):
|
|
183
|
+
document_store._set_up_collection("test_collection", 768, False, "cosine", True, False)
|
|
184
|
+
|
|
185
|
+
def test_set_up_collection_use_sparse_embeddings_false_with_named_vectors(self):
|
|
186
|
+
"""Test that an error is raised when use_sparse_embeddings is False but collection has named vectors"""
|
|
187
|
+
document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=False)
|
|
188
|
+
|
|
189
|
+
# Mock collection info with named vectors
|
|
190
|
+
mock_collection_info = MagicMock()
|
|
191
|
+
mock_collection_info.config.params.vectors = {DENSE_VECTORS_NAME: MagicMock()}
|
|
192
|
+
|
|
193
|
+
with patch.object(document_store.client, "collection_exists", return_value=True), patch.object(
|
|
194
|
+
document_store.client, "get_collection", return_value=mock_collection_info
|
|
195
|
+
):
|
|
196
|
+
|
|
197
|
+
with pytest.raises(QdrantStoreError, match="with sparse embedding vectors"):
|
|
198
|
+
document_store._set_up_collection("test_collection", 768, False, "cosine", False, False)
|
|
199
|
+
|
|
200
|
+
def test_set_up_collection_with_distance_mismatch(self):
|
|
201
|
+
document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=False, similarity="cosine")
|
|
202
|
+
|
|
203
|
+
# Mock collection info with different distance
|
|
204
|
+
mock_collection_info = MagicMock()
|
|
205
|
+
mock_collection_info.config.params.vectors = MagicMock()
|
|
206
|
+
mock_collection_info.config.params.vectors.distance = rest.Distance.DOT
|
|
207
|
+
mock_collection_info.config.params.vectors.size = 768
|
|
208
|
+
|
|
209
|
+
with patch.object(document_store.client, "collection_exists", return_value=True), patch.object(
|
|
210
|
+
document_store.client, "get_collection", return_value=mock_collection_info
|
|
211
|
+
):
|
|
212
|
+
|
|
213
|
+
with pytest.raises(ValueError, match="different similarity"):
|
|
214
|
+
document_store._set_up_collection("test_collection", 768, False, "cosine", False, False)
|
|
215
|
+
|
|
216
|
+
def test_set_up_collection_with_dimension_mismatch(self):
|
|
217
|
+
document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=False, similarity="cosine")
|
|
218
|
+
|
|
219
|
+
# Mock collection info with different vector size
|
|
220
|
+
mock_collection_info = MagicMock()
|
|
221
|
+
mock_collection_info.config.params.vectors = MagicMock()
|
|
222
|
+
mock_collection_info.config.params.vectors.distance = rest.Distance.COSINE
|
|
223
|
+
mock_collection_info.config.params.vectors.size = 512
|
|
224
|
+
|
|
225
|
+
with patch.object(document_store.client, "collection_exists", return_value=True), patch.object(
|
|
226
|
+
document_store.client, "get_collection", return_value=mock_collection_info
|
|
227
|
+
):
|
|
228
|
+
|
|
229
|
+
with pytest.raises(ValueError, match="different vector size"):
|
|
230
|
+
document_store._set_up_collection("test_collection", 768, False, "cosine", False, False)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|