qdrant-haystack 3.1.0__py3-none-any.whl → 3.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of qdrant-haystack might be problematic. Click here for more details.
- haystack_integrations/document_stores/qdrant/converters.py +14 -0
- haystack_integrations/document_stores/qdrant/document_store.py +28 -4
- haystack_integrations/document_stores/qdrant/filters.py +48 -32
- {qdrant_haystack-3.1.0.dist-info → qdrant_haystack-3.2.1.dist-info}/METADATA +4 -5
- {qdrant_haystack-3.1.0.dist-info → qdrant_haystack-3.2.1.dist-info}/RECORD +7 -7
- {qdrant_haystack-3.1.0.dist-info → qdrant_haystack-3.2.1.dist-info}/WHEEL +1 -1
- {qdrant_haystack-3.1.0.dist-info → qdrant_haystack-3.2.1.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -1,9 +1,12 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
import uuid
|
|
2
3
|
from typing import List, Union
|
|
3
4
|
|
|
4
5
|
from haystack.dataclasses import Document
|
|
5
6
|
from qdrant_client.http import models as rest
|
|
6
7
|
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
7
10
|
|
|
8
11
|
class HaystackToQdrant:
|
|
9
12
|
"""A converter from Haystack to Qdrant types."""
|
|
@@ -22,6 +25,17 @@ class HaystackToQdrant:
|
|
|
22
25
|
vector = payload.pop(embedding_field) or {}
|
|
23
26
|
_id = self.convert_id(payload.get("id"))
|
|
24
27
|
|
|
28
|
+
# TODO: remove as soon as we introduce the support for sparse embeddings in Qdrant
|
|
29
|
+
if "sparse_embedding" in payload:
|
|
30
|
+
sparse_embedding = payload.pop("sparse_embedding", None)
|
|
31
|
+
if sparse_embedding:
|
|
32
|
+
logger.warning(
|
|
33
|
+
"Document %s has the `sparse_embedding` field set,"
|
|
34
|
+
"but storing sparse embeddings in Qdrant is not currently supported."
|
|
35
|
+
"The `sparse_embedding` field will be ignored.",
|
|
36
|
+
payload["id"],
|
|
37
|
+
)
|
|
38
|
+
|
|
25
39
|
point = rest.PointStruct(
|
|
26
40
|
payload=payload,
|
|
27
41
|
vector=vector,
|
|
@@ -63,6 +63,7 @@ class QdrantDocumentStore:
|
|
|
63
63
|
path: Optional[str] = None,
|
|
64
64
|
index: str = "Document",
|
|
65
65
|
embedding_dim: int = 768,
|
|
66
|
+
on_disk: bool = False, # noqa: FBT001, FBT002
|
|
66
67
|
content_field: str = "content",
|
|
67
68
|
name_field: str = "name",
|
|
68
69
|
embedding_field: str = "embedding",
|
|
@@ -84,6 +85,7 @@ class QdrantDocumentStore:
|
|
|
84
85
|
metadata: Optional[dict] = None,
|
|
85
86
|
write_batch_size: int = 100,
|
|
86
87
|
scroll_size: int = 10_000,
|
|
88
|
+
payload_fields_to_index: Optional[List[dict]] = None,
|
|
87
89
|
):
|
|
88
90
|
super().__init__()
|
|
89
91
|
|
|
@@ -130,11 +132,13 @@ class QdrantDocumentStore:
|
|
|
130
132
|
self.init_from = init_from
|
|
131
133
|
self.wait_result_from_api = wait_result_from_api
|
|
132
134
|
self.recreate_index = recreate_index
|
|
135
|
+
self.payload_fields_to_index = payload_fields_to_index
|
|
133
136
|
|
|
134
137
|
# Make sure the collection is properly set up
|
|
135
|
-
self._set_up_collection(index, embedding_dim, recreate_index, similarity)
|
|
138
|
+
self._set_up_collection(index, embedding_dim, recreate_index, similarity, on_disk, payload_fields_to_index)
|
|
136
139
|
|
|
137
140
|
self.embedding_dim = embedding_dim
|
|
141
|
+
self.on_disk = on_disk
|
|
138
142
|
self.content_field = content_field
|
|
139
143
|
self.name_field = name_field
|
|
140
144
|
self.embedding_field = embedding_field
|
|
@@ -334,19 +338,36 @@ class QdrantDocumentStore:
|
|
|
334
338
|
)
|
|
335
339
|
raise QdrantStoreError(msg) from ke
|
|
336
340
|
|
|
341
|
+
def _create_payload_index(self, collection_name: str, payload_fields_to_index: Optional[List[dict]] = None):
|
|
342
|
+
"""
|
|
343
|
+
Create payload index for the collection if payload_fields_to_index is provided
|
|
344
|
+
See: https://qdrant.tech/documentation/concepts/indexing/#payload-index
|
|
345
|
+
"""
|
|
346
|
+
if payload_fields_to_index is not None:
|
|
347
|
+
for payload_index in payload_fields_to_index:
|
|
348
|
+
self.client.create_payload_index(
|
|
349
|
+
collection_name=collection_name,
|
|
350
|
+
field_name=payload_index["field_name"],
|
|
351
|
+
field_schema=payload_index["field_schema"],
|
|
352
|
+
)
|
|
353
|
+
|
|
337
354
|
def _set_up_collection(
|
|
338
355
|
self,
|
|
339
356
|
collection_name: str,
|
|
340
357
|
embedding_dim: int,
|
|
341
358
|
recreate_collection: bool, # noqa: FBT001
|
|
342
359
|
similarity: str,
|
|
360
|
+
on_disk: bool = False, # noqa: FBT001, FBT002
|
|
361
|
+
payload_fields_to_index: Optional[List[dict]] = None,
|
|
343
362
|
):
|
|
344
363
|
distance = self._get_distance(similarity)
|
|
345
364
|
|
|
346
365
|
if recreate_collection:
|
|
347
366
|
# There is no need to verify the current configuration of that
|
|
348
367
|
# collection. It might be just recreated again.
|
|
349
|
-
self._recreate_collection(collection_name, distance, embedding_dim)
|
|
368
|
+
self._recreate_collection(collection_name, distance, embedding_dim, on_disk)
|
|
369
|
+
# Create Payload index if payload_fields_to_index is provided
|
|
370
|
+
self._create_payload_index(collection_name, payload_fields_to_index)
|
|
350
371
|
return
|
|
351
372
|
|
|
352
373
|
try:
|
|
@@ -360,7 +381,9 @@ class QdrantDocumentStore:
|
|
|
360
381
|
# Qdrant local raises ValueError if the collection is not found, but
|
|
361
382
|
# with the remote server UnexpectedResponse / RpcError is raised.
|
|
362
383
|
# Until that's unified, we need to catch both.
|
|
363
|
-
self._recreate_collection(collection_name, distance, embedding_dim)
|
|
384
|
+
self._recreate_collection(collection_name, distance, embedding_dim, on_disk)
|
|
385
|
+
# Create Payload index if payload_fields_to_index is provided
|
|
386
|
+
self._create_payload_index(collection_name, payload_fields_to_index)
|
|
364
387
|
return
|
|
365
388
|
|
|
366
389
|
current_distance = collection_info.config.params.vectors.distance
|
|
@@ -384,11 +407,12 @@ class QdrantDocumentStore:
|
|
|
384
407
|
)
|
|
385
408
|
raise ValueError(msg)
|
|
386
409
|
|
|
387
|
-
def _recreate_collection(self, collection_name: str, distance, embedding_dim: int):
|
|
410
|
+
def _recreate_collection(self, collection_name: str, distance, embedding_dim: int, on_disk: bool): # noqa: FBT001
|
|
388
411
|
self.client.recreate_collection(
|
|
389
412
|
collection_name=collection_name,
|
|
390
413
|
vectors_config=rest.VectorParams(
|
|
391
414
|
size=embedding_dim,
|
|
415
|
+
on_disk=on_disk,
|
|
392
416
|
distance=distance,
|
|
393
417
|
),
|
|
394
418
|
shard_number=self.shard_number,
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
from
|
|
2
|
-
from typing import
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
from typing import List, Optional, Union
|
|
3
3
|
|
|
4
4
|
from haystack.utils.filters import COMPARISON_OPERATORS, LOGICAL_OPERATORS, FilterError
|
|
5
5
|
from qdrant_client.http import models
|
|
@@ -10,15 +10,7 @@ COMPARISON_OPERATORS = COMPARISON_OPERATORS.keys()
|
|
|
10
10
|
LOGICAL_OPERATORS = LOGICAL_OPERATORS.keys()
|
|
11
11
|
|
|
12
12
|
|
|
13
|
-
class
|
|
14
|
-
"""Converts Haystack filters to a format accepted by an external tool."""
|
|
15
|
-
|
|
16
|
-
@abstractmethod
|
|
17
|
-
def convert(self, filter_term: Optional[Union[List[dict], dict]]) -> Optional[Any]:
|
|
18
|
-
raise NotImplementedError
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
class QdrantFilterConverter(BaseFilterConverter):
|
|
13
|
+
class QdrantFilterConverter:
|
|
22
14
|
"""Converts Haystack filters to the format used by Qdrant."""
|
|
23
15
|
|
|
24
16
|
def __init__(self):
|
|
@@ -141,36 +133,52 @@ class QdrantFilterConverter(BaseFilterConverter):
|
|
|
141
133
|
must_not=[
|
|
142
134
|
(
|
|
143
135
|
models.FieldCondition(key=key, match=models.MatchText(text=item))
|
|
144
|
-
if isinstance(item, str) and " "
|
|
136
|
+
if isinstance(item, str) and " " in item
|
|
145
137
|
else models.FieldCondition(key=key, match=models.MatchValue(value=item))
|
|
146
138
|
)
|
|
147
139
|
for item in value
|
|
148
140
|
]
|
|
149
141
|
)
|
|
150
142
|
|
|
151
|
-
def _build_lt_condition(self, key: str, value: float) -> models.Condition:
|
|
152
|
-
if
|
|
153
|
-
|
|
154
|
-
raise FilterError(msg)
|
|
155
|
-
return models.FieldCondition(key=key, range=models.Range(lt=value))
|
|
143
|
+
def _build_lt_condition(self, key: str, value: Union[str, float, int]) -> models.Condition:
|
|
144
|
+
if isinstance(value, str) and is_datetime_string(value):
|
|
145
|
+
return models.FieldCondition(key=key, range=models.DatetimeRange(lt=value))
|
|
156
146
|
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
msg = f"Value {value} is not an int or float"
|
|
160
|
-
raise FilterError(msg)
|
|
161
|
-
return models.FieldCondition(key=key, range=models.Range(lte=value))
|
|
147
|
+
if isinstance(value, (int, float)):
|
|
148
|
+
return models.FieldCondition(key=key, range=models.Range(lt=value))
|
|
162
149
|
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
msg = f"Value {value} is not an int or float"
|
|
166
|
-
raise FilterError(msg)
|
|
167
|
-
return models.FieldCondition(key=key, range=models.Range(gt=value))
|
|
150
|
+
msg = f"Value {value} is not an int or float or datetime string"
|
|
151
|
+
raise FilterError(msg)
|
|
168
152
|
|
|
169
|
-
def
|
|
170
|
-
if
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
153
|
+
def _build_lte_condition(self, key: str, value: Union[str, float, int]) -> models.Condition:
|
|
154
|
+
if isinstance(value, str) and is_datetime_string(value):
|
|
155
|
+
return models.FieldCondition(key=key, range=models.DatetimeRange(lte=value))
|
|
156
|
+
|
|
157
|
+
if isinstance(value, (int, float)):
|
|
158
|
+
return models.FieldCondition(key=key, range=models.Range(lte=value))
|
|
159
|
+
|
|
160
|
+
msg = f"Value {value} is not an int or float or datetime string"
|
|
161
|
+
raise FilterError(msg)
|
|
162
|
+
|
|
163
|
+
def _build_gt_condition(self, key: str, value: Union[str, float, int]) -> models.Condition:
|
|
164
|
+
if isinstance(value, str) and is_datetime_string(value):
|
|
165
|
+
return models.FieldCondition(key=key, range=models.DatetimeRange(gt=value))
|
|
166
|
+
|
|
167
|
+
if isinstance(value, (int, float)):
|
|
168
|
+
return models.FieldCondition(key=key, range=models.Range(gt=value))
|
|
169
|
+
|
|
170
|
+
msg = f"Value {value} is not an int or float or datetime string"
|
|
171
|
+
raise FilterError(msg)
|
|
172
|
+
|
|
173
|
+
def _build_gte_condition(self, key: str, value: Union[str, float, int]) -> models.Condition:
|
|
174
|
+
if isinstance(value, str) and is_datetime_string(value):
|
|
175
|
+
return models.FieldCondition(key=key, range=models.DatetimeRange(gte=value))
|
|
176
|
+
|
|
177
|
+
if isinstance(value, (int, float)):
|
|
178
|
+
return models.FieldCondition(key=key, range=models.Range(gte=value))
|
|
179
|
+
|
|
180
|
+
msg = f"Value {value} is not an int or float or datetime string"
|
|
181
|
+
raise FilterError(msg)
|
|
174
182
|
|
|
175
183
|
def _build_has_id_condition(self, id_values: List[models.ExtendedPointId]) -> models.HasIdCondition:
|
|
176
184
|
return models.HasIdCondition(
|
|
@@ -215,3 +223,11 @@ class QdrantFilterConverter(BaseFilterConverter):
|
|
|
215
223
|
return models.Filter(**{part_name: subfilter.must})
|
|
216
224
|
|
|
217
225
|
return payload_filter
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def is_datetime_string(value: str) -> bool:
|
|
229
|
+
try:
|
|
230
|
+
datetime.fromisoformat(value)
|
|
231
|
+
return True
|
|
232
|
+
except ValueError:
|
|
233
|
+
return False
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
2
|
Name: qdrant-haystack
|
|
3
|
-
Version: 3.1
|
|
3
|
+
Version: 3.2.1
|
|
4
4
|
Summary: An integration of Qdrant ANN vector database backend with Haystack
|
|
5
5
|
Project-URL: Source, https://github.com/deepset-ai/haystack-core-integrations
|
|
6
6
|
Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/qdrant/README.md
|
|
@@ -10,15 +10,14 @@ License-Expression: Apache-2.0
|
|
|
10
10
|
License-File: LICENSE.txt
|
|
11
11
|
Classifier: Development Status :: 4 - Beta
|
|
12
12
|
Classifier: Programming Language :: Python
|
|
13
|
-
Classifier: Programming Language :: Python :: 3.7
|
|
14
13
|
Classifier: Programming Language :: Python :: 3.8
|
|
15
14
|
Classifier: Programming Language :: Python :: 3.9
|
|
16
15
|
Classifier: Programming Language :: Python :: 3.10
|
|
17
16
|
Classifier: Programming Language :: Python :: 3.11
|
|
18
17
|
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
19
18
|
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
|
20
|
-
Requires-Python: >=3.
|
|
21
|
-
Requires-Dist: haystack-ai
|
|
19
|
+
Requires-Python: >=3.8
|
|
20
|
+
Requires-Dist: haystack-ai
|
|
22
21
|
Requires-Dist: qdrant-client
|
|
23
22
|
Description-Content-Type: text/markdown
|
|
24
23
|
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
haystack_integrations/components/retrievers/qdrant/__init__.py,sha256=uX6yULYL7RExXbKb9wD7TTz7fKupJf97lUi1YAHgJcY,200
|
|
2
2
|
haystack_integrations/components/retrievers/qdrant/retriever.py,sha256=Pu8qwKive3uvO3z4FnidgwXC0IUgxNRNuAeJrdE70rU,4423
|
|
3
3
|
haystack_integrations/document_stores/qdrant/__init__.py,sha256=PuGxUj29V00f6UiCpTHRkzGufL8bJUML2iNwJnX2KwM,195
|
|
4
|
-
haystack_integrations/document_stores/qdrant/converters.py,sha256=
|
|
5
|
-
haystack_integrations/document_stores/qdrant/document_store.py,sha256
|
|
6
|
-
haystack_integrations/document_stores/qdrant/filters.py,sha256=
|
|
7
|
-
qdrant_haystack-3.1.
|
|
8
|
-
qdrant_haystack-3.1.
|
|
9
|
-
qdrant_haystack-3.1.
|
|
10
|
-
qdrant_haystack-3.1.
|
|
4
|
+
haystack_integrations/document_stores/qdrant/converters.py,sha256=q_S3ATfX2KF4z9c2Z6t3mqz8GnULXAuKXIiGSSKGBJ0,2442
|
|
5
|
+
haystack_integrations/document_stores/qdrant/document_store.py,sha256=-ahV-OuBjGn-299DbLDjq3a0CN_UdqK_-a7NIV2ngYw,18939
|
|
6
|
+
haystack_integrations/document_stores/qdrant/filters.py,sha256=26sgZOdiXEJesk2NdB6NbQoAxEInpLNxO5pLkLnELKE,9170
|
|
7
|
+
qdrant_haystack-3.2.1.dist-info/METADATA,sha256=f6i-52wn91Spzqrb80ZdXV4EhhFnOT16sUObZPLkNVE,1792
|
|
8
|
+
qdrant_haystack-3.2.1.dist-info/WHEEL,sha256=as-1oFTWSeWBgyzh0O_qF439xqBe6AbBgt4MfYe5zwY,87
|
|
9
|
+
qdrant_haystack-3.2.1.dist-info/licenses/LICENSE.txt,sha256=B05uMshqTA74s-0ltyHKI6yoPfJ3zYgQbvcXfDVGFf8,10280
|
|
10
|
+
qdrant_haystack-3.2.1.dist-info/RECORD,,
|
|
File without changes
|