qdrant-haystack 3.1.0__py3-none-any.whl → 3.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of qdrant-haystack might be problematic. Click here for more details.

@@ -1,9 +1,12 @@
1
+ import logging
1
2
  import uuid
2
3
  from typing import List, Union
3
4
 
4
5
  from haystack.dataclasses import Document
5
6
  from qdrant_client.http import models as rest
6
7
 
8
+ logger = logging.getLogger(__name__)
9
+
7
10
 
8
11
  class HaystackToQdrant:
9
12
  """A converter from Haystack to Qdrant types."""
@@ -22,6 +25,17 @@ class HaystackToQdrant:
22
25
  vector = payload.pop(embedding_field) or {}
23
26
  _id = self.convert_id(payload.get("id"))
24
27
 
28
+ # TODO: remove as soon as we introduce the support for sparse embeddings in Qdrant
29
+ if "sparse_embedding" in payload:
30
+ sparse_embedding = payload.pop("sparse_embedding", None)
31
+ if sparse_embedding:
32
+ logger.warning(
33
+ "Document %s has the `sparse_embedding` field set,"
34
+ "but storing sparse embeddings in Qdrant is not currently supported."
35
+ "The `sparse_embedding` field will be ignored.",
36
+ payload["id"],
37
+ )
38
+
25
39
  point = rest.PointStruct(
26
40
  payload=payload,
27
41
  vector=vector,
@@ -63,6 +63,7 @@ class QdrantDocumentStore:
63
63
  path: Optional[str] = None,
64
64
  index: str = "Document",
65
65
  embedding_dim: int = 768,
66
+ on_disk: bool = False, # noqa: FBT001, FBT002
66
67
  content_field: str = "content",
67
68
  name_field: str = "name",
68
69
  embedding_field: str = "embedding",
@@ -84,6 +85,7 @@ class QdrantDocumentStore:
84
85
  metadata: Optional[dict] = None,
85
86
  write_batch_size: int = 100,
86
87
  scroll_size: int = 10_000,
88
+ payload_fields_to_index: Optional[List[dict]] = None,
87
89
  ):
88
90
  super().__init__()
89
91
 
@@ -130,11 +132,13 @@ class QdrantDocumentStore:
130
132
  self.init_from = init_from
131
133
  self.wait_result_from_api = wait_result_from_api
132
134
  self.recreate_index = recreate_index
135
+ self.payload_fields_to_index = payload_fields_to_index
133
136
 
134
137
  # Make sure the collection is properly set up
135
- self._set_up_collection(index, embedding_dim, recreate_index, similarity)
138
+ self._set_up_collection(index, embedding_dim, recreate_index, similarity, on_disk, payload_fields_to_index)
136
139
 
137
140
  self.embedding_dim = embedding_dim
141
+ self.on_disk = on_disk
138
142
  self.content_field = content_field
139
143
  self.name_field = name_field
140
144
  self.embedding_field = embedding_field
@@ -334,19 +338,36 @@ class QdrantDocumentStore:
334
338
  )
335
339
  raise QdrantStoreError(msg) from ke
336
340
 
341
+ def _create_payload_index(self, collection_name: str, payload_fields_to_index: Optional[List[dict]] = None):
342
+ """
343
+ Create payload index for the collection if payload_fields_to_index is provided
344
+ See: https://qdrant.tech/documentation/concepts/indexing/#payload-index
345
+ """
346
+ if payload_fields_to_index is not None:
347
+ for payload_index in payload_fields_to_index:
348
+ self.client.create_payload_index(
349
+ collection_name=collection_name,
350
+ field_name=payload_index["field_name"],
351
+ field_schema=payload_index["field_schema"],
352
+ )
353
+
337
354
  def _set_up_collection(
338
355
  self,
339
356
  collection_name: str,
340
357
  embedding_dim: int,
341
358
  recreate_collection: bool, # noqa: FBT001
342
359
  similarity: str,
360
+ on_disk: bool = False, # noqa: FBT001, FBT002
361
+ payload_fields_to_index: Optional[List[dict]] = None,
343
362
  ):
344
363
  distance = self._get_distance(similarity)
345
364
 
346
365
  if recreate_collection:
347
366
  # There is no need to verify the current configuration of that
348
367
  # collection. It might be just recreated again.
349
- self._recreate_collection(collection_name, distance, embedding_dim)
368
+ self._recreate_collection(collection_name, distance, embedding_dim, on_disk)
369
+ # Create Payload index if payload_fields_to_index is provided
370
+ self._create_payload_index(collection_name, payload_fields_to_index)
350
371
  return
351
372
 
352
373
  try:
@@ -360,7 +381,9 @@ class QdrantDocumentStore:
360
381
  # Qdrant local raises ValueError if the collection is not found, but
361
382
  # with the remote server UnexpectedResponse / RpcError is raised.
362
383
  # Until that's unified, we need to catch both.
363
- self._recreate_collection(collection_name, distance, embedding_dim)
384
+ self._recreate_collection(collection_name, distance, embedding_dim, on_disk)
385
+ # Create Payload index if payload_fields_to_index is provided
386
+ self._create_payload_index(collection_name, payload_fields_to_index)
364
387
  return
365
388
 
366
389
  current_distance = collection_info.config.params.vectors.distance
@@ -384,11 +407,12 @@ class QdrantDocumentStore:
384
407
  )
385
408
  raise ValueError(msg)
386
409
 
387
- def _recreate_collection(self, collection_name: str, distance, embedding_dim: int):
410
+ def _recreate_collection(self, collection_name: str, distance, embedding_dim: int, on_disk: bool): # noqa: FBT001
388
411
  self.client.recreate_collection(
389
412
  collection_name=collection_name,
390
413
  vectors_config=rest.VectorParams(
391
414
  size=embedding_dim,
415
+ on_disk=on_disk,
392
416
  distance=distance,
393
417
  ),
394
418
  shard_number=self.shard_number,
@@ -1,5 +1,5 @@
1
- from abc import ABC, abstractmethod
2
- from typing import Any, List, Optional, Union
1
+ from datetime import datetime
2
+ from typing import List, Optional, Union
3
3
 
4
4
  from haystack.utils.filters import COMPARISON_OPERATORS, LOGICAL_OPERATORS, FilterError
5
5
  from qdrant_client.http import models
@@ -10,15 +10,7 @@ COMPARISON_OPERATORS = COMPARISON_OPERATORS.keys()
10
10
  LOGICAL_OPERATORS = LOGICAL_OPERATORS.keys()
11
11
 
12
12
 
13
- class BaseFilterConverter(ABC):
14
- """Converts Haystack filters to a format accepted by an external tool."""
15
-
16
- @abstractmethod
17
- def convert(self, filter_term: Optional[Union[List[dict], dict]]) -> Optional[Any]:
18
- raise NotImplementedError
19
-
20
-
21
- class QdrantFilterConverter(BaseFilterConverter):
13
+ class QdrantFilterConverter:
22
14
  """Converts Haystack filters to the format used by Qdrant."""
23
15
 
24
16
  def __init__(self):
@@ -141,36 +133,52 @@ class QdrantFilterConverter(BaseFilterConverter):
141
133
  must_not=[
142
134
  (
143
135
  models.FieldCondition(key=key, match=models.MatchText(text=item))
144
- if isinstance(item, str) and " " not in item
136
+ if isinstance(item, str) and " " in item
145
137
  else models.FieldCondition(key=key, match=models.MatchValue(value=item))
146
138
  )
147
139
  for item in value
148
140
  ]
149
141
  )
150
142
 
151
- def _build_lt_condition(self, key: str, value: float) -> models.Condition:
152
- if not isinstance(value, (int, float)):
153
- msg = f"Value {value} is not an int or float"
154
- raise FilterError(msg)
155
- return models.FieldCondition(key=key, range=models.Range(lt=value))
143
+ def _build_lt_condition(self, key: str, value: Union[str, float, int]) -> models.Condition:
144
+ if isinstance(value, str) and is_datetime_string(value):
145
+ return models.FieldCondition(key=key, range=models.DatetimeRange(lt=value))
156
146
 
157
- def _build_lte_condition(self, key: str, value: float) -> models.Condition:
158
- if not isinstance(value, (int, float)):
159
- msg = f"Value {value} is not an int or float"
160
- raise FilterError(msg)
161
- return models.FieldCondition(key=key, range=models.Range(lte=value))
147
+ if isinstance(value, (int, float)):
148
+ return models.FieldCondition(key=key, range=models.Range(lt=value))
162
149
 
163
- def _build_gt_condition(self, key: str, value: float) -> models.Condition:
164
- if not isinstance(value, (int, float)):
165
- msg = f"Value {value} is not an int or float"
166
- raise FilterError(msg)
167
- return models.FieldCondition(key=key, range=models.Range(gt=value))
150
+ msg = f"Value {value} is not an int or float or datetime string"
151
+ raise FilterError(msg)
168
152
 
169
- def _build_gte_condition(self, key: str, value: float) -> models.Condition:
170
- if not isinstance(value, (int, float)):
171
- msg = f"Value {value} is not an int or float"
172
- raise FilterError(msg)
173
- return models.FieldCondition(key=key, range=models.Range(gte=value))
153
+ def _build_lte_condition(self, key: str, value: Union[str, float, int]) -> models.Condition:
154
+ if isinstance(value, str) and is_datetime_string(value):
155
+ return models.FieldCondition(key=key, range=models.DatetimeRange(lte=value))
156
+
157
+ if isinstance(value, (int, float)):
158
+ return models.FieldCondition(key=key, range=models.Range(lte=value))
159
+
160
+ msg = f"Value {value} is not an int or float or datetime string"
161
+ raise FilterError(msg)
162
+
163
+ def _build_gt_condition(self, key: str, value: Union[str, float, int]) -> models.Condition:
164
+ if isinstance(value, str) and is_datetime_string(value):
165
+ return models.FieldCondition(key=key, range=models.DatetimeRange(gt=value))
166
+
167
+ if isinstance(value, (int, float)):
168
+ return models.FieldCondition(key=key, range=models.Range(gt=value))
169
+
170
+ msg = f"Value {value} is not an int or float or datetime string"
171
+ raise FilterError(msg)
172
+
173
+ def _build_gte_condition(self, key: str, value: Union[str, float, int]) -> models.Condition:
174
+ if isinstance(value, str) and is_datetime_string(value):
175
+ return models.FieldCondition(key=key, range=models.DatetimeRange(gte=value))
176
+
177
+ if isinstance(value, (int, float)):
178
+ return models.FieldCondition(key=key, range=models.Range(gte=value))
179
+
180
+ msg = f"Value {value} is not an int or float or datetime string"
181
+ raise FilterError(msg)
174
182
 
175
183
  def _build_has_id_condition(self, id_values: List[models.ExtendedPointId]) -> models.HasIdCondition:
176
184
  return models.HasIdCondition(
@@ -215,3 +223,11 @@ class QdrantFilterConverter(BaseFilterConverter):
215
223
  return models.Filter(**{part_name: subfilter.must})
216
224
 
217
225
  return payload_filter
226
+
227
+
228
+ def is_datetime_string(value: str) -> bool:
229
+ try:
230
+ datetime.fromisoformat(value)
231
+ return True
232
+ except ValueError:
233
+ return False
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.3
2
2
  Name: qdrant-haystack
3
- Version: 3.1.0
3
+ Version: 3.2.1
4
4
  Summary: An integration of Qdrant ANN vector database backend with Haystack
5
5
  Project-URL: Source, https://github.com/deepset-ai/haystack-core-integrations
6
6
  Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/qdrant/README.md
@@ -10,15 +10,14 @@ License-Expression: Apache-2.0
10
10
  License-File: LICENSE.txt
11
11
  Classifier: Development Status :: 4 - Beta
12
12
  Classifier: Programming Language :: Python
13
- Classifier: Programming Language :: Python :: 3.7
14
13
  Classifier: Programming Language :: Python :: 3.8
15
14
  Classifier: Programming Language :: Python :: 3.9
16
15
  Classifier: Programming Language :: Python :: 3.10
17
16
  Classifier: Programming Language :: Python :: 3.11
18
17
  Classifier: Programming Language :: Python :: Implementation :: CPython
19
18
  Classifier: Programming Language :: Python :: Implementation :: PyPy
20
- Requires-Python: >=3.7
21
- Requires-Dist: haystack-ai>=2.0.0b6
19
+ Requires-Python: >=3.8
20
+ Requires-Dist: haystack-ai
22
21
  Requires-Dist: qdrant-client
23
22
  Description-Content-Type: text/markdown
24
23
 
@@ -1,10 +1,10 @@
1
1
  haystack_integrations/components/retrievers/qdrant/__init__.py,sha256=uX6yULYL7RExXbKb9wD7TTz7fKupJf97lUi1YAHgJcY,200
2
2
  haystack_integrations/components/retrievers/qdrant/retriever.py,sha256=Pu8qwKive3uvO3z4FnidgwXC0IUgxNRNuAeJrdE70rU,4423
3
3
  haystack_integrations/document_stores/qdrant/__init__.py,sha256=PuGxUj29V00f6UiCpTHRkzGufL8bJUML2iNwJnX2KwM,195
4
- haystack_integrations/document_stores/qdrant/converters.py,sha256=chTu0-nWLy6Hnzn76mXfEagOOWhXIUqdoWTBvcHK7PY,1797
5
- haystack_integrations/document_stores/qdrant/document_store.py,sha256=c4lobSgv0CRRPxZM-Q5AKwpiaSjf-D7oiqQBaQoL1sc,17529
6
- haystack_integrations/document_stores/qdrant/filters.py,sha256=zpmFQHhIP0IFClMADr84USA7p5Is237evutVQLFK3s4,8570
7
- qdrant_haystack-3.1.0.dist-info/METADATA,sha256=q8mahJGC6yCwguQJo9FzSeVYSVoK1LHn9wTgeKuhLEY,1851
8
- qdrant_haystack-3.1.0.dist-info/WHEEL,sha256=TJPnKdtrSue7xZ_AVGkp9YXcvDrobsjBds1du3Nx6dc,87
9
- qdrant_haystack-3.1.0.dist-info/licenses/LICENSE.txt,sha256=B05uMshqTA74s-0ltyHKI6yoPfJ3zYgQbvcXfDVGFf8,10280
10
- qdrant_haystack-3.1.0.dist-info/RECORD,,
4
+ haystack_integrations/document_stores/qdrant/converters.py,sha256=q_S3ATfX2KF4z9c2Z6t3mqz8GnULXAuKXIiGSSKGBJ0,2442
5
+ haystack_integrations/document_stores/qdrant/document_store.py,sha256=-ahV-OuBjGn-299DbLDjq3a0CN_UdqK_-a7NIV2ngYw,18939
6
+ haystack_integrations/document_stores/qdrant/filters.py,sha256=26sgZOdiXEJesk2NdB6NbQoAxEInpLNxO5pLkLnELKE,9170
7
+ qdrant_haystack-3.2.1.dist-info/METADATA,sha256=f6i-52wn91Spzqrb80ZdXV4EhhFnOT16sUObZPLkNVE,1792
8
+ qdrant_haystack-3.2.1.dist-info/WHEEL,sha256=as-1oFTWSeWBgyzh0O_qF439xqBe6AbBgt4MfYe5zwY,87
9
+ qdrant_haystack-3.2.1.dist-info/licenses/LICENSE.txt,sha256=B05uMshqTA74s-0ltyHKI6yoPfJ3zYgQbvcXfDVGFf8,10280
10
+ qdrant_haystack-3.2.1.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.21.1
2
+ Generator: hatchling 1.22.5
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any