qdrant-haystack 0.0.4__py3-none-any.whl → 0.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of qdrant-haystack might be problematic. Click here for more details.

@@ -37,6 +37,7 @@ class QdrantDocumentStore(BaseDocumentStore):
37
37
 
38
38
  def __init__(
39
39
  self,
40
+ location: Optional[str] = None,
40
41
  url: Optional[str] = None,
41
42
  port: int = 6333,
42
43
  grpc_port: int = 6334,
@@ -46,6 +47,7 @@ class QdrantDocumentStore(BaseDocumentStore):
46
47
  prefix: Optional[str] = None,
47
48
  timeout: Optional[float] = None,
48
49
  host: Optional[str] = None,
50
+ path: Optional[str] = None,
49
51
  index: str = "Document",
50
52
  embedding_dim: int = 768,
51
53
  hnsw_config: Optional[Dict] = None,
@@ -62,6 +64,7 @@ class QdrantDocumentStore(BaseDocumentStore):
62
64
  super().__init__()
63
65
 
64
66
  self.client = qdrant_client.QdrantClient(
67
+ location=location,
65
68
  url=url,
66
69
  port=port,
67
70
  grpc_port=grpc_port,
@@ -71,6 +74,7 @@ class QdrantDocumentStore(BaseDocumentStore):
71
74
  prefix=prefix,
72
75
  timeout=timeout,
73
76
  host=host,
77
+ path=path,
74
78
  **kwargs,
75
79
  )
76
80
 
@@ -165,10 +169,11 @@ class QdrantDocumentStore(BaseDocumentStore):
165
169
 
166
170
  next_offset = None
167
171
  stop_scrolling = False
172
+ scroll_filter = self.qdrant_filter_converter.convert(None, ids)
168
173
  while not stop_scrolling:
169
174
  records, next_offset = self.client.scroll(
170
175
  collection_name=index,
171
- scroll_filter=self.qdrant_filter_converter.convert(None, ids),
176
+ scroll_filter=scroll_filter,
172
177
  limit=batch_size,
173
178
  offset=next_offset,
174
179
  with_payload=True,
@@ -201,7 +206,10 @@ class QdrantDocumentStore(BaseDocumentStore):
201
206
  count_filter=qdrant_filters,
202
207
  )
203
208
  return response.count
204
- except UnexpectedResponse:
209
+ except (UnexpectedResponse, ValueError):
210
+ # Qdrant local raises ValueError if the collection is not found, but
211
+ # with the remote server UnexpectedResponse is raised. Until that's unified,
212
+ # we need to catch both.
205
213
  return 0
206
214
 
207
215
  def get_embedding_count(
@@ -490,30 +498,36 @@ class QdrantDocumentStore(BaseDocumentStore):
490
498
  # Check if the collection already exists and validate its
491
499
  # current configuration with the parameters.
492
500
  collection_info = self.client.get_collection(collection_name)
493
- current_distance = collection_info.config.params.vectors.distance
494
- current_vector_size = collection_info.config.params.vectors.size
495
-
496
- if current_distance != distance:
497
- raise ValueError(
498
- f"Collection '{collection_name}' already exists in Qdrant, "
499
- f"but it is configured with a similarity '{current_distance.name}'. "
500
- f"If you want to use that collection, but with a different "
501
- f"similarity, please set `recreate_collection=True` argument."
502
- )
503
-
504
- if current_vector_size != embedding_dim:
505
- raise ValueError(
506
- f"Collection '{collection_name}' already exists in Qdrant, "
507
- f"but it is configured with a vector size '{current_vector_size}'. "
508
- f"If you want to use that collection, but with a different "
509
- f"vector size, please set `recreate_collection=True` argument."
510
- )
511
- except (UnexpectedResponse, _InactiveRpcError):
501
+ except (UnexpectedResponse, _InactiveRpcError, ValueError):
512
502
  # That indicates the collection does not exist, so it can be
513
503
  # safely created with any configuration.
504
+ #
505
+ # Qdrant local raises ValueError if the collection is not found, but
506
+ # with the remote server UnexpectedResponse / _InactiveRpcError is raised.
507
+ # Until that's unified, we need to catch both.
514
508
  self._recreate_collection(
515
509
  collection_name, distance, embedding_dim, hnsw_config
516
510
  )
511
+ return
512
+
513
+ current_distance = collection_info.config.params.vectors.distance
514
+ current_vector_size = collection_info.config.params.vectors.size
515
+
516
+ if current_distance != distance:
517
+ raise ValueError(
518
+ f"Collection '{collection_name}' already exists in Qdrant, "
519
+ f"but it is configured with a similarity '{current_distance.name}'. "
520
+ f"If you want to use that collection, but with a different "
521
+ f"similarity, please set `recreate_collection=True` argument."
522
+ )
523
+
524
+ if current_vector_size != embedding_dim:
525
+ raise ValueError(
526
+ f"Collection '{collection_name}' already exists in Qdrant, "
527
+ f"but it is configured with a vector size '{current_vector_size}'. "
528
+ f"If you want to use that collection, but with a different "
529
+ f"vector size, please set `recreate_collection=True` argument."
530
+ )
517
531
 
518
532
  def _recreate_collection(
519
533
  self, collection_name, distance, embedding_dim, hnsw_config
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: qdrant-haystack
3
- Version: 0.0.4
3
+ Version: 0.0.5
4
4
  Summary: An integration of Qdrant ANN vector database backend with Haystack
5
5
  License: Apache 2.0
6
6
  Author: Kacper Łukawski
@@ -12,7 +12,7 @@ Classifier: Programming Language :: Python :: 3.9
12
12
  Classifier: Programming Language :: Python :: 3.10
13
13
  Classifier: Programming Language :: Python :: 3.11
14
14
  Requires-Dist: farm-haystack (>=1.13.0,<2.0.0)
15
- Requires-Dist: qdrant-client (>=1.0.2,<2.0.0)
15
+ Requires-Dist: qdrant-client (>=1.1.2,<2.0.0)
16
16
  Description-Content-Type: text/markdown
17
17
 
18
18
  # qdrant-haystack
@@ -45,7 +45,7 @@ embeddings.
45
45
  from qdrant_haystack import QdrantDocumentStore
46
46
 
47
47
  document_store = QdrantDocumentStore(
48
- url="localhost",
48
+ "localhost",
49
49
  index="Document",
50
50
  embedding_dim=512,
51
51
  recreate_index=True,
@@ -56,6 +56,51 @@ document_store = QdrantDocumentStore(
56
56
  The list of parameters accepted by `QdrantDocumentStore` is complementary to those used in the
57
57
  official [Python Qdrant client](https://github.com/qdrant/qdrant_client).
58
58
 
59
+ ### Using local in-memory / disk-persisted mode
60
+
61
+ Qdrant Python client, from version 1.1.1, supports local in-memory/disk-persisted mode. That's
62
+ a good choice for any test scenarios and quick experiments in which you do not plan to store
63
+ lots of vectors. In such a case spinning a Docker container might be even not required.
64
+
65
+ The local mode was also implemented in `qdrant-haystack` integration.
66
+
67
+ #### In-memory storage
68
+
69
+ In case you want to have a transient storage, for example in case of automated tests launched
70
+ during your CI/CD pipeline, using Qdrant Local mode with in-memory storage might be a preferred
71
+ option. It might be simply enabled by passing `:memory:` as first parameter, while creating an
72
+ instance of `QdrantDocumentStore`.
73
+
74
+ ```python
75
+ from qdrant_haystack import QdrantDocumentStore
76
+
77
+ document_store = QdrantDocumentStore(
78
+ ":memory:",
79
+ index="Document",
80
+ embedding_dim=512,
81
+ recreate_index=True,
82
+ hnsw_config={"m": 16, "ef_construct": 64} # Optional
83
+ )
84
+ ```
85
+
86
+ #### On disk storage
87
+
88
+ However, if you prefer to keep the vectors between different runs of your application, it
89
+ might be better to use on disk storage and pass the path that should be used to persist
90
+ the data.
91
+
92
+ ```python
93
+ from qdrant_haystack import QdrantDocumentStore
94
+
95
+ document_store = QdrantDocumentStore(
96
+ path="/home/qdrant/storage_local",
97
+ index="Document",
98
+ embedding_dim=512,
99
+ recreate_index=True,
100
+ hnsw_config={"m": 16, "ef_construct": 64} # Optional
101
+ )
102
+ ```
103
+
59
104
  ### Connecting to Qdrant Cloud cluster
60
105
 
61
106
  If you prefer not to manage your own Qdrant instance, [Qdrant Cloud](https://cloud.qdrant.io/)
@@ -65,7 +110,7 @@ might be a better option.
65
110
  from qdrant_haystack import QdrantDocumentStore
66
111
 
67
112
  document_store = QdrantDocumentStore(
68
- url="https://YOUR-CLUSTER-URL.aws.cloud.qdrant.io",
113
+ "https://YOUR-CLUSTER-URL.aws.cloud.qdrant.io",
69
114
  index="Document",
70
115
  api_key="<< YOUR QDRANT CLOUD API KEY >>",
71
116
  embedding_dim=512,
@@ -2,9 +2,9 @@ qdrant_haystack/__init__.py,sha256=F7uhTaB7pul_sbPyBQNYIz5eTOVCW5f9qF3FjiN8CRM,1
2
2
  qdrant_haystack/document_stores/__init__.py,sha256=RbJ_JV8mXcmWI9LrHj0gPPEokPfoKGi6zFwkpXvHsjA,107
3
3
  qdrant_haystack/document_stores/converters.py,sha256=UkmoK2AMS_zEUy2C9B-04DdMdL0KxANB5d7QBOySzY0,2285
4
4
  qdrant_haystack/document_stores/filters.py,sha256=lZumUKgAz1vlWkI0x2jqITDaHDGkth8n3bw1uXTqmjM,8295
5
- qdrant_haystack/document_stores/qdrant.py,sha256=kwQACKRDGOQ29x9MwTj5RRj7bRNliUeYYnlN26Fxpk8,18456
5
+ qdrant_haystack/document_stores/qdrant.py,sha256=hekVs25mnqHcjId9buWzlr7aIsMIq1YjkZbGEQrm3Uw,19054
6
6
  qdrant_haystack/utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- qdrant_haystack-0.0.4.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
8
- qdrant_haystack-0.0.4.dist-info/METADATA,sha256=__Q-cDKSu-hVJfAX4yX8sWQZEs3vp6ldNGprt3G3cyo,2333
9
- qdrant_haystack-0.0.4.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
10
- qdrant_haystack-0.0.4.dist-info/RECORD,,
7
+ qdrant_haystack-0.0.5.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
8
+ qdrant_haystack-0.0.5.dist-info/METADATA,sha256=LVmmp970YdLGka5aLH-ycUMttzzbr8bpjH83iJUgiDg,3797
9
+ qdrant_haystack-0.0.5.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
10
+ qdrant_haystack-0.0.5.dist-info/RECORD,,