qdrant-haystack 0.0.3__py3-none-any.whl → 0.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of qdrant-haystack might be problematic. Click here for more details.

@@ -37,6 +37,7 @@ class QdrantDocumentStore(BaseDocumentStore):
37
37
 
38
38
  def __init__(
39
39
  self,
40
+ location: Optional[str] = None,
40
41
  url: Optional[str] = None,
41
42
  port: int = 6333,
42
43
  grpc_port: int = 6334,
@@ -46,8 +47,10 @@ class QdrantDocumentStore(BaseDocumentStore):
46
47
  prefix: Optional[str] = None,
47
48
  timeout: Optional[float] = None,
48
49
  host: Optional[str] = None,
50
+ path: Optional[str] = None,
49
51
  index: str = "Document",
50
52
  embedding_dim: int = 768,
53
+ hnsw_config: Optional[Dict] = None,
51
54
  content_field: str = "content",
52
55
  name_field: str = "name",
53
56
  embedding_field: str = "vector",
@@ -61,6 +64,7 @@ class QdrantDocumentStore(BaseDocumentStore):
61
64
  super().__init__()
62
65
 
63
66
  self.client = qdrant_client.QdrantClient(
67
+ location=location,
64
68
  url=url,
65
69
  port=port,
66
70
  grpc_port=grpc_port,
@@ -70,15 +74,19 @@ class QdrantDocumentStore(BaseDocumentStore):
70
74
  prefix=prefix,
71
75
  timeout=timeout,
72
76
  host=host,
77
+ path=path,
73
78
  **kwargs,
74
79
  )
75
80
 
76
- self._set_up_collection(index, embedding_dim, recreate_index, similarity)
81
+ self._set_up_collection(
82
+ index, embedding_dim, hnsw_config, recreate_index, similarity
83
+ )
77
84
 
78
85
  self.embedding_dim = embedding_dim
79
86
  self.content_field = content_field
80
87
  self.name_field = name_field
81
88
  self.embedding_field = embedding_field
89
+ self.hnsw_config = hnsw_config
82
90
  self.similarity = similarity
83
91
  self.index = index
84
92
  self.return_embedding = return_embedding
@@ -161,10 +169,11 @@ class QdrantDocumentStore(BaseDocumentStore):
161
169
 
162
170
  next_offset = None
163
171
  stop_scrolling = False
172
+ scroll_filter = self.qdrant_filter_converter.convert(None, ids)
164
173
  while not stop_scrolling:
165
174
  records, next_offset = self.client.scroll(
166
175
  collection_name=index,
167
- scroll_filter=self.qdrant_filter_converter.convert(None, ids),
176
+ scroll_filter=scroll_filter,
168
177
  limit=batch_size,
169
178
  offset=next_offset,
170
179
  with_payload=True,
@@ -197,7 +206,10 @@ class QdrantDocumentStore(BaseDocumentStore):
197
206
  count_filter=qdrant_filters,
198
207
  )
199
208
  return response.count
200
- except UnexpectedResponse:
209
+ except (UnexpectedResponse, ValueError):
210
+ # Qdrant local raises ValueError if the collection is not found, but
211
+ # with the remote server UnexpectedResponse is raised. Until that's unified,
212
+ # we need to catch both.
201
213
  return 0
202
214
 
203
215
  def get_embedding_count(
@@ -247,7 +259,9 @@ class QdrantDocumentStore(BaseDocumentStore):
247
259
  headers: Optional[Dict[str, str]] = None,
248
260
  ):
249
261
  index = index or self.index
250
- self._set_up_collection(index, self.embedding_dim, False, self.similarity)
262
+ self._set_up_collection(
263
+ index, self.embedding_dim, self.hnsw_config, False, self.similarity
264
+ )
251
265
  field_map = self._create_document_field_map()
252
266
 
253
267
  duplicate_documents = duplicate_documents or self.duplicate_documents
@@ -466,6 +480,7 @@ class QdrantDocumentStore(BaseDocumentStore):
466
480
  self,
467
481
  collection_name: str,
468
482
  embedding_dim: int,
483
+ hnsw_config: dict,
469
484
  recreate_collection: bool,
470
485
  similarity: str,
471
486
  ):
@@ -474,41 +489,54 @@ class QdrantDocumentStore(BaseDocumentStore):
474
489
  if recreate_collection:
475
490
  # There is no need to verify the current configuration of that
476
491
  # collection. It might be just recreated again.
477
- self._recreate_collection(collection_name, distance, embedding_dim)
492
+ self._recreate_collection(
493
+ collection_name, distance, embedding_dim, hnsw_config
494
+ )
478
495
  return
479
496
 
480
497
  try:
481
498
  # Check if the collection already exists and validate its
482
499
  # current configuration with the parameters.
483
500
  collection_info = self.client.get_collection(collection_name)
484
- current_distance = collection_info.config.params.vectors.distance
485
- current_vector_size = collection_info.config.params.vectors.size
486
-
487
- if current_distance != distance:
488
- raise ValueError(
489
- f"Collection '{collection_name}' already exists in Qdrant, "
490
- f"but it is configured with a similarity '{current_distance.name}'. "
491
- f"If you want to use that collection, but with a different "
492
- f"similarity, please set `recreate_collection=True` argument."
493
- )
494
-
495
- if current_vector_size != embedding_dim:
496
- raise ValueError(
497
- f"Collection '{collection_name}' already exists in Qdrant, "
498
- f"but it is configured with a vector size '{current_vector_size}'. "
499
- f"If you want to use that collection, but with a different "
500
- f"vector size, please set `recreate_collection=True` argument."
501
- )
502
- except (UnexpectedResponse, _InactiveRpcError):
501
+ except (UnexpectedResponse, _InactiveRpcError, ValueError):
503
502
  # That indicates the collection does not exist, so it can be
504
503
  # safely created with any configuration.
505
- self._recreate_collection(collection_name, distance, embedding_dim)
504
+ #
505
+ # Qdrant local raises ValueError if the collection is not found, but
506
+ # with the remote server UnexpectedResponse / _InactiveRpcError is raised.
507
+ # Until that's unified, we need to catch both.
508
+ self._recreate_collection(
509
+ collection_name, distance, embedding_dim, hnsw_config
510
+ )
511
+ return
512
+
513
+ current_distance = collection_info.config.params.vectors.distance
514
+ current_vector_size = collection_info.config.params.vectors.size
515
+
516
+ if current_distance != distance:
517
+ raise ValueError(
518
+ f"Collection '{collection_name}' already exists in Qdrant, "
519
+ f"but it is configured with a similarity '{current_distance.name}'. "
520
+ f"If you want to use that collection, but with a different "
521
+ f"similarity, please set `recreate_collection=True` argument."
522
+ )
506
523
 
507
- def _recreate_collection(self, collection_name, distance, embedding_dim):
524
+ if current_vector_size != embedding_dim:
525
+ raise ValueError(
526
+ f"Collection '{collection_name}' already exists in Qdrant, "
527
+ f"but it is configured with a vector size '{current_vector_size}'. "
528
+ f"If you want to use that collection, but with a different "
529
+ f"vector size, please set `recreate_collection=True` argument."
530
+ )
531
+
532
+ def _recreate_collection(
533
+ self, collection_name, distance, embedding_dim, hnsw_config
534
+ ):
508
535
  self.client.recreate_collection(
509
536
  collection_name=collection_name,
510
537
  vectors_config=rest.VectorParams(
511
538
  size=embedding_dim,
512
539
  distance=distance,
513
540
  ),
541
+ hnsw_config=hnsw_config,
514
542
  )
@@ -0,0 +1,122 @@
1
+ Metadata-Version: 2.1
2
+ Name: qdrant-haystack
3
+ Version: 0.0.5
4
+ Summary: An integration of Qdrant ANN vector database backend with Haystack
5
+ License: Apache 2.0
6
+ Author: Kacper Łukawski
7
+ Author-email: kacper.lukawski@qdrant.com
8
+ Requires-Python: >=3.8.1,<=3.11
9
+ Classifier: License :: Other/Proprietary License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.9
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Requires-Dist: farm-haystack (>=1.13.0,<2.0.0)
15
+ Requires-Dist: qdrant-client (>=1.1.2,<2.0.0)
16
+ Description-Content-Type: text/markdown
17
+
18
+ # qdrant-haystack
19
+
20
+ An integration of [Qdrant](https://qdrant.tech) vector database with [Haystack](https://haystack.deepset.ai/)
21
+ by [deepset](https://www.deepset.ai).
22
+
23
+ The library finally allows using Qdrant as a document store, and provides an in-place replacement
24
+ for any other vector embeddings store. Thus, you should expect any kind of application to be working
25
+ smoothly just by changing the provider to `QdrantDocumentStore`.
26
+
27
+ ## Installation
28
+
29
+ `qdrant-haystack` might be installed as any other Python library, using pip or poetry:
30
+
31
+ ```bash
32
+ pip install qdrant-haystack
33
+ ```
34
+
35
+ ```bash
36
+ poetry add qdrant-haystack
37
+ ```
38
+
39
+ ## Usage
40
+
41
+ Once installed, you can already start using `QdrantDocumentStore` as any other store that supports
42
+ embeddings.
43
+
44
+ ```python
45
+ from qdrant_haystack import QdrantDocumentStore
46
+
47
+ document_store = QdrantDocumentStore(
48
+ "localhost",
49
+ index="Document",
50
+ embedding_dim=512,
51
+ recreate_index=True,
52
+ hnsw_config={"m": 16, "ef_construct": 64} # Optional
53
+ )
54
+ ```
55
+
56
+ The list of parameters accepted by `QdrantDocumentStore` is complementary to those used in the
57
+ official [Python Qdrant client](https://github.com/qdrant/qdrant_client).
58
+
59
+ ### Using local in-memory / disk-persisted mode
60
+
61
+ Qdrant Python client, from version 1.1.1, supports local in-memory/disk-persisted mode. That's
62
+ a good choice for any test scenarios and quick experiments in which you do not plan to store
63
+ lots of vectors. In such a case spinning a Docker container might be even not required.
64
+
65
+ The local mode was also implemented in `qdrant-haystack` integration.
66
+
67
+ #### In-memory storage
68
+
69
+ In case you want to have a transient storage, for example in case of automated tests launched
70
+ during your CI/CD pipeline, using Qdrant Local mode with in-memory storage might be a preferred
71
+ option. It might be simply enabled by passing `:memory:` as first parameter, while creating an
72
+ instance of `QdrantDocumentStore`.
73
+
74
+ ```python
75
+ from qdrant_haystack import QdrantDocumentStore
76
+
77
+ document_store = QdrantDocumentStore(
78
+ ":memory:",
79
+ index="Document",
80
+ embedding_dim=512,
81
+ recreate_index=True,
82
+ hnsw_config={"m": 16, "ef_construct": 64} # Optional
83
+ )
84
+ ```
85
+
86
+ #### On disk storage
87
+
88
+ However, if you prefer to keep the vectors between different runs of your application, it
89
+ might be better to use on disk storage and pass the path that should be used to persist
90
+ the data.
91
+
92
+ ```python
93
+ from qdrant_haystack import QdrantDocumentStore
94
+
95
+ document_store = QdrantDocumentStore(
96
+ path="/home/qdrant/storage_local",
97
+ index="Document",
98
+ embedding_dim=512,
99
+ recreate_index=True,
100
+ hnsw_config={"m": 16, "ef_construct": 64} # Optional
101
+ )
102
+ ```
103
+
104
+ ### Connecting to Qdrant Cloud cluster
105
+
106
+ If you prefer not to manage your own Qdrant instance, [Qdrant Cloud](https://cloud.qdrant.io/)
107
+ might be a better option.
108
+
109
+ ```python
110
+ from qdrant_haystack import QdrantDocumentStore
111
+
112
+ document_store = QdrantDocumentStore(
113
+ "https://YOUR-CLUSTER-URL.aws.cloud.qdrant.io",
114
+ index="Document",
115
+ api_key="<< YOUR QDRANT CLOUD API KEY >>",
116
+ embedding_dim=512,
117
+ recreate_index=True,
118
+ )
119
+ ```
120
+
121
+ There is no difference in terms of functionality between local instances and cloud clusters.
122
+
@@ -2,9 +2,9 @@ qdrant_haystack/__init__.py,sha256=F7uhTaB7pul_sbPyBQNYIz5eTOVCW5f9qF3FjiN8CRM,1
2
2
  qdrant_haystack/document_stores/__init__.py,sha256=RbJ_JV8mXcmWI9LrHj0gPPEokPfoKGi6zFwkpXvHsjA,107
3
3
  qdrant_haystack/document_stores/converters.py,sha256=UkmoK2AMS_zEUy2C9B-04DdMdL0KxANB5d7QBOySzY0,2285
4
4
  qdrant_haystack/document_stores/filters.py,sha256=lZumUKgAz1vlWkI0x2jqITDaHDGkth8n3bw1uXTqmjM,8295
5
- qdrant_haystack/document_stores/qdrant.py,sha256=-ysi39v0lHS0XByh1usUFkOOr8h_LwXBf3qwcP3t44I,18121
5
+ qdrant_haystack/document_stores/qdrant.py,sha256=hekVs25mnqHcjId9buWzlr7aIsMIq1YjkZbGEQrm3Uw,19054
6
6
  qdrant_haystack/utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- qdrant_haystack-0.0.3.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
8
- qdrant_haystack-0.0.3.dist-info/WHEEL,sha256=vVCvjcmxuUltf8cYhJ0sJMRDLr1XsPuxEId8YDzbyCY,88
9
- qdrant_haystack-0.0.3.dist-info/METADATA,sha256=AhstKT1BYXBABZCImGXtFJkT902ZVg5eZVIWSh1P2G0,1743
10
- qdrant_haystack-0.0.3.dist-info/RECORD,,
7
+ qdrant_haystack-0.0.5.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
8
+ qdrant_haystack-0.0.5.dist-info/METADATA,sha256=LVmmp970YdLGka5aLH-ycUMttzzbr8bpjH83iJUgiDg,3797
9
+ qdrant_haystack-0.0.5.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
10
+ qdrant_haystack-0.0.5.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 1.4.0
2
+ Generator: poetry-core 1.5.2
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,57 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: qdrant-haystack
3
- Version: 0.0.3
4
- Summary: An integration of Qdrant ANN vector database backend with Haystack
5
- License: Apache 2.0
6
- Author: Kacper Łukawski
7
- Author-email: kacper.lukawski@qdrant.com
8
- Requires-Python: >=3.8.1,<=3.11
9
- Classifier: License :: Other/Proprietary License
10
- Classifier: Programming Language :: Python :: 3
11
- Classifier: Programming Language :: Python :: 3.9
12
- Classifier: Programming Language :: Python :: 3.10
13
- Classifier: Programming Language :: Python :: 3.11
14
- Requires-Dist: farm-haystack (>=1.13.0,<2.0.0)
15
- Requires-Dist: qdrant-client (>=1.0.2,<2.0.0)
16
- Description-Content-Type: text/markdown
17
-
18
- # qdrant-haystack
19
-
20
- An integration of [Qdrant](https://qdrant.tech) vector database with [Haystack](https://haystack.deepset.ai/)
21
- by [deepset](https://www.deepset.ai).
22
-
23
- The library finally allows using Qdrant as a document store, and provides an in-place replacement
24
- for any other vector embeddings store. Thus, you should expect any kind of application to be working
25
- smoothly just by changing the provider to `QdrantDocumentStore`.
26
-
27
- ## Installation
28
-
29
- `qdrant-haystack` might be installed as any other Python library, using pip or poetry:
30
-
31
- ```bash
32
- pip install qdrant-haystack
33
- ```
34
-
35
- ```bash
36
- poetry add qdrant-haystack
37
- ```
38
-
39
- ## Usage
40
-
41
- Once installed, you can already start using `QdrantDocumentStore` as any other store that supports
42
- embeddings.
43
-
44
- ```python
45
- from qdrant_haystack import QdrantDocumentStore
46
-
47
- document_store = QdrantDocumentStore(
48
- host="localhost",
49
- index="Document",
50
- embedding_dim=512,
51
- recreate_index=True,
52
- )
53
- ```
54
-
55
- The list of parameters accepted by `QdrantDocumentStore` is complementary to those used in the
56
- official [Python Qdrant client](https://github.com/qdrant/qdrant_client).
57
-