qdrant-haystack 0.0.4__py3-none-any.whl → 0.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of qdrant-haystack might be problematic. Click here for more details.
- qdrant_haystack/document_stores/qdrant.py +35 -21
- {qdrant_haystack-0.0.4.dist-info → qdrant_haystack-0.0.5.dist-info}/METADATA +49 -4
- {qdrant_haystack-0.0.4.dist-info → qdrant_haystack-0.0.5.dist-info}/RECORD +5 -5
- {qdrant_haystack-0.0.4.dist-info → qdrant_haystack-0.0.5.dist-info}/LICENSE +0 -0
- {qdrant_haystack-0.0.4.dist-info → qdrant_haystack-0.0.5.dist-info}/WHEEL +0 -0
|
@@ -37,6 +37,7 @@ class QdrantDocumentStore(BaseDocumentStore):
|
|
|
37
37
|
|
|
38
38
|
def __init__(
|
|
39
39
|
self,
|
|
40
|
+
location: Optional[str] = None,
|
|
40
41
|
url: Optional[str] = None,
|
|
41
42
|
port: int = 6333,
|
|
42
43
|
grpc_port: int = 6334,
|
|
@@ -46,6 +47,7 @@ class QdrantDocumentStore(BaseDocumentStore):
|
|
|
46
47
|
prefix: Optional[str] = None,
|
|
47
48
|
timeout: Optional[float] = None,
|
|
48
49
|
host: Optional[str] = None,
|
|
50
|
+
path: Optional[str] = None,
|
|
49
51
|
index: str = "Document",
|
|
50
52
|
embedding_dim: int = 768,
|
|
51
53
|
hnsw_config: Optional[Dict] = None,
|
|
@@ -62,6 +64,7 @@ class QdrantDocumentStore(BaseDocumentStore):
|
|
|
62
64
|
super().__init__()
|
|
63
65
|
|
|
64
66
|
self.client = qdrant_client.QdrantClient(
|
|
67
|
+
location=location,
|
|
65
68
|
url=url,
|
|
66
69
|
port=port,
|
|
67
70
|
grpc_port=grpc_port,
|
|
@@ -71,6 +74,7 @@ class QdrantDocumentStore(BaseDocumentStore):
|
|
|
71
74
|
prefix=prefix,
|
|
72
75
|
timeout=timeout,
|
|
73
76
|
host=host,
|
|
77
|
+
path=path,
|
|
74
78
|
**kwargs,
|
|
75
79
|
)
|
|
76
80
|
|
|
@@ -165,10 +169,11 @@ class QdrantDocumentStore(BaseDocumentStore):
|
|
|
165
169
|
|
|
166
170
|
next_offset = None
|
|
167
171
|
stop_scrolling = False
|
|
172
|
+
scroll_filter = self.qdrant_filter_converter.convert(None, ids)
|
|
168
173
|
while not stop_scrolling:
|
|
169
174
|
records, next_offset = self.client.scroll(
|
|
170
175
|
collection_name=index,
|
|
171
|
-
scroll_filter=
|
|
176
|
+
scroll_filter=scroll_filter,
|
|
172
177
|
limit=batch_size,
|
|
173
178
|
offset=next_offset,
|
|
174
179
|
with_payload=True,
|
|
@@ -201,7 +206,10 @@ class QdrantDocumentStore(BaseDocumentStore):
|
|
|
201
206
|
count_filter=qdrant_filters,
|
|
202
207
|
)
|
|
203
208
|
return response.count
|
|
204
|
-
except UnexpectedResponse:
|
|
209
|
+
except (UnexpectedResponse, ValueError):
|
|
210
|
+
# Qdrant local raises ValueError if the collection is not found, but
|
|
211
|
+
# with the remote server UnexpectedResponse is raised. Until that's unified,
|
|
212
|
+
# we need to catch both.
|
|
205
213
|
return 0
|
|
206
214
|
|
|
207
215
|
def get_embedding_count(
|
|
@@ -490,30 +498,36 @@ class QdrantDocumentStore(BaseDocumentStore):
|
|
|
490
498
|
# Check if the collection already exists and validate its
|
|
491
499
|
# current configuration with the parameters.
|
|
492
500
|
collection_info = self.client.get_collection(collection_name)
|
|
493
|
-
|
|
494
|
-
current_vector_size = collection_info.config.params.vectors.size
|
|
495
|
-
|
|
496
|
-
if current_distance != distance:
|
|
497
|
-
raise ValueError(
|
|
498
|
-
f"Collection '{collection_name}' already exists in Qdrant, "
|
|
499
|
-
f"but it is configured with a similarity '{current_distance.name}'. "
|
|
500
|
-
f"If you want to use that collection, but with a different "
|
|
501
|
-
f"similarity, please set `recreate_collection=True` argument."
|
|
502
|
-
)
|
|
503
|
-
|
|
504
|
-
if current_vector_size != embedding_dim:
|
|
505
|
-
raise ValueError(
|
|
506
|
-
f"Collection '{collection_name}' already exists in Qdrant, "
|
|
507
|
-
f"but it is configured with a vector size '{current_vector_size}'. "
|
|
508
|
-
f"If you want to use that collection, but with a different "
|
|
509
|
-
f"vector size, please set `recreate_collection=True` argument."
|
|
510
|
-
)
|
|
511
|
-
except (UnexpectedResponse, _InactiveRpcError):
|
|
501
|
+
except (UnexpectedResponse, _InactiveRpcError, ValueError):
|
|
512
502
|
# That indicates the collection does not exist, so it can be
|
|
513
503
|
# safely created with any configuration.
|
|
504
|
+
#
|
|
505
|
+
# Qdrant local raises ValueError if the collection is not found, but
|
|
506
|
+
# with the remote server UnexpectedResponse / _InactiveRpcError is raised.
|
|
507
|
+
# Until that's unified, we need to catch both.
|
|
514
508
|
self._recreate_collection(
|
|
515
509
|
collection_name, distance, embedding_dim, hnsw_config
|
|
516
510
|
)
|
|
511
|
+
return
|
|
512
|
+
|
|
513
|
+
current_distance = collection_info.config.params.vectors.distance
|
|
514
|
+
current_vector_size = collection_info.config.params.vectors.size
|
|
515
|
+
|
|
516
|
+
if current_distance != distance:
|
|
517
|
+
raise ValueError(
|
|
518
|
+
f"Collection '{collection_name}' already exists in Qdrant, "
|
|
519
|
+
f"but it is configured with a similarity '{current_distance.name}'. "
|
|
520
|
+
f"If you want to use that collection, but with a different "
|
|
521
|
+
f"similarity, please set `recreate_collection=True` argument."
|
|
522
|
+
)
|
|
523
|
+
|
|
524
|
+
if current_vector_size != embedding_dim:
|
|
525
|
+
raise ValueError(
|
|
526
|
+
f"Collection '{collection_name}' already exists in Qdrant, "
|
|
527
|
+
f"but it is configured with a vector size '{current_vector_size}'. "
|
|
528
|
+
f"If you want to use that collection, but with a different "
|
|
529
|
+
f"vector size, please set `recreate_collection=True` argument."
|
|
530
|
+
)
|
|
517
531
|
|
|
518
532
|
def _recreate_collection(
|
|
519
533
|
self, collection_name, distance, embedding_dim, hnsw_config
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: qdrant-haystack
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.5
|
|
4
4
|
Summary: An integration of Qdrant ANN vector database backend with Haystack
|
|
5
5
|
License: Apache 2.0
|
|
6
6
|
Author: Kacper Łukawski
|
|
@@ -12,7 +12,7 @@ Classifier: Programming Language :: Python :: 3.9
|
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.10
|
|
13
13
|
Classifier: Programming Language :: Python :: 3.11
|
|
14
14
|
Requires-Dist: farm-haystack (>=1.13.0,<2.0.0)
|
|
15
|
-
Requires-Dist: qdrant-client (>=1.
|
|
15
|
+
Requires-Dist: qdrant-client (>=1.1.2,<2.0.0)
|
|
16
16
|
Description-Content-Type: text/markdown
|
|
17
17
|
|
|
18
18
|
# qdrant-haystack
|
|
@@ -45,7 +45,7 @@ embeddings.
|
|
|
45
45
|
from qdrant_haystack import QdrantDocumentStore
|
|
46
46
|
|
|
47
47
|
document_store = QdrantDocumentStore(
|
|
48
|
-
|
|
48
|
+
"localhost",
|
|
49
49
|
index="Document",
|
|
50
50
|
embedding_dim=512,
|
|
51
51
|
recreate_index=True,
|
|
@@ -56,6 +56,51 @@ document_store = QdrantDocumentStore(
|
|
|
56
56
|
The list of parameters accepted by `QdrantDocumentStore` is complementary to those used in the
|
|
57
57
|
official [Python Qdrant client](https://github.com/qdrant/qdrant_client).
|
|
58
58
|
|
|
59
|
+
### Using local in-memory / disk-persisted mode
|
|
60
|
+
|
|
61
|
+
Qdrant Python client, from version 1.1.1, supports local in-memory/disk-persisted mode. That's
|
|
62
|
+
a good choice for any test scenarios and quick experiments in which you do not plan to store
|
|
63
|
+
lots of vectors. In such a case spinning a Docker container might be even not required.
|
|
64
|
+
|
|
65
|
+
The local mode was also implemented in `qdrant-haystack` integration.
|
|
66
|
+
|
|
67
|
+
#### In-memory storage
|
|
68
|
+
|
|
69
|
+
In case you want to have a transient storage, for example in case of automated tests launched
|
|
70
|
+
during your CI/CD pipeline, using Qdrant Local mode with in-memory storage might be a preferred
|
|
71
|
+
option. It might be simply enabled by passing `:memory:` as first parameter, while creating an
|
|
72
|
+
instance of `QdrantDocumentStore`.
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
from qdrant_haystack import QdrantDocumentStore
|
|
76
|
+
|
|
77
|
+
document_store = QdrantDocumentStore(
|
|
78
|
+
":memory:",
|
|
79
|
+
index="Document",
|
|
80
|
+
embedding_dim=512,
|
|
81
|
+
recreate_index=True,
|
|
82
|
+
hnsw_config={"m": 16, "ef_construct": 64} # Optional
|
|
83
|
+
)
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
#### On disk storage
|
|
87
|
+
|
|
88
|
+
However, if you prefer to keep the vectors between different runs of your application, it
|
|
89
|
+
might be better to use on disk storage and pass the path that should be used to persist
|
|
90
|
+
the data.
|
|
91
|
+
|
|
92
|
+
```python
|
|
93
|
+
from qdrant_haystack import QdrantDocumentStore
|
|
94
|
+
|
|
95
|
+
document_store = QdrantDocumentStore(
|
|
96
|
+
path="/home/qdrant/storage_local",
|
|
97
|
+
index="Document",
|
|
98
|
+
embedding_dim=512,
|
|
99
|
+
recreate_index=True,
|
|
100
|
+
hnsw_config={"m": 16, "ef_construct": 64} # Optional
|
|
101
|
+
)
|
|
102
|
+
```
|
|
103
|
+
|
|
59
104
|
### Connecting to Qdrant Cloud cluster
|
|
60
105
|
|
|
61
106
|
If you prefer not to manage your own Qdrant instance, [Qdrant Cloud](https://cloud.qdrant.io/)
|
|
@@ -65,7 +110,7 @@ might be a better option.
|
|
|
65
110
|
from qdrant_haystack import QdrantDocumentStore
|
|
66
111
|
|
|
67
112
|
document_store = QdrantDocumentStore(
|
|
68
|
-
|
|
113
|
+
"https://YOUR-CLUSTER-URL.aws.cloud.qdrant.io",
|
|
69
114
|
index="Document",
|
|
70
115
|
api_key="<< YOUR QDRANT CLOUD API KEY >>",
|
|
71
116
|
embedding_dim=512,
|
|
@@ -2,9 +2,9 @@ qdrant_haystack/__init__.py,sha256=F7uhTaB7pul_sbPyBQNYIz5eTOVCW5f9qF3FjiN8CRM,1
|
|
|
2
2
|
qdrant_haystack/document_stores/__init__.py,sha256=RbJ_JV8mXcmWI9LrHj0gPPEokPfoKGi6zFwkpXvHsjA,107
|
|
3
3
|
qdrant_haystack/document_stores/converters.py,sha256=UkmoK2AMS_zEUy2C9B-04DdMdL0KxANB5d7QBOySzY0,2285
|
|
4
4
|
qdrant_haystack/document_stores/filters.py,sha256=lZumUKgAz1vlWkI0x2jqITDaHDGkth8n3bw1uXTqmjM,8295
|
|
5
|
-
qdrant_haystack/document_stores/qdrant.py,sha256=
|
|
5
|
+
qdrant_haystack/document_stores/qdrant.py,sha256=hekVs25mnqHcjId9buWzlr7aIsMIq1YjkZbGEQrm3Uw,19054
|
|
6
6
|
qdrant_haystack/utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
-
qdrant_haystack-0.0.
|
|
8
|
-
qdrant_haystack-0.0.
|
|
9
|
-
qdrant_haystack-0.0.
|
|
10
|
-
qdrant_haystack-0.0.
|
|
7
|
+
qdrant_haystack-0.0.5.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
8
|
+
qdrant_haystack-0.0.5.dist-info/METADATA,sha256=LVmmp970YdLGka5aLH-ycUMttzzbr8bpjH83iJUgiDg,3797
|
|
9
|
+
qdrant_haystack-0.0.5.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
|
10
|
+
qdrant_haystack-0.0.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|