qdrant-haystack 3.3.0__py3-none-any.whl → 3.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of qdrant-haystack might be problematic. Click here for more details.
- haystack_integrations/components/retrievers/qdrant/__init__.py +2 -2
- haystack_integrations/components/retrievers/qdrant/retriever.py +4 -4
- haystack_integrations/document_stores/qdrant/__init__.py +2 -1
- haystack_integrations/document_stores/qdrant/document_store.py +3 -1
- haystack_integrations/document_stores/qdrant/migrate_to_sparse.py +127 -0
- {qdrant_haystack-3.3.0.dist-info → qdrant_haystack-3.4.0.dist-info}/METADATA +1 -1
- qdrant_haystack-3.4.0.dist-info/RECORD +11 -0
- {qdrant_haystack-3.3.0.dist-info → qdrant_haystack-3.4.0.dist-info}/WHEEL +1 -1
- qdrant_haystack-3.3.0.dist-info/RECORD +0 -10
- {qdrant_haystack-3.3.0.dist-info → qdrant_haystack-3.4.0.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -2,6 +2,6 @@
|
|
|
2
2
|
#
|
|
3
3
|
# SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
|
|
5
|
-
from .retriever import QdrantEmbeddingRetriever,
|
|
5
|
+
from .retriever import QdrantEmbeddingRetriever, QdrantSparseEmbeddingRetriever
|
|
6
6
|
|
|
7
|
-
__all__ = ("QdrantEmbeddingRetriever", "
|
|
7
|
+
__all__ = ("QdrantEmbeddingRetriever", "QdrantSparseEmbeddingRetriever")
|
|
@@ -124,13 +124,13 @@ class QdrantEmbeddingRetriever:
|
|
|
124
124
|
|
|
125
125
|
|
|
126
126
|
@component
|
|
127
|
-
class
|
|
127
|
+
class QdrantSparseEmbeddingRetriever:
|
|
128
128
|
"""
|
|
129
129
|
A component for retrieving documents from an QdrantDocumentStore using sparse vectors.
|
|
130
130
|
|
|
131
131
|
Usage example:
|
|
132
132
|
```python
|
|
133
|
-
from haystack_integrations.components.retrievers.qdrant import
|
|
133
|
+
from haystack_integrations.components.retrievers.qdrant import QdrantSparseEmbeddingRetriever
|
|
134
134
|
from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
|
|
135
135
|
from haystack.dataclasses.sparse_embedding import SparseEmbedding
|
|
136
136
|
|
|
@@ -140,7 +140,7 @@ class QdrantSparseRetriever:
|
|
|
140
140
|
return_embedding=True,
|
|
141
141
|
wait_result_from_api=True,
|
|
142
142
|
)
|
|
143
|
-
retriever =
|
|
143
|
+
retriever = QdrantSparseEmbeddingRetriever(document_store=document_store)
|
|
144
144
|
sparse_embedding = SparseEmbedding(indices=[0, 1, 2, 3], values=[0.1, 0.8, 0.05, 0.33])
|
|
145
145
|
retriever.run(query_sparse_embedding=sparse_embedding)
|
|
146
146
|
```
|
|
@@ -155,7 +155,7 @@ class QdrantSparseRetriever:
|
|
|
155
155
|
return_embedding: bool = False,
|
|
156
156
|
):
|
|
157
157
|
"""
|
|
158
|
-
Create a
|
|
158
|
+
Create a QdrantSparseEmbeddingRetriever component.
|
|
159
159
|
|
|
160
160
|
:param document_store: An instance of QdrantDocumentStore.
|
|
161
161
|
:param filters: A dictionary with filters to narrow down the search space. Default is None.
|
|
@@ -3,5 +3,6 @@
|
|
|
3
3
|
# SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
|
|
5
5
|
from .document_store import QdrantDocumentStore
|
|
6
|
+
from .migrate_to_sparse import migrate_to_sparse_embeddings_support
|
|
6
7
|
|
|
7
|
-
__all__ = ("QdrantDocumentStore",)
|
|
8
|
+
__all__ = ("QdrantDocumentStore", "migrate_to_sparse_embeddings_support")
|
|
@@ -453,7 +453,9 @@ class QdrantDocumentStore:
|
|
|
453
453
|
f"Collection '{collection_name}' already exists in Qdrant, "
|
|
454
454
|
f"but it has been originally created without sparse embedding vectors. "
|
|
455
455
|
f"If you want to use that collection, you can set `use_sparse_embeddings=False`. "
|
|
456
|
-
f"To use sparse embeddings, you need to recreate the collection or migrate the existing one."
|
|
456
|
+
f"To use sparse embeddings, you need to recreate the collection or migrate the existing one. "
|
|
457
|
+
f"See `migrate_to_sparse_embeddings_support` function in "
|
|
458
|
+
f"`haystack_integrations.document_stores.qdrant`."
|
|
457
459
|
)
|
|
458
460
|
raise QdrantStoreError(msg)
|
|
459
461
|
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import time
|
|
3
|
+
|
|
4
|
+
from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
|
|
5
|
+
from qdrant_client.http import models
|
|
6
|
+
|
|
7
|
+
logger = logging.getLogger(__name__)
|
|
8
|
+
logger.addHandler(logging.StreamHandler())
|
|
9
|
+
logger.setLevel(logging.INFO)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def migrate_to_sparse_embeddings_support(old_document_store: QdrantDocumentStore, new_index: str):
|
|
13
|
+
"""
|
|
14
|
+
Utility function to migrate an existing `QdrantDocumentStore` to a new one with support for sparse embeddings.
|
|
15
|
+
|
|
16
|
+
With qdrant-hasytack v3.3.0, support for sparse embeddings has been added to `QdrantDocumentStore`.
|
|
17
|
+
This feature is disabled by default and can be enabled by setting `use_sparse_embeddings=True` in the init
|
|
18
|
+
parameters. To store sparse embeddings, Document stores/collections created with this feature disabled must be
|
|
19
|
+
migrated to a new collection with the feature enabled.
|
|
20
|
+
|
|
21
|
+
This utility function applies to on-premise and cloud instances of Qdrant.
|
|
22
|
+
It does not work for local in-memory/disk-persisted instances.
|
|
23
|
+
|
|
24
|
+
The utility function merely migrates the existing documents so that they are ready to store sparse embeddings.
|
|
25
|
+
It does not compute sparse embeddings. To do this, you need to use a Sparse Embedder component.
|
|
26
|
+
|
|
27
|
+
Example usage:
|
|
28
|
+
```python
|
|
29
|
+
from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
|
|
30
|
+
from haystack_integrations.document_stores.qdrant import migrate_to_sparse_embeddings_support
|
|
31
|
+
|
|
32
|
+
old_document_store = QdrantDocumentStore(url="http://localhost:6333",
|
|
33
|
+
index="Document",
|
|
34
|
+
use_sparse_embeddings=False)
|
|
35
|
+
new_index = "Document_sparse"
|
|
36
|
+
|
|
37
|
+
migrate_to_sparse_embeddings_support(old_document_store, new_index)
|
|
38
|
+
|
|
39
|
+
# now you can use the new document store with sparse embeddings support
|
|
40
|
+
new_document_store = QdrantDocumentStore(url="http://localhost:6333",
|
|
41
|
+
index=new_index,
|
|
42
|
+
use_sparse_embeddings=True)
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
:param old_document_store: The existing QdrantDocumentStore instance to migrate from.
|
|
47
|
+
:param new_index: The name of the new index/collection to create with sparse embeddings support.
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
start = time.time()
|
|
51
|
+
|
|
52
|
+
old_collection_name = old_document_store.index
|
|
53
|
+
total_points = old_document_store.count_documents()
|
|
54
|
+
|
|
55
|
+
# copy the init parameters of the old document to create a new document store
|
|
56
|
+
init_parameters = old_document_store.to_dict()["init_parameters"]
|
|
57
|
+
init_parameters["index"] = new_index
|
|
58
|
+
init_parameters["use_sparse_embeddings"] = True
|
|
59
|
+
init_parameters["recreate_index"] = True
|
|
60
|
+
|
|
61
|
+
new_document_store = QdrantDocumentStore(**init_parameters)
|
|
62
|
+
|
|
63
|
+
client = new_document_store.client
|
|
64
|
+
|
|
65
|
+
original_indexing_threshold = client.get_collection(
|
|
66
|
+
collection_name=new_index
|
|
67
|
+
).config.optimizer_config.indexing_threshold
|
|
68
|
+
|
|
69
|
+
# disable indexing while adding points so it's faster
|
|
70
|
+
# https://qdrant.tech/documentation/concepts/collections/#update-collection-parameters
|
|
71
|
+
client.update_collection(
|
|
72
|
+
collection_name=new_index,
|
|
73
|
+
optimizer_config=models.OptimizersConfigDiff(indexing_threshold=0),
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
# migration loop
|
|
77
|
+
next_page_offset = "first"
|
|
78
|
+
offset = None
|
|
79
|
+
points_transmitted = 0
|
|
80
|
+
|
|
81
|
+
while next_page_offset:
|
|
82
|
+
if next_page_offset != "first":
|
|
83
|
+
offset = next_page_offset
|
|
84
|
+
|
|
85
|
+
# get the records
|
|
86
|
+
records = client.scroll(
|
|
87
|
+
collection_name=old_collection_name,
|
|
88
|
+
limit=100,
|
|
89
|
+
with_payload=True,
|
|
90
|
+
with_vectors=True,
|
|
91
|
+
offset=offset,
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
next_page_offset = records[1]
|
|
95
|
+
current_records = records[0]
|
|
96
|
+
|
|
97
|
+
points = []
|
|
98
|
+
|
|
99
|
+
for record in current_records:
|
|
100
|
+
vector = {}
|
|
101
|
+
|
|
102
|
+
vector["text-dense"] = record.vector
|
|
103
|
+
|
|
104
|
+
point = {"id": record.id, "payload": record.payload, "vector": vector}
|
|
105
|
+
|
|
106
|
+
embedding_point = models.PointStruct(**point)
|
|
107
|
+
points.append(embedding_point)
|
|
108
|
+
|
|
109
|
+
client.upsert(collection_name=new_index, points=points)
|
|
110
|
+
|
|
111
|
+
points_transmitted += len(points)
|
|
112
|
+
points_remaining = total_points - points_transmitted
|
|
113
|
+
|
|
114
|
+
message = (
|
|
115
|
+
f"Points transmitted: {points_transmitted}/{total_points}\n"
|
|
116
|
+
f"Percent done {points_transmitted/total_points*100:.2f}%\n"
|
|
117
|
+
f"Time elapsed: {time.time() - start:.2f} seconds\n"
|
|
118
|
+
f"Time remaining: {(((time.time() - start) / points_transmitted) * points_remaining) / 60:.2f} minutes\n"
|
|
119
|
+
f"Current offset: {next_page_offset}\n"
|
|
120
|
+
)
|
|
121
|
+
logger.info(message)
|
|
122
|
+
|
|
123
|
+
# restore the original indexing threshold (re-enable indexing)
|
|
124
|
+
client.update_collection(
|
|
125
|
+
collection_name=new_index,
|
|
126
|
+
optimizer_config=models.OptimizersConfigDiff(indexing_threshold=original_indexing_threshold),
|
|
127
|
+
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: qdrant-haystack
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.4.0
|
|
4
4
|
Summary: An integration of Qdrant ANN vector database backend with Haystack
|
|
5
5
|
Project-URL: Source, https://github.com/deepset-ai/haystack-core-integrations
|
|
6
6
|
Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/qdrant/README.md
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
haystack_integrations/components/retrievers/qdrant/__init__.py,sha256=Paisl7uv_QHK9HdCOCLP3tW5PAI8XzoDKlCnPc1xvPk,265
|
|
2
|
+
haystack_integrations/components/retrievers/qdrant/retriever.py,sha256=62t6YYSA7FX4ZvDasPERMrXKr6nk0NEqJLEEE0p1J7k,8853
|
|
3
|
+
haystack_integrations/document_stores/qdrant/__init__.py,sha256=kUGc5uewqArhmVR-JqB_NmJ4kNkTIQIvYDNSoO2ELn0,302
|
|
4
|
+
haystack_integrations/document_stores/qdrant/converters.py,sha256=oSO2YlsWEQbcw9CPlWfSg_HoTZlnkAhZw_6VlYWzKKs,2525
|
|
5
|
+
haystack_integrations/document_stores/qdrant/document_store.py,sha256=uZkTKjHhFQFv27C4LQ6xJO3kFf0m_aNhlpAyEyuP8n4,22943
|
|
6
|
+
haystack_integrations/document_stores/qdrant/filters.py,sha256=iNWOqv1otUaXTURXd8e9QOYg8sx3Qm_LOqOaxAP2xJI,8249
|
|
7
|
+
haystack_integrations/document_stores/qdrant/migrate_to_sparse.py,sha256=i6wBC_9_JVzYZtqKm3dhHKTxhwNdcAdpgki8GABDp1c,4909
|
|
8
|
+
qdrant_haystack-3.4.0.dist-info/METADATA,sha256=el2vEKE_LY7BerXSrZVwBdPJmaqeIW8mwdNM_U6zfE4,1799
|
|
9
|
+
qdrant_haystack-3.4.0.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
|
|
10
|
+
qdrant_haystack-3.4.0.dist-info/licenses/LICENSE.txt,sha256=B05uMshqTA74s-0ltyHKI6yoPfJ3zYgQbvcXfDVGFf8,10280
|
|
11
|
+
qdrant_haystack-3.4.0.dist-info/RECORD,,
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
haystack_integrations/components/retrievers/qdrant/__init__.py,sha256=5P4opz_iVPY-6ntpNdQgTu2MAS102_ki8fT-rm7aiJs,247
|
|
2
|
-
haystack_integrations/components/retrievers/qdrant/retriever.py,sha256=M1_5suJnF9VDC0fPbHyfU8kx_YIYG7TEyAWkIYbLTN8,8817
|
|
3
|
-
haystack_integrations/document_stores/qdrant/__init__.py,sha256=PuGxUj29V00f6UiCpTHRkzGufL8bJUML2iNwJnX2KwM,195
|
|
4
|
-
haystack_integrations/document_stores/qdrant/converters.py,sha256=oSO2YlsWEQbcw9CPlWfSg_HoTZlnkAhZw_6VlYWzKKs,2525
|
|
5
|
-
haystack_integrations/document_stores/qdrant/document_store.py,sha256=HEUkzkcxRlTSegx1aq95ay1lJFaEmQh8NyxMf3TAarQ,22800
|
|
6
|
-
haystack_integrations/document_stores/qdrant/filters.py,sha256=iNWOqv1otUaXTURXd8e9QOYg8sx3Qm_LOqOaxAP2xJI,8249
|
|
7
|
-
qdrant_haystack-3.3.0.dist-info/METADATA,sha256=dqi3IRTo3fqIASBVbFW8I8y1F9a14vU4z-dAMJKlUOU,1799
|
|
8
|
-
qdrant_haystack-3.3.0.dist-info/WHEEL,sha256=as-1oFTWSeWBgyzh0O_qF439xqBe6AbBgt4MfYe5zwY,87
|
|
9
|
-
qdrant_haystack-3.3.0.dist-info/licenses/LICENSE.txt,sha256=B05uMshqTA74s-0ltyHKI6yoPfJ3zYgQbvcXfDVGFf8,10280
|
|
10
|
-
qdrant_haystack-3.3.0.dist-info/RECORD,,
|
|
File without changes
|