qdrant-haystack 3.3.1__tar.gz → 3.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {qdrant_haystack-3.3.1 → qdrant_haystack-3.4.0}/PKG-INFO +1 -1
- qdrant_haystack-3.4.0/examples/embedding_retrieval.py +52 -0
- {qdrant_haystack-3.3.1 → qdrant_haystack-3.4.0}/pyproject.toml +2 -0
- qdrant_haystack-3.4.0/src/haystack_integrations/components/retrievers/qdrant/__init__.py +7 -0
- {qdrant_haystack-3.3.1 → qdrant_haystack-3.4.0}/src/haystack_integrations/components/retrievers/qdrant/retriever.py +4 -4
- {qdrant_haystack-3.3.1 → qdrant_haystack-3.4.0}/tests/test_retriever.py +8 -8
- qdrant_haystack-3.3.1/src/haystack_integrations/components/retrievers/qdrant/__init__.py +0 -7
- {qdrant_haystack-3.3.1 → qdrant_haystack-3.4.0}/.gitignore +0 -0
- {qdrant_haystack-3.3.1 → qdrant_haystack-3.4.0}/LICENSE.txt +0 -0
- {qdrant_haystack-3.3.1 → qdrant_haystack-3.4.0}/README.md +0 -0
- {qdrant_haystack-3.3.1 → qdrant_haystack-3.4.0}/pydoc/config.yml +0 -0
- {qdrant_haystack-3.3.1 → qdrant_haystack-3.4.0}/src/haystack_integrations/document_stores/qdrant/__init__.py +0 -0
- {qdrant_haystack-3.3.1 → qdrant_haystack-3.4.0}/src/haystack_integrations/document_stores/qdrant/converters.py +0 -0
- {qdrant_haystack-3.3.1 → qdrant_haystack-3.4.0}/src/haystack_integrations/document_stores/qdrant/document_store.py +0 -0
- {qdrant_haystack-3.3.1 → qdrant_haystack-3.4.0}/src/haystack_integrations/document_stores/qdrant/filters.py +0 -0
- {qdrant_haystack-3.3.1 → qdrant_haystack-3.4.0}/src/haystack_integrations/document_stores/qdrant/migrate_to_sparse.py +0 -0
- {qdrant_haystack-3.3.1 → qdrant_haystack-3.4.0}/tests/__init__.py +0 -0
- {qdrant_haystack-3.3.1 → qdrant_haystack-3.4.0}/tests/test_converters.py +0 -0
- {qdrant_haystack-3.3.1 → qdrant_haystack-3.4.0}/tests/test_dict_converters.py +0 -0
- {qdrant_haystack-3.3.1 → qdrant_haystack-3.4.0}/tests/test_document_store.py +0 -0
- {qdrant_haystack-3.3.1 → qdrant_haystack-3.4.0}/tests/test_filters.py +0 -0
- {qdrant_haystack-3.3.1 → qdrant_haystack-3.4.0}/tests/test_legacy_filters.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: qdrant-haystack
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.4.0
|
|
4
4
|
Summary: An integration of Qdrant ANN vector database backend with Haystack
|
|
5
5
|
Project-URL: Source, https://github.com/deepset-ai/haystack-core-integrations
|
|
6
6
|
Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/qdrant/README.md
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# Install required packages for this example, including qdrant-haystack and other libraries needed
|
|
2
|
+
# for Markdown conversion and embeddings generation. Use the following command:
|
|
3
|
+
# pip install qdrant-haystack markdown-it-py mdit_plain sentence-transformers
|
|
4
|
+
|
|
5
|
+
# Download some Markdown files to index.
|
|
6
|
+
# git clone https://github.com/anakin87/neural-search-pills
|
|
7
|
+
|
|
8
|
+
import glob
|
|
9
|
+
|
|
10
|
+
from haystack import Pipeline
|
|
11
|
+
from haystack.components.converters import MarkdownToDocument
|
|
12
|
+
from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder
|
|
13
|
+
from haystack.components.preprocessors import DocumentSplitter
|
|
14
|
+
from haystack.components.writers import DocumentWriter
|
|
15
|
+
from haystack_integrations.components.retrievers.qdrant import QdrantEmbeddingRetriever
|
|
16
|
+
from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
|
|
17
|
+
|
|
18
|
+
# Initialize QdrantDocumentStore: for simplicity, we use an in-memory store here.
|
|
19
|
+
# You can also run a Qdrant instance using Docker or use Qdrant Cloud.
|
|
20
|
+
document_store = QdrantDocumentStore(
|
|
21
|
+
":memory:",
|
|
22
|
+
index="Document",
|
|
23
|
+
embedding_dim=768,
|
|
24
|
+
recreate_index=True,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
# Create the indexing Pipeline and index some documents
|
|
28
|
+
file_paths = glob.glob("neural-search-pills/pills/*.md")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
indexing = Pipeline()
|
|
32
|
+
indexing.add_component("converter", MarkdownToDocument())
|
|
33
|
+
indexing.add_component("splitter", DocumentSplitter(split_by="sentence", split_length=2))
|
|
34
|
+
indexing.add_component("embedder", SentenceTransformersDocumentEmbedder())
|
|
35
|
+
indexing.add_component("writer", DocumentWriter(document_store))
|
|
36
|
+
indexing.connect("converter", "splitter")
|
|
37
|
+
indexing.connect("splitter", "embedder")
|
|
38
|
+
indexing.connect("embedder", "writer")
|
|
39
|
+
|
|
40
|
+
indexing.run({"converter": {"sources": file_paths}})
|
|
41
|
+
|
|
42
|
+
# Create the querying Pipeline and try a query
|
|
43
|
+
querying = Pipeline()
|
|
44
|
+
querying.add_component("embedder", SentenceTransformersTextEmbedder())
|
|
45
|
+
querying.add_component("retriever", QdrantEmbeddingRetriever(document_store=document_store, top_k=3))
|
|
46
|
+
querying.connect("embedder", "retriever")
|
|
47
|
+
|
|
48
|
+
results = querying.run({"embedder": {"text": "What is a cross-encoder?"}})
|
|
49
|
+
|
|
50
|
+
for doc in results["retriever"]["documents"]:
|
|
51
|
+
print(doc)
|
|
52
|
+
print("-" * 10)
|
|
@@ -127,6 +127,8 @@ ban-relative-imports = "parents"
|
|
|
127
127
|
[tool.ruff.per-file-ignores]
|
|
128
128
|
# Tests can use magic values, assertions, and relative imports
|
|
129
129
|
"tests/**/*" = ["PLR2004", "S101", "TID252"]
|
|
130
|
+
# examples can contain "print" commands
|
|
131
|
+
"examples/**/*" = ["T201"]
|
|
130
132
|
|
|
131
133
|
|
|
132
134
|
[tool.coverage.run]
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
from .retriever import QdrantEmbeddingRetriever, QdrantSparseEmbeddingRetriever
|
|
6
|
+
|
|
7
|
+
__all__ = ("QdrantEmbeddingRetriever", "QdrantSparseEmbeddingRetriever")
|
|
@@ -124,13 +124,13 @@ class QdrantEmbeddingRetriever:
|
|
|
124
124
|
|
|
125
125
|
|
|
126
126
|
@component
|
|
127
|
-
class
|
|
127
|
+
class QdrantSparseEmbeddingRetriever:
|
|
128
128
|
"""
|
|
129
129
|
A component for retrieving documents from an QdrantDocumentStore using sparse vectors.
|
|
130
130
|
|
|
131
131
|
Usage example:
|
|
132
132
|
```python
|
|
133
|
-
from haystack_integrations.components.retrievers.qdrant import
|
|
133
|
+
from haystack_integrations.components.retrievers.qdrant import QdrantSparseEmbeddingRetriever
|
|
134
134
|
from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
|
|
135
135
|
from haystack.dataclasses.sparse_embedding import SparseEmbedding
|
|
136
136
|
|
|
@@ -140,7 +140,7 @@ class QdrantSparseRetriever:
|
|
|
140
140
|
return_embedding=True,
|
|
141
141
|
wait_result_from_api=True,
|
|
142
142
|
)
|
|
143
|
-
retriever =
|
|
143
|
+
retriever = QdrantSparseEmbeddingRetriever(document_store=document_store)
|
|
144
144
|
sparse_embedding = SparseEmbedding(indices=[0, 1, 2, 3], values=[0.1, 0.8, 0.05, 0.33])
|
|
145
145
|
retriever.run(query_sparse_embedding=sparse_embedding)
|
|
146
146
|
```
|
|
@@ -155,7 +155,7 @@ class QdrantSparseRetriever:
|
|
|
155
155
|
return_embedding: bool = False,
|
|
156
156
|
):
|
|
157
157
|
"""
|
|
158
|
-
Create a
|
|
158
|
+
Create a QdrantSparseEmbeddingRetriever component.
|
|
159
159
|
|
|
160
160
|
:param document_store: An instance of QdrantDocumentStore.
|
|
161
161
|
:param filters: A dictionary with filters to narrow down the search space. Default is None.
|
|
@@ -8,7 +8,7 @@ from haystack.testing.document_store import (
|
|
|
8
8
|
)
|
|
9
9
|
from haystack_integrations.components.retrievers.qdrant import (
|
|
10
10
|
QdrantEmbeddingRetriever,
|
|
11
|
-
|
|
11
|
+
QdrantSparseEmbeddingRetriever,
|
|
12
12
|
)
|
|
13
13
|
from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
|
|
14
14
|
|
|
@@ -135,10 +135,10 @@ class TestQdrantRetriever(FilterableDocsFixtureMixin):
|
|
|
135
135
|
assert document.embedding is None
|
|
136
136
|
|
|
137
137
|
|
|
138
|
-
class
|
|
138
|
+
class TestQdrantSparseEmbeddingRetriever(FilterableDocsFixtureMixin):
|
|
139
139
|
def test_init_default(self):
|
|
140
140
|
document_store = QdrantDocumentStore(location=":memory:", index="test")
|
|
141
|
-
retriever =
|
|
141
|
+
retriever = QdrantSparseEmbeddingRetriever(document_store=document_store)
|
|
142
142
|
assert retriever._document_store == document_store
|
|
143
143
|
assert retriever._filters is None
|
|
144
144
|
assert retriever._top_k == 10
|
|
@@ -146,10 +146,10 @@ class TestQdrantSparseRetriever(FilterableDocsFixtureMixin):
|
|
|
146
146
|
|
|
147
147
|
def test_to_dict(self):
|
|
148
148
|
document_store = QdrantDocumentStore(location=":memory:", index="test")
|
|
149
|
-
retriever =
|
|
149
|
+
retriever = QdrantSparseEmbeddingRetriever(document_store=document_store)
|
|
150
150
|
res = retriever.to_dict()
|
|
151
151
|
assert res == {
|
|
152
|
-
"type": "haystack_integrations.components.retrievers.qdrant.retriever.
|
|
152
|
+
"type": "haystack_integrations.components.retrievers.qdrant.retriever.QdrantSparseEmbeddingRetriever",
|
|
153
153
|
"init_parameters": {
|
|
154
154
|
"document_store": {
|
|
155
155
|
"type": "haystack_integrations.document_stores.qdrant.document_store.QdrantDocumentStore",
|
|
@@ -202,7 +202,7 @@ class TestQdrantSparseRetriever(FilterableDocsFixtureMixin):
|
|
|
202
202
|
|
|
203
203
|
def test_from_dict(self):
|
|
204
204
|
data = {
|
|
205
|
-
"type": "haystack_integrations.components.retrievers.qdrant.retriever.
|
|
205
|
+
"type": "haystack_integrations.components.retrievers.qdrant.retriever.QdrantSparseEmbeddingRetriever",
|
|
206
206
|
"init_parameters": {
|
|
207
207
|
"document_store": {
|
|
208
208
|
"init_parameters": {"location": ":memory:", "index": "test"},
|
|
@@ -214,7 +214,7 @@ class TestQdrantSparseRetriever(FilterableDocsFixtureMixin):
|
|
|
214
214
|
"return_embedding": True,
|
|
215
215
|
},
|
|
216
216
|
}
|
|
217
|
-
retriever =
|
|
217
|
+
retriever = QdrantSparseEmbeddingRetriever.from_dict(data)
|
|
218
218
|
assert isinstance(retriever._document_store, QdrantDocumentStore)
|
|
219
219
|
assert retriever._document_store.index == "test"
|
|
220
220
|
assert retriever._filters is None
|
|
@@ -241,7 +241,7 @@ class TestQdrantSparseRetriever(FilterableDocsFixtureMixin):
|
|
|
241
241
|
doc.sparse_embedding = SparseEmbedding.from_dict(self._generate_mocked_sparse_embedding(1)[0])
|
|
242
242
|
|
|
243
243
|
document_store.write_documents(filterable_docs)
|
|
244
|
-
retriever =
|
|
244
|
+
retriever = QdrantSparseEmbeddingRetriever(document_store=document_store)
|
|
245
245
|
sparse_embedding = SparseEmbedding(indices=[0, 1, 2, 3], values=[0.1, 0.8, 0.05, 0.33])
|
|
246
246
|
|
|
247
247
|
results: List[Document] = retriever.run(query_sparse_embedding=sparse_embedding)["documents"]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|