qdrant-haystack 4.1.2__tar.gz → 5.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of qdrant-haystack might be problematic. Click here for more details.
- {qdrant_haystack-4.1.2 → qdrant_haystack-5.0.0}/CHANGELOG.md +16 -0
- {qdrant_haystack-4.1.2 → qdrant_haystack-5.0.0}/PKG-INFO +1 -1
- {qdrant_haystack-4.1.2 → qdrant_haystack-5.0.0}/examples/embedding_retrieval.py +1 -0
- {qdrant_haystack-4.1.2 → qdrant_haystack-5.0.0}/pyproject.toml +6 -4
- {qdrant_haystack-4.1.2 → qdrant_haystack-5.0.0}/src/haystack_integrations/components/retrievers/qdrant/retriever.py +2 -1
- {qdrant_haystack-4.1.2 → qdrant_haystack-5.0.0}/src/haystack_integrations/document_stores/qdrant/document_store.py +37 -54
- {qdrant_haystack-4.1.2 → qdrant_haystack-5.0.0}/src/haystack_integrations/document_stores/qdrant/migrate_to_sparse.py +2 -1
- {qdrant_haystack-4.1.2 → qdrant_haystack-5.0.0}/tests/test_converters.py +2 -1
- {qdrant_haystack-4.1.2 → qdrant_haystack-5.0.0}/tests/test_dict_converters.py +1 -0
- {qdrant_haystack-4.1.2 → qdrant_haystack-5.0.0}/tests/test_document_store.py +3 -14
- {qdrant_haystack-4.1.2 → qdrant_haystack-5.0.0}/tests/test_filters.py +2 -1
- {qdrant_haystack-4.1.2 → qdrant_haystack-5.0.0}/tests/test_legacy_filters.py +1 -0
- {qdrant_haystack-4.1.2 → qdrant_haystack-5.0.0}/tests/test_retriever.py +5 -1
- {qdrant_haystack-4.1.2 → qdrant_haystack-5.0.0}/.gitignore +0 -0
- {qdrant_haystack-4.1.2 → qdrant_haystack-5.0.0}/LICENSE.txt +0 -0
- {qdrant_haystack-4.1.2 → qdrant_haystack-5.0.0}/README.md +0 -0
- {qdrant_haystack-4.1.2 → qdrant_haystack-5.0.0}/pydoc/config.yml +0 -0
- {qdrant_haystack-4.1.2 → qdrant_haystack-5.0.0}/src/haystack_integrations/components/retrievers/qdrant/__init__.py +0 -0
- {qdrant_haystack-4.1.2 → qdrant_haystack-5.0.0}/src/haystack_integrations/document_stores/qdrant/__init__.py +0 -0
- {qdrant_haystack-4.1.2 → qdrant_haystack-5.0.0}/src/haystack_integrations/document_stores/qdrant/converters.py +0 -0
- {qdrant_haystack-4.1.2 → qdrant_haystack-5.0.0}/src/haystack_integrations/document_stores/qdrant/filters.py +0 -0
- {qdrant_haystack-4.1.2 → qdrant_haystack-5.0.0}/tests/__init__.py +0 -0
- {qdrant_haystack-4.1.2 → qdrant_haystack-5.0.0}/tests/conftest.py +0 -0
|
@@ -1,5 +1,21 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [integrations/qdrant-v4.2.0] - 2024-08-27
|
|
4
|
+
|
|
5
|
+
### 🚜 Refactor
|
|
6
|
+
|
|
7
|
+
- Qdrant Query API (#1025)
|
|
8
|
+
|
|
9
|
+
### 🧪 Testing
|
|
10
|
+
|
|
11
|
+
- Do not retry tests in `hatch run test` command (#954)
|
|
12
|
+
|
|
13
|
+
## [integrations/qdrant-v4.1.2] - 2024-07-15
|
|
14
|
+
|
|
15
|
+
### 🐛 Bug Fixes
|
|
16
|
+
|
|
17
|
+
- `qdrant` - Fallback to default filter policy when deserializing retrievers without the init parameter (#902)
|
|
18
|
+
|
|
3
19
|
## [integrations/qdrant-v4.1.1] - 2024-07-10
|
|
4
20
|
|
|
5
21
|
### 🚀 Features
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: qdrant-haystack
|
|
3
|
-
Version:
|
|
3
|
+
Version: 5.0.0
|
|
4
4
|
Summary: An integration of Qdrant ANN vector database backend with Haystack
|
|
5
5
|
Project-URL: Source, https://github.com/deepset-ai/haystack-core-integrations
|
|
6
6
|
Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/qdrant/README.md
|
|
@@ -12,6 +12,7 @@ from haystack.components.converters import MarkdownToDocument
|
|
|
12
12
|
from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder
|
|
13
13
|
from haystack.components.preprocessors import DocumentSplitter
|
|
14
14
|
from haystack.components.writers import DocumentWriter
|
|
15
|
+
|
|
15
16
|
from haystack_integrations.components.retrievers.qdrant import QdrantEmbeddingRetriever
|
|
16
17
|
from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
|
|
17
18
|
|
|
@@ -46,10 +46,12 @@ git_describe_command = 'git describe --tags --match="integrations/qdrant-v[0-9]*
|
|
|
46
46
|
[tool.hatch.envs.default]
|
|
47
47
|
dependencies = ["coverage[toml]>=6.5", "pytest", "pytest-rerunfailures", "haystack-pydoc-tools"]
|
|
48
48
|
[tool.hatch.envs.default.scripts]
|
|
49
|
-
test = "pytest
|
|
50
|
-
test-cov = "coverage run -m pytest
|
|
49
|
+
test = "pytest {args:tests}"
|
|
50
|
+
test-cov = "coverage run -m pytest {args:tests}"
|
|
51
|
+
test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x"
|
|
51
52
|
cov-report = ["- coverage combine", "coverage report"]
|
|
52
53
|
cov = ["test-cov", "cov-report"]
|
|
54
|
+
cov-retry = ["test-cov-retry", "cov-report"]
|
|
53
55
|
docs = ["pydoc-markdown pydoc/config.yml"]
|
|
54
56
|
|
|
55
57
|
[[tool.hatch.envs.all.matrix]]
|
|
@@ -60,8 +62,8 @@ detached = true
|
|
|
60
62
|
dependencies = ["black>=23.1.0", "mypy>=1.0.0", "ruff>=0.0.243"]
|
|
61
63
|
[tool.hatch.envs.lint.scripts]
|
|
62
64
|
typing = "mypy --install-types --non-interactive --explicit-package-bases {args:src/ tests}"
|
|
63
|
-
style = ["ruff check {args:.}", "black --check --diff {args:.}"]
|
|
64
|
-
fmt = ["black {args:.}", "ruff --fix {args:.}", "style"]
|
|
65
|
+
style = ["ruff check {args:. --exclude tests/, examples/}", "black --check --diff {args:.}"]
|
|
66
|
+
fmt = ["black {args:.}", "ruff --fix {args:. --exclude tests/, examples/}", "style"]
|
|
65
67
|
all = ["style", "typing"]
|
|
66
68
|
|
|
67
69
|
[tool.black]
|
|
@@ -4,9 +4,10 @@ from haystack import Document, component, default_from_dict, default_to_dict
|
|
|
4
4
|
from haystack.dataclasses.sparse_embedding import SparseEmbedding
|
|
5
5
|
from haystack.document_stores.types import FilterPolicy
|
|
6
6
|
from haystack.document_stores.types.filter_policy import apply_filter_policy
|
|
7
|
-
from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
|
|
8
7
|
from qdrant_client.http import models
|
|
9
8
|
|
|
9
|
+
from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
|
|
10
|
+
|
|
10
11
|
|
|
11
12
|
@component
|
|
12
13
|
class QdrantEmbeddingRetriever:
|
|
@@ -15,7 +15,6 @@ from haystack.utils.filters import convert as convert_legacy_filters
|
|
|
15
15
|
from qdrant_client import grpc
|
|
16
16
|
from qdrant_client.http import models as rest
|
|
17
17
|
from qdrant_client.http.exceptions import UnexpectedResponse
|
|
18
|
-
from qdrant_client.hybrid.fusion import reciprocal_rank_fusion
|
|
19
18
|
from tqdm import tqdm
|
|
20
19
|
|
|
21
20
|
from .converters import (
|
|
@@ -335,7 +334,7 @@ class QdrantDocumentStore:
|
|
|
335
334
|
self,
|
|
336
335
|
documents: List[Document],
|
|
337
336
|
policy: DuplicatePolicy = DuplicatePolicy.FAIL,
|
|
338
|
-
):
|
|
337
|
+
) -> int:
|
|
339
338
|
"""
|
|
340
339
|
Writes documents to Qdrant using the specified policy.
|
|
341
340
|
The QdrantDocumentStore can handle duplicate documents based on the given policy.
|
|
@@ -359,7 +358,7 @@ class QdrantDocumentStore:
|
|
|
359
358
|
|
|
360
359
|
if len(documents) == 0:
|
|
361
360
|
logger.warning("Calling QdrantDocumentStore.write_documents() with empty list")
|
|
362
|
-
return
|
|
361
|
+
return 0
|
|
363
362
|
|
|
364
363
|
document_objects = self._handle_duplicate_documents(
|
|
365
364
|
documents=documents,
|
|
@@ -384,13 +383,13 @@ class QdrantDocumentStore:
|
|
|
384
383
|
progress_bar.update(self.write_batch_size)
|
|
385
384
|
return len(document_objects)
|
|
386
385
|
|
|
387
|
-
def delete_documents(self,
|
|
386
|
+
def delete_documents(self, document_ids: List[str]) -> None:
|
|
388
387
|
"""
|
|
389
388
|
Deletes documents that match the provided `document_ids` from the document store.
|
|
390
389
|
|
|
391
390
|
:param document_ids: the document ids to delete
|
|
392
391
|
"""
|
|
393
|
-
ids = [convert_id(_id) for _id in
|
|
392
|
+
ids = [convert_id(_id) for _id in document_ids]
|
|
394
393
|
try:
|
|
395
394
|
self.client.delete(
|
|
396
395
|
collection_name=self.index,
|
|
@@ -537,20 +536,18 @@ class QdrantDocumentStore:
|
|
|
537
536
|
qdrant_filters = convert_filters_to_qdrant(filters)
|
|
538
537
|
query_indices = query_sparse_embedding.indices
|
|
539
538
|
query_values = query_sparse_embedding.values
|
|
540
|
-
points = self.client.
|
|
539
|
+
points = self.client.query_points(
|
|
541
540
|
collection_name=self.index,
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
indices=query_indices,
|
|
546
|
-
values=query_values,
|
|
547
|
-
),
|
|
541
|
+
query=rest.SparseVector(
|
|
542
|
+
indices=query_indices,
|
|
543
|
+
values=query_values,
|
|
548
544
|
),
|
|
545
|
+
using=SPARSE_VECTORS_NAME,
|
|
549
546
|
query_filter=qdrant_filters,
|
|
550
547
|
limit=top_k,
|
|
551
548
|
with_vectors=return_embedding,
|
|
552
549
|
score_threshold=score_threshold,
|
|
553
|
-
)
|
|
550
|
+
).points
|
|
554
551
|
results = [
|
|
555
552
|
convert_qdrant_point_to_haystack_document(point, use_sparse_embeddings=self.use_sparse_embeddings)
|
|
556
553
|
for point in points
|
|
@@ -588,17 +585,15 @@ class QdrantDocumentStore:
|
|
|
588
585
|
"""
|
|
589
586
|
qdrant_filters = convert_filters_to_qdrant(filters)
|
|
590
587
|
|
|
591
|
-
points = self.client.
|
|
588
|
+
points = self.client.query_points(
|
|
592
589
|
collection_name=self.index,
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
vector=query_embedding,
|
|
596
|
-
),
|
|
590
|
+
query=query_embedding,
|
|
591
|
+
using=DENSE_VECTORS_NAME if self.use_sparse_embeddings else None,
|
|
597
592
|
query_filter=qdrant_filters,
|
|
598
593
|
limit=top_k,
|
|
599
594
|
with_vectors=return_embedding,
|
|
600
595
|
score_threshold=score_threshold,
|
|
601
|
-
)
|
|
596
|
+
).points
|
|
602
597
|
results = [
|
|
603
598
|
convert_qdrant_point_to_haystack_document(point, use_sparse_embeddings=self.use_sparse_embeddings)
|
|
604
599
|
for point in points
|
|
@@ -655,46 +650,34 @@ class QdrantDocumentStore:
|
|
|
655
650
|
|
|
656
651
|
qdrant_filters = convert_filters_to_qdrant(filters)
|
|
657
652
|
|
|
658
|
-
sparse_request = rest.SearchRequest(
|
|
659
|
-
vector=rest.NamedSparseVector(
|
|
660
|
-
name=SPARSE_VECTORS_NAME,
|
|
661
|
-
vector=rest.SparseVector(
|
|
662
|
-
indices=query_sparse_embedding.indices,
|
|
663
|
-
values=query_sparse_embedding.values,
|
|
664
|
-
),
|
|
665
|
-
),
|
|
666
|
-
filter=qdrant_filters,
|
|
667
|
-
limit=top_k,
|
|
668
|
-
with_payload=True,
|
|
669
|
-
with_vector=return_embedding,
|
|
670
|
-
score_threshold=score_threshold,
|
|
671
|
-
)
|
|
672
|
-
|
|
673
|
-
dense_request = rest.SearchRequest(
|
|
674
|
-
vector=rest.NamedVector(
|
|
675
|
-
name=DENSE_VECTORS_NAME,
|
|
676
|
-
vector=query_embedding,
|
|
677
|
-
),
|
|
678
|
-
filter=qdrant_filters,
|
|
679
|
-
limit=top_k,
|
|
680
|
-
with_payload=True,
|
|
681
|
-
with_vector=return_embedding,
|
|
682
|
-
)
|
|
683
|
-
|
|
684
653
|
try:
|
|
685
|
-
|
|
686
|
-
collection_name=self.index,
|
|
687
|
-
|
|
654
|
+
points = self.client.query_points(
|
|
655
|
+
collection_name=self.index,
|
|
656
|
+
prefetch=[
|
|
657
|
+
rest.Prefetch(
|
|
658
|
+
query=rest.SparseVector(
|
|
659
|
+
indices=query_sparse_embedding.indices,
|
|
660
|
+
values=query_sparse_embedding.values,
|
|
661
|
+
),
|
|
662
|
+
using=SPARSE_VECTORS_NAME,
|
|
663
|
+
filter=qdrant_filters,
|
|
664
|
+
),
|
|
665
|
+
rest.Prefetch(
|
|
666
|
+
query=query_embedding,
|
|
667
|
+
using=DENSE_VECTORS_NAME,
|
|
668
|
+
filter=qdrant_filters,
|
|
669
|
+
),
|
|
670
|
+
],
|
|
671
|
+
query=rest.FusionQuery(fusion=rest.Fusion.RRF),
|
|
672
|
+
limit=top_k,
|
|
673
|
+
score_threshold=score_threshold,
|
|
674
|
+
with_payload=True,
|
|
675
|
+
with_vectors=return_embedding,
|
|
676
|
+
).points
|
|
688
677
|
except Exception as e:
|
|
689
678
|
msg = "Error during hybrid search"
|
|
690
679
|
raise QdrantStoreError(msg) from e
|
|
691
680
|
|
|
692
|
-
try:
|
|
693
|
-
points = reciprocal_rank_fusion(responses=[dense_request_response, sparse_request_response], limit=top_k)
|
|
694
|
-
except Exception as e:
|
|
695
|
-
msg = "Error while applying Reciprocal Rank Fusion"
|
|
696
|
-
raise QdrantStoreError(msg) from e
|
|
697
|
-
|
|
698
681
|
results = [convert_qdrant_point_to_haystack_document(point, use_sparse_embeddings=True) for point in points]
|
|
699
682
|
|
|
700
683
|
return results
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import time
|
|
3
3
|
|
|
4
|
-
from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
|
|
5
4
|
from qdrant_client.http import models
|
|
6
5
|
|
|
6
|
+
from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
|
|
7
|
+
|
|
7
8
|
logger = logging.getLogger(__name__)
|
|
8
9
|
logger.addHandler(logging.StreamHandler())
|
|
9
10
|
logger.setLevel(logging.INFO)
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import numpy as np
|
|
2
|
+
from qdrant_client.http import models as rest
|
|
3
|
+
|
|
2
4
|
from haystack_integrations.document_stores.qdrant.converters import (
|
|
3
5
|
convert_id,
|
|
4
6
|
convert_qdrant_point_to_haystack_document,
|
|
5
7
|
)
|
|
6
|
-
from qdrant_client.http import models as rest
|
|
7
8
|
|
|
8
9
|
|
|
9
10
|
def test_convert_id_is_deterministic():
|
|
@@ -12,12 +12,13 @@ from haystack.testing.document_store import (
|
|
|
12
12
|
WriteDocumentsTest,
|
|
13
13
|
_random_embeddings,
|
|
14
14
|
)
|
|
15
|
+
from qdrant_client.http import models as rest
|
|
16
|
+
|
|
15
17
|
from haystack_integrations.document_stores.qdrant.document_store import (
|
|
16
18
|
SPARSE_VECTORS_NAME,
|
|
17
19
|
QdrantDocumentStore,
|
|
18
20
|
QdrantStoreError,
|
|
19
21
|
)
|
|
20
|
-
from qdrant_client.http import models as rest
|
|
21
22
|
|
|
22
23
|
|
|
23
24
|
class TestQdrantDocumentStore(CountDocumentsTest, WriteDocumentsTest, DeleteDocumentsTest):
|
|
@@ -113,19 +114,7 @@ class TestQdrantDocumentStore(CountDocumentsTest, WriteDocumentsTest, DeleteDocu
|
|
|
113
114
|
sparse_embedding = SparseEmbedding(indices=[0, 1, 2, 3], values=[0.1, 0.8, 0.05, 0.33])
|
|
114
115
|
embedding = [0.1] * 768
|
|
115
116
|
|
|
116
|
-
with patch.object(document_store.client, "
|
|
117
|
+
with patch.object(document_store.client, "query_points", side_effect=Exception("query_points")):
|
|
117
118
|
|
|
118
119
|
with pytest.raises(QdrantStoreError):
|
|
119
120
|
document_store._query_hybrid(query_sparse_embedding=sparse_embedding, query_embedding=embedding)
|
|
120
|
-
|
|
121
|
-
@patch("haystack_integrations.document_stores.qdrant.document_store.reciprocal_rank_fusion")
|
|
122
|
-
def test_query_hybrid_reciprocal_rank_fusion_failure(self, mocked_fusion):
|
|
123
|
-
document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=True)
|
|
124
|
-
|
|
125
|
-
sparse_embedding = SparseEmbedding(indices=[0, 1, 2, 3], values=[0.1, 0.8, 0.05, 0.33])
|
|
126
|
-
embedding = [0.1] * 768
|
|
127
|
-
|
|
128
|
-
mocked_fusion.side_effect = Exception("reciprocal_rank_fusion error")
|
|
129
|
-
|
|
130
|
-
with pytest.raises(QdrantStoreError):
|
|
131
|
-
document_store._query_hybrid(query_sparse_embedding=sparse_embedding, query_embedding=embedding)
|
|
@@ -4,9 +4,10 @@ import pytest
|
|
|
4
4
|
from haystack import Document
|
|
5
5
|
from haystack.testing.document_store import FilterDocumentsTest
|
|
6
6
|
from haystack.utils.filters import FilterError
|
|
7
|
-
from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
|
|
8
7
|
from qdrant_client.http import models
|
|
9
8
|
|
|
9
|
+
from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
|
|
10
|
+
|
|
10
11
|
|
|
11
12
|
class TestQdrantStoreBaseTests(FilterDocumentsTest):
|
|
12
13
|
@pytest.fixture
|
|
@@ -5,6 +5,7 @@ from haystack import Document
|
|
|
5
5
|
from haystack.document_stores.types import DocumentStore
|
|
6
6
|
from haystack.testing.document_store import LegacyFilterDocumentsTest
|
|
7
7
|
from haystack.utils.filters import FilterError
|
|
8
|
+
|
|
8
9
|
from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
|
|
9
10
|
|
|
10
11
|
# The tests below are from haystack.testing.document_store.LegacyFilterDocumentsTest
|
|
@@ -8,6 +8,7 @@ from haystack.testing.document_store import (
|
|
|
8
8
|
FilterableDocsFixtureMixin,
|
|
9
9
|
_random_embeddings,
|
|
10
10
|
)
|
|
11
|
+
|
|
11
12
|
from haystack_integrations.components.retrievers.qdrant import (
|
|
12
13
|
QdrantEmbeddingRetriever,
|
|
13
14
|
QdrantHybridRetriever,
|
|
@@ -151,7 +152,10 @@ class TestQdrantRetriever(FilterableDocsFixtureMixin):
|
|
|
151
152
|
filters={"field": "meta.chapter", "operator": "==", "value": "abstract"},
|
|
152
153
|
return_embedding=False,
|
|
153
154
|
)["documents"]
|
|
154
|
-
|
|
155
|
+
# we need to combine init filter and run filter as the policy is MERGE
|
|
156
|
+
# when we combine these filters we use AND logical operator by default
|
|
157
|
+
# so the result should be 1 as we have only one document that matches both filters
|
|
158
|
+
assert len(results) == 1
|
|
155
159
|
|
|
156
160
|
for document in results:
|
|
157
161
|
assert document.embedding is None
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|