qdrant-haystack 4.1.2__tar.gz → 4.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of qdrant-haystack might be problematic. Click here for more details.

Files changed (23) hide show
  1. {qdrant_haystack-4.1.2 → qdrant_haystack-4.2.0}/CHANGELOG.md +6 -0
  2. {qdrant_haystack-4.1.2 → qdrant_haystack-4.2.0}/PKG-INFO +1 -1
  3. {qdrant_haystack-4.1.2 → qdrant_haystack-4.2.0}/examples/embedding_retrieval.py +1 -0
  4. {qdrant_haystack-4.1.2 → qdrant_haystack-4.2.0}/pyproject.toml +6 -4
  5. {qdrant_haystack-4.1.2 → qdrant_haystack-4.2.0}/src/haystack_integrations/components/retrievers/qdrant/retriever.py +2 -1
  6. {qdrant_haystack-4.1.2 → qdrant_haystack-4.2.0}/src/haystack_integrations/document_stores/qdrant/document_store.py +33 -50
  7. {qdrant_haystack-4.1.2 → qdrant_haystack-4.2.0}/src/haystack_integrations/document_stores/qdrant/migrate_to_sparse.py +2 -1
  8. {qdrant_haystack-4.1.2 → qdrant_haystack-4.2.0}/tests/test_converters.py +2 -1
  9. {qdrant_haystack-4.1.2 → qdrant_haystack-4.2.0}/tests/test_dict_converters.py +1 -0
  10. {qdrant_haystack-4.1.2 → qdrant_haystack-4.2.0}/tests/test_document_store.py +3 -14
  11. {qdrant_haystack-4.1.2 → qdrant_haystack-4.2.0}/tests/test_filters.py +2 -1
  12. {qdrant_haystack-4.1.2 → qdrant_haystack-4.2.0}/tests/test_legacy_filters.py +1 -0
  13. {qdrant_haystack-4.1.2 → qdrant_haystack-4.2.0}/tests/test_retriever.py +5 -1
  14. {qdrant_haystack-4.1.2 → qdrant_haystack-4.2.0}/.gitignore +0 -0
  15. {qdrant_haystack-4.1.2 → qdrant_haystack-4.2.0}/LICENSE.txt +0 -0
  16. {qdrant_haystack-4.1.2 → qdrant_haystack-4.2.0}/README.md +0 -0
  17. {qdrant_haystack-4.1.2 → qdrant_haystack-4.2.0}/pydoc/config.yml +0 -0
  18. {qdrant_haystack-4.1.2 → qdrant_haystack-4.2.0}/src/haystack_integrations/components/retrievers/qdrant/__init__.py +0 -0
  19. {qdrant_haystack-4.1.2 → qdrant_haystack-4.2.0}/src/haystack_integrations/document_stores/qdrant/__init__.py +0 -0
  20. {qdrant_haystack-4.1.2 → qdrant_haystack-4.2.0}/src/haystack_integrations/document_stores/qdrant/converters.py +0 -0
  21. {qdrant_haystack-4.1.2 → qdrant_haystack-4.2.0}/src/haystack_integrations/document_stores/qdrant/filters.py +0 -0
  22. {qdrant_haystack-4.1.2 → qdrant_haystack-4.2.0}/tests/__init__.py +0 -0
  23. {qdrant_haystack-4.1.2 → qdrant_haystack-4.2.0}/tests/conftest.py +0 -0
@@ -1,5 +1,11 @@
1
1
  # Changelog
2
2
 
3
+ ## [integrations/qdrant-v4.1.2] - 2024-07-15
4
+
5
+ ### 🐛 Bug Fixes
6
+
7
+ - `qdrant` - Fallback to default filter policy when deserializing retrievers without the init parameter (#902)
8
+
3
9
  ## [integrations/qdrant-v4.1.1] - 2024-07-10
4
10
 
5
11
  ### 🚀 Features
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: qdrant-haystack
3
- Version: 4.1.2
3
+ Version: 4.2.0
4
4
  Summary: An integration of Qdrant ANN vector database backend with Haystack
5
5
  Project-URL: Source, https://github.com/deepset-ai/haystack-core-integrations
6
6
  Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/qdrant/README.md
@@ -12,6 +12,7 @@ from haystack.components.converters import MarkdownToDocument
12
12
  from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder
13
13
  from haystack.components.preprocessors import DocumentSplitter
14
14
  from haystack.components.writers import DocumentWriter
15
+
15
16
  from haystack_integrations.components.retrievers.qdrant import QdrantEmbeddingRetriever
16
17
  from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
17
18
 
@@ -46,10 +46,12 @@ git_describe_command = 'git describe --tags --match="integrations/qdrant-v[0-9]*
46
46
  [tool.hatch.envs.default]
47
47
  dependencies = ["coverage[toml]>=6.5", "pytest", "pytest-rerunfailures", "haystack-pydoc-tools"]
48
48
  [tool.hatch.envs.default.scripts]
49
- test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
50
- test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
49
+ test = "pytest {args:tests}"
50
+ test-cov = "coverage run -m pytest {args:tests}"
51
+ test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x"
51
52
  cov-report = ["- coverage combine", "coverage report"]
52
53
  cov = ["test-cov", "cov-report"]
54
+ cov-retry = ["test-cov-retry", "cov-report"]
53
55
  docs = ["pydoc-markdown pydoc/config.yml"]
54
56
 
55
57
  [[tool.hatch.envs.all.matrix]]
@@ -60,8 +62,8 @@ detached = true
60
62
  dependencies = ["black>=23.1.0", "mypy>=1.0.0", "ruff>=0.0.243"]
61
63
  [tool.hatch.envs.lint.scripts]
62
64
  typing = "mypy --install-types --non-interactive --explicit-package-bases {args:src/ tests}"
63
- style = ["ruff check {args:.}", "black --check --diff {args:.}"]
64
- fmt = ["black {args:.}", "ruff --fix {args:.}", "style"]
65
+ style = ["ruff check {args:. --exclude tests/, examples/}", "black --check --diff {args:.}"]
66
+ fmt = ["black {args:.}", "ruff --fix {args:. --exclude tests/, examples/}", "style"]
65
67
  all = ["style", "typing"]
66
68
 
67
69
  [tool.black]
@@ -4,9 +4,10 @@ from haystack import Document, component, default_from_dict, default_to_dict
4
4
  from haystack.dataclasses.sparse_embedding import SparseEmbedding
5
5
  from haystack.document_stores.types import FilterPolicy
6
6
  from haystack.document_stores.types.filter_policy import apply_filter_policy
7
- from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
8
7
  from qdrant_client.http import models
9
8
 
9
+ from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
10
+
10
11
 
11
12
  @component
12
13
  class QdrantEmbeddingRetriever:
@@ -15,7 +15,6 @@ from haystack.utils.filters import convert as convert_legacy_filters
15
15
  from qdrant_client import grpc
16
16
  from qdrant_client.http import models as rest
17
17
  from qdrant_client.http.exceptions import UnexpectedResponse
18
- from qdrant_client.hybrid.fusion import reciprocal_rank_fusion
19
18
  from tqdm import tqdm
20
19
 
21
20
  from .converters import (
@@ -537,20 +536,18 @@ class QdrantDocumentStore:
537
536
  qdrant_filters = convert_filters_to_qdrant(filters)
538
537
  query_indices = query_sparse_embedding.indices
539
538
  query_values = query_sparse_embedding.values
540
- points = self.client.search(
539
+ points = self.client.query_points(
541
540
  collection_name=self.index,
542
- query_vector=rest.NamedSparseVector(
543
- name=SPARSE_VECTORS_NAME,
544
- vector=rest.SparseVector(
545
- indices=query_indices,
546
- values=query_values,
547
- ),
541
+ query=rest.SparseVector(
542
+ indices=query_indices,
543
+ values=query_values,
548
544
  ),
545
+ using=SPARSE_VECTORS_NAME,
549
546
  query_filter=qdrant_filters,
550
547
  limit=top_k,
551
548
  with_vectors=return_embedding,
552
549
  score_threshold=score_threshold,
553
- )
550
+ ).points
554
551
  results = [
555
552
  convert_qdrant_point_to_haystack_document(point, use_sparse_embeddings=self.use_sparse_embeddings)
556
553
  for point in points
@@ -588,17 +585,15 @@ class QdrantDocumentStore:
588
585
  """
589
586
  qdrant_filters = convert_filters_to_qdrant(filters)
590
587
 
591
- points = self.client.search(
588
+ points = self.client.query_points(
592
589
  collection_name=self.index,
593
- query_vector=rest.NamedVector(
594
- name=DENSE_VECTORS_NAME if self.use_sparse_embeddings else "",
595
- vector=query_embedding,
596
- ),
590
+ query=query_embedding,
591
+ using=DENSE_VECTORS_NAME if self.use_sparse_embeddings else None,
597
592
  query_filter=qdrant_filters,
598
593
  limit=top_k,
599
594
  with_vectors=return_embedding,
600
595
  score_threshold=score_threshold,
601
- )
596
+ ).points
602
597
  results = [
603
598
  convert_qdrant_point_to_haystack_document(point, use_sparse_embeddings=self.use_sparse_embeddings)
604
599
  for point in points
@@ -655,46 +650,34 @@ class QdrantDocumentStore:
655
650
 
656
651
  qdrant_filters = convert_filters_to_qdrant(filters)
657
652
 
658
- sparse_request = rest.SearchRequest(
659
- vector=rest.NamedSparseVector(
660
- name=SPARSE_VECTORS_NAME,
661
- vector=rest.SparseVector(
662
- indices=query_sparse_embedding.indices,
663
- values=query_sparse_embedding.values,
664
- ),
665
- ),
666
- filter=qdrant_filters,
667
- limit=top_k,
668
- with_payload=True,
669
- with_vector=return_embedding,
670
- score_threshold=score_threshold,
671
- )
672
-
673
- dense_request = rest.SearchRequest(
674
- vector=rest.NamedVector(
675
- name=DENSE_VECTORS_NAME,
676
- vector=query_embedding,
677
- ),
678
- filter=qdrant_filters,
679
- limit=top_k,
680
- with_payload=True,
681
- with_vector=return_embedding,
682
- )
683
-
684
653
  try:
685
- dense_request_response, sparse_request_response = self.client.search_batch(
686
- collection_name=self.index, requests=[dense_request, sparse_request]
687
- )
654
+ points = self.client.query_points(
655
+ collection_name=self.index,
656
+ prefetch=[
657
+ rest.Prefetch(
658
+ query=rest.SparseVector(
659
+ indices=query_sparse_embedding.indices,
660
+ values=query_sparse_embedding.values,
661
+ ),
662
+ using=SPARSE_VECTORS_NAME,
663
+ filter=qdrant_filters,
664
+ ),
665
+ rest.Prefetch(
666
+ query=query_embedding,
667
+ using=DENSE_VECTORS_NAME,
668
+ filter=qdrant_filters,
669
+ ),
670
+ ],
671
+ query=rest.FusionQuery(fusion=rest.Fusion.RRF),
672
+ limit=top_k,
673
+ score_threshold=score_threshold,
674
+ with_payload=True,
675
+ with_vectors=return_embedding,
676
+ ).points
688
677
  except Exception as e:
689
678
  msg = "Error during hybrid search"
690
679
  raise QdrantStoreError(msg) from e
691
680
 
692
- try:
693
- points = reciprocal_rank_fusion(responses=[dense_request_response, sparse_request_response], limit=top_k)
694
- except Exception as e:
695
- msg = "Error while applying Reciprocal Rank Fusion"
696
- raise QdrantStoreError(msg) from e
697
-
698
681
  results = [convert_qdrant_point_to_haystack_document(point, use_sparse_embeddings=True) for point in points]
699
682
 
700
683
  return results
@@ -1,9 +1,10 @@
1
1
  import logging
2
2
  import time
3
3
 
4
- from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
5
4
  from qdrant_client.http import models
6
5
 
6
+ from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
7
+
7
8
  logger = logging.getLogger(__name__)
8
9
  logger.addHandler(logging.StreamHandler())
9
10
  logger.setLevel(logging.INFO)
@@ -1,9 +1,10 @@
1
1
  import numpy as np
2
+ from qdrant_client.http import models as rest
3
+
2
4
  from haystack_integrations.document_stores.qdrant.converters import (
3
5
  convert_id,
4
6
  convert_qdrant_point_to_haystack_document,
5
7
  )
6
- from qdrant_client.http import models as rest
7
8
 
8
9
 
9
10
  def test_convert_id_is_deterministic():
@@ -1,4 +1,5 @@
1
1
  from haystack.utils import Secret
2
+
2
3
  from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
3
4
 
4
5
 
@@ -12,12 +12,13 @@ from haystack.testing.document_store import (
12
12
  WriteDocumentsTest,
13
13
  _random_embeddings,
14
14
  )
15
+ from qdrant_client.http import models as rest
16
+
15
17
  from haystack_integrations.document_stores.qdrant.document_store import (
16
18
  SPARSE_VECTORS_NAME,
17
19
  QdrantDocumentStore,
18
20
  QdrantStoreError,
19
21
  )
20
- from qdrant_client.http import models as rest
21
22
 
22
23
 
23
24
  class TestQdrantDocumentStore(CountDocumentsTest, WriteDocumentsTest, DeleteDocumentsTest):
@@ -113,19 +114,7 @@ class TestQdrantDocumentStore(CountDocumentsTest, WriteDocumentsTest, DeleteDocu
113
114
  sparse_embedding = SparseEmbedding(indices=[0, 1, 2, 3], values=[0.1, 0.8, 0.05, 0.33])
114
115
  embedding = [0.1] * 768
115
116
 
116
- with patch.object(document_store.client, "search_batch", side_effect=Exception("search_batch error")):
117
+ with patch.object(document_store.client, "query_points", side_effect=Exception("query_points")):
117
118
 
118
119
  with pytest.raises(QdrantStoreError):
119
120
  document_store._query_hybrid(query_sparse_embedding=sparse_embedding, query_embedding=embedding)
120
-
121
- @patch("haystack_integrations.document_stores.qdrant.document_store.reciprocal_rank_fusion")
122
- def test_query_hybrid_reciprocal_rank_fusion_failure(self, mocked_fusion):
123
- document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=True)
124
-
125
- sparse_embedding = SparseEmbedding(indices=[0, 1, 2, 3], values=[0.1, 0.8, 0.05, 0.33])
126
- embedding = [0.1] * 768
127
-
128
- mocked_fusion.side_effect = Exception("reciprocal_rank_fusion error")
129
-
130
- with pytest.raises(QdrantStoreError):
131
- document_store._query_hybrid(query_sparse_embedding=sparse_embedding, query_embedding=embedding)
@@ -4,9 +4,10 @@ import pytest
4
4
  from haystack import Document
5
5
  from haystack.testing.document_store import FilterDocumentsTest
6
6
  from haystack.utils.filters import FilterError
7
- from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
8
7
  from qdrant_client.http import models
9
8
 
9
+ from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
10
+
10
11
 
11
12
  class TestQdrantStoreBaseTests(FilterDocumentsTest):
12
13
  @pytest.fixture
@@ -5,6 +5,7 @@ from haystack import Document
5
5
  from haystack.document_stores.types import DocumentStore
6
6
  from haystack.testing.document_store import LegacyFilterDocumentsTest
7
7
  from haystack.utils.filters import FilterError
8
+
8
9
  from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
9
10
 
10
11
  # The tests below are from haystack.testing.document_store.LegacyFilterDocumentsTest
@@ -8,6 +8,7 @@ from haystack.testing.document_store import (
8
8
  FilterableDocsFixtureMixin,
9
9
  _random_embeddings,
10
10
  )
11
+
11
12
  from haystack_integrations.components.retrievers.qdrant import (
12
13
  QdrantEmbeddingRetriever,
13
14
  QdrantHybridRetriever,
@@ -151,7 +152,10 @@ class TestQdrantRetriever(FilterableDocsFixtureMixin):
151
152
  filters={"field": "meta.chapter", "operator": "==", "value": "abstract"},
152
153
  return_embedding=False,
153
154
  )["documents"]
154
- assert len(results) == 3
155
+ # we need to combine init filter and run filter as the policy is MERGE
156
+ # when we combine these filters we use AND logical operator by default
157
+ # so the result should be 1 as we have only one document that matches both filters
158
+ assert len(results) == 1
155
159
 
156
160
  for document in results:
157
161
  assert document.embedding is None