qdrant-haystack 6.0.0__tar.gz → 8.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. {qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/CHANGELOG.md +9 -8
  2. {qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/PKG-INFO +2 -2
  3. {qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/pyproject.toml +9 -5
  4. {qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/src/haystack_integrations/components/retrievers/qdrant/__init__.py +1 -1
  5. {qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/src/haystack_integrations/document_stores/qdrant/converters.py +17 -0
  6. {qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/src/haystack_integrations/document_stores/qdrant/document_store.py +6 -14
  7. {qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/tests/test_converters.py +44 -0
  8. {qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/tests/test_filters.py +0 -6
  9. {qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/.gitignore +0 -0
  10. {qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/LICENSE.txt +0 -0
  11. {qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/README.md +0 -0
  12. {qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/examples/embedding_retrieval.py +0 -0
  13. {qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/pydoc/config.yml +0 -0
  14. {qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/src/haystack_integrations/components/retrievers/qdrant/retriever.py +0 -0
  15. {qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/src/haystack_integrations/document_stores/qdrant/__init__.py +0 -0
  16. {qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/src/haystack_integrations/document_stores/qdrant/filters.py +0 -0
  17. {qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/src/haystack_integrations/document_stores/qdrant/migrate_to_sparse.py +0 -0
  18. {qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/tests/__init__.py +0 -0
  19. {qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/tests/conftest.py +0 -0
  20. {qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/tests/test_dict_converters.py +0 -0
  21. {qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/tests/test_document_store.py +0 -0
  22. {qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/tests/test_retriever.py +0 -0
@@ -1,5 +1,14 @@
1
1
  # Changelog
2
2
 
3
+ ## [integrations/qdrant-v7.0.0] - 2024-10-29
4
+
5
+ ### ⚙️ Miscellaneous Tasks
6
+
7
+ - Update ruff linting scripts and settings (#1105)
8
+ - Adopt uv as installer (#1142)
9
+
10
+ ## [integrations/qdrant-v6.0.0] - 2024-09-13
11
+
3
12
  ## [integrations/qdrant-v5.1.0] - 2024-09-12
4
13
 
5
14
  ### 🚀 Features
@@ -103,8 +112,6 @@
103
112
 
104
113
  - Fix haystack-ai pin (#649)
105
114
 
106
-
107
-
108
115
  ## [integrations/qdrant-v3.2.0] - 2024-03-27
109
116
 
110
117
  ### 🚀 Features
@@ -115,15 +122,11 @@
115
122
  ### 🐛 Bug Fixes
116
123
 
117
124
  - Fix linter errors (#282)
118
-
119
-
120
125
  - Fix order of API docs (#447)
121
126
 
122
127
  This PR will also push the docs to Readme
123
128
  - Fixes (#518)
124
129
 
125
-
126
-
127
130
  ### 🚜 Refactor
128
131
 
129
132
  - [**breaking**] Qdrant - update secret management (#405)
@@ -154,8 +157,6 @@ This PR will also push the docs to Readme
154
157
 
155
158
  - Fix import paths for beta5 (#237)
156
159
 
157
-
158
-
159
160
  ### 🚜 Refactor
160
161
 
161
162
  - Use `hatch_vcs` to manage integrations versioning (#103)
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.4
2
2
  Name: qdrant-haystack
3
- Version: 6.0.0
3
+ Version: 8.0.0
4
4
  Summary: An integration of Qdrant ANN vector database backend with Haystack
5
5
  Project-URL: Source, https://github.com/deepset-ai/haystack-core-integrations
6
6
  Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/qdrant/README.md
@@ -44,6 +44,7 @@ root = "../.."
44
44
  git_describe_command = 'git describe --tags --match="integrations/qdrant-v[0-9]*"'
45
45
 
46
46
  [tool.hatch.envs.default]
47
+ installer = "uv"
47
48
  dependencies = ["coverage[toml]>=6.5", "pytest", "pytest-rerunfailures", "haystack-pydoc-tools"]
48
49
  [tool.hatch.envs.default.scripts]
49
50
  test = "pytest {args:tests}"
@@ -58,12 +59,13 @@ docs = ["pydoc-markdown pydoc/config.yml"]
58
59
  python = ["3.8", "3.9", "3.10", "3.11"]
59
60
 
60
61
  [tool.hatch.envs.lint]
62
+ installer = "uv"
61
63
  detached = true
62
- dependencies = ["black>=23.1.0", "mypy>=1.0.0", "ruff>=0.0.243"]
64
+ dependencies = ["pip", "black>=23.1.0", "mypy>=1.0.0", "ruff>=0.0.243"]
63
65
  [tool.hatch.envs.lint.scripts]
64
66
  typing = "mypy --install-types --non-interactive --explicit-package-bases {args:src/ tests}"
65
- style = ["ruff check {args:. --exclude tests/, examples/}", "black --check --diff {args:.}"]
66
- fmt = ["black {args:.}", "ruff --fix {args:. --exclude tests/, examples/}", "style"]
67
+ style = ["ruff check {args:.}", "black --check --diff {args:.}"]
68
+ fmt = ["black {args:.}", "ruff check --fix {args:.}", "style"]
67
69
  all = ["style", "typing"]
68
70
 
69
71
  [tool.black]
@@ -74,6 +76,8 @@ skip-string-normalization = true
74
76
  [tool.ruff]
75
77
  target-version = "py38"
76
78
  line-length = 120
79
+
80
+ [tool.ruff.lint]
77
81
  select = [
78
82
  "A",
79
83
  "ARG",
@@ -125,10 +129,10 @@ unfixable = [
125
129
  "F401",
126
130
  ]
127
131
 
128
- [tool.ruff.flake8-tidy-imports]
132
+ [tool.ruff.lint.flake8-tidy-imports]
129
133
  ban-relative-imports = "parents"
130
134
 
131
- [tool.ruff.per-file-ignores]
135
+ [tool.ruff.lint.per-file-ignores]
132
136
  # Tests can use magic values, assertions, and relative imports
133
137
  "tests/**/*" = ["PLR2004", "S101", "TID252"]
134
138
  # examples can contain "print" commands
@@ -4,4 +4,4 @@
4
4
 
5
5
  from .retriever import QdrantEmbeddingRetriever, QdrantHybridRetriever, QdrantSparseEmbeddingRetriever
6
6
 
7
- __all__ = ("QdrantEmbeddingRetriever", "QdrantSparseEmbeddingRetriever", "QdrantHybridRetriever")
7
+ __all__ = ("QdrantEmbeddingRetriever", "QdrantHybridRetriever", "QdrantSparseEmbeddingRetriever")
@@ -22,6 +22,15 @@ def convert_haystack_documents_to_qdrant_points(
22
22
  points = []
23
23
  for document in documents:
24
24
  payload = document.to_dict(flatten=False)
25
+
26
+ if payload.pop("dataframe", None):
27
+ logger.warning(
28
+ "Document %s has the `dataframe` field set,"
29
+ "QdrantDocumentStore no longer supports dataframes and this field will be ignored. "
30
+ "The `dataframe` field will soon be removed from Haystack Document.",
31
+ document.id,
32
+ )
33
+
25
34
  if use_sparse_embeddings:
26
35
  vector = {}
27
36
 
@@ -64,6 +73,14 @@ def convert_qdrant_point_to_haystack_document(point: QdrantPoint, use_sparse_emb
64
73
  payload = {**point.payload}
65
74
  payload["score"] = point.score if hasattr(point, "score") else None
66
75
 
76
+ if payload.pop("dataframe", None):
77
+ logger.warning(
78
+ "Document %s has the `dataframe` field set,"
79
+ "QdrantDocumentStore no longer supports dataframes and this field will be ignored. "
80
+ "The `dataframe` field will soon be removed from Haystack Document.",
81
+ payload["id"],
82
+ )
83
+
67
84
  if not use_sparse_embeddings:
68
85
  payload["embedding"] = point.vector if hasattr(point, "vector") else None
69
86
  elif hasattr(point, "vector") and point.vector is not None:
@@ -362,7 +362,6 @@ class QdrantDocumentStore:
362
362
 
363
363
  document_objects = self._handle_duplicate_documents(
364
364
  documents=documents,
365
- index=self.index,
366
365
  policy=policy,
367
366
  )
368
367
 
@@ -468,7 +467,6 @@ class QdrantDocumentStore:
468
467
  def get_documents_by_id(
469
468
  self,
470
469
  ids: List[str],
471
- index: Optional[str] = None,
472
470
  ) -> List[Document]:
473
471
  """
474
472
  Retrieves documents from Qdrant by their IDs.
@@ -480,13 +478,11 @@ class QdrantDocumentStore:
480
478
  :returns:
481
479
  A list of documents.
482
480
  """
483
- index = index or self.index
484
-
485
481
  documents: List[Document] = []
486
482
 
487
483
  ids = [convert_id(_id) for _id in ids]
488
484
  records = self.client.retrieve(
489
- collection_name=index,
485
+ collection_name=self.index,
490
486
  ids=ids,
491
487
  with_payload=True,
492
488
  with_vectors=True,
@@ -987,7 +983,6 @@ class QdrantDocumentStore:
987
983
  def _handle_duplicate_documents(
988
984
  self,
989
985
  documents: List[Document],
990
- index: Optional[str] = None,
991
986
  policy: DuplicatePolicy = None,
992
987
  ):
993
988
  """
@@ -995,31 +990,28 @@ class QdrantDocumentStore:
995
990
  documents that are not in the index yet.
996
991
 
997
992
  :param documents: A list of Haystack Document objects.
998
- :param index: name of the index
999
993
  :param policy: The duplicate policy to use when writing documents.
1000
994
  :returns: A list of Haystack Document objects.
1001
995
  """
1002
996
 
1003
- index = index or self.index
1004
997
  if policy in (DuplicatePolicy.SKIP, DuplicatePolicy.FAIL):
1005
- documents = self._drop_duplicate_documents(documents, index)
1006
- documents_found = self.get_documents_by_id(ids=[doc.id for doc in documents], index=index)
998
+ documents = self._drop_duplicate_documents(documents)
999
+ documents_found = self.get_documents_by_id(ids=[doc.id for doc in documents])
1007
1000
  ids_exist_in_db: List[str] = [doc.id for doc in documents_found]
1008
1001
 
1009
1002
  if len(ids_exist_in_db) > 0 and policy == DuplicatePolicy.FAIL:
1010
- msg = f"Document with ids '{', '.join(ids_exist_in_db)} already exists in index = '{index}'."
1003
+ msg = f"Document with ids '{', '.join(ids_exist_in_db)} already exists in index = '{self.index}'."
1011
1004
  raise DuplicateDocumentError(msg)
1012
1005
 
1013
1006
  documents = list(filter(lambda doc: doc.id not in ids_exist_in_db, documents))
1014
1007
 
1015
1008
  return documents
1016
1009
 
1017
- def _drop_duplicate_documents(self, documents: List[Document], index: Optional[str] = None) -> List[Document]:
1010
+ def _drop_duplicate_documents(self, documents: List[Document]) -> List[Document]:
1018
1011
  """
1019
1012
  Drop duplicate documents based on same hash ID.
1020
1013
 
1021
1014
  :param documents: A list of Haystack Document objects.
1022
- :param index: Name of the index.
1023
1015
  :returns: A list of Haystack Document objects.
1024
1016
  """
1025
1017
  _hash_ids: Set = set()
@@ -1030,7 +1022,7 @@ class QdrantDocumentStore:
1030
1022
  logger.info(
1031
1023
  "Duplicate Documents: Document with id '%s' already exists in index '%s'",
1032
1024
  document.id,
1033
- index or self.index,
1025
+ self.index,
1034
1026
  )
1035
1027
  continue
1036
1028
  _documents.append(document)
@@ -1,7 +1,10 @@
1
1
  import numpy as np
2
+ from haystack import Document
3
+ from pandas import DataFrame
2
4
  from qdrant_client.http import models as rest
3
5
 
4
6
  from haystack_integrations.document_stores.qdrant.converters import (
7
+ convert_haystack_documents_to_qdrant_points,
5
8
  convert_id,
6
9
  convert_qdrant_point_to_haystack_document,
7
10
  )
@@ -62,3 +65,44 @@ def test_point_to_document_reverts_proper_structure_from_record_without_sparse()
62
65
  assert document.sparse_embedding is None
63
66
  assert {"test_field": 1} == document.meta
64
67
  assert 0.0 == np.sum(np.array([1.0, 0.0, 0.0, 0.0]) - document.embedding)
68
+
69
+
70
+ def test_point_to_document_skips_dataframe():
71
+
72
+ point = rest.Record(
73
+ id="c7c62e8e-02b9-4ec6-9f88-46bd97b628b7",
74
+ payload={
75
+ "id": "my-id",
76
+ "content": "Lorem ipsum",
77
+ "content_type": "text",
78
+ "meta": {
79
+ "test_field": 1,
80
+ },
81
+ "dataframe": {"a": [1, 2, 3]},
82
+ },
83
+ vector=[1.0, 0.0, 0.0, 0.0],
84
+ )
85
+ document = convert_qdrant_point_to_haystack_document(point, use_sparse_embeddings=False)
86
+ assert "my-id" == document.id
87
+ assert "Lorem ipsum" == document.content
88
+ assert "text" == document.content_type
89
+ assert {"test_field": 1} == document.meta
90
+ assert 0.0 == np.sum(np.array([1.0, 0.0, 0.0, 0.0]) - document.embedding)
91
+ assert not hasattr(document, "dataframe") or document.dataframe is None
92
+
93
+
94
+ def test_documents_to_points_skips_dataframe():
95
+ doc = Document(
96
+ id="my-id",
97
+ content="Lorem ipsum",
98
+ embedding=[1.0, 0.0, 0.0, 0.0],
99
+ )
100
+
101
+ doc.dataframe = DataFrame([[1, 2], [3, 4]])
102
+
103
+ points = convert_haystack_documents_to_qdrant_points([doc], use_sparse_embeddings=False)
104
+ assert len(points) == 1
105
+
106
+ assert points[0].payload["content"] == "Lorem ipsum"
107
+ assert points[0].vector == [1.0, 0.0, 0.0, 0.0]
108
+ assert "dataframe" not in points[0].payload
@@ -208,11 +208,5 @@ class TestQdrantStoreBaseTests(FilterDocumentsTest):
208
208
 
209
209
  # ======== ========================== ========
210
210
 
211
- @pytest.mark.skip(reason="Qdrant doesn't support comparision with dataframe")
212
- def test_comparison_equal_with_dataframe(self, document_store, filterable_docs): ...
213
-
214
- @pytest.mark.skip(reason="Qdrant doesn't support comparision with dataframe")
215
- def test_comparison_not_equal_with_dataframe(self, document_store, filterable_docs): ...
216
-
217
211
  @pytest.mark.skip(reason="Cannot distinguish errors yet")
218
212
  def test_missing_top_level_operator_key(self, document_store, filterable_docs): ...