qdrant-haystack 6.0.0__tar.gz → 8.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/CHANGELOG.md +9 -8
- {qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/PKG-INFO +2 -2
- {qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/pyproject.toml +9 -5
- {qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/src/haystack_integrations/components/retrievers/qdrant/__init__.py +1 -1
- {qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/src/haystack_integrations/document_stores/qdrant/converters.py +17 -0
- {qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/src/haystack_integrations/document_stores/qdrant/document_store.py +6 -14
- {qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/tests/test_converters.py +44 -0
- {qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/tests/test_filters.py +0 -6
- {qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/.gitignore +0 -0
- {qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/LICENSE.txt +0 -0
- {qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/README.md +0 -0
- {qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/examples/embedding_retrieval.py +0 -0
- {qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/pydoc/config.yml +0 -0
- {qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/src/haystack_integrations/components/retrievers/qdrant/retriever.py +0 -0
- {qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/src/haystack_integrations/document_stores/qdrant/__init__.py +0 -0
- {qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/src/haystack_integrations/document_stores/qdrant/filters.py +0 -0
- {qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/src/haystack_integrations/document_stores/qdrant/migrate_to_sparse.py +0 -0
- {qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/tests/__init__.py +0 -0
- {qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/tests/conftest.py +0 -0
- {qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/tests/test_dict_converters.py +0 -0
- {qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/tests/test_document_store.py +0 -0
- {qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/tests/test_retriever.py +0 -0
|
@@ -1,5 +1,14 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [integrations/qdrant-v7.0.0] - 2024-10-29
|
|
4
|
+
|
|
5
|
+
### ⚙️ Miscellaneous Tasks
|
|
6
|
+
|
|
7
|
+
- Update ruff linting scripts and settings (#1105)
|
|
8
|
+
- Adopt uv as installer (#1142)
|
|
9
|
+
|
|
10
|
+
## [integrations/qdrant-v6.0.0] - 2024-09-13
|
|
11
|
+
|
|
3
12
|
## [integrations/qdrant-v5.1.0] - 2024-09-12
|
|
4
13
|
|
|
5
14
|
### 🚀 Features
|
|
@@ -103,8 +112,6 @@
|
|
|
103
112
|
|
|
104
113
|
- Fix haystack-ai pin (#649)
|
|
105
114
|
|
|
106
|
-
|
|
107
|
-
|
|
108
115
|
## [integrations/qdrant-v3.2.0] - 2024-03-27
|
|
109
116
|
|
|
110
117
|
### 🚀 Features
|
|
@@ -115,15 +122,11 @@
|
|
|
115
122
|
### 🐛 Bug Fixes
|
|
116
123
|
|
|
117
124
|
- Fix linter errors (#282)
|
|
118
|
-
|
|
119
|
-
|
|
120
125
|
- Fix order of API docs (#447)
|
|
121
126
|
|
|
122
127
|
This PR will also push the docs to Readme
|
|
123
128
|
- Fixes (#518)
|
|
124
129
|
|
|
125
|
-
|
|
126
|
-
|
|
127
130
|
### 🚜 Refactor
|
|
128
131
|
|
|
129
132
|
- [**breaking**] Qdrant - update secret management (#405)
|
|
@@ -154,8 +157,6 @@ This PR will also push the docs to Readme
|
|
|
154
157
|
|
|
155
158
|
- Fix import paths for beta5 (#237)
|
|
156
159
|
|
|
157
|
-
|
|
158
|
-
|
|
159
160
|
### 🚜 Refactor
|
|
160
161
|
|
|
161
162
|
- Use `hatch_vcs` to manage integrations versioning (#103)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: qdrant-haystack
|
|
3
|
-
Version:
|
|
3
|
+
Version: 8.0.0
|
|
4
4
|
Summary: An integration of Qdrant ANN vector database backend with Haystack
|
|
5
5
|
Project-URL: Source, https://github.com/deepset-ai/haystack-core-integrations
|
|
6
6
|
Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/qdrant/README.md
|
|
@@ -44,6 +44,7 @@ root = "../.."
|
|
|
44
44
|
git_describe_command = 'git describe --tags --match="integrations/qdrant-v[0-9]*"'
|
|
45
45
|
|
|
46
46
|
[tool.hatch.envs.default]
|
|
47
|
+
installer = "uv"
|
|
47
48
|
dependencies = ["coverage[toml]>=6.5", "pytest", "pytest-rerunfailures", "haystack-pydoc-tools"]
|
|
48
49
|
[tool.hatch.envs.default.scripts]
|
|
49
50
|
test = "pytest {args:tests}"
|
|
@@ -58,12 +59,13 @@ docs = ["pydoc-markdown pydoc/config.yml"]
|
|
|
58
59
|
python = ["3.8", "3.9", "3.10", "3.11"]
|
|
59
60
|
|
|
60
61
|
[tool.hatch.envs.lint]
|
|
62
|
+
installer = "uv"
|
|
61
63
|
detached = true
|
|
62
|
-
dependencies = ["black>=23.1.0", "mypy>=1.0.0", "ruff>=0.0.243"]
|
|
64
|
+
dependencies = ["pip", "black>=23.1.0", "mypy>=1.0.0", "ruff>=0.0.243"]
|
|
63
65
|
[tool.hatch.envs.lint.scripts]
|
|
64
66
|
typing = "mypy --install-types --non-interactive --explicit-package-bases {args:src/ tests}"
|
|
65
|
-
style = ["ruff check {args:.
|
|
66
|
-
fmt = ["black {args:.}", "ruff --fix {args:.
|
|
67
|
+
style = ["ruff check {args:.}", "black --check --diff {args:.}"]
|
|
68
|
+
fmt = ["black {args:.}", "ruff check --fix {args:.}", "style"]
|
|
67
69
|
all = ["style", "typing"]
|
|
68
70
|
|
|
69
71
|
[tool.black]
|
|
@@ -74,6 +76,8 @@ skip-string-normalization = true
|
|
|
74
76
|
[tool.ruff]
|
|
75
77
|
target-version = "py38"
|
|
76
78
|
line-length = 120
|
|
79
|
+
|
|
80
|
+
[tool.ruff.lint]
|
|
77
81
|
select = [
|
|
78
82
|
"A",
|
|
79
83
|
"ARG",
|
|
@@ -125,10 +129,10 @@ unfixable = [
|
|
|
125
129
|
"F401",
|
|
126
130
|
]
|
|
127
131
|
|
|
128
|
-
[tool.ruff.flake8-tidy-imports]
|
|
132
|
+
[tool.ruff.lint.flake8-tidy-imports]
|
|
129
133
|
ban-relative-imports = "parents"
|
|
130
134
|
|
|
131
|
-
[tool.ruff.per-file-ignores]
|
|
135
|
+
[tool.ruff.lint.per-file-ignores]
|
|
132
136
|
# Tests can use magic values, assertions, and relative imports
|
|
133
137
|
"tests/**/*" = ["PLR2004", "S101", "TID252"]
|
|
134
138
|
# examples can contain "print" commands
|
|
@@ -4,4 +4,4 @@
|
|
|
4
4
|
|
|
5
5
|
from .retriever import QdrantEmbeddingRetriever, QdrantHybridRetriever, QdrantSparseEmbeddingRetriever
|
|
6
6
|
|
|
7
|
-
__all__ = ("QdrantEmbeddingRetriever", "
|
|
7
|
+
__all__ = ("QdrantEmbeddingRetriever", "QdrantHybridRetriever", "QdrantSparseEmbeddingRetriever")
|
|
@@ -22,6 +22,15 @@ def convert_haystack_documents_to_qdrant_points(
|
|
|
22
22
|
points = []
|
|
23
23
|
for document in documents:
|
|
24
24
|
payload = document.to_dict(flatten=False)
|
|
25
|
+
|
|
26
|
+
if payload.pop("dataframe", None):
|
|
27
|
+
logger.warning(
|
|
28
|
+
"Document %s has the `dataframe` field set,"
|
|
29
|
+
"QdrantDocumentStore no longer supports dataframes and this field will be ignored. "
|
|
30
|
+
"The `dataframe` field will soon be removed from Haystack Document.",
|
|
31
|
+
document.id,
|
|
32
|
+
)
|
|
33
|
+
|
|
25
34
|
if use_sparse_embeddings:
|
|
26
35
|
vector = {}
|
|
27
36
|
|
|
@@ -64,6 +73,14 @@ def convert_qdrant_point_to_haystack_document(point: QdrantPoint, use_sparse_emb
|
|
|
64
73
|
payload = {**point.payload}
|
|
65
74
|
payload["score"] = point.score if hasattr(point, "score") else None
|
|
66
75
|
|
|
76
|
+
if payload.pop("dataframe", None):
|
|
77
|
+
logger.warning(
|
|
78
|
+
"Document %s has the `dataframe` field set,"
|
|
79
|
+
"QdrantDocumentStore no longer supports dataframes and this field will be ignored. "
|
|
80
|
+
"The `dataframe` field will soon be removed from Haystack Document.",
|
|
81
|
+
payload["id"],
|
|
82
|
+
)
|
|
83
|
+
|
|
67
84
|
if not use_sparse_embeddings:
|
|
68
85
|
payload["embedding"] = point.vector if hasattr(point, "vector") else None
|
|
69
86
|
elif hasattr(point, "vector") and point.vector is not None:
|
|
@@ -362,7 +362,6 @@ class QdrantDocumentStore:
|
|
|
362
362
|
|
|
363
363
|
document_objects = self._handle_duplicate_documents(
|
|
364
364
|
documents=documents,
|
|
365
|
-
index=self.index,
|
|
366
365
|
policy=policy,
|
|
367
366
|
)
|
|
368
367
|
|
|
@@ -468,7 +467,6 @@ class QdrantDocumentStore:
|
|
|
468
467
|
def get_documents_by_id(
|
|
469
468
|
self,
|
|
470
469
|
ids: List[str],
|
|
471
|
-
index: Optional[str] = None,
|
|
472
470
|
) -> List[Document]:
|
|
473
471
|
"""
|
|
474
472
|
Retrieves documents from Qdrant by their IDs.
|
|
@@ -480,13 +478,11 @@ class QdrantDocumentStore:
|
|
|
480
478
|
:returns:
|
|
481
479
|
A list of documents.
|
|
482
480
|
"""
|
|
483
|
-
index = index or self.index
|
|
484
|
-
|
|
485
481
|
documents: List[Document] = []
|
|
486
482
|
|
|
487
483
|
ids = [convert_id(_id) for _id in ids]
|
|
488
484
|
records = self.client.retrieve(
|
|
489
|
-
collection_name=index,
|
|
485
|
+
collection_name=self.index,
|
|
490
486
|
ids=ids,
|
|
491
487
|
with_payload=True,
|
|
492
488
|
with_vectors=True,
|
|
@@ -987,7 +983,6 @@ class QdrantDocumentStore:
|
|
|
987
983
|
def _handle_duplicate_documents(
|
|
988
984
|
self,
|
|
989
985
|
documents: List[Document],
|
|
990
|
-
index: Optional[str] = None,
|
|
991
986
|
policy: DuplicatePolicy = None,
|
|
992
987
|
):
|
|
993
988
|
"""
|
|
@@ -995,31 +990,28 @@ class QdrantDocumentStore:
|
|
|
995
990
|
documents that are not in the index yet.
|
|
996
991
|
|
|
997
992
|
:param documents: A list of Haystack Document objects.
|
|
998
|
-
:param index: name of the index
|
|
999
993
|
:param policy: The duplicate policy to use when writing documents.
|
|
1000
994
|
:returns: A list of Haystack Document objects.
|
|
1001
995
|
"""
|
|
1002
996
|
|
|
1003
|
-
index = index or self.index
|
|
1004
997
|
if policy in (DuplicatePolicy.SKIP, DuplicatePolicy.FAIL):
|
|
1005
|
-
documents = self._drop_duplicate_documents(documents
|
|
1006
|
-
documents_found = self.get_documents_by_id(ids=[doc.id for doc in documents]
|
|
998
|
+
documents = self._drop_duplicate_documents(documents)
|
|
999
|
+
documents_found = self.get_documents_by_id(ids=[doc.id for doc in documents])
|
|
1007
1000
|
ids_exist_in_db: List[str] = [doc.id for doc in documents_found]
|
|
1008
1001
|
|
|
1009
1002
|
if len(ids_exist_in_db) > 0 and policy == DuplicatePolicy.FAIL:
|
|
1010
|
-
msg = f"Document with ids '{', '.join(ids_exist_in_db)} already exists in index = '{index}'."
|
|
1003
|
+
msg = f"Document with ids '{', '.join(ids_exist_in_db)} already exists in index = '{self.index}'."
|
|
1011
1004
|
raise DuplicateDocumentError(msg)
|
|
1012
1005
|
|
|
1013
1006
|
documents = list(filter(lambda doc: doc.id not in ids_exist_in_db, documents))
|
|
1014
1007
|
|
|
1015
1008
|
return documents
|
|
1016
1009
|
|
|
1017
|
-
def _drop_duplicate_documents(self, documents: List[Document]
|
|
1010
|
+
def _drop_duplicate_documents(self, documents: List[Document]) -> List[Document]:
|
|
1018
1011
|
"""
|
|
1019
1012
|
Drop duplicate documents based on same hash ID.
|
|
1020
1013
|
|
|
1021
1014
|
:param documents: A list of Haystack Document objects.
|
|
1022
|
-
:param index: Name of the index.
|
|
1023
1015
|
:returns: A list of Haystack Document objects.
|
|
1024
1016
|
"""
|
|
1025
1017
|
_hash_ids: Set = set()
|
|
@@ -1030,7 +1022,7 @@ class QdrantDocumentStore:
|
|
|
1030
1022
|
logger.info(
|
|
1031
1023
|
"Duplicate Documents: Document with id '%s' already exists in index '%s'",
|
|
1032
1024
|
document.id,
|
|
1033
|
-
|
|
1025
|
+
self.index,
|
|
1034
1026
|
)
|
|
1035
1027
|
continue
|
|
1036
1028
|
_documents.append(document)
|
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
import numpy as np
|
|
2
|
+
from haystack import Document
|
|
3
|
+
from pandas import DataFrame
|
|
2
4
|
from qdrant_client.http import models as rest
|
|
3
5
|
|
|
4
6
|
from haystack_integrations.document_stores.qdrant.converters import (
|
|
7
|
+
convert_haystack_documents_to_qdrant_points,
|
|
5
8
|
convert_id,
|
|
6
9
|
convert_qdrant_point_to_haystack_document,
|
|
7
10
|
)
|
|
@@ -62,3 +65,44 @@ def test_point_to_document_reverts_proper_structure_from_record_without_sparse()
|
|
|
62
65
|
assert document.sparse_embedding is None
|
|
63
66
|
assert {"test_field": 1} == document.meta
|
|
64
67
|
assert 0.0 == np.sum(np.array([1.0, 0.0, 0.0, 0.0]) - document.embedding)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def test_point_to_document_skips_dataframe():
|
|
71
|
+
|
|
72
|
+
point = rest.Record(
|
|
73
|
+
id="c7c62e8e-02b9-4ec6-9f88-46bd97b628b7",
|
|
74
|
+
payload={
|
|
75
|
+
"id": "my-id",
|
|
76
|
+
"content": "Lorem ipsum",
|
|
77
|
+
"content_type": "text",
|
|
78
|
+
"meta": {
|
|
79
|
+
"test_field": 1,
|
|
80
|
+
},
|
|
81
|
+
"dataframe": {"a": [1, 2, 3]},
|
|
82
|
+
},
|
|
83
|
+
vector=[1.0, 0.0, 0.0, 0.0],
|
|
84
|
+
)
|
|
85
|
+
document = convert_qdrant_point_to_haystack_document(point, use_sparse_embeddings=False)
|
|
86
|
+
assert "my-id" == document.id
|
|
87
|
+
assert "Lorem ipsum" == document.content
|
|
88
|
+
assert "text" == document.content_type
|
|
89
|
+
assert {"test_field": 1} == document.meta
|
|
90
|
+
assert 0.0 == np.sum(np.array([1.0, 0.0, 0.0, 0.0]) - document.embedding)
|
|
91
|
+
assert not hasattr(document, "dataframe") or document.dataframe is None
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def test_documents_to_points_skips_dataframe():
|
|
95
|
+
doc = Document(
|
|
96
|
+
id="my-id",
|
|
97
|
+
content="Lorem ipsum",
|
|
98
|
+
embedding=[1.0, 0.0, 0.0, 0.0],
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
doc.dataframe = DataFrame([[1, 2], [3, 4]])
|
|
102
|
+
|
|
103
|
+
points = convert_haystack_documents_to_qdrant_points([doc], use_sparse_embeddings=False)
|
|
104
|
+
assert len(points) == 1
|
|
105
|
+
|
|
106
|
+
assert points[0].payload["content"] == "Lorem ipsum"
|
|
107
|
+
assert points[0].vector == [1.0, 0.0, 0.0, 0.0]
|
|
108
|
+
assert "dataframe" not in points[0].payload
|
|
@@ -208,11 +208,5 @@ class TestQdrantStoreBaseTests(FilterDocumentsTest):
|
|
|
208
208
|
|
|
209
209
|
# ======== ========================== ========
|
|
210
210
|
|
|
211
|
-
@pytest.mark.skip(reason="Qdrant doesn't support comparision with dataframe")
|
|
212
|
-
def test_comparison_equal_with_dataframe(self, document_store, filterable_docs): ...
|
|
213
|
-
|
|
214
|
-
@pytest.mark.skip(reason="Qdrant doesn't support comparision with dataframe")
|
|
215
|
-
def test_comparison_not_equal_with_dataframe(self, document_store, filterable_docs): ...
|
|
216
|
-
|
|
217
211
|
@pytest.mark.skip(reason="Cannot distinguish errors yet")
|
|
218
212
|
def test_missing_top_level_operator_key(self, document_store, filterable_docs): ...
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|