qdrant-haystack 3.8.1__tar.gz → 4.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of qdrant-haystack might be problematic. Click here for more details.

Files changed (23) hide show
  1. {qdrant_haystack-3.8.1 → qdrant_haystack-4.0.0}/.gitignore +9 -0
  2. {qdrant_haystack-3.8.1 → qdrant_haystack-4.0.0}/CHANGELOG.md +6 -0
  3. {qdrant_haystack-3.8.1 → qdrant_haystack-4.0.0}/PKG-INFO +1 -1
  4. {qdrant_haystack-3.8.1 → qdrant_haystack-4.0.0}/pyproject.toml +8 -11
  5. {qdrant_haystack-3.8.1 → qdrant_haystack-4.0.0}/src/haystack_integrations/components/retrievers/qdrant/retriever.py +2 -2
  6. {qdrant_haystack-3.8.1 → qdrant_haystack-4.0.0}/src/haystack_integrations/document_stores/qdrant/converters.py +2 -3
  7. {qdrant_haystack-3.8.1 → qdrant_haystack-4.0.0}/src/haystack_integrations/document_stores/qdrant/document_store.py +3 -25
  8. {qdrant_haystack-3.8.1 → qdrant_haystack-4.0.0}/tests/test_dict_converters.py +0 -12
  9. {qdrant_haystack-3.8.1 → qdrant_haystack-4.0.0}/tests/test_retriever.py +2 -14
  10. {qdrant_haystack-3.8.1 → qdrant_haystack-4.0.0}/LICENSE.txt +0 -0
  11. {qdrant_haystack-3.8.1 → qdrant_haystack-4.0.0}/README.md +0 -0
  12. {qdrant_haystack-3.8.1 → qdrant_haystack-4.0.0}/examples/embedding_retrieval.py +0 -0
  13. {qdrant_haystack-3.8.1 → qdrant_haystack-4.0.0}/pydoc/config.yml +0 -0
  14. {qdrant_haystack-3.8.1 → qdrant_haystack-4.0.0}/src/haystack_integrations/components/retrievers/qdrant/__init__.py +0 -0
  15. {qdrant_haystack-3.8.1 → qdrant_haystack-4.0.0}/src/haystack_integrations/document_stores/qdrant/__init__.py +0 -0
  16. {qdrant_haystack-3.8.1 → qdrant_haystack-4.0.0}/src/haystack_integrations/document_stores/qdrant/filters.py +0 -0
  17. {qdrant_haystack-3.8.1 → qdrant_haystack-4.0.0}/src/haystack_integrations/document_stores/qdrant/migrate_to_sparse.py +0 -0
  18. {qdrant_haystack-3.8.1 → qdrant_haystack-4.0.0}/tests/__init__.py +0 -0
  19. {qdrant_haystack-3.8.1 → qdrant_haystack-4.0.0}/tests/conftest.py +0 -0
  20. {qdrant_haystack-3.8.1 → qdrant_haystack-4.0.0}/tests/test_converters.py +0 -0
  21. {qdrant_haystack-3.8.1 → qdrant_haystack-4.0.0}/tests/test_document_store.py +0 -0
  22. {qdrant_haystack-3.8.1 → qdrant_haystack-4.0.0}/tests/test_filters.py +0 -0
  23. {qdrant_haystack-3.8.1 → qdrant_haystack-4.0.0}/tests/test_legacy_filters.py +0 -0
@@ -135,3 +135,12 @@ dmypy.json
135
135
  # Docs generation artifacts
136
136
  _readme_*.md
137
137
  .idea
138
+
139
+ # macOS
140
+ .DS_Store
141
+
142
+ # http cache (requests-cache)
143
+ **/http_cache.sqlite
144
+
145
+ # ruff
146
+ .ruff_cache
@@ -1,5 +1,11 @@
1
1
  # Changelog
2
2
 
3
+ ## [integrations/qdrant-v3.8.1] - 2024-06-20
4
+
5
+ ### 📚 Documentation
6
+
7
+ - Added docstrings for QdrantDocumentStore (#808)
8
+
3
9
  ## [integrations/qdrant-v3.8.0] - 2024-06-06
4
10
 
5
11
  ### 🚀 Features
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: qdrant-haystack
3
- Version: 3.8.1
3
+ Version: 4.0.0
4
4
  Summary: An integration of Qdrant ANN vector database backend with Haystack
5
5
  Project-URL: Source, https://github.com/deepset-ai/haystack-core-integrations
6
6
  Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/qdrant/README.md
@@ -44,10 +44,10 @@ root = "../.."
44
44
  git_describe_command = 'git describe --tags --match="integrations/qdrant-v[0-9]*"'
45
45
 
46
46
  [tool.hatch.envs.default]
47
- dependencies = ["coverage[toml]>=6.5", "pytest", "haystack-pydoc-tools"]
47
+ dependencies = ["coverage[toml]>=6.5", "pytest", "pytest-rerunfailures", "haystack-pydoc-tools"]
48
48
  [tool.hatch.envs.default.scripts]
49
- test = "pytest {args:tests}"
50
- test-cov = "coverage run -m pytest {args:tests}"
49
+ test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
50
+ test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
51
51
  cov-report = ["- coverage combine", "coverage report"]
52
52
  cov = ["test-cov", "cov-report"]
53
53
  docs = ["pydoc-markdown pydoc/config.yml"]
@@ -60,7 +60,7 @@ detached = true
60
60
  dependencies = ["black>=23.1.0", "mypy>=1.0.0", "ruff>=0.0.243"]
61
61
  [tool.hatch.envs.lint.scripts]
62
62
  typing = "mypy --install-types --non-interactive --explicit-package-bases {args:src/ tests}"
63
- style = ["ruff {args:.}", "black --check --diff {args:.}"]
63
+ style = ["ruff check {args:.}", "black --check --diff {args:.}"]
64
64
  fmt = ["black {args:.}", "ruff --fix {args:.}", "style"]
65
65
  all = ["style", "typing"]
66
66
 
@@ -105,7 +105,8 @@ ignore = [
105
105
  # Allow boolean positional values in function calls, like `dict.get(... True)`
106
106
  "FBT003",
107
107
  # Allow boolean arguments in function definition
108
- "FBT001", "FBT002",
108
+ "FBT001",
109
+ "FBT002",
109
110
  # Ignore checks for possible passwords
110
111
  "S105",
111
112
  "S106",
@@ -140,12 +141,8 @@ parallel = false
140
141
 
141
142
  [tool.coverage.report]
142
143
  omit = ["*/tests/*", "*/__init__.py"]
143
- show_missing=true
144
- exclude_lines = [
145
- "no cov",
146
- "if __name__ == .__main__.:",
147
- "if TYPE_CHECKING:",
148
- ]
144
+ show_missing = true
145
+ exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"]
149
146
 
150
147
 
151
148
  [[tool.mypy.overrides]]
@@ -37,7 +37,7 @@ class QdrantEmbeddingRetriever:
37
37
  document_store: QdrantDocumentStore,
38
38
  filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
39
39
  top_k: int = 10,
40
- scale_score: bool = True,
40
+ scale_score: bool = False,
41
41
  return_embedding: bool = False,
42
42
  ):
43
43
  """
@@ -159,7 +159,7 @@ class QdrantSparseEmbeddingRetriever:
159
159
  document_store: QdrantDocumentStore,
160
160
  filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
161
161
  top_k: int = 10,
162
- scale_score: bool = True,
162
+ scale_score: bool = False,
163
163
  return_embedding: bool = False,
164
164
  ):
165
165
  """
@@ -17,7 +17,6 @@ UUID_NAMESPACE = uuid.UUID("3896d314-1e95-4a3a-b45a-945f9f0b541d")
17
17
  def convert_haystack_documents_to_qdrant_points(
18
18
  documents: List[Document],
19
19
  *,
20
- embedding_field: str,
21
20
  use_sparse_embeddings: bool,
22
21
  ) -> List[rest.PointStruct]:
23
22
  points = []
@@ -26,7 +25,7 @@ def convert_haystack_documents_to_qdrant_points(
26
25
  if use_sparse_embeddings:
27
26
  vector = {}
28
27
 
29
- dense_vector = payload.pop(embedding_field, None)
28
+ dense_vector = payload.pop("embedding", None)
30
29
  if dense_vector is not None:
31
30
  vector[DENSE_VECTORS_NAME] = dense_vector
32
31
 
@@ -36,7 +35,7 @@ def convert_haystack_documents_to_qdrant_points(
36
35
  vector[SPARSE_VECTORS_NAME] = sparse_vector_instance
37
36
 
38
37
  else:
39
- vector = payload.pop(embedding_field) or {}
38
+ vector = payload.pop("embedding") or {}
40
39
  _id = convert_id(payload.get("id"))
41
40
 
42
41
  point = rest.PointStruct(
@@ -110,14 +110,10 @@ class QdrantDocumentStore:
110
110
  index: str = "Document",
111
111
  embedding_dim: int = 768,
112
112
  on_disk: bool = False,
113
- content_field: str = "content",
114
- name_field: str = "name",
115
- embedding_field: str = "embedding",
116
113
  use_sparse_embeddings: bool = False,
117
114
  similarity: str = "cosine",
118
115
  return_embedding: bool = False,
119
116
  progress_bar: bool = True,
120
- duplicate_documents: str = "overwrite",
121
117
  recreate_index: bool = False,
122
118
  shard_number: Optional[int] = None,
123
119
  replication_factor: Optional[int] = None,
@@ -170,12 +166,6 @@ class QdrantDocumentStore:
170
166
  Dimension of the embeddings.
171
167
  :param on_disk:
172
168
  Whether to store the collection on disk.
173
- :param content_field:
174
- The field for the document content.
175
- :param name_field:
176
- The field for the document name.
177
- :param embedding_field:
178
- The field for the document embeddings.
179
169
  :param use_sparse_embedding:
180
170
  If set to `True`, enables support for sparse embeddings.
181
171
  :param similarity:
@@ -184,8 +174,6 @@ class QdrantDocumentStore:
184
174
  Whether to return embeddings in the search results.
185
175
  :param progress_bar:
186
176
  Whether to show a progress bar or not.
187
- :param duplicate_documents:
188
- The parameter is not used and will be removed in future release.
189
177
  :param recreate_index:
190
178
  Whether to recreate the index.
191
179
  :param shard_number:
@@ -260,14 +248,10 @@ class QdrantDocumentStore:
260
248
  self.use_sparse_embeddings = use_sparse_embeddings
261
249
  self.embedding_dim = embedding_dim
262
250
  self.on_disk = on_disk
263
- self.content_field = content_field
264
- self.name_field = name_field
265
- self.embedding_field = embedding_field
266
251
  self.similarity = similarity
267
252
  self.index = index
268
253
  self.return_embedding = return_embedding
269
254
  self.progress_bar = progress_bar
270
- self.duplicate_documents = duplicate_documents
271
255
  self.write_batch_size = write_batch_size
272
256
  self.scroll_size = scroll_size
273
257
 
@@ -380,7 +364,6 @@ class QdrantDocumentStore:
380
364
  for document_batch in batched_documents:
381
365
  batch = convert_haystack_documents_to_qdrant_points(
382
366
  document_batch,
383
- embedding_field=self.embedding_field,
384
367
  use_sparse_embeddings=self.use_sparse_embeddings,
385
368
  )
386
369
 
@@ -513,7 +496,7 @@ class QdrantDocumentStore:
513
496
  query_sparse_embedding: SparseEmbedding,
514
497
  filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
515
498
  top_k: int = 10,
516
- scale_score: bool = True,
499
+ scale_score: bool = False,
517
500
  return_embedding: bool = False,
518
501
  ) -> List[Document]:
519
502
  """
@@ -570,7 +553,7 @@ class QdrantDocumentStore:
570
553
  query_embedding: List[float],
571
554
  filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
572
555
  top_k: int = 10,
573
- scale_score: bool = True,
556
+ scale_score: bool = False,
574
557
  return_embedding: bool = False,
575
558
  ) -> List[Document]:
576
559
  """
@@ -891,12 +874,7 @@ class QdrantDocumentStore:
891
874
 
892
875
  :param documents: A list of Haystack Document objects.
893
876
  :param index: name of the index
894
- :param duplicate_documents: Handle duplicate documents based on parameter options.
895
- Parameter options : ( 'skip','overwrite','fail')
896
- skip (default option): Ignore the duplicates documents.
897
- overwrite: Update any existing documents with the same ID when adding documents.
898
- fail: An error is raised if the document ID of the document being added already
899
- exists.
877
+ :param policy: The duplicate policy to use when writing documents.
900
878
  :returns: A list of Haystack Document objects.
901
879
  """
902
880
 
@@ -22,15 +22,11 @@ def test_to_dict():
22
22
  "index": "test",
23
23
  "embedding_dim": 768,
24
24
  "on_disk": False,
25
- "content_field": "content",
26
- "name_field": "name",
27
- "embedding_field": "embedding",
28
25
  "force_disable_check_same_thread": False,
29
26
  "use_sparse_embeddings": False,
30
27
  "similarity": "cosine",
31
28
  "return_embedding": False,
32
29
  "progress_bar": True,
33
- "duplicate_documents": "overwrite",
34
30
  "recreate_index": False,
35
31
  "shard_number": None,
36
32
  "replication_factor": None,
@@ -62,15 +58,11 @@ def test_from_dict():
62
58
  "index": "test",
63
59
  "embedding_dim": 768,
64
60
  "on_disk": False,
65
- "content_field": "content",
66
- "name_field": "name",
67
- "embedding_field": "embedding",
68
61
  "force_disable_check_same_thread": False,
69
62
  "use_sparse_embeddings": True,
70
63
  "similarity": "cosine",
71
64
  "return_embedding": False,
72
65
  "progress_bar": True,
73
- "duplicate_documents": "overwrite",
74
66
  "recreate_index": True,
75
67
  "shard_number": None,
76
68
  "quantization_config": None,
@@ -87,16 +79,12 @@ def test_from_dict():
87
79
  assert all(
88
80
  [
89
81
  document_store.index == "test",
90
- document_store.content_field == "content",
91
- document_store.name_field == "name",
92
- document_store.embedding_field == "embedding",
93
82
  document_store.force_disable_check_same_thread is False,
94
83
  document_store.use_sparse_embeddings is True,
95
84
  document_store.on_disk is False,
96
85
  document_store.similarity == "cosine",
97
86
  document_store.return_embedding is False,
98
87
  document_store.progress_bar,
99
- document_store.duplicate_documents == "overwrite",
100
88
  document_store.recreate_index is True,
101
89
  document_store.shard_number is None,
102
90
  document_store.replication_factor is None,
@@ -47,15 +47,11 @@ class TestQdrantRetriever(FilterableDocsFixtureMixin):
47
47
  "index": "test",
48
48
  "embedding_dim": 768,
49
49
  "on_disk": False,
50
- "content_field": "content",
51
- "name_field": "name",
52
50
  "force_disable_check_same_thread": False,
53
- "embedding_field": "embedding",
54
51
  "use_sparse_embeddings": False,
55
52
  "similarity": "cosine",
56
53
  "return_embedding": False,
57
54
  "progress_bar": True,
58
- "duplicate_documents": "overwrite",
59
55
  "recreate_index": False,
60
56
  "shard_number": None,
61
57
  "replication_factor": None,
@@ -75,7 +71,7 @@ class TestQdrantRetriever(FilterableDocsFixtureMixin):
75
71
  },
76
72
  "filters": None,
77
73
  "top_k": 10,
78
- "scale_score": True,
74
+ "scale_score": False,
79
75
  "return_embedding": False,
80
76
  },
81
77
  }
@@ -170,15 +166,11 @@ class TestQdrantSparseEmbeddingRetriever(FilterableDocsFixtureMixin):
170
166
  "index": "test",
171
167
  "embedding_dim": 768,
172
168
  "on_disk": False,
173
- "content_field": "content",
174
- "name_field": "name",
175
- "embedding_field": "embedding",
176
169
  "force_disable_check_same_thread": False,
177
170
  "use_sparse_embeddings": False,
178
171
  "similarity": "cosine",
179
172
  "return_embedding": False,
180
173
  "progress_bar": True,
181
- "duplicate_documents": "overwrite",
182
174
  "recreate_index": False,
183
175
  "shard_number": None,
184
176
  "replication_factor": None,
@@ -198,7 +190,7 @@ class TestQdrantSparseEmbeddingRetriever(FilterableDocsFixtureMixin):
198
190
  },
199
191
  "filters": None,
200
192
  "top_k": 10,
201
- "scale_score": True,
193
+ "scale_score": False,
202
194
  "return_embedding": False,
203
195
  },
204
196
  }
@@ -280,15 +272,11 @@ class TestQdrantHybridRetriever:
280
272
  "index": "test",
281
273
  "embedding_dim": 768,
282
274
  "on_disk": False,
283
- "content_field": "content",
284
- "name_field": "name",
285
- "embedding_field": "embedding",
286
275
  "force_disable_check_same_thread": False,
287
276
  "use_sparse_embeddings": False,
288
277
  "similarity": "cosine",
289
278
  "return_embedding": False,
290
279
  "progress_bar": True,
291
- "duplicate_documents": "overwrite",
292
280
  "recreate_index": False,
293
281
  "shard_number": None,
294
282
  "replication_factor": None,