qdrant-haystack 3.8.0__tar.gz → 4.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of qdrant-haystack might be problematic. Click here for more details.

Files changed (23) hide show
  1. {qdrant_haystack-3.8.0 → qdrant_haystack-4.0.0}/.gitignore +9 -0
  2. qdrant_haystack-4.0.0/CHANGELOG.md +110 -0
  3. {qdrant_haystack-3.8.0 → qdrant_haystack-4.0.0}/PKG-INFO +1 -1
  4. {qdrant_haystack-3.8.0 → qdrant_haystack-4.0.0}/pyproject.toml +8 -11
  5. {qdrant_haystack-3.8.0 → qdrant_haystack-4.0.0}/src/haystack_integrations/components/retrievers/qdrant/retriever.py +2 -2
  6. {qdrant_haystack-3.8.0 → qdrant_haystack-4.0.0}/src/haystack_integrations/document_stores/qdrant/converters.py +2 -3
  7. {qdrant_haystack-3.8.0 → qdrant_haystack-4.0.0}/src/haystack_integrations/document_stores/qdrant/document_store.py +263 -21
  8. {qdrant_haystack-3.8.0 → qdrant_haystack-4.0.0}/tests/test_dict_converters.py +0 -12
  9. {qdrant_haystack-3.8.0 → qdrant_haystack-4.0.0}/tests/test_retriever.py +2 -14
  10. {qdrant_haystack-3.8.0 → qdrant_haystack-4.0.0}/LICENSE.txt +0 -0
  11. {qdrant_haystack-3.8.0 → qdrant_haystack-4.0.0}/README.md +0 -0
  12. {qdrant_haystack-3.8.0 → qdrant_haystack-4.0.0}/examples/embedding_retrieval.py +0 -0
  13. {qdrant_haystack-3.8.0 → qdrant_haystack-4.0.0}/pydoc/config.yml +0 -0
  14. {qdrant_haystack-3.8.0 → qdrant_haystack-4.0.0}/src/haystack_integrations/components/retrievers/qdrant/__init__.py +0 -0
  15. {qdrant_haystack-3.8.0 → qdrant_haystack-4.0.0}/src/haystack_integrations/document_stores/qdrant/__init__.py +0 -0
  16. {qdrant_haystack-3.8.0 → qdrant_haystack-4.0.0}/src/haystack_integrations/document_stores/qdrant/filters.py +0 -0
  17. {qdrant_haystack-3.8.0 → qdrant_haystack-4.0.0}/src/haystack_integrations/document_stores/qdrant/migrate_to_sparse.py +0 -0
  18. {qdrant_haystack-3.8.0 → qdrant_haystack-4.0.0}/tests/__init__.py +0 -0
  19. {qdrant_haystack-3.8.0 → qdrant_haystack-4.0.0}/tests/conftest.py +0 -0
  20. {qdrant_haystack-3.8.0 → qdrant_haystack-4.0.0}/tests/test_converters.py +0 -0
  21. {qdrant_haystack-3.8.0 → qdrant_haystack-4.0.0}/tests/test_document_store.py +0 -0
  22. {qdrant_haystack-3.8.0 → qdrant_haystack-4.0.0}/tests/test_filters.py +0 -0
  23. {qdrant_haystack-3.8.0 → qdrant_haystack-4.0.0}/tests/test_legacy_filters.py +0 -0
@@ -135,3 +135,12 @@ dmypy.json
135
135
  # Docs generation artifacts
136
136
  _readme_*.md
137
137
  .idea
138
+
139
+ # macOS
140
+ .DS_Store
141
+
142
+ # http cache (requests-cache)
143
+ **/http_cache.sqlite
144
+
145
+ # ruff
146
+ .ruff_cache
@@ -0,0 +1,110 @@
1
+ # Changelog
2
+
3
+ ## [integrations/qdrant-v3.8.1] - 2024-06-20
4
+
5
+ ### 📚 Documentation
6
+
7
+ - Added docstrings for QdrantDocumentStore (#808)
8
+
9
+ ## [integrations/qdrant-v3.8.0] - 2024-06-06
10
+
11
+ ### 🚀 Features
12
+
13
+ - Add force_disable_check_same_thread init param for Qdrant local client (#779)
14
+
15
+ ## [integrations/qdrant-v3.7.0] - 2024-05-24
16
+
17
+ ### 🚀 Features
18
+
19
+ - Make get_distance and recreate_collection public, replace deprecated recreate_collection function (#754)
20
+
21
+ ## [integrations/qdrant-v3.6.0] - 2024-05-24
22
+
23
+ ### 🚀 Features
24
+
25
+ - Defer database connection to the first usage (#748)
26
+
27
+ ## [integrations/qdrant-v3.5.0] - 2024-04-24
28
+
29
+ ## [integrations/qdrant-v3.4.0] - 2024-04-23
30
+
31
+ ### Qdrant
32
+
33
+ - Add embedding retrieval example (#666)
34
+
35
+ ## [integrations/qdrant-v3.3.1] - 2024-04-12
36
+
37
+ ### Qdrant
38
+
39
+ - Add migration utility function for Sparse Embedding support (#659)
40
+
41
+ ## [integrations/qdrant-v3.3.0] - 2024-04-12
42
+
43
+ ### 🚀 Features
44
+
45
+ - *(Qdrant)* Start to work on sparse vector integration (#578)
46
+
47
+ ## [integrations/qdrant-v3.2.1] - 2024-04-09
48
+
49
+ ### 🐛 Bug Fixes
50
+
51
+ - Fix haystack-ai pin (#649)
52
+
53
+
54
+
55
+ ## [integrations/qdrant-v3.2.0] - 2024-03-27
56
+
57
+ ### 🚀 Features
58
+
59
+ - *(Qdrant)* Allow payload indexing + on disk vectors (#553)
60
+ - Qdrant datetime filtering support (#570)
61
+
62
+ ### 🐛 Bug Fixes
63
+
64
+ - Fix linter errors (#282)
65
+
66
+
67
+ - Fix order of API docs (#447)
68
+
69
+ This PR will also push the docs to Readme
70
+ - Fixes (#518)
71
+
72
+
73
+
74
+ ### 🚜 Refactor
75
+
76
+ - [**breaking**] Qdrant - update secret management (#405)
77
+
78
+ ### 📚 Documentation
79
+
80
+ - Update category slug (#442)
81
+ - Small consistency improvements (#536)
82
+ - Disable-class-def (#556)
83
+
84
+ ### ⚙️ Miscellaneous Tasks
85
+
86
+ - Generate API docs for Qdrant (#361)
87
+
88
+ ## [integrations/qdrant-v3.0.0] - 2024-01-22
89
+
90
+ ### Refact
91
+
92
+ - [**breaking**] Change import paths (#255)
93
+
94
+ ## [integrations/qdrant-v2.0.1] - 2024-01-18
95
+
96
+ ### 🚀 Features
97
+
98
+ - Add Qdrant integration (#98)
99
+
100
+ ### 🐛 Bug Fixes
101
+
102
+ - Fix import paths for beta5 (#237)
103
+
104
+
105
+
106
+ ### 🚜 Refactor
107
+
108
+ - Use `hatch_vcs` to manage integrations versioning (#103)
109
+
110
+ <!-- generated by git-cliff -->
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: qdrant-haystack
3
- Version: 3.8.0
3
+ Version: 4.0.0
4
4
  Summary: An integration of Qdrant ANN vector database backend with Haystack
5
5
  Project-URL: Source, https://github.com/deepset-ai/haystack-core-integrations
6
6
  Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/qdrant/README.md
@@ -44,10 +44,10 @@ root = "../.."
44
44
  git_describe_command = 'git describe --tags --match="integrations/qdrant-v[0-9]*"'
45
45
 
46
46
  [tool.hatch.envs.default]
47
- dependencies = ["coverage[toml]>=6.5", "pytest", "haystack-pydoc-tools"]
47
+ dependencies = ["coverage[toml]>=6.5", "pytest", "pytest-rerunfailures", "haystack-pydoc-tools"]
48
48
  [tool.hatch.envs.default.scripts]
49
- test = "pytest {args:tests}"
50
- test-cov = "coverage run -m pytest {args:tests}"
49
+ test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
50
+ test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
51
51
  cov-report = ["- coverage combine", "coverage report"]
52
52
  cov = ["test-cov", "cov-report"]
53
53
  docs = ["pydoc-markdown pydoc/config.yml"]
@@ -60,7 +60,7 @@ detached = true
60
60
  dependencies = ["black>=23.1.0", "mypy>=1.0.0", "ruff>=0.0.243"]
61
61
  [tool.hatch.envs.lint.scripts]
62
62
  typing = "mypy --install-types --non-interactive --explicit-package-bases {args:src/ tests}"
63
- style = ["ruff {args:.}", "black --check --diff {args:.}"]
63
+ style = ["ruff check {args:.}", "black --check --diff {args:.}"]
64
64
  fmt = ["black {args:.}", "ruff --fix {args:.}", "style"]
65
65
  all = ["style", "typing"]
66
66
 
@@ -105,7 +105,8 @@ ignore = [
105
105
  # Allow boolean positional values in function calls, like `dict.get(... True)`
106
106
  "FBT003",
107
107
  # Allow boolean arguments in function definition
108
- "FBT001", "FBT002",
108
+ "FBT001",
109
+ "FBT002",
109
110
  # Ignore checks for possible passwords
110
111
  "S105",
111
112
  "S106",
@@ -140,12 +141,8 @@ parallel = false
140
141
 
141
142
  [tool.coverage.report]
142
143
  omit = ["*/tests/*", "*/__init__.py"]
143
- show_missing=true
144
- exclude_lines = [
145
- "no cov",
146
- "if __name__ == .__main__.:",
147
- "if TYPE_CHECKING:",
148
- ]
144
+ show_missing = true
145
+ exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"]
149
146
 
150
147
 
151
148
  [[tool.mypy.overrides]]
@@ -37,7 +37,7 @@ class QdrantEmbeddingRetriever:
37
37
  document_store: QdrantDocumentStore,
38
38
  filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
39
39
  top_k: int = 10,
40
- scale_score: bool = True,
40
+ scale_score: bool = False,
41
41
  return_embedding: bool = False,
42
42
  ):
43
43
  """
@@ -159,7 +159,7 @@ class QdrantSparseEmbeddingRetriever:
159
159
  document_store: QdrantDocumentStore,
160
160
  filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
161
161
  top_k: int = 10,
162
- scale_score: bool = True,
162
+ scale_score: bool = False,
163
163
  return_embedding: bool = False,
164
164
  ):
165
165
  """
@@ -17,7 +17,6 @@ UUID_NAMESPACE = uuid.UUID("3896d314-1e95-4a3a-b45a-945f9f0b541d")
17
17
  def convert_haystack_documents_to_qdrant_points(
18
18
  documents: List[Document],
19
19
  *,
20
- embedding_field: str,
21
20
  use_sparse_embeddings: bool,
22
21
  ) -> List[rest.PointStruct]:
23
22
  points = []
@@ -26,7 +25,7 @@ def convert_haystack_documents_to_qdrant_points(
26
25
  if use_sparse_embeddings:
27
26
  vector = {}
28
27
 
29
- dense_vector = payload.pop(embedding_field, None)
28
+ dense_vector = payload.pop("embedding", None)
30
29
  if dense_vector is not None:
31
30
  vector[DENSE_VECTORS_NAME] = dense_vector
32
31
 
@@ -36,7 +35,7 @@ def convert_haystack_documents_to_qdrant_points(
36
35
  vector[SPARSE_VECTORS_NAME] = sparse_vector_instance
37
36
 
38
37
  else:
39
- vector = payload.pop(embedding_field) or {}
38
+ vector = payload.pop("embedding") or {}
40
39
  _id = convert_id(payload.get("id"))
41
40
 
42
41
  point = rest.PointStruct(
@@ -49,6 +49,44 @@ def get_batches_from_generator(iterable, n):
49
49
 
50
50
 
51
51
  class QdrantDocumentStore:
52
+ """
53
+ QdrantDocumentStore is a Document Store for Qdrant.
54
+ It can be used with any Qdrant instance: in-memory, disk-persisted, Docker-based,
55
+ and Qdrant Cloud Cluster deployments.
56
+
57
+ Usage example by creating an in-memory instance:
58
+
59
+ ```python
60
+ from haystack.dataclasses.document import Document
61
+ from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
62
+
63
+ document_store = QdrantDocumentStore(
64
+ ":memory:",
65
+ recreate_index=True
66
+ )
67
+ document_store.write_documents([
68
+ Document(content="This is first", embedding=[0.0]*5),
69
+ Document(content="This is second", embedding=[0.1, 0.2, 0.3, 0.4, 0.5])
70
+ ])
71
+ ```
72
+
73
+ Usage example with Qdrant Cloud:
74
+
75
+ ```python
76
+ from haystack.dataclasses.document import Document
77
+ from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
78
+
79
+ document_store = QdrantDocumentStore(
80
+ url="https://xxxxxx-xxxxx-xxxxx-xxxx-xxxxxxxxx.us-east.aws.cloud.qdrant.io:6333",
81
+ api_key="<your-api-key>",
82
+ )
83
+ document_store.write_documents([
84
+ Document(content="This is first", embedding=[0.0]*5),
85
+ Document(content="This is second", embedding=[0.1, 0.2, 0.3, 0.4, 0.5])
86
+ ])
87
+ ```
88
+ """
89
+
52
90
  SIMILARITY: ClassVar[Dict[str, str]] = {
53
91
  "cosine": rest.Distance.COSINE,
54
92
  "dot_product": rest.Distance.DOT,
@@ -72,14 +110,10 @@ class QdrantDocumentStore:
72
110
  index: str = "Document",
73
111
  embedding_dim: int = 768,
74
112
  on_disk: bool = False,
75
- content_field: str = "content",
76
- name_field: str = "name",
77
- embedding_field: str = "embedding",
78
113
  use_sparse_embeddings: bool = False,
79
114
  similarity: str = "cosine",
80
115
  return_embedding: bool = False,
81
116
  progress_bar: bool = True,
82
- duplicate_documents: str = "overwrite",
83
117
  recreate_index: bool = False,
84
118
  shard_number: Optional[int] = None,
85
119
  replication_factor: Optional[int] = None,
@@ -96,6 +130,90 @@ class QdrantDocumentStore:
96
130
  scroll_size: int = 10_000,
97
131
  payload_fields_to_index: Optional[List[dict]] = None,
98
132
  ):
133
+ """
134
+ :param location:
135
+ If `memory` - use in-memory Qdrant instance.
136
+ If `str` - use it as a URL parameter.
137
+ If `None` - use default values for host and port.
138
+ :param url:
139
+ Either host or str of `Optional[scheme], host, Optional[port], Optional[prefix]`.
140
+ :param port:
141
+ Port of the REST API interface.
142
+ :param grpc_port:
143
+ Port of the gRPC interface.
144
+ :param prefer_grpc:
145
+ If `True` - use gRPC interface whenever possible in custom methods.
146
+ :param https:
147
+ If `True` - use HTTPS(SSL) protocol.
148
+ :param api_key:
149
+ API key for authentication in Qdrant Cloud.
150
+ :param prefix:
151
+ If not `None` - add prefix to the REST URL path.
152
+ Example: service/v1 will result in http://localhost:6333/service/v1/{qdrant-endpoint}
153
+ for REST API.
154
+ :param timeout:
155
+ Timeout for REST and gRPC API requests.
156
+ :param host:
157
+ Host name of Qdrant service. If ùrl` and `host` are `None`, set to `localhost`.
158
+ :param path:
159
+ Persistence path for QdrantLocal.
160
+ :param force_disable_check_same_thread:
161
+ For QdrantLocal, force disable check_same_thread.
162
+ Only use this if you can guarantee that you can resolve the thread safety outside QdrantClient.
163
+ :param index:
164
+ Name of the index.
165
+ :param embedding_dim:
166
+ Dimension of the embeddings.
167
+ :param on_disk:
168
+ Whether to store the collection on disk.
169
+ :param use_sparse_embedding:
170
+ If set to `True`, enables support for sparse embeddings.
171
+ :param similarity:
172
+ The similarity metric to use.
173
+ :param return_embedding:
174
+ Whether to return embeddings in the search results.
175
+ :param progress_bar:
176
+ Whether to show a progress bar or not.
177
+ :param recreate_index:
178
+ Whether to recreate the index.
179
+ :param shard_number:
180
+ Number of shards in the collection.
181
+ :param replication_factor:
182
+ Replication factor for the collection.
183
+ Defines how many copies of each shard will be created. Effective only in distributed mode.
184
+ :param write_consistency_factor:
185
+ Write consistency factor for the collection. Minimum value is 1.
186
+ Defines how many replicas should apply to the operation for it to be considered successful.
187
+ Increasing this number makes the collection more resilient to inconsistencies
188
+ but will cause failures if not enough replicas are available.
189
+ Effective only in distributed mode.
190
+ :param on_disk_payload:
191
+ If `True`, the point's payload will not be stored in memory and
192
+ will be read from the disk every time it is requested.
193
+ This setting saves RAM by slightly increasing response time.
194
+ Note: indexed payload values remain in RAM.
195
+ :param hnsw_config:
196
+ Params for HNSW index.
197
+ :param optimizers_config:
198
+ Params for optimizer.
199
+ :param wal_config:
200
+ Params for Write-Ahead-Log.
201
+ :param quantization_config:
202
+ Params for quantization. If `None`, quantization will be disabled.
203
+ :param init_from:
204
+ Use data stored in another collection to initialize this collection.
205
+ :param wait_result_from_api:
206
+ Whether to wait for the result from the API after each request.
207
+ :param metadata:
208
+ Additional metadata to include with the documents.
209
+ :param write_batch_size:
210
+ The batch size for writing documents.
211
+ :param scroll_size:
212
+ The scroll size for reading documents.
213
+ :param payload_fields_to_index:
214
+ List of payload fields to index.
215
+ """
216
+
99
217
  self._client = None
100
218
 
101
219
  # Store the Qdrant client specific attributes
@@ -130,14 +248,10 @@ class QdrantDocumentStore:
130
248
  self.use_sparse_embeddings = use_sparse_embeddings
131
249
  self.embedding_dim = embedding_dim
132
250
  self.on_disk = on_disk
133
- self.content_field = content_field
134
- self.name_field = name_field
135
- self.embedding_field = embedding_field
136
251
  self.similarity = similarity
137
252
  self.index = index
138
253
  self.return_embedding = return_embedding
139
254
  self.progress_bar = progress_bar
140
- self.duplicate_documents = duplicate_documents
141
255
  self.write_batch_size = write_batch_size
142
256
  self.scroll_size = scroll_size
143
257
 
@@ -172,6 +286,9 @@ class QdrantDocumentStore:
172
286
  return self._client
173
287
 
174
288
  def count_documents(self) -> int:
289
+ """
290
+ Returns the number of documents present in the Document Store.
291
+ """
175
292
  try:
176
293
  response = self.client.count(
177
294
  collection_name=self.index,
@@ -187,6 +304,15 @@ class QdrantDocumentStore:
187
304
  self,
188
305
  filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
189
306
  ) -> List[Document]:
307
+ """
308
+ Returns the documents that match the provided filters.
309
+
310
+ For a detailed specification of the filters, refer to the
311
+ [documentation](https://docs.haystack.deepset.ai/docs/metadata-filtering)
312
+
313
+ :param filters: The filters to apply to the document list.
314
+ :returns: A list of documents that match the given filters.
315
+ """
190
316
  if filters and not isinstance(filters, dict) and not isinstance(filters, rest.Filter):
191
317
  msg = "Filter must be a dictionary or an instance of `qdrant_client.http.models.Filter`"
192
318
  raise ValueError(msg)
@@ -204,6 +330,19 @@ class QdrantDocumentStore:
204
330
  documents: List[Document],
205
331
  policy: DuplicatePolicy = DuplicatePolicy.FAIL,
206
332
  ):
333
+ """
334
+ Writes documents to Qdrant using the specified policy.
335
+ The QdrantDocumentStore can handle duplicate documents based on the given policy.
336
+ The available policies are:
337
+ - `FAIL`: The operation will raise an error if any document already exists.
338
+ - `OVERWRITE`: Existing documents will be overwritten with the new ones.
339
+ - `SKIP`: Existing documents will be skipped, and only new documents will be added.
340
+
341
+ :param documents: A list of Document objects to write to Qdrant.
342
+ :param policy: The policy for handling duplicate documents.
343
+
344
+ :returns: The number of documents written to the document store.
345
+ """
207
346
  for doc in documents:
208
347
  if not isinstance(doc, Document):
209
348
  msg = f"DocumentStore.write_documents() expects a list of Documents but got an element of {type(doc)}."
@@ -225,7 +364,6 @@ class QdrantDocumentStore:
225
364
  for document_batch in batched_documents:
226
365
  batch = convert_haystack_documents_to_qdrant_points(
227
366
  document_batch,
228
- embedding_field=self.embedding_field,
229
367
  use_sparse_embeddings=self.use_sparse_embeddings,
230
368
  )
231
369
 
@@ -239,6 +377,11 @@ class QdrantDocumentStore:
239
377
  return len(document_objects)
240
378
 
241
379
  def delete_documents(self, ids: List[str]):
380
+ """
381
+ Deletes documents that match the provided `document_ids` from the document store.
382
+
383
+ :param document_ids: the document ids to delete
384
+ """
242
385
  ids = [convert_id(_id) for _id in ids]
243
386
  try:
244
387
  self.client.delete(
@@ -253,10 +396,24 @@ class QdrantDocumentStore:
253
396
 
254
397
  @classmethod
255
398
  def from_dict(cls, data: Dict[str, Any]) -> "QdrantDocumentStore":
399
+ """
400
+ Deserializes the component from a dictionary.
401
+
402
+ :param data:
403
+ The dictionary to deserialize from.
404
+ :returns:
405
+ The deserialized component.
406
+ """
256
407
  deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"])
257
408
  return default_from_dict(cls, data)
258
409
 
259
410
  def to_dict(self) -> Dict[str, Any]:
411
+ """
412
+ Serializes the component to a dictionary.
413
+
414
+ :returns:
415
+ Dictionary with serialized data.
416
+ """
260
417
  params = inspect.signature(self.__init__).parameters # type: ignore
261
418
  # All the __init__ params must be set as attributes
262
419
  # Set as init_parms without default values
@@ -271,6 +428,13 @@ class QdrantDocumentStore:
271
428
  self,
272
429
  filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
273
430
  ) -> Generator[Document, None, None]:
431
+ """
432
+ Returns a generator that yields documents from Qdrant based on the provided filters.
433
+
434
+ :param filters: Filters applied to the retrieved documents.
435
+ :returns: A generator that yields documents retrieved from Qdrant.
436
+ """
437
+
274
438
  index = self.index
275
439
  qdrant_filters = convert_filters_to_qdrant(filters)
276
440
 
@@ -299,6 +463,16 @@ class QdrantDocumentStore:
299
463
  ids: List[str],
300
464
  index: Optional[str] = None,
301
465
  ) -> List[Document]:
466
+ """
467
+ Retrieves documents from Qdrant by their IDs.
468
+
469
+ :param ids:
470
+ A list of document IDs to retrieve.
471
+ :param index:
472
+ The name of the index to retrieve documents from.
473
+ :returns:
474
+ A list of documents.
475
+ """
302
476
  index = index or self.index
303
477
 
304
478
  documents: List[Document] = []
@@ -322,9 +496,24 @@ class QdrantDocumentStore:
322
496
  query_sparse_embedding: SparseEmbedding,
323
497
  filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
324
498
  top_k: int = 10,
325
- scale_score: bool = True,
499
+ scale_score: bool = False,
326
500
  return_embedding: bool = False,
327
501
  ) -> List[Document]:
502
+ """
503
+ Queries Qdrant using a sparse embedding and returns the most relevant documents.
504
+
505
+ :param query_sparse_embedding: Sparse embedding of the query.
506
+ :param filters: Filters applied to the retrieved documents.
507
+ :param top_k: Maximum number of documents to return.
508
+ :param scale_score: Whether to scale the scores of the retrieved documents.
509
+ :param return_embedding: Whether to return the embeddings of the retrieved documents.
510
+
511
+ :returns: List of documents that are most similar to `query_sparse_embedding`.
512
+
513
+ :raises QdrantStoreError:
514
+ If the Document Store was initialized with `use_sparse_embeddings=False`.
515
+ """
516
+
328
517
  if not self.use_sparse_embeddings:
329
518
  message = (
330
519
  "You are trying to query using sparse embeddings, but the Document Store "
@@ -364,9 +553,20 @@ class QdrantDocumentStore:
364
553
  query_embedding: List[float],
365
554
  filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
366
555
  top_k: int = 10,
367
- scale_score: bool = True,
556
+ scale_score: bool = False,
368
557
  return_embedding: bool = False,
369
558
  ) -> List[Document]:
559
+ """
560
+ Queries Qdrant using a dense embedding and returns the most relevant documents.
561
+
562
+ :param query_embedding: Dense embedding of the query.
563
+ :param filters: Filters applied to the retrieved documents.
564
+ :param top_k: Maximum number of documents to return.
565
+ :param scale_score: Whether to scale the scores of the retrieved documents.
566
+ :param return_embedding: Whether to return the embeddings of the retrieved documents.
567
+
568
+ :returns: List of documents that are most similar to `query_embedding`.
569
+ """
370
570
  qdrant_filters = convert_filters_to_qdrant(filters)
371
571
 
372
572
  points = self.client.search(
@@ -409,8 +609,8 @@ class QdrantDocumentStore:
409
609
 
410
610
  :param query_embedding: Dense embedding of the query.
411
611
  :param query_sparse_embedding: Sparse embedding of the query.
412
- :param filters: Filters applied to the retrieved Documents.
413
- :param top_k: Maximum number of Documents to return.
612
+ :param filters: Filters applied to the retrieved documents.
613
+ :param top_k: Maximum number of documents to return.
414
614
  :param return_embedding: Whether to return the embeddings of the retrieved documents.
415
615
 
416
616
  :returns: List of Document that are most similar to `query_embedding` and `query_sparse_embedding`.
@@ -474,6 +674,16 @@ class QdrantDocumentStore:
474
674
  return results
475
675
 
476
676
  def get_distance(self, similarity: str) -> rest.Distance:
677
+ """
678
+ Retrieves the distance metric for the specified similarity measure.
679
+
680
+ :param similarity:
681
+ The similarity measure to retrieve the distance.
682
+ :returns:
683
+ The corresponding rest.Distance object.
684
+ :raises QdrantStoreError:
685
+ If the provided similarity measure is not supported.
686
+ """
477
687
  try:
478
688
  return self.SIMILARITY[similarity]
479
689
  except KeyError as ke:
@@ -507,6 +717,29 @@ class QdrantDocumentStore:
507
717
  on_disk: bool = False,
508
718
  payload_fields_to_index: Optional[List[dict]] = None,
509
719
  ):
720
+ """
721
+ Sets up the Qdrant collection with the specified parameters.
722
+ :param collection_name:
723
+ The name of the collection to set up.
724
+ :param embedding_dim:
725
+ The dimension of the embeddings.
726
+ :param recreate_collection:
727
+ Whether to recreate the collection if it already exists.
728
+ :param similarity:
729
+ The similarity measure to use.
730
+ :param use_sparse_embeddings:
731
+ Whether to use sparse embeddings.
732
+ :param on_disk:
733
+ Whether to store the collection on disk.
734
+ :param payload_fields_to_index:
735
+ List of payload fields to index.
736
+
737
+ :raises QdrantStoreError:
738
+ If the collection exists with incompatible settings.
739
+ :raises ValueError:
740
+ If the collection exists with a different similarity measure or embedding dimension.
741
+
742
+ """
510
743
  distance = self.get_distance(similarity)
511
744
 
512
745
  if recreate_collection or not self.client.collection_exists(collection_name):
@@ -576,6 +809,20 @@ class QdrantDocumentStore:
576
809
  on_disk: Optional[bool] = None,
577
810
  use_sparse_embeddings: Optional[bool] = None,
578
811
  ):
812
+ """
813
+ Recreates the Qdrant collection with the specified parameters.
814
+
815
+ :param collection_name:
816
+ The name of the collection to recreate.
817
+ :param distance:
818
+ The distance metric to use for the collection.
819
+ :param embedding_dim:
820
+ The dimension of the embeddings.
821
+ :param on_disk:
822
+ Whether to store the collection on disk.
823
+ :param use_sparse_embeddings:
824
+ Whether to use sparse embeddings.
825
+ """
579
826
  if on_disk is None:
580
827
  on_disk = self.on_disk
581
828
 
@@ -627,12 +874,7 @@ class QdrantDocumentStore:
627
874
 
628
875
  :param documents: A list of Haystack Document objects.
629
876
  :param index: name of the index
630
- :param duplicate_documents: Handle duplicates document based on parameter options.
631
- Parameter options : ( 'skip','overwrite','fail')
632
- skip (default option): Ignore the duplicates documents
633
- overwrite: Update any existing documents with the same ID when adding documents.
634
- fail: an error is raised if the document ID of the document being added already
635
- exists.
877
+ :param policy: The duplicate policy to use when writing documents.
636
878
  :returns: A list of Haystack Document objects.
637
879
  """
638
880
 
@@ -652,10 +894,10 @@ class QdrantDocumentStore:
652
894
 
653
895
  def _drop_duplicate_documents(self, documents: List[Document], index: Optional[str] = None) -> List[Document]:
654
896
  """
655
- Drop duplicates documents based on same hash ID
897
+ Drop duplicate documents based on same hash ID.
656
898
 
657
899
  :param documents: A list of Haystack Document objects.
658
- :param index: name of the index
900
+ :param index: Name of the index.
659
901
  :returns: A list of Haystack Document objects.
660
902
  """
661
903
  _hash_ids: Set = set()
@@ -22,15 +22,11 @@ def test_to_dict():
22
22
  "index": "test",
23
23
  "embedding_dim": 768,
24
24
  "on_disk": False,
25
- "content_field": "content",
26
- "name_field": "name",
27
- "embedding_field": "embedding",
28
25
  "force_disable_check_same_thread": False,
29
26
  "use_sparse_embeddings": False,
30
27
  "similarity": "cosine",
31
28
  "return_embedding": False,
32
29
  "progress_bar": True,
33
- "duplicate_documents": "overwrite",
34
30
  "recreate_index": False,
35
31
  "shard_number": None,
36
32
  "replication_factor": None,
@@ -62,15 +58,11 @@ def test_from_dict():
62
58
  "index": "test",
63
59
  "embedding_dim": 768,
64
60
  "on_disk": False,
65
- "content_field": "content",
66
- "name_field": "name",
67
- "embedding_field": "embedding",
68
61
  "force_disable_check_same_thread": False,
69
62
  "use_sparse_embeddings": True,
70
63
  "similarity": "cosine",
71
64
  "return_embedding": False,
72
65
  "progress_bar": True,
73
- "duplicate_documents": "overwrite",
74
66
  "recreate_index": True,
75
67
  "shard_number": None,
76
68
  "quantization_config": None,
@@ -87,16 +79,12 @@ def test_from_dict():
87
79
  assert all(
88
80
  [
89
81
  document_store.index == "test",
90
- document_store.content_field == "content",
91
- document_store.name_field == "name",
92
- document_store.embedding_field == "embedding",
93
82
  document_store.force_disable_check_same_thread is False,
94
83
  document_store.use_sparse_embeddings is True,
95
84
  document_store.on_disk is False,
96
85
  document_store.similarity == "cosine",
97
86
  document_store.return_embedding is False,
98
87
  document_store.progress_bar,
99
- document_store.duplicate_documents == "overwrite",
100
88
  document_store.recreate_index is True,
101
89
  document_store.shard_number is None,
102
90
  document_store.replication_factor is None,
@@ -47,15 +47,11 @@ class TestQdrantRetriever(FilterableDocsFixtureMixin):
47
47
  "index": "test",
48
48
  "embedding_dim": 768,
49
49
  "on_disk": False,
50
- "content_field": "content",
51
- "name_field": "name",
52
50
  "force_disable_check_same_thread": False,
53
- "embedding_field": "embedding",
54
51
  "use_sparse_embeddings": False,
55
52
  "similarity": "cosine",
56
53
  "return_embedding": False,
57
54
  "progress_bar": True,
58
- "duplicate_documents": "overwrite",
59
55
  "recreate_index": False,
60
56
  "shard_number": None,
61
57
  "replication_factor": None,
@@ -75,7 +71,7 @@ class TestQdrantRetriever(FilterableDocsFixtureMixin):
75
71
  },
76
72
  "filters": None,
77
73
  "top_k": 10,
78
- "scale_score": True,
74
+ "scale_score": False,
79
75
  "return_embedding": False,
80
76
  },
81
77
  }
@@ -170,15 +166,11 @@ class TestQdrantSparseEmbeddingRetriever(FilterableDocsFixtureMixin):
170
166
  "index": "test",
171
167
  "embedding_dim": 768,
172
168
  "on_disk": False,
173
- "content_field": "content",
174
- "name_field": "name",
175
- "embedding_field": "embedding",
176
169
  "force_disable_check_same_thread": False,
177
170
  "use_sparse_embeddings": False,
178
171
  "similarity": "cosine",
179
172
  "return_embedding": False,
180
173
  "progress_bar": True,
181
- "duplicate_documents": "overwrite",
182
174
  "recreate_index": False,
183
175
  "shard_number": None,
184
176
  "replication_factor": None,
@@ -198,7 +190,7 @@ class TestQdrantSparseEmbeddingRetriever(FilterableDocsFixtureMixin):
198
190
  },
199
191
  "filters": None,
200
192
  "top_k": 10,
201
- "scale_score": True,
193
+ "scale_score": False,
202
194
  "return_embedding": False,
203
195
  },
204
196
  }
@@ -280,15 +272,11 @@ class TestQdrantHybridRetriever:
280
272
  "index": "test",
281
273
  "embedding_dim": 768,
282
274
  "on_disk": False,
283
- "content_field": "content",
284
- "name_field": "name",
285
- "embedding_field": "embedding",
286
275
  "force_disable_check_same_thread": False,
287
276
  "use_sparse_embeddings": False,
288
277
  "similarity": "cosine",
289
278
  "return_embedding": False,
290
279
  "progress_bar": True,
291
- "duplicate_documents": "overwrite",
292
280
  "recreate_index": False,
293
281
  "shard_number": None,
294
282
  "replication_factor": None,