PyPI - qdrant-haystack - Versions diffs - 3.8.1__tar.gz → 4.1.0__tar.gz - Mend

qdrant-haystack 3.8.1tar.gz → 4.1.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of qdrant-haystack might be problematic. Click here for more details.

Files changed (23) hide show

{qdrant_haystack-3.8.1 → qdrant_haystack-4.1.0}/.gitignore RENAMED Viewed

@@ -135,3 +135,12 @@ dmypy.json
 # Docs generation artifacts
 _readme_*.md
 .idea
+# macOS
+.DS_Store
+# http cache (requests-cache)
+**/http_cache.sqlite
+# ruff
+.ruff_cache

{qdrant_haystack-3.8.1 → qdrant_haystack-4.1.0}/CHANGELOG.md RENAMED Viewed

@@ -1,5 +1,23 @@
 # Changelog
+## [integrations/qdrant-v4.0.0] - 2024-07-02
+### 🚜 Refactor
+- [**breaking**] Qdrant - remove unused init parameters: `content_field`, `name_field`, `embedding_field`, and `duplicate_documents` (#861)
+- [**breaking**] Qdrant - set `scale_score` default value to `False` (#862)
+### ⚙️ Miscellaneous Tasks
+- Retry tests to reduce flakyness (#836)
+- Update ruff invocation to include check parameter (#853)
+## [integrations/qdrant-v3.8.1] - 2024-06-20
+### 📚 Documentation
+- Added docstrings for QdrantDocumentStore (#808)
 ## [integrations/qdrant-v3.8.0] - 2024-06-06
 ### 🚀 Features

{qdrant_haystack-3.8.1 → qdrant_haystack-4.1.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: qdrant-haystack
-Version: 3.8.1
+Version: 4.1.0
 Summary: An integration of Qdrant ANN vector database backend with Haystack
 Project-URL: Source, https://github.com/deepset-ai/haystack-core-integrations
 Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/qdrant/README.md
@@ -19,7 +19,7 @@ Classifier: Programming Language :: Python :: Implementation :: CPython
 Classifier: Programming Language :: Python :: Implementation :: PyPy
 Requires-Python: >=3.8
 Requires-Dist: haystack-ai>=2.0.1
-Requires-Dist: qdrant-client
+Requires-Dist: qdrant-client>=1.10.0
 Description-Content-Type: text/markdown
 # qdrant-haystack

{qdrant_haystack-3.8.1 → qdrant_haystack-4.1.0}/pyproject.toml RENAMED Viewed

@@ -25,7 +25,7 @@ classifiers = [
   "Programming Language :: Python :: Implementation :: CPython",
   "Programming Language :: Python :: Implementation :: PyPy",
 ]
-dependencies = ["haystack-ai>=2.0.1", "qdrant-client"]
+dependencies = ["haystack-ai>=2.0.1", "qdrant-client>=1.10.0"]
 [project.urls]
 Source = "https://github.com/deepset-ai/haystack-core-integrations"
@@ -44,10 +44,10 @@ root = "../.."
 git_describe_command = 'git describe --tags --match="integrations/qdrant-v[0-9]*"'
 [tool.hatch.envs.default]
-dependencies = ["coverage[toml]>=6.5", "pytest", "haystack-pydoc-tools"]
+dependencies = ["coverage[toml]>=6.5", "pytest", "pytest-rerunfailures", "haystack-pydoc-tools"]
 [tool.hatch.envs.default.scripts]
-test = "pytest {args:tests}"
-test-cov = "coverage run -m pytest {args:tests}"
+test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
+test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
 cov-report = ["- coverage combine", "coverage report"]
 cov = ["test-cov", "cov-report"]
 docs = ["pydoc-markdown pydoc/config.yml"]
@@ -60,7 +60,7 @@ detached = true
 dependencies = ["black>=23.1.0", "mypy>=1.0.0", "ruff>=0.0.243"]
 [tool.hatch.envs.lint.scripts]
 typing = "mypy --install-types --non-interactive --explicit-package-bases {args:src/ tests}"
-style = ["ruff {args:.}", "black --check --diff {args:.}"]
+style = ["ruff check {args:.}", "black --check --diff {args:.}"]
 fmt = ["black {args:.}", "ruff --fix {args:.}", "style"]
 all = ["style", "typing"]
@@ -105,7 +105,8 @@ ignore = [
   # Allow boolean positional values in function calls, like `dict.get(... True)`
   "FBT003",
   # Allow boolean arguments in function definition
-  "FBT001", "FBT002",
+  "FBT001",
+  "FBT002",
   # Ignore checks for possible passwords
   "S105",
   "S106",
@@ -140,12 +141,8 @@ parallel = false
 [tool.coverage.report]
 omit = ["*/tests/*", "*/__init__.py"]
-show_missing=true
-exclude_lines = [
-  "no cov",
-  "if __name__ == .__main__.:",
-  "if TYPE_CHECKING:",
-]
+show_missing = true
+exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"]
 [[tool.mypy.overrides]]

{qdrant_haystack-3.8.1 → qdrant_haystack-4.1.0}/src/haystack_integrations/components/retrievers/qdrant/retriever.py RENAMED Viewed

@@ -37,8 +37,9 @@ class QdrantEmbeddingRetriever:
         document_store: QdrantDocumentStore,
         filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
         top_k: int = 10,
-        scale_score: bool = True,
+        scale_score: bool = False,
         return_embedding: bool = False,
+        score_threshold: Optional[float] = None,
     ):
         """
         Create a QdrantEmbeddingRetriever component.
@@ -48,6 +49,10 @@ class QdrantEmbeddingRetriever:
         :param top_k: The maximum number of documents to retrieve.
         :param scale_score: Whether to scale the scores of the retrieved documents or not.
         :param return_embedding: Whether to return the embedding of the retrieved Documents.
+        :param score_threshold: A minimal score threshold for the result.
+            Score of the returned result might be higher or smaller than the threshold
+             depending on the `similarity` function specified in the Document Store.
+            E.g. for cosine similarity only higher scores will be returned.
         :raises ValueError: If `document_store` is not an instance of `QdrantDocumentStore`.
         """
@@ -61,6 +66,7 @@ class QdrantEmbeddingRetriever:
         self._top_k = top_k
         self._scale_score = scale_score
         self._return_embedding = return_embedding
+        self._score_threshold = score_threshold
     def to_dict(self) -> Dict[str, Any]:
         """
@@ -76,6 +82,7 @@ class QdrantEmbeddingRetriever:
             top_k=self._top_k,
             scale_score=self._scale_score,
             return_embedding=self._return_embedding,
+            score_threshold=self._score_threshold,
         )
         d["init_parameters"]["document_store"] = self._document_store.to_dict()
@@ -103,6 +110,7 @@ class QdrantEmbeddingRetriever:
         top_k: Optional[int] = None,
         scale_score: Optional[bool] = None,
         return_embedding: Optional[bool] = None,
+        score_threshold: Optional[float] = None,
     ):
         """
         Run the Embedding Retriever on the given input data.
@@ -112,6 +120,7 @@ class QdrantEmbeddingRetriever:
         :param top_k: The maximum number of documents to return.
         :param scale_score: Whether to scale the scores of the retrieved documents or not.
         :param return_embedding: Whether to return the embedding of the retrieved Documents.
+        :param score_threshold: A minimal score threshold for the result.
         :returns:
             The retrieved documents.
@@ -122,6 +131,7 @@ class QdrantEmbeddingRetriever:
             top_k=top_k or self._top_k,
             scale_score=scale_score or self._scale_score,
             return_embedding=return_embedding or self._return_embedding,
+            score_threshold=score_threshold or self._score_threshold,
         )
         return {"documents": docs}
@@ -159,8 +169,9 @@ class QdrantSparseEmbeddingRetriever:
         document_store: QdrantDocumentStore,
         filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
         top_k: int = 10,
-        scale_score: bool = True,
+        scale_score: bool = False,
         return_embedding: bool = False,
+        score_threshold: Optional[float] = None,
     ):
         """
         Create a QdrantSparseEmbeddingRetriever component.
@@ -170,6 +181,10 @@ class QdrantSparseEmbeddingRetriever:
         :param top_k: The maximum number of documents to retrieve.
         :param scale_score: Whether to scale the scores of the retrieved documents or not.
         :param return_embedding: Whether to return the sparse embedding of the retrieved Documents.
+        :param score_threshold: A minimal score threshold for the result.
+            Score of the returned result might be higher or smaller than the threshold
+             depending on the Distance function used.
+            E.g. for cosine similarity only higher scores will be returned.
         :raises ValueError: If `document_store` is not an instance of `QdrantDocumentStore`.
         """
@@ -183,6 +198,7 @@ class QdrantSparseEmbeddingRetriever:
         self._top_k = top_k
         self._scale_score = scale_score
         self._return_embedding = return_embedding
+        self._score_threshold = score_threshold
     def to_dict(self) -> Dict[str, Any]:
         """
@@ -198,6 +214,7 @@ class QdrantSparseEmbeddingRetriever:
             top_k=self._top_k,
             scale_score=self._scale_score,
             return_embedding=self._return_embedding,
+            score_threshold=self._score_threshold,
         )
         d["init_parameters"]["document_store"] = self._document_store.to_dict()
@@ -225,6 +242,7 @@ class QdrantSparseEmbeddingRetriever:
         top_k: Optional[int] = None,
         scale_score: Optional[bool] = None,
         return_embedding: Optional[bool] = None,
+        score_threshold: Optional[float] = None,
     ):
         """
         Run the Sparse Embedding Retriever on the given input data.
@@ -234,6 +252,10 @@ class QdrantSparseEmbeddingRetriever:
         :param top_k: The maximum number of documents to return.
         :param scale_score: Whether to scale the scores of the retrieved documents or not.
         :param return_embedding: Whether to return the embedding of the retrieved Documents.
+        :param score_threshold: A minimal score threshold for the result.
+            Score of the returned result might be higher or smaller than the threshold
+             depending on the Distance function used.
+            E.g. for cosine similarity only higher scores will be returned.
         :returns:
             The retrieved documents.
@@ -244,6 +266,7 @@ class QdrantSparseEmbeddingRetriever:
             top_k=top_k or self._top_k,
             scale_score=scale_score or self._scale_score,
             return_embedding=return_embedding or self._return_embedding,
+            score_threshold=score_threshold or self._score_threshold,
         )
         return {"documents": docs}
@@ -288,6 +311,7 @@ class QdrantHybridRetriever:
         filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
         top_k: int = 10,
         return_embedding: bool = False,
+        score_threshold: Optional[float] = None,
     ):
         """
         Create a QdrantHybridRetriever component.
@@ -296,6 +320,10 @@ class QdrantHybridRetriever:
         :param filters: A dictionary with filters to narrow down the search space.
         :param top_k: The maximum number of documents to retrieve.
         :param return_embedding: Whether to return the embeddings of the retrieved Documents.
+        :param score_threshold: A minimal score threshold for the result.
+            Score of the returned result might be higher or smaller than the threshold
+             depending on the Distance function used.
+            E.g. for cosine similarity only higher scores will be returned.
         :raises ValueError: If 'document_store' is not an instance of QdrantDocumentStore.
         """
@@ -308,6 +336,7 @@ class QdrantHybridRetriever:
         self._filters = filters
         self._top_k = top_k
         self._return_embedding = return_embedding
+        self._score_threshold = score_threshold
     def to_dict(self) -> Dict[str, Any]:
         """
@@ -322,6 +351,7 @@ class QdrantHybridRetriever:
             filters=self._filters,
             top_k=self._top_k,
             return_embedding=self._return_embedding,
+            score_threshold=self._score_threshold,
         )
     @classmethod
@@ -346,6 +376,7 @@ class QdrantHybridRetriever:
         filters: Optional[Union[Dict[str, Any], models.Filter]] = None,
         top_k: Optional[int] = None,
         return_embedding: Optional[bool] = None,
+        score_threshold: Optional[float] = None,
     ):
         """
         Run the Sparse Embedding Retriever on the given input data.
@@ -355,6 +386,10 @@ class QdrantHybridRetriever:
         :param filters: A dictionary with filters to narrow down the search space.
         :param top_k: The maximum number of documents to return.
         :param return_embedding: Whether to return the embedding of the retrieved Documents.
+        :param score_threshold: A minimal score threshold for the result.
+            Score of the returned result might be higher or smaller than the threshold
+             depending on the Distance function used.
+            E.g. for cosine similarity only higher scores will be returned.
         :returns:
             The retrieved documents.
@@ -365,6 +400,7 @@ class QdrantHybridRetriever:
             filters=filters or self._filters,
             top_k=top_k or self._top_k,
             return_embedding=return_embedding or self._return_embedding,
+            score_threshold=score_threshold or self._score_threshold,
         )
         return {"documents": docs}

{qdrant_haystack-3.8.1 → qdrant_haystack-4.1.0}/src/haystack_integrations/document_stores/qdrant/converters.py RENAMED Viewed

@@ -17,7 +17,6 @@ UUID_NAMESPACE = uuid.UUID("3896d314-1e95-4a3a-b45a-945f9f0b541d")
 def convert_haystack_documents_to_qdrant_points(
     documents: List[Document],
     *,
-    embedding_field: str,
     use_sparse_embeddings: bool,
 ) -> List[rest.PointStruct]:
     points = []
@@ -26,7 +25,7 @@ def convert_haystack_documents_to_qdrant_points(
         if use_sparse_embeddings:
             vector = {}
-            dense_vector = payload.pop(embedding_field, None)
+            dense_vector = payload.pop("embedding", None)
             if dense_vector is not None:
                 vector[DENSE_VECTORS_NAME] = dense_vector
@@ -36,7 +35,7 @@ def convert_haystack_documents_to_qdrant_points(
                 vector[SPARSE_VECTORS_NAME] = sparse_vector_instance
         else:
-            vector = payload.pop(embedding_field) or {}
+            vector = payload.pop("embedding") or {}
         _id = convert_id(payload.get("id"))
         point = rest.PointStruct(

{qdrant_haystack-3.8.1 → qdrant_haystack-4.1.0}/src/haystack_integrations/document_stores/qdrant/document_store.py RENAMED Viewed

@@ -110,14 +110,11 @@ class QdrantDocumentStore:
         index: str = "Document",
         embedding_dim: int = 768,
         on_disk: bool = False,
-        content_field: str = "content",
-        name_field: str = "name",
-        embedding_field: str = "embedding",
         use_sparse_embeddings: bool = False,
+        sparse_idf: bool = False,
         similarity: str = "cosine",
         return_embedding: bool = False,
         progress_bar: bool = True,
-        duplicate_documents: str = "overwrite",
         recreate_index: bool = False,
         shard_number: Optional[int] = None,
         replication_factor: Optional[int] = None,
@@ -170,22 +167,17 @@ class QdrantDocumentStore:
             Dimension of the embeddings.
         :param on_disk:
             Whether to store the collection on disk.
-        :param content_field:
-            The field for the document content.
-        :param name_field:
-            The field for the document name.
-        :param embedding_field:
-            The field for the document embeddings.
         :param use_sparse_embedding:
             If set to `True`, enables support for sparse embeddings.
+        :param sparse_idf:
+            If set to `True`, computes the Inverse Document Frequency (IDF) when using sparse embeddings.
+            It is required to use techniques like BM42. It is ignored if `use_sparse_embeddings` is `False`.
         :param similarity:
             The similarity metric to use.
         :param return_embedding:
             Whether to return embeddings in the search results.
         :param progress_bar:
             Whether to show a progress bar or not.
-        :param duplicate_documents:
-            The parameter is not used and will be removed in future release.
         :param recreate_index:
             Whether to recreate the index.
         :param shard_number:
@@ -258,16 +250,13 @@ class QdrantDocumentStore:
         self.recreate_index = recreate_index
         self.payload_fields_to_index = payload_fields_to_index
         self.use_sparse_embeddings = use_sparse_embeddings
+        self.sparse_idf = use_sparse_embeddings and sparse_idf
         self.embedding_dim = embedding_dim
         self.on_disk = on_disk
-        self.content_field = content_field
-        self.name_field = name_field
-        self.embedding_field = embedding_field
         self.similarity = similarity
         self.index = index
         self.return_embedding = return_embedding
         self.progress_bar = progress_bar
-        self.duplicate_documents = duplicate_documents
         self.write_batch_size = write_batch_size
         self.scroll_size = scroll_size
@@ -296,6 +285,7 @@ class QdrantDocumentStore:
                 self.recreate_index,
                 self.similarity,
                 self.use_sparse_embeddings,
+                self.sparse_idf,
                 self.on_disk,
                 self.payload_fields_to_index,
             )
@@ -363,7 +353,9 @@ class QdrantDocumentStore:
             if not isinstance(doc, Document):
                 msg = f"DocumentStore.write_documents() expects a list of Documents but got an element of {type(doc)}."
                 raise ValueError(msg)
-        self._set_up_collection(self.index, self.embedding_dim, False, self.similarity, self.use_sparse_embeddings)
+        self._set_up_collection(
+            self.index, self.embedding_dim, False, self.similarity, self.use_sparse_embeddings, self.sparse_idf
+        )
         if len(documents) == 0:
             logger.warning("Calling QdrantDocumentStore.write_documents() with empty list")
@@ -380,7 +372,6 @@ class QdrantDocumentStore:
             for document_batch in batched_documents:
                 batch = convert_haystack_documents_to_qdrant_points(
                     document_batch,
-                    embedding_field=self.embedding_field,
                     use_sparse_embeddings=self.use_sparse_embeddings,
                 )
@@ -513,8 +504,9 @@ class QdrantDocumentStore:
         query_sparse_embedding: SparseEmbedding,
         filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
         top_k: int = 10,
-        scale_score: bool = True,
+        scale_score: bool = False,
         return_embedding: bool = False,
+        score_threshold: Optional[float] = None,
     ) -> List[Document]:
         """
         Queries Qdrant using a sparse embedding and returns the most relevant documents.
@@ -524,6 +516,10 @@ class QdrantDocumentStore:
         :param top_k: Maximum number of documents to return.
         :param scale_score: Whether to scale the scores of the retrieved documents.
         :param return_embedding: Whether to return the embeddings of the retrieved documents.
+        :param score_threshold: A minimal score threshold for the result.
+            Score of the returned result might be higher or smaller than the threshold
+             depending on the Distance function used.
+            E.g. for cosine similarity only higher scores will be returned.
         :returns: List of documents that are most similar to `query_sparse_embedding`.
@@ -553,6 +549,7 @@ class QdrantDocumentStore:
             query_filter=qdrant_filters,
             limit=top_k,
             with_vectors=return_embedding,
+            score_threshold=score_threshold,
         )
         results = [
             convert_qdrant_point_to_haystack_document(point, use_sparse_embeddings=self.use_sparse_embeddings)
@@ -570,8 +567,9 @@ class QdrantDocumentStore:
         query_embedding: List[float],
         filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
         top_k: int = 10,
-        scale_score: bool = True,
+        scale_score: bool = False,
         return_embedding: bool = False,
+        score_threshold: Optional[float] = None,
     ) -> List[Document]:
         """
         Queries Qdrant using a dense embedding and returns the most relevant documents.
@@ -581,6 +579,10 @@ class QdrantDocumentStore:
         :param top_k: Maximum number of documents to return.
         :param scale_score: Whether to scale the scores of the retrieved documents.
         :param return_embedding: Whether to return the embeddings of the retrieved documents.
+        :param score_threshold: A minimal score threshold for the result.
+            Score of the returned result might be higher or smaller than the threshold
+             depending on the Distance function used.
+            E.g. for cosine similarity only higher scores will be returned.
         :returns: List of documents that are most similar to `query_embedding`.
         """
@@ -595,6 +597,7 @@ class QdrantDocumentStore:
             query_filter=qdrant_filters,
             limit=top_k,
             with_vectors=return_embedding,
+            score_threshold=score_threshold,
         )
         results = [
             convert_qdrant_point_to_haystack_document(point, use_sparse_embeddings=self.use_sparse_embeddings)
@@ -617,6 +620,7 @@ class QdrantDocumentStore:
         filters: Optional[Union[Dict[str, Any], rest.Filter]] = None,
         top_k: int = 10,
         return_embedding: bool = False,
+        score_threshold: Optional[float] = None,
     ) -> List[Document]:
         """
         Retrieves documents based on dense and sparse embeddings and fuses the results using Reciprocal Rank Fusion.
@@ -629,6 +633,10 @@ class QdrantDocumentStore:
         :param filters: Filters applied to the retrieved documents.
         :param top_k: Maximum number of documents to return.
         :param return_embedding: Whether to return the embeddings of the retrieved documents.
+        :param score_threshold: A minimal score threshold for the result.
+            Score of the returned result might be higher or smaller than the threshold
+             depending on the Distance function used.
+            E.g. for cosine similarity only higher scores will be returned.
         :returns: List of Document that are most similar to `query_embedding` and `query_sparse_embedding`.
@@ -659,6 +667,7 @@ class QdrantDocumentStore:
             limit=top_k,
             with_payload=True,
             with_vector=return_embedding,
+            score_threshold=score_threshold,
         )
         dense_request = rest.SearchRequest(
@@ -731,6 +740,7 @@ class QdrantDocumentStore:
         recreate_collection: bool,
         similarity: str,
         use_sparse_embeddings: bool,
+        sparse_idf: bool,
         on_disk: bool = False,
         payload_fields_to_index: Optional[List[dict]] = None,
     ):
@@ -746,6 +756,8 @@ class QdrantDocumentStore:
             The similarity measure to use.
         :param use_sparse_embeddings:
             Whether to use sparse embeddings.
+        :param sparse_idf:
+            Whether to compute the Inverse Document Frequency (IDF) when using sparse embeddings. Required for BM42.
         :param on_disk:
             Whether to store the collection on disk.
         :param payload_fields_to_index:
@@ -762,7 +774,9 @@ class QdrantDocumentStore:
         if recreate_collection or not self.client.collection_exists(collection_name):
             # There is no need to verify the current configuration of that
             # collection. It might be just recreated again or does not exist yet.
-            self.recreate_collection(collection_name, distance, embedding_dim, on_disk, use_sparse_embeddings)
+            self.recreate_collection(
+                collection_name, distance, embedding_dim, on_disk, use_sparse_embeddings, sparse_idf
+            )
             # Create Payload index if payload_fields_to_index is provided
             self._create_payload_index(collection_name, payload_fields_to_index)
             return
@@ -825,6 +839,7 @@ class QdrantDocumentStore:
         embedding_dim: int,
         on_disk: Optional[bool] = None,
         use_sparse_embeddings: Optional[bool] = None,
+        sparse_idf: bool = False,
     ):
         """
         Recreates the Qdrant collection with the specified parameters.
@@ -839,6 +854,8 @@ class QdrantDocumentStore:
             Whether to store the collection on disk.
         :param use_sparse_embeddings:
             Whether to use sparse embeddings.
+        :param sparse_idf:
+            Whether to compute the Inverse Document Frequency (IDF) when using sparse embeddings. Required for BM42.
         """
         if on_disk is None:
             on_disk = self.on_disk
@@ -857,7 +874,8 @@ class QdrantDocumentStore:
                 SPARSE_VECTORS_NAME: rest.SparseVectorParams(
                     index=rest.SparseIndexParams(
                         on_disk=on_disk,
-                    )
+                    ),
+                    modifier=rest.Modifier.IDF if sparse_idf else None,
                 ),
             }
@@ -891,12 +909,7 @@ class QdrantDocumentStore:
         :param documents: A list of Haystack Document objects.
         :param index: name of the index
-        :param duplicate_documents: Handle duplicate documents based on parameter options.
-                                    Parameter options : ( 'skip','overwrite','fail')
-                                    skip (default option): Ignore the duplicates documents.
-                                    overwrite: Update any existing documents with the same ID when adding documents.
-                                    fail: An error is raised if the document ID of the document being added already
-                                    exists.
+        :param policy: The duplicate policy to use when writing documents.
         :returns: A list of Haystack Document objects.
         """

{qdrant_haystack-3.8.1 → qdrant_haystack-4.1.0}/tests/test_dict_converters.py RENAMED Viewed

@@ -22,15 +22,12 @@ def test_to_dict():
             "index": "test",
             "embedding_dim": 768,
             "on_disk": False,
-            "content_field": "content",
-            "name_field": "name",
-            "embedding_field": "embedding",
             "force_disable_check_same_thread": False,
             "use_sparse_embeddings": False,
+            "sparse_idf": False,
             "similarity": "cosine",
             "return_embedding": False,
             "progress_bar": True,
-            "duplicate_documents": "overwrite",
             "recreate_index": False,
             "shard_number": None,
             "replication_factor": None,
@@ -62,15 +59,12 @@ def test_from_dict():
                 "index": "test",
                 "embedding_dim": 768,
                 "on_disk": False,
-                "content_field": "content",
-                "name_field": "name",
-                "embedding_field": "embedding",
                 "force_disable_check_same_thread": False,
                 "use_sparse_embeddings": True,
+                "sparse_idf": True,
                 "similarity": "cosine",
                 "return_embedding": False,
                 "progress_bar": True,
-                "duplicate_documents": "overwrite",
                 "recreate_index": True,
                 "shard_number": None,
                 "quantization_config": None,
@@ -87,16 +81,13 @@ def test_from_dict():
     assert all(
         [
             document_store.index == "test",
-            document_store.content_field == "content",
-            document_store.name_field == "name",
-            document_store.embedding_field == "embedding",
             document_store.force_disable_check_same_thread is False,
             document_store.use_sparse_embeddings is True,
+            document_store.sparse_idf is True,
             document_store.on_disk is False,
             document_store.similarity == "cosine",
             document_store.return_embedding is False,
             document_store.progress_bar,
-            document_store.duplicate_documents == "overwrite",
             document_store.recreate_index is True,
             document_store.shard_number is None,
             document_store.replication_factor is None,

{qdrant_haystack-3.8.1 → qdrant_haystack-4.1.0}/tests/test_document_store.py RENAMED Viewed

@@ -12,7 +12,12 @@ from haystack.testing.document_store import (
     WriteDocumentsTest,
     _random_embeddings,
 )
-from haystack_integrations.document_stores.qdrant.document_store import QdrantDocumentStore, QdrantStoreError
+from haystack_integrations.document_stores.qdrant.document_store import (
+    SPARSE_VECTORS_NAME,
+    QdrantDocumentStore,
+    QdrantStoreError,
+)
+from qdrant_client.http import models as rest
 class TestQdrantDocumentStore(CountDocumentsTest, WriteDocumentsTest, DeleteDocumentsTest):
@@ -49,6 +54,23 @@ class TestQdrantDocumentStore(CountDocumentsTest, WriteDocumentsTest, DeleteDocu
         with pytest.raises(DuplicateDocumentError):
             document_store.write_documents(docs, DuplicatePolicy.FAIL)
+    def test_sparse_configuration(self):
+        document_store = QdrantDocumentStore(
+            ":memory:",
+            recreate_index=True,
+            use_sparse_embeddings=True,
+            sparse_idf=True,
+        )
+        client = document_store.client
+        sparse_config = client.get_collection("Document").config.params.sparse_vectors
+        assert SPARSE_VECTORS_NAME in sparse_config
+        # check that the `sparse_idf` parameter takes effect
+        assert hasattr(sparse_config[SPARSE_VECTORS_NAME], "modifier")
+        assert sparse_config[SPARSE_VECTORS_NAME].modifier == rest.Modifier.IDF
     def test_query_hybrid(self, generate_sparse_embedding):
         document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=True)

{qdrant_haystack-3.8.1 → qdrant_haystack-4.1.0}/tests/test_retriever.py RENAMED Viewed

@@ -22,6 +22,7 @@ class TestQdrantRetriever(FilterableDocsFixtureMixin):
         assert retriever._filters is None
         assert retriever._top_k == 10
         assert retriever._return_embedding is False
+        assert retriever._score_threshold is None
     def test_to_dict(self):
         document_store = QdrantDocumentStore(location=":memory:", index="test", use_sparse_embeddings=False)
@@ -47,15 +48,12 @@ class TestQdrantRetriever(FilterableDocsFixtureMixin):
                         "index": "test",
                         "embedding_dim": 768,
                         "on_disk": False,
-                        "content_field": "content",
-                        "name_field": "name",
                         "force_disable_check_same_thread": False,
-                        "embedding_field": "embedding",
                         "use_sparse_embeddings": False,
+                        "sparse_idf": False,
                         "similarity": "cosine",
                         "return_embedding": False,
                         "progress_bar": True,
-                        "duplicate_documents": "overwrite",
                         "recreate_index": False,
                         "shard_number": None,
                         "replication_factor": None,
@@ -75,8 +73,9 @@ class TestQdrantRetriever(FilterableDocsFixtureMixin):
                 },
                 "filters": None,
                 "top_k": 10,
-                "scale_score": True,
+                "scale_score": False,
                 "return_embedding": False,
+                "score_threshold": None,
             },
         }
@@ -92,6 +91,7 @@ class TestQdrantRetriever(FilterableDocsFixtureMixin):
                 "top_k": 5,
                 "scale_score": False,
                 "return_embedding": True,
+                "score_threshold": None,
             },
         }
         retriever = QdrantEmbeddingRetriever.from_dict(data)
@@ -101,6 +101,7 @@ class TestQdrantRetriever(FilterableDocsFixtureMixin):
         assert retriever._top_k == 5
         assert retriever._scale_score is False
         assert retriever._return_embedding is True
+        assert retriever._score_threshold is None
     def test_run(self, filterable_docs: List[Document]):
         document_store = QdrantDocumentStore(location=":memory:", index="Boi", use_sparse_embeddings=False)
@@ -118,6 +119,28 @@ class TestQdrantRetriever(FilterableDocsFixtureMixin):
         for document in results:
             assert document.embedding is None
+    def test_run_with_score_threshold(self):
+        document_store = QdrantDocumentStore(
+            embedding_dim=4, location=":memory:", similarity="cosine", index="Boi", use_sparse_embeddings=False
+        )
+        document_store.write_documents(
+            [
+                Document(
+                    content="Yet another document",
+                    embedding=[-0.1, -0.9, -10.0, -0.2],
+                ),
+                Document(content="The document", embedding=[1.0, 1.0, 1.0, 1.0]),
+                Document(content="Another document", embedding=[0.8, 0.8, 0.5, 1.0]),
+            ]
+        )
+        retriever = QdrantEmbeddingRetriever(document_store=document_store)
+        results = retriever.run(
+            query_embedding=[0.9, 0.9, 0.9, 0.9], top_k=5, return_embedding=False, score_threshold=0.5
+        )["documents"]
+        assert len(results) == 2
     def test_run_with_sparse_activated(self, filterable_docs: List[Document]):
         document_store = QdrantDocumentStore(location=":memory:", index="Boi", use_sparse_embeddings=True)
@@ -145,6 +168,7 @@ class TestQdrantSparseEmbeddingRetriever(FilterableDocsFixtureMixin):
         assert retriever._filters is None
         assert retriever._top_k == 10
         assert retriever._return_embedding is False
+        assert retriever._score_threshold is None
     def test_to_dict(self):
         document_store = QdrantDocumentStore(location=":memory:", index="test")
@@ -170,15 +194,12 @@ class TestQdrantSparseEmbeddingRetriever(FilterableDocsFixtureMixin):
                         "index": "test",
                         "embedding_dim": 768,
                         "on_disk": False,
-                        "content_field": "content",
-                        "name_field": "name",
-                        "embedding_field": "embedding",
                         "force_disable_check_same_thread": False,
                         "use_sparse_embeddings": False,
+                        "sparse_idf": False,
                         "similarity": "cosine",
                         "return_embedding": False,
                         "progress_bar": True,
-                        "duplicate_documents": "overwrite",
                         "recreate_index": False,
                         "shard_number": None,
                         "replication_factor": None,
@@ -198,8 +219,9 @@ class TestQdrantSparseEmbeddingRetriever(FilterableDocsFixtureMixin):
                 },
                 "filters": None,
                 "top_k": 10,
-                "scale_score": True,
+                "scale_score": False,
                 "return_embedding": False,
+                "score_threshold": None,
             },
         }
@@ -215,6 +237,7 @@ class TestQdrantSparseEmbeddingRetriever(FilterableDocsFixtureMixin):
                 "top_k": 5,
                 "scale_score": False,
                 "return_embedding": True,
+                "score_threshold": None,
             },
         }
         retriever = QdrantSparseEmbeddingRetriever.from_dict(data)
@@ -224,6 +247,7 @@ class TestQdrantSparseEmbeddingRetriever(FilterableDocsFixtureMixin):
         assert retriever._top_k == 5
         assert retriever._scale_score is False
         assert retriever._return_embedding is True
+        assert retriever._score_threshold is None
     def test_run(self, filterable_docs: List[Document], generate_sparse_embedding):
         document_store = QdrantDocumentStore(location=":memory:", index="Boi", use_sparse_embeddings=True)
@@ -255,6 +279,7 @@ class TestQdrantHybridRetriever:
         assert retriever._filters is None
         assert retriever._top_k == 10
         assert retriever._return_embedding is False
+        assert retriever._score_threshold is None
     def test_to_dict(self):
         document_store = QdrantDocumentStore(location=":memory:", index="test")
@@ -280,15 +305,12 @@ class TestQdrantHybridRetriever:
                         "index": "test",
                         "embedding_dim": 768,
                         "on_disk": False,
-                        "content_field": "content",
-                        "name_field": "name",
-                        "embedding_field": "embedding",
                         "force_disable_check_same_thread": False,
                         "use_sparse_embeddings": False,
+                        "sparse_idf": False,
                         "similarity": "cosine",
                         "return_embedding": False,
                         "progress_bar": True,
-                        "duplicate_documents": "overwrite",
                         "recreate_index": False,
                         "shard_number": None,
                         "replication_factor": None,
@@ -309,6 +331,7 @@ class TestQdrantHybridRetriever:
                 "filters": None,
                 "top_k": 5,
                 "return_embedding": True,
+                "score_threshold": None,
             },
         }
@@ -323,6 +346,7 @@ class TestQdrantHybridRetriever:
                 "filters": None,
                 "top_k": 5,
                 "return_embedding": True,
+                "score_threshold": None,
             },
         }
         retriever = QdrantHybridRetriever.from_dict(data)
@@ -331,6 +355,7 @@ class TestQdrantHybridRetriever:
         assert retriever._filters is None
         assert retriever._top_k == 5
         assert retriever._return_embedding
+        assert retriever._score_threshold is None
     def test_run(self):
         mock_store = Mock(spec=QdrantDocumentStore)