PyPI - qdrant-haystack - Versions diffs - 6.0.0__tar.gz → 8.0.0__tar.gz - Mend

qdrant-haystack 6.0.0tar.gz → 8.0.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

{qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/CHANGELOG.md RENAMED Viewed

@@ -1,5 +1,14 @@
 # Changelog
+## [integrations/qdrant-v7.0.0] - 2024-10-29
+### ⚙️ Miscellaneous Tasks
+- Update ruff linting scripts and settings (#1105)
+- Adopt uv as installer (#1142)
+## [integrations/qdrant-v6.0.0] - 2024-09-13
 ## [integrations/qdrant-v5.1.0] - 2024-09-12
 ### 🚀 Features
@@ -103,8 +112,6 @@
 - Fix haystack-ai pin (#649)
 ## [integrations/qdrant-v3.2.0] - 2024-03-27
 ### 🚀 Features
@@ -115,15 +122,11 @@
 ### 🐛 Bug Fixes
 - Fix linter errors (#282)
 - Fix order of API docs (#447)
 This PR will also push the docs to Readme
 - Fixes (#518)
 ### 🚜 Refactor
 - [**breaking**] Qdrant - update secret management (#405)
@@ -154,8 +157,6 @@ This PR will also push the docs to Readme
 - Fix import paths for beta5 (#237)
 ### 🚜 Refactor
 - Use `hatch_vcs` to manage integrations versioning (#103)

{qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.3
+Metadata-Version: 2.4
 Name: qdrant-haystack
-Version: 6.0.0
+Version: 8.0.0
 Summary: An integration of Qdrant ANN vector database backend with Haystack
 Project-URL: Source, https://github.com/deepset-ai/haystack-core-integrations
 Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/qdrant/README.md

{qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/pyproject.toml RENAMED Viewed

@@ -44,6 +44,7 @@ root = "../.."
 git_describe_command = 'git describe --tags --match="integrations/qdrant-v[0-9]*"'
 [tool.hatch.envs.default]
+installer = "uv"
 dependencies = ["coverage[toml]>=6.5", "pytest", "pytest-rerunfailures", "haystack-pydoc-tools"]
 [tool.hatch.envs.default.scripts]
 test = "pytest {args:tests}"
@@ -58,12 +59,13 @@ docs = ["pydoc-markdown pydoc/config.yml"]
 python = ["3.8", "3.9", "3.10", "3.11"]
 [tool.hatch.envs.lint]
+installer = "uv"
 detached = true
-dependencies = ["black>=23.1.0", "mypy>=1.0.0", "ruff>=0.0.243"]
+dependencies = ["pip", "black>=23.1.0", "mypy>=1.0.0", "ruff>=0.0.243"]
 [tool.hatch.envs.lint.scripts]
 typing = "mypy --install-types --non-interactive --explicit-package-bases {args:src/ tests}"
-style = ["ruff check {args:. --exclude tests/, examples/}", "black --check --diff {args:.}"]
-fmt = ["black {args:.}", "ruff --fix {args:. --exclude tests/, examples/}", "style"]
+style = ["ruff check {args:.}", "black --check --diff {args:.}"]
+fmt = ["black {args:.}", "ruff check --fix {args:.}", "style"]
 all = ["style", "typing"]
 [tool.black]
@@ -74,6 +76,8 @@ skip-string-normalization = true
 [tool.ruff]
 target-version = "py38"
 line-length = 120
+[tool.ruff.lint]
 select = [
   "A",
   "ARG",
@@ -125,10 +129,10 @@ unfixable = [
   "F401",
 ]
-[tool.ruff.flake8-tidy-imports]
+[tool.ruff.lint.flake8-tidy-imports]
 ban-relative-imports = "parents"
-[tool.ruff.per-file-ignores]
+[tool.ruff.lint.per-file-ignores]
 # Tests can use magic values, assertions, and relative imports
 "tests/**/*" = ["PLR2004", "S101", "TID252"]
 # examples can contain "print" commands

{qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/src/haystack_integrations/components/retrievers/qdrant/__init__.py RENAMED Viewed

@@ -4,4 +4,4 @@
 from .retriever import QdrantEmbeddingRetriever, QdrantHybridRetriever, QdrantSparseEmbeddingRetriever
-__all__ = ("QdrantEmbeddingRetriever", "QdrantSparseEmbeddingRetriever", "QdrantHybridRetriever")
+__all__ = ("QdrantEmbeddingRetriever", "QdrantHybridRetriever", "QdrantSparseEmbeddingRetriever")

{qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/src/haystack_integrations/document_stores/qdrant/converters.py RENAMED Viewed

@@ -22,6 +22,15 @@ def convert_haystack_documents_to_qdrant_points(
     points = []
     for document in documents:
         payload = document.to_dict(flatten=False)
+        if payload.pop("dataframe", None):
+            logger.warning(
+                "Document %s has the `dataframe` field set,"
+                "QdrantDocumentStore no longer supports dataframes and this field will be ignored. "
+                "The `dataframe` field will soon be removed from Haystack Document.",
+                document.id,
+            )
         if use_sparse_embeddings:
             vector = {}
@@ -64,6 +73,14 @@ def convert_qdrant_point_to_haystack_document(point: QdrantPoint, use_sparse_emb
     payload = {**point.payload}
     payload["score"] = point.score if hasattr(point, "score") else None
+    if payload.pop("dataframe", None):
+        logger.warning(
+            "Document %s has the `dataframe` field set,"
+            "QdrantDocumentStore no longer supports dataframes and this field will be ignored. "
+            "The `dataframe` field will soon be removed from Haystack Document.",
+            payload["id"],
+        )
     if not use_sparse_embeddings:
         payload["embedding"] = point.vector if hasattr(point, "vector") else None
     elif hasattr(point, "vector") and point.vector is not None:

{qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/src/haystack_integrations/document_stores/qdrant/document_store.py RENAMED Viewed

@@ -362,7 +362,6 @@ class QdrantDocumentStore:
         document_objects = self._handle_duplicate_documents(
             documents=documents,
-            index=self.index,
             policy=policy,
         )
@@ -468,7 +467,6 @@ class QdrantDocumentStore:
     def get_documents_by_id(
         self,
         ids: List[str],
-        index: Optional[str] = None,
     ) -> List[Document]:
         """
         Retrieves documents from Qdrant by their IDs.
@@ -480,13 +478,11 @@ class QdrantDocumentStore:
         :returns:
             A list of documents.
         """
-        index = index or self.index
         documents: List[Document] = []
         ids = [convert_id(_id) for _id in ids]
         records = self.client.retrieve(
-            collection_name=index,
+            collection_name=self.index,
             ids=ids,
             with_payload=True,
             with_vectors=True,
@@ -987,7 +983,6 @@ class QdrantDocumentStore:
     def _handle_duplicate_documents(
         self,
         documents: List[Document],
-        index: Optional[str] = None,
         policy: DuplicatePolicy = None,
     ):
         """
@@ -995,31 +990,28 @@ class QdrantDocumentStore:
         documents that are not in the index yet.
         :param documents: A list of Haystack Document objects.
-        :param index: name of the index
         :param policy: The duplicate policy to use when writing documents.
         :returns: A list of Haystack Document objects.
         """
-        index = index or self.index
         if policy in (DuplicatePolicy.SKIP, DuplicatePolicy.FAIL):
-            documents = self._drop_duplicate_documents(documents, index)
-            documents_found = self.get_documents_by_id(ids=[doc.id for doc in documents], index=index)
+            documents = self._drop_duplicate_documents(documents)
+            documents_found = self.get_documents_by_id(ids=[doc.id for doc in documents])
             ids_exist_in_db: List[str] = [doc.id for doc in documents_found]
             if len(ids_exist_in_db) > 0 and policy == DuplicatePolicy.FAIL:
-                msg = f"Document with ids '{', '.join(ids_exist_in_db)} already exists in index = '{index}'."
+                msg = f"Document with ids '{', '.join(ids_exist_in_db)} already exists in index = '{self.index}'."
                 raise DuplicateDocumentError(msg)
             documents = list(filter(lambda doc: doc.id not in ids_exist_in_db, documents))
         return documents
-    def _drop_duplicate_documents(self, documents: List[Document], index: Optional[str] = None) -> List[Document]:
+    def _drop_duplicate_documents(self, documents: List[Document]) -> List[Document]:
         """
         Drop duplicate documents based on same hash ID.
         :param documents: A list of Haystack Document objects.
-        :param index: Name of the index.
         :returns: A list of Haystack Document objects.
         """
         _hash_ids: Set = set()
@@ -1030,7 +1022,7 @@ class QdrantDocumentStore:
                 logger.info(
                     "Duplicate Documents: Document with id '%s' already exists in index '%s'",
                     document.id,
-                    index or self.index,
+                    self.index,
                 )
                 continue
             _documents.append(document)

{qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/tests/test_converters.py RENAMED Viewed

@@ -1,7 +1,10 @@
 import numpy as np
+from haystack import Document
+from pandas import DataFrame
 from qdrant_client.http import models as rest
 from haystack_integrations.document_stores.qdrant.converters import (
+    convert_haystack_documents_to_qdrant_points,
     convert_id,
     convert_qdrant_point_to_haystack_document,
 )
@@ -62,3 +65,44 @@ def test_point_to_document_reverts_proper_structure_from_record_without_sparse()
     assert document.sparse_embedding is None
     assert {"test_field": 1} == document.meta
     assert 0.0 == np.sum(np.array([1.0, 0.0, 0.0, 0.0]) - document.embedding)
+def test_point_to_document_skips_dataframe():
+    point = rest.Record(
+        id="c7c62e8e-02b9-4ec6-9f88-46bd97b628b7",
+        payload={
+            "id": "my-id",
+            "content": "Lorem ipsum",
+            "content_type": "text",
+            "meta": {
+                "test_field": 1,
+            },
+            "dataframe": {"a": [1, 2, 3]},
+        },
+        vector=[1.0, 0.0, 0.0, 0.0],
+    )
+    document = convert_qdrant_point_to_haystack_document(point, use_sparse_embeddings=False)
+    assert "my-id" == document.id
+    assert "Lorem ipsum" == document.content
+    assert "text" == document.content_type
+    assert {"test_field": 1} == document.meta
+    assert 0.0 == np.sum(np.array([1.0, 0.0, 0.0, 0.0]) - document.embedding)
+    assert not hasattr(document, "dataframe") or document.dataframe is None
+def test_documents_to_points_skips_dataframe():
+    doc = Document(
+        id="my-id",
+        content="Lorem ipsum",
+        embedding=[1.0, 0.0, 0.0, 0.0],
+    )
+    doc.dataframe = DataFrame([[1, 2], [3, 4]])
+    points = convert_haystack_documents_to_qdrant_points([doc], use_sparse_embeddings=False)
+    assert len(points) == 1
+    assert points[0].payload["content"] == "Lorem ipsum"
+    assert points[0].vector == [1.0, 0.0, 0.0, 0.0]
+    assert "dataframe" not in points[0].payload

{qdrant_haystack-6.0.0 → qdrant_haystack-8.0.0}/tests/test_filters.py RENAMED Viewed

@@ -208,11 +208,5 @@ class TestQdrantStoreBaseTests(FilterDocumentsTest):
     # ======== ========================== ========
-    @pytest.mark.skip(reason="Qdrant doesn't support comparision with dataframe")
-    def test_comparison_equal_with_dataframe(self, document_store, filterable_docs): ...
-    @pytest.mark.skip(reason="Qdrant doesn't support comparision with dataframe")
-    def test_comparison_not_equal_with_dataframe(self, document_store, filterable_docs): ...
     @pytest.mark.skip(reason="Cannot distinguish errors yet")
     def test_missing_top_level_operator_key(self, document_store, filterable_docs): ...