qdrant-haystack 5.1.0__tar.gz → 7.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. {qdrant_haystack-5.1.0 → qdrant_haystack-7.0.0}/CHANGELOG.md +8 -0
  2. {qdrant_haystack-5.1.0 → qdrant_haystack-7.0.0}/PKG-INFO +1 -1
  3. {qdrant_haystack-5.1.0 → qdrant_haystack-7.0.0}/pyproject.toml +9 -5
  4. {qdrant_haystack-5.1.0 → qdrant_haystack-7.0.0}/src/haystack_integrations/document_stores/qdrant/document_store.py +8 -16
  5. qdrant_haystack-5.1.0/tests/test_legacy_filters.py +0 -442
  6. {qdrant_haystack-5.1.0 → qdrant_haystack-7.0.0}/.gitignore +0 -0
  7. {qdrant_haystack-5.1.0 → qdrant_haystack-7.0.0}/LICENSE.txt +0 -0
  8. {qdrant_haystack-5.1.0 → qdrant_haystack-7.0.0}/README.md +0 -0
  9. {qdrant_haystack-5.1.0 → qdrant_haystack-7.0.0}/examples/embedding_retrieval.py +0 -0
  10. {qdrant_haystack-5.1.0 → qdrant_haystack-7.0.0}/pydoc/config.yml +0 -0
  11. {qdrant_haystack-5.1.0 → qdrant_haystack-7.0.0}/src/haystack_integrations/components/retrievers/qdrant/__init__.py +0 -0
  12. {qdrant_haystack-5.1.0 → qdrant_haystack-7.0.0}/src/haystack_integrations/components/retrievers/qdrant/retriever.py +0 -0
  13. {qdrant_haystack-5.1.0 → qdrant_haystack-7.0.0}/src/haystack_integrations/document_stores/qdrant/__init__.py +0 -0
  14. {qdrant_haystack-5.1.0 → qdrant_haystack-7.0.0}/src/haystack_integrations/document_stores/qdrant/converters.py +0 -0
  15. {qdrant_haystack-5.1.0 → qdrant_haystack-7.0.0}/src/haystack_integrations/document_stores/qdrant/filters.py +0 -0
  16. {qdrant_haystack-5.1.0 → qdrant_haystack-7.0.0}/src/haystack_integrations/document_stores/qdrant/migrate_to_sparse.py +0 -0
  17. {qdrant_haystack-5.1.0 → qdrant_haystack-7.0.0}/tests/__init__.py +0 -0
  18. {qdrant_haystack-5.1.0 → qdrant_haystack-7.0.0}/tests/conftest.py +0 -0
  19. {qdrant_haystack-5.1.0 → qdrant_haystack-7.0.0}/tests/test_converters.py +0 -0
  20. {qdrant_haystack-5.1.0 → qdrant_haystack-7.0.0}/tests/test_dict_converters.py +0 -0
  21. {qdrant_haystack-5.1.0 → qdrant_haystack-7.0.0}/tests/test_document_store.py +0 -0
  22. {qdrant_haystack-5.1.0 → qdrant_haystack-7.0.0}/tests/test_filters.py +0 -0
  23. {qdrant_haystack-5.1.0 → qdrant_haystack-7.0.0}/tests/test_retriever.py +0 -0
@@ -1,5 +1,13 @@
1
1
  # Changelog
2
2
 
3
+ ## [integrations/qdrant-v6.0.0] - 2024-09-13
4
+
5
+ ## [integrations/qdrant-v5.1.0] - 2024-09-12
6
+
7
+ ### 🚀 Features
8
+
9
+ - Qdrant - Add group_by and group_size optional parameters to Retrievers (#1054)
10
+
3
11
  ## [integrations/qdrant-v5.0.0] - 2024-09-02
4
12
 
5
13
  ## [integrations/qdrant-v4.2.0] - 2024-08-27
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: qdrant-haystack
3
- Version: 5.1.0
3
+ Version: 7.0.0
4
4
  Summary: An integration of Qdrant ANN vector database backend with Haystack
5
5
  Project-URL: Source, https://github.com/deepset-ai/haystack-core-integrations
6
6
  Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/qdrant/README.md
@@ -44,6 +44,7 @@ root = "../.."
44
44
  git_describe_command = 'git describe --tags --match="integrations/qdrant-v[0-9]*"'
45
45
 
46
46
  [tool.hatch.envs.default]
47
+ installer = "uv"
47
48
  dependencies = ["coverage[toml]>=6.5", "pytest", "pytest-rerunfailures", "haystack-pydoc-tools"]
48
49
  [tool.hatch.envs.default.scripts]
49
50
  test = "pytest {args:tests}"
@@ -58,12 +59,13 @@ docs = ["pydoc-markdown pydoc/config.yml"]
58
59
  python = ["3.8", "3.9", "3.10", "3.11"]
59
60
 
60
61
  [tool.hatch.envs.lint]
62
+ installer = "uv"
61
63
  detached = true
62
- dependencies = ["black>=23.1.0", "mypy>=1.0.0", "ruff>=0.0.243"]
64
+ dependencies = ["pip", "black>=23.1.0", "mypy>=1.0.0", "ruff>=0.0.243"]
63
65
  [tool.hatch.envs.lint.scripts]
64
66
  typing = "mypy --install-types --non-interactive --explicit-package-bases {args:src/ tests}"
65
- style = ["ruff check {args:. --exclude tests/, examples/}", "black --check --diff {args:.}"]
66
- fmt = ["black {args:.}", "ruff --fix {args:. --exclude tests/, examples/}", "style"]
67
+ style = ["ruff check {args:.}", "black --check --diff {args:.}"]
68
+ fmt = ["black {args:.}", "ruff check --fix {args:.}", "style"]
67
69
  all = ["style", "typing"]
68
70
 
69
71
  [tool.black]
@@ -74,6 +76,8 @@ skip-string-normalization = true
74
76
  [tool.ruff]
75
77
  target-version = "py38"
76
78
  line-length = 120
79
+
80
+ [tool.ruff.lint]
77
81
  select = [
78
82
  "A",
79
83
  "ARG",
@@ -125,10 +129,10 @@ unfixable = [
125
129
  "F401",
126
130
  ]
127
131
 
128
- [tool.ruff.flake8-tidy-imports]
132
+ [tool.ruff.lint.flake8-tidy-imports]
129
133
  ban-relative-imports = "parents"
130
134
 
131
- [tool.ruff.per-file-ignores]
135
+ [tool.ruff.lint.per-file-ignores]
132
136
  # Tests can use magic values, assertions, and relative imports
133
137
  "tests/**/*" = ["PLR2004", "S101", "TID252"]
134
138
  # examples can contain "print" commands
@@ -11,7 +11,6 @@ from haystack.dataclasses.sparse_embedding import SparseEmbedding
11
11
  from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError
12
12
  from haystack.document_stores.types import DuplicatePolicy
13
13
  from haystack.utils import Secret, deserialize_secrets_inplace
14
- from haystack.utils.filters import convert as convert_legacy_filters
15
14
  from qdrant_client import grpc
16
15
  from qdrant_client.http import models as rest
17
16
  from qdrant_client.http.exceptions import UnexpectedResponse
@@ -323,7 +322,8 @@ class QdrantDocumentStore:
323
322
  raise ValueError(msg)
324
323
 
325
324
  if filters and not isinstance(filters, rest.Filter) and "operator" not in filters:
326
- filters = convert_legacy_filters(filters)
325
+ msg = "Invalid filter syntax. See https://docs.haystack.deepset.ai/docs/metadata-filtering for details."
326
+ raise ValueError(msg)
327
327
  return list(
328
328
  self.get_documents_generator(
329
329
  filters,
@@ -362,7 +362,6 @@ class QdrantDocumentStore:
362
362
 
363
363
  document_objects = self._handle_duplicate_documents(
364
364
  documents=documents,
365
- index=self.index,
366
365
  policy=policy,
367
366
  )
368
367
 
@@ -468,7 +467,6 @@ class QdrantDocumentStore:
468
467
  def get_documents_by_id(
469
468
  self,
470
469
  ids: List[str],
471
- index: Optional[str] = None,
472
470
  ) -> List[Document]:
473
471
  """
474
472
  Retrieves documents from Qdrant by their IDs.
@@ -480,13 +478,11 @@ class QdrantDocumentStore:
480
478
  :returns:
481
479
  A list of documents.
482
480
  """
483
- index = index or self.index
484
-
485
481
  documents: List[Document] = []
486
482
 
487
483
  ids = [convert_id(_id) for _id in ids]
488
484
  records = self.client.retrieve(
489
- collection_name=index,
485
+ collection_name=self.index,
490
486
  ids=ids,
491
487
  with_payload=True,
492
488
  with_vectors=True,
@@ -987,7 +983,6 @@ class QdrantDocumentStore:
987
983
  def _handle_duplicate_documents(
988
984
  self,
989
985
  documents: List[Document],
990
- index: Optional[str] = None,
991
986
  policy: DuplicatePolicy = None,
992
987
  ):
993
988
  """
@@ -995,31 +990,28 @@ class QdrantDocumentStore:
995
990
  documents that are not in the index yet.
996
991
 
997
992
  :param documents: A list of Haystack Document objects.
998
- :param index: name of the index
999
993
  :param policy: The duplicate policy to use when writing documents.
1000
994
  :returns: A list of Haystack Document objects.
1001
995
  """
1002
996
 
1003
- index = index or self.index
1004
997
  if policy in (DuplicatePolicy.SKIP, DuplicatePolicy.FAIL):
1005
- documents = self._drop_duplicate_documents(documents, index)
1006
- documents_found = self.get_documents_by_id(ids=[doc.id for doc in documents], index=index)
998
+ documents = self._drop_duplicate_documents(documents)
999
+ documents_found = self.get_documents_by_id(ids=[doc.id for doc in documents])
1007
1000
  ids_exist_in_db: List[str] = [doc.id for doc in documents_found]
1008
1001
 
1009
1002
  if len(ids_exist_in_db) > 0 and policy == DuplicatePolicy.FAIL:
1010
- msg = f"Document with ids '{', '.join(ids_exist_in_db)} already exists in index = '{index}'."
1003
+ msg = f"Document with ids '{', '.join(ids_exist_in_db)} already exists in index = '{self.index}'."
1011
1004
  raise DuplicateDocumentError(msg)
1012
1005
 
1013
1006
  documents = list(filter(lambda doc: doc.id not in ids_exist_in_db, documents))
1014
1007
 
1015
1008
  return documents
1016
1009
 
1017
- def _drop_duplicate_documents(self, documents: List[Document], index: Optional[str] = None) -> List[Document]:
1010
+ def _drop_duplicate_documents(self, documents: List[Document]) -> List[Document]:
1018
1011
  """
1019
1012
  Drop duplicate documents based on same hash ID.
1020
1013
 
1021
1014
  :param documents: A list of Haystack Document objects.
1022
- :param index: Name of the index.
1023
1015
  :returns: A list of Haystack Document objects.
1024
1016
  """
1025
1017
  _hash_ids: Set = set()
@@ -1030,7 +1022,7 @@ class QdrantDocumentStore:
1030
1022
  logger.info(
1031
1023
  "Duplicate Documents: Document with id '%s' already exists in index '%s'",
1032
1024
  document.id,
1033
- index or self.index,
1025
+ self.index,
1034
1026
  )
1035
1027
  continue
1036
1028
  _documents.append(document)
@@ -1,442 +0,0 @@
1
- from typing import List
2
-
3
- import pytest
4
- from haystack import Document
5
- from haystack.document_stores.types import DocumentStore
6
- from haystack.testing.document_store import LegacyFilterDocumentsTest
7
- from haystack.utils.filters import FilterError
8
-
9
- from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
10
-
11
- # The tests below are from haystack.testing.document_store.LegacyFilterDocumentsTest
12
- # Updated to include `meta` prefix for filter keys wherever necessary
13
- # And skip tests that are not supported in Qdrant(Dataframes, embeddings)
14
-
15
-
16
- class TestQdrantLegacyFilterDocuments(LegacyFilterDocumentsTest):
17
- """
18
- Utility class to test a Document Store `filter_documents` method using different types of legacy filters
19
- """
20
-
21
- @pytest.fixture
22
- def document_store(self) -> QdrantDocumentStore:
23
- return QdrantDocumentStore(
24
- ":memory:",
25
- recreate_index=True,
26
- return_embedding=True,
27
- wait_result_from_api=True,
28
- )
29
-
30
- def assert_documents_are_equal(self, received: List[Document], expected: List[Document]):
31
- """
32
- Assert that two lists of Documents are equal.
33
- This is used in every test.
34
- """
35
-
36
- # Check that the lengths of the lists are the same
37
- assert len(received) == len(expected)
38
-
39
- # Check that the sets are equal, meaning the content and IDs match regardless of order
40
- assert {doc.id for doc in received} == {doc.id for doc in expected}
41
-
42
- def test_filter_simple_metadata_value(self, document_store: DocumentStore, filterable_docs: List[Document]):
43
- document_store.write_documents(filterable_docs)
44
- result = document_store.filter_documents(filters={"meta.page": "100"})
45
- self.assert_documents_are_equal(result, [doc for doc in filterable_docs if doc.meta.get("page") == "100"])
46
-
47
- @pytest.mark.skip(reason="Dataframe filtering is not supported in Qdrant")
48
- def test_filter_document_dataframe(self, document_store: DocumentStore, filterable_docs: List[Document]): ...
49
-
50
- def test_eq_filter_explicit(self, document_store: DocumentStore, filterable_docs: List[Document]):
51
- document_store.write_documents(filterable_docs)
52
- result = document_store.filter_documents(filters={"meta.page": {"$eq": "100"}})
53
- self.assert_documents_are_equal(result, [doc for doc in filterable_docs if doc.meta.get("page") == "100"])
54
-
55
- def test_eq_filter_implicit(self, document_store: DocumentStore, filterable_docs: List[Document]):
56
- document_store.write_documents(filterable_docs)
57
- result = document_store.filter_documents(filters={"meta.page": "100"})
58
- self.assert_documents_are_equal(result, [doc for doc in filterable_docs if doc.meta.get("page") == "100"])
59
-
60
- @pytest.mark.skip(reason="Dataframe filtering is not supported in Qdrant")
61
- def test_eq_filter_table(self, document_store: DocumentStore, filterable_docs: List[Document]): ...
62
-
63
- @pytest.mark.skip(reason="Embedding filtering is not supported in Qdrant")
64
- def test_eq_filter_embedding(self, document_store: DocumentStore, filterable_docs: List[Document]): ...
65
-
66
- # LegacyFilterDocumentsNotEqualTest
67
-
68
- def test_ne_filter(self, document_store: DocumentStore, filterable_docs: List[Document]):
69
- document_store.write_documents(filterable_docs)
70
- result = document_store.filter_documents(filters={"meta.page": {"$ne": "100"}})
71
- self.assert_documents_are_equal(result, [doc for doc in filterable_docs if doc.meta.get("page") != "100"])
72
-
73
- @pytest.mark.skip(reason="Dataframe filtering is not supported in Qdrant")
74
- def test_ne_filter_table(self, document_store: DocumentStore, filterable_docs: List[Document]): ...
75
-
76
- @pytest.mark.skip(reason="Embedding filtering is not supported in Qdrant")
77
- def test_ne_filter_embedding(self, document_store: DocumentStore, filterable_docs: List[Document]): ...
78
-
79
- # LegacyFilterDocumentsInTest
80
-
81
- def test_filter_simple_list_single_element(self, document_store: DocumentStore, filterable_docs: List[Document]):
82
- document_store.write_documents(filterable_docs)
83
- result = document_store.filter_documents(filters={"meta.page": ["100"]})
84
- self.assert_documents_are_equal(result, [doc for doc in filterable_docs if doc.meta.get("page") == "100"])
85
-
86
- def test_filter_simple_list_one_value(self, document_store: DocumentStore, filterable_docs: List[Document]):
87
- document_store.write_documents(filterable_docs)
88
- result = document_store.filter_documents(filters={"meta.page": ["100"]})
89
- self.assert_documents_are_equal(result, [doc for doc in filterable_docs if doc.meta.get("page") in ["100"]])
90
-
91
- def test_filter_simple_list(self, document_store: DocumentStore, filterable_docs: List[Document]):
92
- document_store.write_documents(filterable_docs)
93
- result = document_store.filter_documents(filters={"meta.page": ["100", "123"]})
94
- self.assert_documents_are_equal(
95
- result,
96
- [doc for doc in filterable_docs if doc.meta.get("page") in ["100", "123"]],
97
- )
98
-
99
- def test_incorrect_filter_value(self, document_store: DocumentStore, filterable_docs: List[Document]):
100
- document_store.write_documents(filterable_docs)
101
- result = document_store.filter_documents(filters={"meta.page": ["nope"]})
102
- self.assert_documents_are_equal(result, [])
103
-
104
- def test_in_filter_explicit(self, document_store: DocumentStore, filterable_docs: List[Document]):
105
- document_store.write_documents(filterable_docs)
106
- result = document_store.filter_documents(filters={"meta.page": {"$in": ["100", "123", "n.a."]}})
107
- self.assert_documents_are_equal(
108
- result,
109
- [doc for doc in filterable_docs if doc.meta.get("page") in ["100", "123"]],
110
- )
111
-
112
- def test_in_filter_implicit(self, document_store: DocumentStore, filterable_docs: List[Document]):
113
- document_store.write_documents(filterable_docs)
114
- result = document_store.filter_documents(filters={"meta.page": ["100", "123", "n.a."]})
115
- self.assert_documents_are_equal(
116
- result,
117
- [doc for doc in filterable_docs if doc.meta.get("page") in ["100", "123"]],
118
- )
119
-
120
- @pytest.mark.skip(reason="Dataframe filtering is not supported in Qdrant")
121
- def test_in_filter_table(self, document_store: DocumentStore, filterable_docs: List[Document]): ...
122
-
123
- @pytest.mark.skip(reason="Embedding filtering is not supported in Qdrant")
124
- def test_in_filter_embedding(self, document_store: DocumentStore, filterable_docs: List[Document]): ...
125
-
126
- # LegacyFilterDocumentsNotInTest
127
-
128
- @pytest.mark.skip(reason="Dataframe filtering is not supported in Qdrant")
129
- def test_nin_filter_table(self, document_store: DocumentStore, filterable_docs: List[Document]): ...
130
-
131
- @pytest.mark.skip(reason="Embedding filtering is not supported in Qdrant")
132
- def test_nin_filter_embedding(self, document_store: DocumentStore, filterable_docs: List[Document]): ...
133
-
134
- def test_nin_filter(self, document_store: DocumentStore, filterable_docs: List[Document]):
135
- document_store.write_documents(filterable_docs)
136
- result = document_store.filter_documents(filters={"meta.page": {"$nin": ["100", "123", "n.a."]}})
137
- self.assert_documents_are_equal(
138
- result,
139
- [doc for doc in filterable_docs if doc.meta.get("page") not in ["100", "123"]],
140
- )
141
-
142
- # LegacyFilterDocumentsGreaterThanTest
143
-
144
- def test_gt_filter(self, document_store: DocumentStore, filterable_docs: List[Document]):
145
- document_store.write_documents(filterable_docs)
146
- result = document_store.filter_documents(filters={"meta.number": {"$gt": 0.0}})
147
- self.assert_documents_are_equal(
148
- result,
149
- [doc for doc in filterable_docs if "number" in doc.meta and doc.meta["number"] > 0],
150
- )
151
-
152
- def test_gt_filter_non_numeric(self, document_store: DocumentStore, filterable_docs: List[Document]):
153
- document_store.write_documents(filterable_docs)
154
- with pytest.raises(FilterError):
155
- document_store.filter_documents(filters={"meta.page": {"$gt": "100"}})
156
-
157
- @pytest.mark.skip(reason="Dataframe filtering is not supported in Qdrant")
158
- def test_gt_filter_table(self, document_store: DocumentStore, filterable_docs: List[Document]): ...
159
-
160
- @pytest.mark.skip(reason="Embedding filtering is not supported in Qdrant")
161
- def test_gt_filter_embedding(self, document_store: DocumentStore, filterable_docs: List[Document]): ...
162
-
163
- # LegacyFilterDocumentsGreaterThanEqualTest
164
-
165
- def test_gte_filter(self, document_store: DocumentStore, filterable_docs: List[Document]):
166
- document_store.write_documents(filterable_docs)
167
- result = document_store.filter_documents(filters={"meta.number": {"$gte": -2}})
168
- self.assert_documents_are_equal(
169
- result,
170
- [doc for doc in filterable_docs if "number" in doc.meta and doc.meta["number"] >= -2],
171
- )
172
-
173
- def test_gte_filter_non_numeric(self, document_store: DocumentStore, filterable_docs: List[Document]):
174
- document_store.write_documents(filterable_docs)
175
- with pytest.raises(FilterError):
176
- document_store.filter_documents(filters={"meta.page": {"$gte": "100"}})
177
-
178
- @pytest.mark.skip(reason="Dataframe filtering is not supported in Qdrant")
179
- def test_gte_filter_table(self, document_store: DocumentStore, filterable_docs: List[Document]): ...
180
-
181
- @pytest.mark.skip(reason="Embedding filtering is not supported in Qdrant")
182
- def test_gte_filter_embedding(self, document_store: DocumentStore, filterable_docs: List[Document]): ...
183
-
184
- # LegacyFilterDocumentsLessThanTest
185
-
186
- def test_lt_filter(self, document_store: DocumentStore, filterable_docs: List[Document]):
187
- document_store.write_documents(filterable_docs)
188
- result = document_store.filter_documents(filters={"meta.number": {"$lt": 0.0}})
189
- self.assert_documents_are_equal(
190
- result,
191
- [doc for doc in filterable_docs if doc.meta.get("number") is not None and doc.meta["number"] < 0],
192
- )
193
-
194
- def test_lt_filter_non_numeric(self, document_store: DocumentStore, filterable_docs: List[Document]):
195
- document_store.write_documents(filterable_docs)
196
- with pytest.raises(FilterError):
197
- document_store.filter_documents(filters={"meta.page": {"$lt": "100"}})
198
-
199
- @pytest.mark.skip(reason="Dataframe filtering is not supported in Qdrant")
200
- def test_lt_filter_table(self, document_store: DocumentStore, filterable_docs: List[Document]): ...
201
-
202
- @pytest.mark.skip(reason="Embedding filtering is not supported in Qdrant")
203
- def test_lt_filter_embedding(self, document_store: DocumentStore, filterable_docs: List[Document]): ...
204
-
205
- # LegacyFilterDocumentsLessThanEqualTest
206
-
207
- def test_lte_filter(self, document_store: DocumentStore, filterable_docs: List[Document]):
208
- document_store.write_documents(filterable_docs)
209
- result = document_store.filter_documents(filters={"meta.number": {"$lte": 2.0}})
210
- self.assert_documents_are_equal(
211
- result,
212
- [doc for doc in filterable_docs if doc.meta.get("number") is not None and doc.meta["number"] <= 2.0],
213
- )
214
-
215
- def test_lte_filter_non_numeric(self, document_store: DocumentStore, filterable_docs: List[Document]):
216
- document_store.write_documents(filterable_docs)
217
- with pytest.raises(FilterError):
218
- document_store.filter_documents(filters={"meta.page": {"$lte": "100"}})
219
-
220
- @pytest.mark.skip(reason="Dataframe filtering is not supported in Qdrant")
221
- def test_lte_filter_table(self, document_store: DocumentStore, filterable_docs: List[Document]): ...
222
-
223
- @pytest.mark.skip(reason="Embedding filtering is not supported in Qdrant")
224
- def test_lte_filter_embedding(self, document_store: DocumentStore, filterable_docs: List[Document]): ...
225
-
226
- # LegacyFilterDocumentsSimpleLogicalTest
227
-
228
- def test_filter_simple_or(self, document_store: DocumentStore, filterable_docs: List[Document]):
229
- document_store.write_documents(filterable_docs)
230
- filters = {
231
- "$or": {
232
- "meta.name": {"$in": ["name_0", "name_1"]},
233
- "meta.number": {"$lt": 1.0},
234
- }
235
- }
236
- result = document_store.filter_documents(filters=filters)
237
- self.assert_documents_are_equal(
238
- result,
239
- [
240
- doc
241
- for doc in filterable_docs
242
- if (doc.meta.get("number") is not None and doc.meta["number"] < 1)
243
- or doc.meta.get("name") in ["name_0", "name_1"]
244
- ],
245
- )
246
-
247
- def test_filter_simple_implicit_and_with_multi_key_dict(
248
- self, document_store: DocumentStore, filterable_docs: List[Document]
249
- ):
250
- document_store.write_documents(filterable_docs)
251
- result = document_store.filter_documents(filters={"meta.number": {"$lte": 2.0, "$gte": 0.0}})
252
- self.assert_documents_are_equal(
253
- result,
254
- [
255
- doc
256
- for doc in filterable_docs
257
- if "number" in doc.meta and doc.meta["number"] >= 0.0 and doc.meta["number"] <= 2.0
258
- ],
259
- )
260
-
261
- def test_filter_simple_explicit_and_with_list(self, document_store: DocumentStore, filterable_docs: List[Document]):
262
- document_store.write_documents(filterable_docs)
263
- result = document_store.filter_documents(filters={"meta.number": {"$and": [{"$lte": 2}, {"$gte": 0}]}})
264
- self.assert_documents_are_equal(
265
- result,
266
- [
267
- doc
268
- for doc in filterable_docs
269
- if "number" in doc.meta and doc.meta["number"] <= 2.0 and doc.meta["number"] >= 0.0
270
- ],
271
- )
272
-
273
- def test_filter_simple_implicit_and(self, document_store: DocumentStore, filterable_docs: List[Document]):
274
- document_store.write_documents(filterable_docs)
275
- result = document_store.filter_documents(filters={"meta.number": {"$lte": 2.0, "$gte": 0}})
276
- self.assert_documents_are_equal(
277
- result,
278
- [
279
- doc
280
- for doc in filterable_docs
281
- if "number" in doc.meta and doc.meta["number"] <= 2.0 and doc.meta["number"] >= 0.0
282
- ],
283
- )
284
-
285
- # LegacyFilterDocumentsNestedLogicalTest(
286
-
287
- def test_filter_nested_implicit_and(self, document_store: DocumentStore, filterable_docs: List[Document]):
288
- document_store.write_documents(filterable_docs)
289
- filters_simplified = {
290
- "meta.number": {"$lte": 2, "$gte": 0},
291
- "meta.name": ["name_0", "name_1"],
292
- }
293
- result = document_store.filter_documents(filters=filters_simplified)
294
- self.assert_documents_are_equal(
295
- result,
296
- [
297
- doc
298
- for doc in filterable_docs
299
- if (
300
- "number" in doc.meta
301
- and doc.meta["number"] <= 2
302
- and doc.meta["number"] >= 0
303
- and doc.meta.get("name") in ["name_0", "name_1"]
304
- )
305
- ],
306
- )
307
-
308
- def test_filter_nested_or(self, document_store: DocumentStore, filterable_docs: List[Document]):
309
- document_store.write_documents(filterable_docs)
310
- filters = {
311
- "$or": {
312
- "meta.name": {"$in": ["name_0", "name_1"]},
313
- "meta.number": {"$lt": 1.0},
314
- }
315
- }
316
- result = document_store.filter_documents(filters=filters)
317
- self.assert_documents_are_equal(
318
- result,
319
- [
320
- doc
321
- for doc in filterable_docs
322
- if (
323
- doc.meta.get("name") in ["name_0", "name_1"]
324
- or (doc.meta.get("number") is not None and doc.meta["number"] < 1)
325
- )
326
- ],
327
- )
328
-
329
- def test_filter_nested_and_or_explicit(self, document_store: DocumentStore, filterable_docs: List[Document]):
330
- document_store.write_documents(filterable_docs)
331
- filters_simplified = {
332
- "$and": {
333
- "meta.page": {"$eq": "123"},
334
- "$or": {
335
- "meta.name": {"$in": ["name_0", "name_1"]},
336
- "meta.number": {"$lt": 1.0},
337
- },
338
- }
339
- }
340
- result = document_store.filter_documents(filters=filters_simplified)
341
- self.assert_documents_are_equal(
342
- result,
343
- [
344
- doc
345
- for doc in filterable_docs
346
- if (
347
- doc.meta.get("page") in ["123"]
348
- and (
349
- doc.meta.get("name") in ["name_0", "name_1"]
350
- or ("number" in doc.meta and doc.meta["number"] < 1)
351
- )
352
- )
353
- ],
354
- )
355
-
356
- def test_filter_nested_and_or_implicit(self, document_store: DocumentStore, filterable_docs: List[Document]):
357
- document_store.write_documents(filterable_docs)
358
- filters_simplified = {
359
- "meta.page": {"$eq": "123"},
360
- "$or": {
361
- "meta.name": {"$in": ["name_0", "name_1"]},
362
- "meta.number": {"$lt": 1.0},
363
- },
364
- }
365
- result = document_store.filter_documents(filters=filters_simplified)
366
- self.assert_documents_are_equal(
367
- result,
368
- [
369
- doc
370
- for doc in filterable_docs
371
- if (
372
- doc.meta.get("page") in ["123"]
373
- and (
374
- doc.meta.get("name") in ["name_0", "name_1"]
375
- or ("number" in doc.meta and doc.meta["number"] < 1)
376
- )
377
- )
378
- ],
379
- )
380
-
381
- def test_filter_nested_or_and(self, document_store: DocumentStore, filterable_docs: List[Document]):
382
- document_store.write_documents(filterable_docs)
383
- filters_simplified = {
384
- "$or": {
385
- "meta.number": {"$lt": 1},
386
- "$and": {
387
- "meta.name": {"$in": ["name_0", "name_1"]},
388
- "$not": {"meta.chapter": {"$eq": "intro"}},
389
- },
390
- }
391
- }
392
- result = document_store.filter_documents(filters=filters_simplified)
393
- self.assert_documents_are_equal(
394
- result,
395
- [
396
- doc
397
- for doc in filterable_docs
398
- if (
399
- (doc.meta.get("number") is not None and doc.meta["number"] < 1)
400
- or (doc.meta.get("name") in ["name_0", "name_1"] and (doc.meta.get("chapter") != "intro"))
401
- )
402
- ],
403
- )
404
-
405
- def test_filter_nested_multiple_identical_operators_same_level(
406
- self, document_store: DocumentStore, filterable_docs: List[Document]
407
- ):
408
- document_store.write_documents(filterable_docs)
409
- filters = {
410
- "$or": [
411
- {
412
- "$and": {
413
- "meta.name": {"$in": ["name_0", "name_1"]},
414
- "meta.page": "100",
415
- }
416
- },
417
- {
418
- "$and": {
419
- "meta.chapter": {"$in": ["intro", "abstract"]},
420
- "meta.page": "123",
421
- }
422
- },
423
- ]
424
- }
425
- result = document_store.filter_documents(filters=filters)
426
- self.assert_documents_are_equal(
427
- result,
428
- [
429
- doc
430
- for doc in filterable_docs
431
- if (
432
- (doc.meta.get("name") in ["name_0", "name_1"] and doc.meta.get("page") == "100")
433
- or (doc.meta.get("chapter") in ["intro", "abstract"] and doc.meta.get("page") == "123")
434
- )
435
- ],
436
- )
437
-
438
- def test_no_filter_not_empty(self, document_store: DocumentStore):
439
- docs = [Document(content="test doc")]
440
- document_store.write_documents(docs)
441
- self.assert_documents_are_equal(document_store.filter_documents(), docs)
442
- self.assert_documents_are_equal(document_store.filter_documents(filters={}), docs)