elasticsearch-haystack 0.5.0__tar.gz → 0.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of elasticsearch-haystack might be problematic. Click here for more details.

Files changed (19) hide show
  1. elasticsearch_haystack-0.7.0/CHANGELOG.md +80 -0
  2. {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.7.0}/PKG-INFO +1 -1
  3. {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.7.0}/pyproject.toml +24 -55
  4. {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.7.0}/src/haystack_integrations/components/retrievers/elasticsearch/bm25_retriever.py +13 -3
  5. {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.7.0}/src/haystack_integrations/components/retrievers/elasticsearch/embedding_retriever.py +13 -3
  6. {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.7.0}/src/haystack_integrations/document_stores/elasticsearch/document_store.py +47 -35
  7. {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.7.0}/tests/test_bm25_retriever.py +12 -0
  8. {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.7.0}/tests/test_document_store.py +9 -3
  9. {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.7.0}/tests/test_embedding_retriever.py +10 -0
  10. {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.7.0}/.gitignore +0 -0
  11. {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.7.0}/LICENSE +0 -0
  12. {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.7.0}/README.md +0 -0
  13. {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.7.0}/docker-compose.yml +0 -0
  14. {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.7.0}/pydoc/config.yml +0 -0
  15. {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.7.0}/src/haystack_integrations/components/retrievers/elasticsearch/__init__.py +0 -0
  16. {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.7.0}/src/haystack_integrations/document_stores/elasticsearch/__init__.py +0 -0
  17. {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.7.0}/src/haystack_integrations/document_stores/elasticsearch/filters.py +0 -0
  18. {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.7.0}/tests/__init__.py +0 -0
  19. {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.7.0}/tests/test_filters.py +0 -0
@@ -0,0 +1,80 @@
1
+ # Changelog
2
+
3
+ ## [unreleased]
4
+
5
+ ### 🚀 Features
6
+
7
+ - Defer the database connection to when it's needed (#766)
8
+
9
+ ## [integrations/elasticsearch-v0.5.0] - 2024-05-24
10
+
11
+ ### 🐛 Bug Fixes
12
+
13
+ - Add support for custom mapping in ElasticsearchDocumentStore (#721)
14
+
15
+ ## [integrations/elasticsearch-v0.4.0] - 2024-04-03
16
+
17
+ ### 📚 Documentation
18
+
19
+ - Docstring update (#525)
20
+ - Review Elastic (#541)
21
+ - Disable-class-def (#556)
22
+
23
+ ## [integrations/elasticsearch-v0.3.0] - 2024-02-23
24
+
25
+ ### 🐛 Bug Fixes
26
+
27
+ - Fix order of API docs (#447)
28
+
29
+ This PR will also push the docs to Readme
30
+
31
+ ### 📚 Documentation
32
+
33
+ - Update category slug (#442)
34
+
35
+ ### Elasticsearch
36
+
37
+ - Add user-agent header (#457)
38
+
39
+ ### Feat
40
+
41
+ - Add filters to run function in retrievers of elasticsearch (#440)
42
+
43
+ ### Elasticsearch
44
+
45
+ - Generate api docs (#322)
46
+
47
+ ## [integrations/elasticsearch-v0.2.0] - 2024-01-19
48
+
49
+ ## [integrations/elasticsearch-v0.1.3] - 2024-01-18
50
+
51
+ ## [integrations/elasticsearch-v0.1.2] - 2023-12-20
52
+
53
+ ### 🐛 Bug Fixes
54
+
55
+ - Fix project urls (#96)
56
+
57
+
58
+
59
+ ### 🚜 Refactor
60
+
61
+ - Use `hatch_vcs` to manage integrations versioning (#103)
62
+
63
+ ## [integrations/elasticsearch-v0.1.1] - 2023-12-05
64
+
65
+ ### 🐛 Bug Fixes
66
+
67
+ - Fix import and increase version (#77)
68
+
69
+
70
+
71
+ ## [integrations/elasticsearch-v0.1.0] - 2023-12-04
72
+
73
+ ### 🐛 Bug Fixes
74
+
75
+ - Fix license headers
76
+
77
+
78
+ ## [integrations/elasticsearch-v0.0.2] - 2023-11-29
79
+
80
+ <!-- generated by git-cliff -->
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: elasticsearch-haystack
3
- Version: 0.5.0
3
+ Version: 0.7.0
4
4
  Summary: Haystack 2.x Document Store for ElasticSearch
5
5
  Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/elasticsearch#readme
6
6
  Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues
@@ -10,9 +10,7 @@ readme = "README.md"
10
10
  requires-python = ">=3.8"
11
11
  license = "Apache-2.0"
12
12
  keywords = []
13
- authors = [
14
- { name = "Silvano Cerza", email = "silvanocerza@gmail.com" },
15
- ]
13
+ authors = [{ name = "Silvano Cerza", email = "silvanocerza@gmail.com" }]
16
14
  classifiers = [
17
15
  "License :: OSI Approved :: Apache Software License",
18
16
  "Development Status :: 4 - Beta",
@@ -24,10 +22,7 @@ classifiers = [
24
22
  "Programming Language :: Python :: Implementation :: CPython",
25
23
  "Programming Language :: Python :: Implementation :: PyPy",
26
24
  ]
27
- dependencies = [
28
- "haystack-ai",
29
- "elasticsearch>=8,<9",
30
- ]
25
+ dependencies = ["haystack-ai", "elasticsearch>=8,<9"]
31
26
 
32
27
  [project.urls]
33
28
  Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/elasticsearch#readme"
@@ -49,49 +44,28 @@ git_describe_command = 'git describe --tags --match="integrations/elasticsearch-
49
44
  dependencies = [
50
45
  "coverage[toml]>=6.5",
51
46
  "pytest",
47
+ "pytest-rerunfailures",
52
48
  "pytest-xdist",
53
49
  "haystack-pydoc-tools",
54
50
  ]
55
51
  [tool.hatch.envs.default.scripts]
56
- test = "pytest {args:tests}"
57
- test-cov = "coverage run -m pytest {args:tests}"
58
- cov-report = [
59
- "- coverage combine",
60
- "coverage report",
61
- ]
62
- cov = [
63
- "test-cov",
64
- "cov-report",
65
- ]
66
- docs = [
67
- "pydoc-markdown pydoc/config.yml"
68
- ]
52
+ test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
53
+ test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
54
+ cov-report = ["- coverage combine", "coverage report"]
55
+ cov = ["test-cov", "cov-report"]
56
+ docs = ["pydoc-markdown pydoc/config.yml"]
69
57
 
70
58
  [[tool.hatch.envs.all.matrix]]
71
59
  python = ["3.8", "3.9", "3.10", "3.11"]
72
60
 
73
61
  [tool.hatch.envs.lint]
74
62
  detached = true
75
- dependencies = [
76
- "black>=23.1.0",
77
- "mypy>=1.0.0",
78
- "ruff>=0.0.243",
79
- ]
63
+ dependencies = ["black>=23.1.0", "mypy>=1.0.0", "ruff>=0.0.243"]
80
64
  [tool.hatch.envs.lint.scripts]
81
65
  typing = "mypy --install-types --non-interactive --explicit-package-bases {args:src/ tests}"
82
- style = [
83
- "ruff {args:.}",
84
- "black --check --diff {args:.}",
85
- ]
86
- fmt = [
87
- "black {args:.}",
88
- "ruff --fix {args:.}",
89
- "style",
90
- ]
91
- all = [
92
- "style",
93
- "typing",
94
- ]
66
+ style = ["ruff check {args:.}", "black --check --diff {args:.}"]
67
+ fmt = ["black {args:.}", "ruff --fix {args:.}", "style"]
68
+ all = ["style", "typing"]
95
69
 
96
70
  [tool.hatch.metadata]
97
71
  allow-direct-references = true
@@ -137,9 +111,15 @@ ignore = [
137
111
  # Allow boolean positional values in function calls, like `dict.get(... True)`
138
112
  "FBT003",
139
113
  # Ignore checks for possible passwords
140
- "S105", "S106", "S107",
114
+ "S105",
115
+ "S106",
116
+ "S107",
141
117
  # Ignore complexity
142
- "C901", "PLR0911", "PLR0912", "PLR0913", "PLR0915",
118
+ "C901",
119
+ "PLR0911",
120
+ "PLR0912",
121
+ "PLR0913",
122
+ "PLR0915",
143
123
  ]
144
124
  unfixable = [
145
125
  # Don't touch unused imports
@@ -164,25 +144,14 @@ parallel = false
164
144
 
165
145
  [tool.coverage.report]
166
146
  omit = ["*/tests/*", "*/__init__.py"]
167
- show_missing=true
168
- exclude_lines = [
169
- "no cov",
170
- "if __name__ == .__main__.:",
171
- "if TYPE_CHECKING:",
172
- ]
147
+ show_missing = true
148
+ exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"]
173
149
 
174
150
 
175
151
  [tool.pytest.ini_options]
176
152
  minversion = "6.0"
177
- markers = [
178
- "unit: unit tests",
179
- "integration: integration tests"
180
- ]
153
+ markers = ["unit: unit tests", "integration: integration tests"]
181
154
 
182
155
  [[tool.mypy.overrides]]
183
- module = [
184
- "haystack.*",
185
- "haystack_integrations.*",
186
- "pytest.*"
187
- ]
156
+ module = ["haystack.*", "haystack_integrations.*", "pytest.*"]
188
157
  ignore_missing_imports = true
@@ -1,10 +1,12 @@
1
1
  # SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
2
2
  #
3
3
  # SPDX-License-Identifier: Apache-2.0
4
- from typing import Any, Dict, List, Optional
4
+ from typing import Any, Dict, List, Optional, Union
5
5
 
6
6
  from haystack import component, default_from_dict, default_to_dict
7
7
  from haystack.dataclasses import Document
8
+ from haystack.document_stores.types import FilterPolicy
9
+ from haystack.document_stores.types.filter_policy import apply_filter_policy
8
10
  from haystack_integrations.document_stores.elasticsearch.document_store import ElasticsearchDocumentStore
9
11
 
10
12
 
@@ -48,6 +50,7 @@ class ElasticsearchBM25Retriever:
48
50
  fuzziness: str = "AUTO",
49
51
  top_k: int = 10,
50
52
  scale_score: bool = False,
53
+ filter_policy: Union[str, FilterPolicy] = FilterPolicy.REPLACE,
51
54
  ):
52
55
  """
53
56
  Initialize ElasticsearchBM25Retriever with an instance ElasticsearchDocumentStore.
@@ -60,6 +63,7 @@ class ElasticsearchBM25Retriever:
60
63
  for more details.
61
64
  :param top_k: Maximum number of Documents to return.
62
65
  :param scale_score: If `True` scales the Document`s scores between 0 and 1.
66
+ :param filter_policy: Policy to determine how filters are applied.
63
67
  :raises ValueError: If `document_store` is not an instance of `ElasticsearchDocumentStore`.
64
68
  """
65
69
 
@@ -72,6 +76,7 @@ class ElasticsearchBM25Retriever:
72
76
  self._fuzziness = fuzziness
73
77
  self._top_k = top_k
74
78
  self._scale_score = scale_score
79
+ self._filter_policy = FilterPolicy.from_str(filter_policy) if isinstance(filter_policy, str) else filter_policy
75
80
 
76
81
  def to_dict(self) -> Dict[str, Any]:
77
82
  """
@@ -86,6 +91,7 @@ class ElasticsearchBM25Retriever:
86
91
  fuzziness=self._fuzziness,
87
92
  top_k=self._top_k,
88
93
  scale_score=self._scale_score,
94
+ filter_policy=self._filter_policy.value,
89
95
  document_store=self._document_store.to_dict(),
90
96
  )
91
97
 
@@ -102,6 +108,7 @@ class ElasticsearchBM25Retriever:
102
108
  data["init_parameters"]["document_store"] = ElasticsearchDocumentStore.from_dict(
103
109
  data["init_parameters"]["document_store"]
104
110
  )
111
+ data["init_parameters"]["filter_policy"] = FilterPolicy.from_str(data["init_parameters"]["filter_policy"])
105
112
  return default_from_dict(cls, data)
106
113
 
107
114
  @component.output_types(documents=List[Document])
@@ -110,14 +117,17 @@ class ElasticsearchBM25Retriever:
110
117
  Retrieve documents using the BM25 keyword-based algorithm.
111
118
 
112
119
  :param query: String to search in `Document`s' text.
113
- :param filters: Filters applied to the retrieved `Document`s.
120
+ :param filters: Filters applied to the retrieved Documents. The way runtime filters are applied depends on
121
+ the `filter_policy` chosen at retriever initialization. See init method docstring for more
122
+ details.
114
123
  :param top_k: Maximum number of `Document` to return.
115
124
  :returns: A dictionary with the following keys:
116
125
  - `documents`: List of `Document`s that match the query.
117
126
  """
127
+ filters = apply_filter_policy(self._filter_policy, self._filters, filters)
118
128
  docs = self._document_store._bm25_retrieval(
119
129
  query=query,
120
- filters=filters or self._filters,
130
+ filters=filters,
121
131
  fuzziness=self._fuzziness,
122
132
  top_k=top_k or self._top_k,
123
133
  scale_score=self._scale_score,
@@ -1,10 +1,12 @@
1
1
  # SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
2
2
  #
3
3
  # SPDX-License-Identifier: Apache-2.0
4
- from typing import Any, Dict, List, Optional
4
+ from typing import Any, Dict, List, Optional, Union
5
5
 
6
6
  from haystack import component, default_from_dict, default_to_dict
7
7
  from haystack.dataclasses import Document
8
+ from haystack.document_stores.types import FilterPolicy
9
+ from haystack.document_stores.types.filter_policy import apply_filter_policy
8
10
  from haystack_integrations.document_stores.elasticsearch.document_store import ElasticsearchDocumentStore
9
11
 
10
12
 
@@ -49,6 +51,7 @@ class ElasticsearchEmbeddingRetriever:
49
51
  filters: Optional[Dict[str, Any]] = None,
50
52
  top_k: int = 10,
51
53
  num_candidates: Optional[int] = None,
54
+ filter_policy: Union[str, FilterPolicy] = FilterPolicy.REPLACE,
52
55
  ):
53
56
  """
54
57
  Create the ElasticsearchEmbeddingRetriever component.
@@ -61,6 +64,7 @@ class ElasticsearchEmbeddingRetriever:
61
64
  Increasing this value will improve search accuracy at the cost of slower search speeds.
62
65
  You can read more about it in the Elasticsearch
63
66
  [documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/knn-search.html#tune-approximate-knn-for-speed-accuracy)
67
+ :param filter_policy: Policy to determine how filters are applied.
64
68
  :raises ValueError: If `document_store` is not an instance of ElasticsearchDocumentStore.
65
69
  """
66
70
  if not isinstance(document_store, ElasticsearchDocumentStore):
@@ -71,6 +75,7 @@ class ElasticsearchEmbeddingRetriever:
71
75
  self._filters = filters or {}
72
76
  self._top_k = top_k
73
77
  self._num_candidates = num_candidates
78
+ self._filter_policy = FilterPolicy.from_str(filter_policy) if isinstance(filter_policy, str) else filter_policy
74
79
 
75
80
  def to_dict(self) -> Dict[str, Any]:
76
81
  """
@@ -84,6 +89,7 @@ class ElasticsearchEmbeddingRetriever:
84
89
  filters=self._filters,
85
90
  top_k=self._top_k,
86
91
  num_candidates=self._num_candidates,
92
+ filter_policy=self._filter_policy.value,
87
93
  document_store=self._document_store.to_dict(),
88
94
  )
89
95
 
@@ -100,6 +106,7 @@ class ElasticsearchEmbeddingRetriever:
100
106
  data["init_parameters"]["document_store"] = ElasticsearchDocumentStore.from_dict(
101
107
  data["init_parameters"]["document_store"]
102
108
  )
109
+ data["init_parameters"]["filter_policy"] = FilterPolicy.from_str(data["init_parameters"]["filter_policy"])
103
110
  return default_from_dict(cls, data)
104
111
 
105
112
  @component.output_types(documents=List[Document])
@@ -108,14 +115,17 @@ class ElasticsearchEmbeddingRetriever:
108
115
  Retrieve documents using a vector similarity metric.
109
116
 
110
117
  :param query_embedding: Embedding of the query.
111
- :param filters: Filters applied to the retrieved `Document`s.
118
+ :param filters: Filters applied to the retrieved Documents. The way runtime filters are applied depends on
119
+ the `filter_policy` chosen at retriever initialization. See init method docstring for more
120
+ details.
112
121
  :param top_k: Maximum number of `Document`s to return.
113
122
  :returns: A dictionary with the following keys:
114
123
  - `documents`: List of `Document`s most similar to the given `query_embedding`
115
124
  """
125
+ filters = apply_filter_policy(self._filter_policy, self._filters, filters)
116
126
  docs = self._document_store._embedding_retrieval(
117
127
  query_embedding=query_embedding,
118
- filters=filters or self._filters,
128
+ filters=filters,
119
129
  top_k=top_k or self._top_k,
120
130
  num_candidates=self._num_candidates,
121
131
  )
@@ -93,48 +93,60 @@ class ElasticsearchDocumentStore:
93
93
  :param **kwargs: Optional arguments that `Elasticsearch` takes.
94
94
  """
95
95
  self._hosts = hosts
96
- self._client = Elasticsearch(
97
- hosts,
98
- headers={"user-agent": f"haystack-py-ds/{haystack_version}"},
99
- **kwargs,
100
- )
96
+ self._client = None
101
97
  self._index = index
102
98
  self._embedding_similarity_function = embedding_similarity_function
103
99
  self._custom_mapping = custom_mapping
104
100
  self._kwargs = kwargs
105
101
 
106
- # Check client connection, this will raise if not connected
107
- self._client.info()
108
-
109
102
  if self._custom_mapping and not isinstance(self._custom_mapping, Dict):
110
103
  msg = "custom_mapping must be a dictionary"
111
104
  raise ValueError(msg)
112
105
 
113
- if self._custom_mapping:
114
- mappings = self._custom_mapping
115
- else:
116
- # Configure mapping for the embedding field if none is provided
117
- mappings = {
118
- "properties": {
119
- "embedding": {"type": "dense_vector", "index": True, "similarity": embedding_similarity_function},
120
- "content": {"type": "text"},
121
- },
122
- "dynamic_templates": [
123
- {
124
- "strings": {
125
- "path_match": "*",
126
- "match_mapping_type": "string",
127
- "mapping": {
128
- "type": "keyword",
129
- },
106
+ @property
107
+ def client(self) -> Elasticsearch:
108
+ if self._client is None:
109
+ client = Elasticsearch(
110
+ self._hosts,
111
+ headers={"user-agent": f"haystack-py-ds/{haystack_version}"},
112
+ **self._kwargs,
113
+ )
114
+ # Check client connection, this will raise if not connected
115
+ client.info()
116
+
117
+ if self._custom_mapping:
118
+ mappings = self._custom_mapping
119
+ else:
120
+ # Configure mapping for the embedding field if none is provided
121
+ mappings = {
122
+ "properties": {
123
+ "embedding": {
124
+ "type": "dense_vector",
125
+ "index": True,
126
+ "similarity": self._embedding_similarity_function,
127
+ },
128
+ "content": {"type": "text"},
129
+ },
130
+ "dynamic_templates": [
131
+ {
132
+ "strings": {
133
+ "path_match": "*",
134
+ "match_mapping_type": "string",
135
+ "mapping": {
136
+ "type": "keyword",
137
+ },
138
+ }
130
139
  }
131
- }
132
- ],
133
- }
140
+ ],
141
+ }
142
+
143
+ # Create the index if it doesn't exist
144
+ if not client.indices.exists(index=self._index):
145
+ client.indices.create(index=self._index, mappings=mappings)
146
+
147
+ self._client = client
134
148
 
135
- # Create the index if it doesn't exist
136
- if not self._client.indices.exists(index=index):
137
- self._client.indices.create(index=index, mappings=mappings)
149
+ return self._client
138
150
 
139
151
  def to_dict(self) -> Dict[str, Any]:
140
152
  """
@@ -172,7 +184,7 @@ class ElasticsearchDocumentStore:
172
184
  Returns how many documents are present in the document store.
173
185
  :returns: Number of documents in the document store.
174
186
  """
175
- return self._client.count(index=self._index)["count"]
187
+ return self.client.count(index=self._index)["count"]
176
188
 
177
189
  def _search_documents(self, **kwargs) -> List[Document]:
178
190
  """
@@ -187,7 +199,7 @@ class ElasticsearchDocumentStore:
187
199
  from_ = 0
188
200
  # Handle pagination
189
201
  while True:
190
- res = self._client.search(
202
+ res = self.client.search(
191
203
  index=self._index,
192
204
  from_=from_,
193
205
  **kwargs,
@@ -261,7 +273,7 @@ class ElasticsearchDocumentStore:
261
273
  )
262
274
 
263
275
  documents_written, errors = helpers.bulk(
264
- client=self._client,
276
+ client=self.client,
265
277
  actions=elasticsearch_actions,
266
278
  refresh="wait_for",
267
279
  index=self._index,
@@ -317,7 +329,7 @@ class ElasticsearchDocumentStore:
317
329
  """
318
330
 
319
331
  helpers.bulk(
320
- client=self._client,
332
+ client=self.client,
321
333
  actions=({"_op_type": "delete", "_id": id_} for id_ in document_ids),
322
334
  refresh="wait_for",
323
335
  index=self._index,
@@ -3,7 +3,9 @@
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
  from unittest.mock import Mock, patch
5
5
 
6
+ import pytest
6
7
  from haystack.dataclasses import Document
8
+ from haystack.document_stores.types import FilterPolicy
7
9
  from haystack_integrations.components.retrievers.elasticsearch import ElasticsearchBM25Retriever
8
10
  from haystack_integrations.document_stores.elasticsearch import ElasticsearchDocumentStore
9
11
 
@@ -14,8 +16,15 @@ def test_init_default():
14
16
  assert retriever._document_store == mock_store
15
17
  assert retriever._filters == {}
16
18
  assert retriever._top_k == 10
19
+ assert retriever._filter_policy == FilterPolicy.REPLACE
17
20
  assert not retriever._scale_score
18
21
 
22
+ retriever = ElasticsearchBM25Retriever(document_store=mock_store, filter_policy="replace")
23
+ assert retriever._filter_policy == FilterPolicy.REPLACE
24
+
25
+ with pytest.raises(ValueError):
26
+ ElasticsearchBM25Retriever(document_store=mock_store, filter_policy="keep")
27
+
19
28
 
20
29
  @patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch")
21
30
  def test_to_dict(_mock_elasticsearch_client):
@@ -38,6 +47,7 @@ def test_to_dict(_mock_elasticsearch_client):
38
47
  "fuzziness": "AUTO",
39
48
  "top_k": 10,
40
49
  "scale_score": False,
50
+ "filter_policy": "replace",
41
51
  },
42
52
  }
43
53
 
@@ -55,6 +65,7 @@ def test_from_dict(_mock_elasticsearch_client):
55
65
  "fuzziness": "AUTO",
56
66
  "top_k": 10,
57
67
  "scale_score": True,
68
+ "filter_policy": "replace",
58
69
  },
59
70
  }
60
71
  retriever = ElasticsearchBM25Retriever.from_dict(data)
@@ -63,6 +74,7 @@ def test_from_dict(_mock_elasticsearch_client):
63
74
  assert retriever._fuzziness == "AUTO"
64
75
  assert retriever._top_k == 10
65
76
  assert retriever._scale_score
77
+ assert retriever._filter_policy == FilterPolicy.REPLACE
66
78
 
67
79
 
68
80
  def test_run():
@@ -15,6 +15,12 @@ from haystack.testing.document_store import DocumentStoreBaseTests
15
15
  from haystack_integrations.document_stores.elasticsearch import ElasticsearchDocumentStore
16
16
 
17
17
 
18
+ @patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch")
19
+ def test_init_is_lazy(_mock_es_client):
20
+ ElasticsearchDocumentStore(hosts="testhost")
21
+ _mock_es_client.assert_not_called()
22
+
23
+
18
24
  @patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch")
19
25
  def test_to_dict(_mock_elasticsearch_client):
20
26
  document_store = ElasticsearchDocumentStore(hosts="some hosts")
@@ -73,7 +79,7 @@ class TestDocumentStore(DocumentStoreBaseTests):
73
79
  hosts=hosts, index=index, embedding_similarity_function=embedding_similarity_function
74
80
  )
75
81
  yield store
76
- store._client.options(ignore_status=[400, 404]).indices.delete(index=index)
82
+ store.client.options(ignore_status=[400, 404]).indices.delete(index=index)
77
83
 
78
84
  def assert_documents_are_equal(self, received: List[Document], expected: List[Document]):
79
85
  """
@@ -101,7 +107,7 @@ class TestDocumentStore(DocumentStoreBaseTests):
101
107
  super().assert_documents_are_equal(received, expected)
102
108
 
103
109
  def test_user_agent_header(self, document_store: ElasticsearchDocumentStore):
104
- assert document_store._client._headers["user-agent"].startswith("haystack-py-ds/")
110
+ assert document_store.client._headers["user-agent"].startswith("haystack-py-ds/")
105
111
 
106
112
  def test_write_documents(self, document_store: ElasticsearchDocumentStore):
107
113
  docs = [Document(id="1")]
@@ -308,7 +314,7 @@ class TestDocumentStore(DocumentStoreBaseTests):
308
314
  )
309
315
  mock_elasticsearch.return_value = mock_client
310
316
 
311
- ElasticsearchDocumentStore(hosts="some hosts", custom_mapping=custom_mapping)
317
+ _ = ElasticsearchDocumentStore(hosts="some hosts", custom_mapping=custom_mapping).client
312
318
  mock_client.indices.create.assert_called_once_with(
313
319
  index="default",
314
320
  mappings=custom_mapping,
@@ -3,7 +3,9 @@
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
  from unittest.mock import Mock, patch
5
5
 
6
+ import pytest
6
7
  from haystack.dataclasses import Document
8
+ from haystack.document_stores.types import FilterPolicy
7
9
  from haystack_integrations.components.retrievers.elasticsearch import ElasticsearchEmbeddingRetriever
8
10
  from haystack_integrations.document_stores.elasticsearch import ElasticsearchDocumentStore
9
11
 
@@ -16,6 +18,12 @@ def test_init_default():
16
18
  assert retriever._top_k == 10
17
19
  assert retriever._num_candidates is None
18
20
 
21
+ retriever = ElasticsearchEmbeddingRetriever(document_store=mock_store, filter_policy="replace")
22
+ assert retriever._filter_policy == FilterPolicy.REPLACE
23
+
24
+ with pytest.raises(ValueError):
25
+ ElasticsearchEmbeddingRetriever(document_store=mock_store, filter_policy="keep")
26
+
19
27
 
20
28
  @patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch")
21
29
  def test_to_dict(_mock_elasticsearch_client):
@@ -37,6 +45,7 @@ def test_to_dict(_mock_elasticsearch_client):
37
45
  },
38
46
  "filters": {},
39
47
  "top_k": 10,
48
+ "filter_policy": "replace",
40
49
  "num_candidates": None,
41
50
  },
42
51
  }
@@ -54,6 +63,7 @@ def test_from_dict(_mock_elasticsearch_client):
54
63
  },
55
64
  "filters": {},
56
65
  "top_k": 10,
66
+ "filter_policy": "replace",
57
67
  "num_candidates": None,
58
68
  },
59
69
  }