elasticsearch-haystack 0.7.0__tar.gz → 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (19) hide show
  1. {elasticsearch_haystack-0.7.0 → elasticsearch_haystack-1.0.0}/CHANGELOG.md +10 -0
  2. {elasticsearch_haystack-0.7.0 → elasticsearch_haystack-1.0.0}/PKG-INFO +1 -1
  3. {elasticsearch_haystack-0.7.0 → elasticsearch_haystack-1.0.0}/pyproject.toml +6 -4
  4. {elasticsearch_haystack-0.7.0 → elasticsearch_haystack-1.0.0}/src/haystack_integrations/components/retrievers/elasticsearch/bm25_retriever.py +5 -1
  5. {elasticsearch_haystack-0.7.0 → elasticsearch_haystack-1.0.0}/src/haystack_integrations/components/retrievers/elasticsearch/embedding_retriever.py +5 -1
  6. {elasticsearch_haystack-0.7.0 → elasticsearch_haystack-1.0.0}/src/haystack_integrations/document_stores/elasticsearch/document_store.py +2 -2
  7. {elasticsearch_haystack-0.7.0 → elasticsearch_haystack-1.0.0}/tests/test_bm25_retriever.py +25 -0
  8. {elasticsearch_haystack-0.7.0 → elasticsearch_haystack-1.0.0}/tests/test_document_store.py +1 -0
  9. {elasticsearch_haystack-0.7.0 → elasticsearch_haystack-1.0.0}/tests/test_embedding_retriever.py +24 -0
  10. {elasticsearch_haystack-0.7.0 → elasticsearch_haystack-1.0.0}/tests/test_filters.py +1 -0
  11. {elasticsearch_haystack-0.7.0 → elasticsearch_haystack-1.0.0}/.gitignore +0 -0
  12. {elasticsearch_haystack-0.7.0 → elasticsearch_haystack-1.0.0}/LICENSE +0 -0
  13. {elasticsearch_haystack-0.7.0 → elasticsearch_haystack-1.0.0}/README.md +0 -0
  14. {elasticsearch_haystack-0.7.0 → elasticsearch_haystack-1.0.0}/docker-compose.yml +0 -0
  15. {elasticsearch_haystack-0.7.0 → elasticsearch_haystack-1.0.0}/pydoc/config.yml +0 -0
  16. {elasticsearch_haystack-0.7.0 → elasticsearch_haystack-1.0.0}/src/haystack_integrations/components/retrievers/elasticsearch/__init__.py +0 -0
  17. {elasticsearch_haystack-0.7.0 → elasticsearch_haystack-1.0.0}/src/haystack_integrations/document_stores/elasticsearch/__init__.py +0 -0
  18. {elasticsearch_haystack-0.7.0 → elasticsearch_haystack-1.0.0}/src/haystack_integrations/document_stores/elasticsearch/filters.py +0 -0
  19. {elasticsearch_haystack-0.7.0 → elasticsearch_haystack-1.0.0}/tests/__init__.py +0 -0
@@ -5,6 +5,16 @@
5
5
  ### 🚀 Features
6
6
 
7
7
  - Defer the database connection to when it's needed (#766)
8
+ - Add filter_policy to elasticsearch integration (#825)
9
+
10
+ ### 🐛 Bug Fixes
11
+
12
+ - `ElasticSearch` - Fallback to default filter policy when deserializing retrievers without the init parameter (#898)
13
+
14
+ ### ⚙️ Miscellaneous Tasks
15
+
16
+ - Retry tests to reduce flakyness (#836)
17
+ - Update ruff invocation to include check parameter (#853)
8
18
 
9
19
  ## [integrations/elasticsearch-v0.5.0] - 2024-05-24
10
20
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: elasticsearch-haystack
3
- Version: 0.7.0
3
+ Version: 1.0.0
4
4
  Summary: Haystack 2.x Document Store for ElasticSearch
5
5
  Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/elasticsearch#readme
6
6
  Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues
@@ -49,10 +49,12 @@ dependencies = [
49
49
  "haystack-pydoc-tools",
50
50
  ]
51
51
  [tool.hatch.envs.default.scripts]
52
- test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
53
- test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
52
+ test = "pytest {args:tests}"
53
+ test-cov = "coverage run -m pytest {args:tests}"
54
+ test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x"
54
55
  cov-report = ["- coverage combine", "coverage report"]
55
56
  cov = ["test-cov", "cov-report"]
57
+ cov-retry = ["test-cov-retry", "cov-report"]
56
58
  docs = ["pydoc-markdown pydoc/config.yml"]
57
59
 
58
60
  [[tool.hatch.envs.all.matrix]]
@@ -63,8 +65,8 @@ detached = true
63
65
  dependencies = ["black>=23.1.0", "mypy>=1.0.0", "ruff>=0.0.243"]
64
66
  [tool.hatch.envs.lint.scripts]
65
67
  typing = "mypy --install-types --non-interactive --explicit-package-bases {args:src/ tests}"
66
- style = ["ruff check {args:.}", "black --check --diff {args:.}"]
67
- fmt = ["black {args:.}", "ruff --fix {args:.}", "style"]
68
+ style = ["ruff check {args:. --exclude tests/}", "black --check --diff {args:.}"]
69
+ fmt = ["black {args:.}", "ruff --fix {args:. --exclude tests/}", "style"]
68
70
  all = ["style", "typing"]
69
71
 
70
72
  [tool.hatch.metadata]
@@ -7,6 +7,7 @@ from haystack import component, default_from_dict, default_to_dict
7
7
  from haystack.dataclasses import Document
8
8
  from haystack.document_stores.types import FilterPolicy
9
9
  from haystack.document_stores.types.filter_policy import apply_filter_policy
10
+
10
11
  from haystack_integrations.document_stores.elasticsearch.document_store import ElasticsearchDocumentStore
11
12
 
12
13
 
@@ -108,7 +109,10 @@ class ElasticsearchBM25Retriever:
108
109
  data["init_parameters"]["document_store"] = ElasticsearchDocumentStore.from_dict(
109
110
  data["init_parameters"]["document_store"]
110
111
  )
111
- data["init_parameters"]["filter_policy"] = FilterPolicy.from_str(data["init_parameters"]["filter_policy"])
112
+ # Pipelines serialized with old versions of the component might not
113
+ # have the filter_policy field.
114
+ if filter_policy := data["init_parameters"].get("filter_policy"):
115
+ data["init_parameters"]["filter_policy"] = FilterPolicy.from_str(filter_policy)
112
116
  return default_from_dict(cls, data)
113
117
 
114
118
  @component.output_types(documents=List[Document])
@@ -7,6 +7,7 @@ from haystack import component, default_from_dict, default_to_dict
7
7
  from haystack.dataclasses import Document
8
8
  from haystack.document_stores.types import FilterPolicy
9
9
  from haystack.document_stores.types.filter_policy import apply_filter_policy
10
+
10
11
  from haystack_integrations.document_stores.elasticsearch.document_store import ElasticsearchDocumentStore
11
12
 
12
13
 
@@ -106,7 +107,10 @@ class ElasticsearchEmbeddingRetriever:
106
107
  data["init_parameters"]["document_store"] = ElasticsearchDocumentStore.from_dict(
107
108
  data["init_parameters"]["document_store"]
108
109
  )
109
- data["init_parameters"]["filter_policy"] = FilterPolicy.from_str(data["init_parameters"]["filter_policy"])
110
+ # Pipelines serialized with old versions of the component might not
111
+ # have the filter_policy field.
112
+ if filter_policy := data["init_parameters"].get("filter_policy"):
113
+ data["init_parameters"]["filter_policy"] = FilterPolicy.from_str(filter_policy)
110
114
  return default_from_dict(cls, data)
111
115
 
112
116
  @component.output_types(documents=List[Document])
@@ -12,7 +12,6 @@ from haystack import default_from_dict, default_to_dict
12
12
  from haystack.dataclasses import Document
13
13
  from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError
14
14
  from haystack.document_stores.types import DuplicatePolicy
15
- from haystack.utils.filters import convert
16
15
  from haystack.version import __version__ as haystack_version
17
16
 
18
17
  from elasticsearch import Elasticsearch, helpers # type: ignore[import-not-found]
@@ -224,7 +223,8 @@ class ElasticsearchDocumentStore:
224
223
  :returns: List of `Document`s that match the filters.
225
224
  """
226
225
  if filters and "operator" not in filters and "conditions" not in filters:
227
- filters = convert(filters)
226
+ msg = "Invalid filter syntax. See https://docs.haystack.deepset.ai/docs/metadata-filtering for details."
227
+ raise ValueError(msg)
228
228
 
229
229
  query = {"bool": {"filter": _normalize_filters(filters)}} if filters else None
230
230
  documents = self._search_documents(query=query)
@@ -6,6 +6,7 @@ from unittest.mock import Mock, patch
6
6
  import pytest
7
7
  from haystack.dataclasses import Document
8
8
  from haystack.document_stores.types import FilterPolicy
9
+
9
10
  from haystack_integrations.components.retrievers.elasticsearch import ElasticsearchBM25Retriever
10
11
  from haystack_integrations.document_stores.elasticsearch import ElasticsearchDocumentStore
11
12
 
@@ -77,6 +78,30 @@ def test_from_dict(_mock_elasticsearch_client):
77
78
  assert retriever._filter_policy == FilterPolicy.REPLACE
78
79
 
79
80
 
81
+ @patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch")
82
+ def test_from_dict_no_filter_policy(_mock_elasticsearch_client):
83
+ data = {
84
+ "type": "haystack_integrations.components.retrievers.elasticsearch.bm25_retriever.ElasticsearchBM25Retriever",
85
+ "init_parameters": {
86
+ "document_store": {
87
+ "init_parameters": {"hosts": "some fake host", "index": "default"},
88
+ "type": "haystack_integrations.document_stores.elasticsearch.document_store.ElasticsearchDocumentStore",
89
+ },
90
+ "filters": {},
91
+ "fuzziness": "AUTO",
92
+ "top_k": 10,
93
+ "scale_score": True,
94
+ },
95
+ }
96
+ retriever = ElasticsearchBM25Retriever.from_dict(data)
97
+ assert retriever._document_store
98
+ assert retriever._filters == {}
99
+ assert retriever._fuzziness == "AUTO"
100
+ assert retriever._top_k == 10
101
+ assert retriever._scale_score
102
+ assert retriever._filter_policy == FilterPolicy.REPLACE # defaults to REPLACE
103
+
104
+
80
105
  def test_run():
81
106
  mock_store = Mock(spec=ElasticsearchDocumentStore)
82
107
  mock_store._bm25_retrieval.return_value = [Document(content="Test doc")]
@@ -12,6 +12,7 @@ from haystack.dataclasses.document import Document
12
12
  from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError
13
13
  from haystack.document_stores.types import DuplicatePolicy
14
14
  from haystack.testing.document_store import DocumentStoreBaseTests
15
+
15
16
  from haystack_integrations.document_stores.elasticsearch import ElasticsearchDocumentStore
16
17
 
17
18
 
@@ -6,6 +6,7 @@ from unittest.mock import Mock, patch
6
6
  import pytest
7
7
  from haystack.dataclasses import Document
8
8
  from haystack.document_stores.types import FilterPolicy
9
+
9
10
  from haystack_integrations.components.retrievers.elasticsearch import ElasticsearchEmbeddingRetriever
10
11
  from haystack_integrations.document_stores.elasticsearch import ElasticsearchDocumentStore
11
12
 
@@ -74,6 +75,29 @@ def test_from_dict(_mock_elasticsearch_client):
74
75
  assert retriever._num_candidates is None
75
76
 
76
77
 
78
+ @patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch")
79
+ def test_from_dict_no_filter_policy(_mock_elasticsearch_client):
80
+ t = "haystack_integrations.components.retrievers.elasticsearch.embedding_retriever.ElasticsearchEmbeddingRetriever"
81
+ data = {
82
+ "type": t,
83
+ "init_parameters": {
84
+ "document_store": {
85
+ "init_parameters": {"hosts": "some fake host", "index": "default"},
86
+ "type": "haystack_integrations.document_stores.elasticsearch.document_store.ElasticsearchDocumentStore",
87
+ },
88
+ "filters": {},
89
+ "top_k": 10,
90
+ "num_candidates": None,
91
+ },
92
+ }
93
+ retriever = ElasticsearchEmbeddingRetriever.from_dict(data)
94
+ assert retriever._document_store
95
+ assert retriever._filters == {}
96
+ assert retriever._top_k == 10
97
+ assert retriever._num_candidates is None
98
+ assert retriever._filter_policy == FilterPolicy.REPLACE # defaults to REPLACE
99
+
100
+
77
101
  def test_run():
78
102
  mock_store = Mock(spec=ElasticsearchDocumentStore)
79
103
  mock_store._embedding_retrieval.return_value = [Document(content="Test doc", embedding=[0.1, 0.2])]
@@ -3,6 +3,7 @@
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
  import pytest
5
5
  from haystack.errors import FilterError
6
+
6
7
  from haystack_integrations.document_stores.elasticsearch.filters import _normalize_filters, _normalize_ranges
7
8
 
8
9
  filters_data = [