elasticsearch-haystack 0.7.1__tar.gz → 1.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of elasticsearch-haystack might be problematic. Click here for more details.

Files changed (19) hide show
  1. {elasticsearch_haystack-0.7.1 → elasticsearch_haystack-1.0.1}/CHANGELOG.md +16 -1
  2. {elasticsearch_haystack-0.7.1 → elasticsearch_haystack-1.0.1}/PKG-INFO +1 -1
  3. {elasticsearch_haystack-0.7.1 → elasticsearch_haystack-1.0.1}/pyproject.toml +15 -9
  4. {elasticsearch_haystack-0.7.1 → elasticsearch_haystack-1.0.1}/src/haystack_integrations/components/retrievers/elasticsearch/bm25_retriever.py +1 -0
  5. {elasticsearch_haystack-0.7.1 → elasticsearch_haystack-1.0.1}/src/haystack_integrations/components/retrievers/elasticsearch/embedding_retriever.py +1 -0
  6. {elasticsearch_haystack-0.7.1 → elasticsearch_haystack-1.0.1}/src/haystack_integrations/document_stores/elasticsearch/document_store.py +6 -3
  7. {elasticsearch_haystack-0.7.1 → elasticsearch_haystack-1.0.1}/tests/test_bm25_retriever.py +1 -0
  8. {elasticsearch_haystack-0.7.1 → elasticsearch_haystack-1.0.1}/tests/test_document_store.py +15 -0
  9. {elasticsearch_haystack-0.7.1 → elasticsearch_haystack-1.0.1}/tests/test_embedding_retriever.py +1 -0
  10. {elasticsearch_haystack-0.7.1 → elasticsearch_haystack-1.0.1}/tests/test_filters.py +1 -0
  11. {elasticsearch_haystack-0.7.1 → elasticsearch_haystack-1.0.1}/.gitignore +0 -0
  12. {elasticsearch_haystack-0.7.1 → elasticsearch_haystack-1.0.1}/LICENSE +0 -0
  13. {elasticsearch_haystack-0.7.1 → elasticsearch_haystack-1.0.1}/README.md +0 -0
  14. {elasticsearch_haystack-0.7.1 → elasticsearch_haystack-1.0.1}/docker-compose.yml +0 -0
  15. {elasticsearch_haystack-0.7.1 → elasticsearch_haystack-1.0.1}/pydoc/config.yml +0 -0
  16. {elasticsearch_haystack-0.7.1 → elasticsearch_haystack-1.0.1}/src/haystack_integrations/components/retrievers/elasticsearch/__init__.py +0 -0
  17. {elasticsearch_haystack-0.7.1 → elasticsearch_haystack-1.0.1}/src/haystack_integrations/document_stores/elasticsearch/__init__.py +0 -0
  18. {elasticsearch_haystack-0.7.1 → elasticsearch_haystack-1.0.1}/src/haystack_integrations/document_stores/elasticsearch/filters.py +0 -0
  19. {elasticsearch_haystack-0.7.1 → elasticsearch_haystack-1.0.1}/tests/__init__.py +0 -0
@@ -1,10 +1,25 @@
1
1
  # Changelog
2
2
 
3
- ## [unreleased]
3
+ ## [integrations/elasticsearch-v1.0.0] - 2024-09-12
4
4
 
5
5
  ### 🚀 Features
6
6
 
7
7
  - Defer the database connection to when it's needed (#766)
8
+ - Add filter_policy to elasticsearch integration (#825)
9
+
10
+ ### 🐛 Bug Fixes
11
+
12
+ - `ElasticSearch` - Fallback to default filter policy when deserializing retrievers without the init parameter (#898)
13
+
14
+ ### 🧪 Testing
15
+
16
+ - Do not retry tests in `hatch run test` command (#954)
17
+
18
+ ### ⚙️ Miscellaneous Tasks
19
+
20
+ - Retry tests to reduce flakyness (#836)
21
+ - Update ruff invocation to include check parameter (#853)
22
+ - ElasticSearch - remove legacy filters elasticsearch (#1078)
8
23
 
9
24
  ## [integrations/elasticsearch-v0.5.0] - 2024-05-24
10
25
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: elasticsearch-haystack
3
- Version: 0.7.1
3
+ Version: 1.0.1
4
4
  Summary: Haystack 2.x Document Store for ElasticSearch
5
5
  Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/elasticsearch#readme
6
6
  Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues
@@ -41,6 +41,7 @@ root = "../.."
41
41
  git_describe_command = 'git describe --tags --match="integrations/elasticsearch-v[0-9]*"'
42
42
 
43
43
  [tool.hatch.envs.default]
44
+ installer = "uv"
44
45
  dependencies = [
45
46
  "coverage[toml]>=6.5",
46
47
  "pytest",
@@ -49,22 +50,25 @@ dependencies = [
49
50
  "haystack-pydoc-tools",
50
51
  ]
51
52
  [tool.hatch.envs.default.scripts]
52
- test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
53
- test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
53
+ test = "pytest {args:tests}"
54
+ test-cov = "coverage run -m pytest {args:tests}"
55
+ test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x"
54
56
  cov-report = ["- coverage combine", "coverage report"]
55
57
  cov = ["test-cov", "cov-report"]
58
+ cov-retry = ["test-cov-retry", "cov-report"]
56
59
  docs = ["pydoc-markdown pydoc/config.yml"]
57
60
 
58
61
  [[tool.hatch.envs.all.matrix]]
59
62
  python = ["3.8", "3.9", "3.10", "3.11"]
60
63
 
61
64
  [tool.hatch.envs.lint]
65
+ installer = "uv"
62
66
  detached = true
63
- dependencies = ["black>=23.1.0", "mypy>=1.0.0", "ruff>=0.0.243"]
67
+ dependencies = ["pip", "black>=23.1.0", "mypy>=1.0.0", "ruff>=0.0.243"]
64
68
  [tool.hatch.envs.lint.scripts]
65
69
  typing = "mypy --install-types --non-interactive --explicit-package-bases {args:src/ tests}"
66
- style = ["ruff check {args:.}", "black --check --diff {args:.}"]
67
- fmt = ["black {args:.}", "ruff --fix {args:.}", "style"]
70
+ style = ["ruff check {args:}", "black --check --diff {args:.}"]
71
+ fmt = ["black {args:.}", "ruff check --fix {args:}", "style"]
68
72
  all = ["style", "typing"]
69
73
 
70
74
  [tool.hatch.metadata]
@@ -78,6 +82,8 @@ skip-string-normalization = true
78
82
  [tool.ruff]
79
83
  target-version = "py38"
80
84
  line-length = 120
85
+
86
+ [tool.ruff.lint]
81
87
  select = [
82
88
  "A",
83
89
  "ARG",
@@ -126,13 +132,13 @@ unfixable = [
126
132
  "F401",
127
133
  ]
128
134
 
129
- [tool.ruff.isort]
130
- known-first-party = ["src"]
135
+ [tool.ruff.lint.isort]
136
+ known-first-party = ["haystack_integrations"]
131
137
 
132
- [tool.ruff.flake8-tidy-imports]
138
+ [tool.ruff.lint.flake8-tidy-imports]
133
139
  ban-relative-imports = "parents"
134
140
 
135
- [tool.ruff.per-file-ignores]
141
+ [tool.ruff.lint.per-file-ignores]
136
142
  # Tests can use magic values, assertions, and relative imports
137
143
  "tests/**/*" = ["PLR2004", "S101", "TID252"]
138
144
 
@@ -7,6 +7,7 @@ from haystack import component, default_from_dict, default_to_dict
7
7
  from haystack.dataclasses import Document
8
8
  from haystack.document_stores.types import FilterPolicy
9
9
  from haystack.document_stores.types.filter_policy import apply_filter_policy
10
+
10
11
  from haystack_integrations.document_stores.elasticsearch.document_store import ElasticsearchDocumentStore
11
12
 
12
13
 
@@ -7,6 +7,7 @@ from haystack import component, default_from_dict, default_to_dict
7
7
  from haystack.dataclasses import Document
8
8
  from haystack.document_stores.types import FilterPolicy
9
9
  from haystack.document_stores.types.filter_policy import apply_filter_policy
10
+
10
11
  from haystack_integrations.document_stores.elasticsearch.document_store import ElasticsearchDocumentStore
11
12
 
12
13
 
@@ -12,7 +12,6 @@ from haystack import default_from_dict, default_to_dict
12
12
  from haystack.dataclasses import Document
13
13
  from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError
14
14
  from haystack.document_stores.types import DuplicatePolicy
15
- from haystack.utils.filters import convert
16
15
  from haystack.version import __version__ as haystack_version
17
16
 
18
17
  from elasticsearch import Elasticsearch, helpers # type: ignore[import-not-found]
@@ -106,9 +105,12 @@ class ElasticsearchDocumentStore:
106
105
  @property
107
106
  def client(self) -> Elasticsearch:
108
107
  if self._client is None:
108
+ headers = self._kwargs.pop("headers", {})
109
+ headers["user-agent"] = f"haystack-py-ds/{haystack_version}"
110
+
109
111
  client = Elasticsearch(
110
112
  self._hosts,
111
- headers={"user-agent": f"haystack-py-ds/{haystack_version}"},
113
+ headers=headers,
112
114
  **self._kwargs,
113
115
  )
114
116
  # Check client connection, this will raise if not connected
@@ -224,7 +226,8 @@ class ElasticsearchDocumentStore:
224
226
  :returns: List of `Document`s that match the filters.
225
227
  """
226
228
  if filters and "operator" not in filters and "conditions" not in filters:
227
- filters = convert(filters)
229
+ msg = "Invalid filter syntax. See https://docs.haystack.deepset.ai/docs/metadata-filtering for details."
230
+ raise ValueError(msg)
228
231
 
229
232
  query = {"bool": {"filter": _normalize_filters(filters)}} if filters else None
230
233
  documents = self._search_documents(query=query)
@@ -6,6 +6,7 @@ from unittest.mock import Mock, patch
6
6
  import pytest
7
7
  from haystack.dataclasses import Document
8
8
  from haystack.document_stores.types import FilterPolicy
9
+
9
10
  from haystack_integrations.components.retrievers.elasticsearch import ElasticsearchBM25Retriever
10
11
  from haystack_integrations.document_stores.elasticsearch import ElasticsearchDocumentStore
11
12
 
@@ -12,6 +12,7 @@ from haystack.dataclasses.document import Document
12
12
  from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError
13
13
  from haystack.document_stores.types import DuplicatePolicy
14
14
  from haystack.testing.document_store import DocumentStoreBaseTests
15
+
15
16
  from haystack_integrations.document_stores.elasticsearch import ElasticsearchDocumentStore
16
17
 
17
18
 
@@ -21,6 +22,20 @@ def test_init_is_lazy(_mock_es_client):
21
22
  _mock_es_client.assert_not_called()
22
23
 
23
24
 
25
+ @patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch")
26
+ def test_headers_are_supported(_mock_es_client):
27
+ _ = ElasticsearchDocumentStore(hosts="testhost", headers={"header1": "value1", "header2": "value2"}).client
28
+
29
+ assert _mock_es_client.call_count == 1
30
+ _, kwargs = _mock_es_client.call_args
31
+
32
+ headers_found = kwargs["headers"]
33
+ assert headers_found["header1"] == "value1"
34
+ assert headers_found["header2"] == "value2"
35
+
36
+ assert headers_found["user-agent"].startswith("haystack-py-ds/")
37
+
38
+
24
39
  @patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch")
25
40
  def test_to_dict(_mock_elasticsearch_client):
26
41
  document_store = ElasticsearchDocumentStore(hosts="some hosts")
@@ -6,6 +6,7 @@ from unittest.mock import Mock, patch
6
6
  import pytest
7
7
  from haystack.dataclasses import Document
8
8
  from haystack.document_stores.types import FilterPolicy
9
+
9
10
  from haystack_integrations.components.retrievers.elasticsearch import ElasticsearchEmbeddingRetriever
10
11
  from haystack_integrations.document_stores.elasticsearch import ElasticsearchDocumentStore
11
12
 
@@ -3,6 +3,7 @@
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
  import pytest
5
5
  from haystack.errors import FilterError
6
+
6
7
  from haystack_integrations.document_stores.elasticsearch.filters import _normalize_filters, _normalize_ranges
7
8
 
8
9
  filters_data = [