elasticsearch-haystack 3.1.0__py3-none-any.whl → 4.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of elasticsearch-haystack might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: elasticsearch-haystack
3
- Version: 3.1.0
3
+ Version: 4.0.0
4
4
  Summary: Haystack 2.x Document Store for ElasticSearch
5
5
  Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/elasticsearch#readme
6
6
  Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues
@@ -24,35 +24,19 @@ Requires-Dist: elasticsearch<9,>=8
24
24
  Requires-Dist: haystack-ai>=2.4.0
25
25
  Description-Content-Type: text/markdown
26
26
 
27
- [![test](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/elasticsearch.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/elasticsearch.yml)
27
+ # elasticsearch-haystack
28
28
 
29
29
  [![PyPI - Version](https://img.shields.io/pypi/v/elasticsearch-haystack.svg)](https://pypi.org/project/elasticsearch-haystack)
30
30
  [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/elasticsearch-haystack.svg)](https://pypi.org/project/elasticsearch-haystack)
31
31
 
32
- # Elasticsearch Document Store
32
+ - [Integration page](https://haystack.deepset.ai/integrations/elasticsearch-document-store)
33
+ - [Changelog](https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/elasticsearch/CHANGELOG.md)
33
34
 
34
- Document Store for Haystack 2.x, supports ElasticSearch 8.
35
+ ---
35
36
 
36
- ## Installation
37
+ ## Contributing
37
38
 
38
- ```console
39
- pip install elasticsearch-haystack
40
- ```
39
+ Refer to the general [Contribution Guidelines](https://github.com/deepset-ai/haystack-core-integrations/blob/main/CONTRIBUTING.md).
41
40
 
42
- ## Testing
43
-
44
- To run tests first start a Docker container running ElasticSearch. We provide a utility `docker-compose.yml` for that:
45
-
46
- ```console
47
- docker-compose up
48
- ```
49
-
50
- Then run tests:
51
-
52
- ```console
53
- hatch run test:all
54
- ```
55
-
56
- ## License
57
-
58
- `elasticsearch-haystack` is distributed under the terms of the [Apache-2.0](https://spdx.org/licenses/Apache-2.0.html) license.
41
+ To run integration tests locally, you need a Docker container running ElasticSearch.
42
+ Use the provided `docker-compose.yml` file to start the container: `docker compose up -d`.
@@ -4,9 +4,9 @@ haystack_integrations/components/retrievers/elasticsearch/bm25_retriever.py,sha2
4
4
  haystack_integrations/components/retrievers/elasticsearch/embedding_retriever.py,sha256=-6eIHW5cU4k8-jAsUsCb15hJRalpkUhzy_dNxr5HUZo,7404
5
5
  haystack_integrations/document_stores/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  haystack_integrations/document_stores/elasticsearch/__init__.py,sha256=YTfu94dtVUBogbJFr1aJrKuaI6-Bw9VuHfPoyU7M8os,207
7
- haystack_integrations/document_stores/elasticsearch/document_store.py,sha256=d_u49ySnhQzK_jGGThAYCWKPGDdVcpmCGQ-CWgCaO58,27852
7
+ haystack_integrations/document_stores/elasticsearch/document_store.py,sha256=1KnQdBsD-QdvncxFZ1oaNTA9-vRJ7xXCDaKr5JcTVnE,31062
8
8
  haystack_integrations/document_stores/elasticsearch/filters.py,sha256=Umip-PP4uFjuWeB1JWkKhaKClQ0VpiykoDlDu99wIV0,9759
9
- elasticsearch_haystack-3.1.0.dist-info/METADATA,sha256=O1bfELb0DpiXMSLvZuq4upfSo-1So67b058LXqt7N4E,2261
10
- elasticsearch_haystack-3.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
11
- elasticsearch_haystack-3.1.0.dist-info/licenses/LICENSE,sha256=_M2kulivnaiTHiW-5CRlZrPmH47tt04pBgAgeDvfYi4,11342
12
- elasticsearch_haystack-3.1.0.dist-info/RECORD,,
9
+ elasticsearch_haystack-4.0.0.dist-info/METADATA,sha256=Ukr9NBPT0Vwi8WhAA5agHeOQfB3_0_6RcGQDx_yJB2w,2105
10
+ elasticsearch_haystack-4.0.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
11
+ elasticsearch_haystack-4.0.0.dist-info/licenses/LICENSE,sha256=_M2kulivnaiTHiW-5CRlZrPmH47tt04pBgAgeDvfYi4,11342
12
+ elasticsearch_haystack-4.0.0.dist-info/RECORD,,
@@ -1,8 +1,9 @@
1
1
  # SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
2
2
  #
3
3
  # SPDX-License-Identifier: Apache-2.0
4
+
4
5
  from collections.abc import Mapping
5
- from typing import Any, Dict, List, Literal, Optional, Union
6
+ from typing import Any, Dict, List, Literal, Optional, Tuple, Union
6
7
 
7
8
  import numpy as np
8
9
  from elastic_transport import NodeConfig
@@ -10,6 +11,7 @@ from haystack import default_from_dict, default_to_dict, logging
10
11
  from haystack.dataclasses import Document
11
12
  from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError
12
13
  from haystack.document_stores.types import DuplicatePolicy
14
+ from haystack.utils import Secret, deserialize_secrets_inplace
13
15
  from haystack.version import __version__ as haystack_version
14
16
 
15
17
  from elasticsearch import AsyncElasticsearch, Elasticsearch, helpers
@@ -38,13 +40,16 @@ class ElasticsearchDocumentStore:
38
40
 
39
41
  Usage example (Elastic Cloud):
40
42
  ```python
41
- from haystack.document_store.elasticsearch import ElasticsearchDocumentStore
42
- document_store = ElasticsearchDocumentStore(cloud_id="YOUR_CLOUD_ID", api_key="YOUR_API_KEY")
43
+ from haystack_integrations.document_stores.elasticsearch import ElasticsearchDocumentStore
44
+ document_store = ElasticsearchDocumentStore(
45
+ api_key_id=Secret.from_env_var("ELASTIC_API_KEY_ID", strict=False),
46
+ api_key=Secret.from_env_var("ELASTIC_API_KEY", strict=False),
47
+ )
43
48
  ```
44
49
 
45
50
  Usage example (self-hosted Elasticsearch instance):
46
51
  ```python
47
- from haystack.document_store.elasticsearch import ElasticsearchDocumentStore
52
+ from haystack_integrations.document_stores.elasticsearch import ElasticsearchDocumentStore
48
53
  document_store = ElasticsearchDocumentStore(hosts="http://localhost:9200")
49
54
  ```
50
55
  In the above example we connect with security disabled just to show the basic usage.
@@ -63,6 +68,8 @@ class ElasticsearchDocumentStore:
63
68
  hosts: Optional[Hosts] = None,
64
69
  custom_mapping: Optional[Dict[str, Any]] = None,
65
70
  index: str = "default",
71
+ api_key: Secret = Secret.from_env_var("ELASTIC_API_KEY", strict=False), # noqa: B008
72
+ api_key_id: Secret = Secret.from_env_var("ELASTIC_API_KEY_ID", strict=False), # noqa: B008
66
73
  embedding_similarity_function: Literal["cosine", "dot_product", "l2_norm", "max_inner_product"] = "cosine",
67
74
  **kwargs: Any,
68
75
  ):
@@ -80,9 +87,16 @@ class ElasticsearchDocumentStore:
80
87
  For the full list of supported kwargs, see the official Elasticsearch
81
88
  [reference](https://elasticsearch-py.readthedocs.io/en/stable/api.html#module-elasticsearch)
82
89
 
90
+ Authentication is provided via Secret objects, which by default are loaded from environment variables.
91
+ You can either provide both `api_key_id` and `api_key`, or just `api_key` containing a base64-encoded string
92
+ of `id:secret`. Secret instances can also be loaded from a token using the `Secret.from_token()` method.
93
+
83
94
  :param hosts: List of hosts running the Elasticsearch client.
84
95
  :param custom_mapping: Custom mapping for the index. If not provided, a default mapping will be used.
85
96
  :param index: Name of index in Elasticsearch.
97
+ :param api_key: A Secret object containing the API key for authenticating or base64-encoded with the
98
+ concatenated secret and id for authenticating with Elasticsearch (separated by “:”).
99
+ :param api_key_id: A Secret object containing the API key ID for authenticating with Elasticsearch.
86
100
  :param embedding_similarity_function: The similarity function used to compare Documents embeddings.
87
101
  This parameter only takes effect if the index does not yet exist and is created.
88
102
  To choose the most appropriate function, look for information about your embedding model.
@@ -94,6 +108,8 @@ class ElasticsearchDocumentStore:
94
108
  self._client: Optional[Elasticsearch] = None
95
109
  self._async_client: Optional[AsyncElasticsearch] = None
96
110
  self._index = index
111
+ self._api_key = api_key
112
+ self._api_key_id = api_key_id
97
113
  self._embedding_similarity_function = embedding_similarity_function
98
114
  self._custom_mapping = custom_mapping
99
115
  self._kwargs = kwargs
@@ -111,14 +127,18 @@ class ElasticsearchDocumentStore:
111
127
  headers = self._kwargs.pop("headers", {})
112
128
  headers["user-agent"] = f"haystack-py-ds/{haystack_version}"
113
129
 
130
+ api_key = self._handle_auth()
131
+
114
132
  # Initialize both sync and async clients
115
133
  self._client = Elasticsearch(
116
134
  self._hosts,
135
+ api_key=api_key,
117
136
  headers=headers,
118
137
  **self._kwargs,
119
138
  )
120
139
  self._async_client = AsyncElasticsearch(
121
140
  self._hosts,
141
+ api_key=api_key,
122
142
  headers=headers,
123
143
  **self._kwargs,
124
144
  )
@@ -158,6 +178,49 @@ class ElasticsearchDocumentStore:
158
178
 
159
179
  self._initialized = True
160
180
 
181
+ def _handle_auth(self) -> Optional[Union[str, Tuple[str, str]]]:
182
+ """
183
+ Handles authentication for the Elasticsearch client.
184
+
185
+ There are three possible scenarios.
186
+
187
+ 1) Authentication with both api_key and api_key_id, either as Secrets or as environment variables. In this case,
188
+ use both for authentication.
189
+
190
+ 2) Authentication with only api_key, either as a Secret or as an environment variable. In this case, the api_key
191
+ must be a base64-encoded string that encodes both id and secret <id:secret>.
192
+
193
+ 3) There's no authentication, neither api_key nor api_key_id are provided as a Secret nor defined as
194
+ environment variables. In this case, the client will connect without authentication.
195
+
196
+ :returns:
197
+ api_key: Optional[Union[str, Tuple[str, str]]]
198
+
199
+ """
200
+
201
+ api_key: Optional[Union[str, Tuple[str, str]]] # make the type checker happy
202
+
203
+ api_key_resolved = self._api_key.resolve_value()
204
+ api_key_id_resolved = self._api_key_id.resolve_value()
205
+
206
+ # Scenario 1: both are found, use them
207
+ if api_key_id_resolved and api_key_resolved:
208
+ api_key = (api_key_id_resolved, api_key_resolved)
209
+ return api_key
210
+
211
+ # Scenario 2: only api_key is set, must be a base64-encoded string that encodes id and secret (separated by “:”)
212
+ elif api_key_resolved and not api_key_id_resolved:
213
+ return api_key_resolved
214
+
215
+ # Error: only api_key_id is found, raise an error
216
+ elif api_key_id_resolved and not api_key_resolved:
217
+ msg = "api_key_id is provided but api_key is missing."
218
+ raise ValueError(msg)
219
+
220
+ else:
221
+ # Scenario 3: neither found, no authentication
222
+ return None
223
+
161
224
  @property
162
225
  def client(self) -> Elasticsearch:
163
226
  """
@@ -191,6 +254,8 @@ class ElasticsearchDocumentStore:
191
254
  hosts=self._hosts,
192
255
  custom_mapping=self._custom_mapping,
193
256
  index=self._index,
257
+ api_key=self._api_key.to_dict(),
258
+ api_key_id=self._api_key_id.to_dict(),
194
259
  embedding_similarity_function=self._embedding_similarity_function,
195
260
  **self._kwargs,
196
261
  )
@@ -205,6 +270,7 @@ class ElasticsearchDocumentStore:
205
270
  :returns:
206
271
  Deserialized component.
207
272
  """
273
+ deserialize_secrets_inplace(data, keys=["api_key", "api_key_id"])
208
274
  return default_from_dict(cls, data)
209
275
 
210
276
  def count_documents(self) -> int: