elasticsearch-haystack 3.1.0__py3-none-any.whl → 4.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of elasticsearch-haystack might be problematic. Click here for more details.
- {elasticsearch_haystack-3.1.0.dist-info → elasticsearch_haystack-4.0.0.dist-info}/METADATA +9 -25
- {elasticsearch_haystack-3.1.0.dist-info → elasticsearch_haystack-4.0.0.dist-info}/RECORD +5 -5
- haystack_integrations/document_stores/elasticsearch/document_store.py +70 -4
- {elasticsearch_haystack-3.1.0.dist-info → elasticsearch_haystack-4.0.0.dist-info}/WHEEL +0 -0
- {elasticsearch_haystack-3.1.0.dist-info → elasticsearch_haystack-4.0.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: elasticsearch-haystack
|
|
3
|
-
Version:
|
|
3
|
+
Version: 4.0.0
|
|
4
4
|
Summary: Haystack 2.x Document Store for ElasticSearch
|
|
5
5
|
Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/elasticsearch#readme
|
|
6
6
|
Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues
|
|
@@ -24,35 +24,19 @@ Requires-Dist: elasticsearch<9,>=8
|
|
|
24
24
|
Requires-Dist: haystack-ai>=2.4.0
|
|
25
25
|
Description-Content-Type: text/markdown
|
|
26
26
|
|
|
27
|
-
|
|
27
|
+
# elasticsearch-haystack
|
|
28
28
|
|
|
29
29
|
[](https://pypi.org/project/elasticsearch-haystack)
|
|
30
30
|
[](https://pypi.org/project/elasticsearch-haystack)
|
|
31
31
|
|
|
32
|
-
|
|
32
|
+
- [Integration page](https://haystack.deepset.ai/integrations/elasticsearch-document-store)
|
|
33
|
+
- [Changelog](https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/elasticsearch/CHANGELOG.md)
|
|
33
34
|
|
|
34
|
-
|
|
35
|
+
---
|
|
35
36
|
|
|
36
|
-
##
|
|
37
|
+
## Contributing
|
|
37
38
|
|
|
38
|
-
|
|
39
|
-
pip install elasticsearch-haystack
|
|
40
|
-
```
|
|
39
|
+
Refer to the general [Contribution Guidelines](https://github.com/deepset-ai/haystack-core-integrations/blob/main/CONTRIBUTING.md).
|
|
41
40
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
To run tests first start a Docker container running ElasticSearch. We provide a utility `docker-compose.yml` for that:
|
|
45
|
-
|
|
46
|
-
```console
|
|
47
|
-
docker-compose up
|
|
48
|
-
```
|
|
49
|
-
|
|
50
|
-
Then run tests:
|
|
51
|
-
|
|
52
|
-
```console
|
|
53
|
-
hatch run test:all
|
|
54
|
-
```
|
|
55
|
-
|
|
56
|
-
## License
|
|
57
|
-
|
|
58
|
-
`elasticsearch-haystack` is distributed under the terms of the [Apache-2.0](https://spdx.org/licenses/Apache-2.0.html) license.
|
|
41
|
+
To run integration tests locally, you need a Docker container running ElasticSearch.
|
|
42
|
+
Use the provided `docker-compose.yml` file to start the container: `docker compose up -d`.
|
|
@@ -4,9 +4,9 @@ haystack_integrations/components/retrievers/elasticsearch/bm25_retriever.py,sha2
|
|
|
4
4
|
haystack_integrations/components/retrievers/elasticsearch/embedding_retriever.py,sha256=-6eIHW5cU4k8-jAsUsCb15hJRalpkUhzy_dNxr5HUZo,7404
|
|
5
5
|
haystack_integrations/document_stores/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
6
|
haystack_integrations/document_stores/elasticsearch/__init__.py,sha256=YTfu94dtVUBogbJFr1aJrKuaI6-Bw9VuHfPoyU7M8os,207
|
|
7
|
-
haystack_integrations/document_stores/elasticsearch/document_store.py,sha256=
|
|
7
|
+
haystack_integrations/document_stores/elasticsearch/document_store.py,sha256=1KnQdBsD-QdvncxFZ1oaNTA9-vRJ7xXCDaKr5JcTVnE,31062
|
|
8
8
|
haystack_integrations/document_stores/elasticsearch/filters.py,sha256=Umip-PP4uFjuWeB1JWkKhaKClQ0VpiykoDlDu99wIV0,9759
|
|
9
|
-
elasticsearch_haystack-
|
|
10
|
-
elasticsearch_haystack-
|
|
11
|
-
elasticsearch_haystack-
|
|
12
|
-
elasticsearch_haystack-
|
|
9
|
+
elasticsearch_haystack-4.0.0.dist-info/METADATA,sha256=Ukr9NBPT0Vwi8WhAA5agHeOQfB3_0_6RcGQDx_yJB2w,2105
|
|
10
|
+
elasticsearch_haystack-4.0.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
11
|
+
elasticsearch_haystack-4.0.0.dist-info/licenses/LICENSE,sha256=_M2kulivnaiTHiW-5CRlZrPmH47tt04pBgAgeDvfYi4,11342
|
|
12
|
+
elasticsearch_haystack-4.0.0.dist-info/RECORD,,
|
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
|
|
2
2
|
#
|
|
3
3
|
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
4
5
|
from collections.abc import Mapping
|
|
5
|
-
from typing import Any, Dict, List, Literal, Optional, Union
|
|
6
|
+
from typing import Any, Dict, List, Literal, Optional, Tuple, Union
|
|
6
7
|
|
|
7
8
|
import numpy as np
|
|
8
9
|
from elastic_transport import NodeConfig
|
|
@@ -10,6 +11,7 @@ from haystack import default_from_dict, default_to_dict, logging
|
|
|
10
11
|
from haystack.dataclasses import Document
|
|
11
12
|
from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError
|
|
12
13
|
from haystack.document_stores.types import DuplicatePolicy
|
|
14
|
+
from haystack.utils import Secret, deserialize_secrets_inplace
|
|
13
15
|
from haystack.version import __version__ as haystack_version
|
|
14
16
|
|
|
15
17
|
from elasticsearch import AsyncElasticsearch, Elasticsearch, helpers
|
|
@@ -38,13 +40,16 @@ class ElasticsearchDocumentStore:
|
|
|
38
40
|
|
|
39
41
|
Usage example (Elastic Cloud):
|
|
40
42
|
```python
|
|
41
|
-
from
|
|
42
|
-
document_store = ElasticsearchDocumentStore(
|
|
43
|
+
from haystack_integrations.document_stores.elasticsearch import ElasticsearchDocumentStore
|
|
44
|
+
document_store = ElasticsearchDocumentStore(
|
|
45
|
+
api_key_id=Secret.from_env_var("ELASTIC_API_KEY_ID", strict=False),
|
|
46
|
+
api_key=Secret.from_env_var("ELASTIC_API_KEY", strict=False),
|
|
47
|
+
)
|
|
43
48
|
```
|
|
44
49
|
|
|
45
50
|
Usage example (self-hosted Elasticsearch instance):
|
|
46
51
|
```python
|
|
47
|
-
from
|
|
52
|
+
from haystack_integrations.document_stores.elasticsearch import ElasticsearchDocumentStore
|
|
48
53
|
document_store = ElasticsearchDocumentStore(hosts="http://localhost:9200")
|
|
49
54
|
```
|
|
50
55
|
In the above example we connect with security disabled just to show the basic usage.
|
|
@@ -63,6 +68,8 @@ class ElasticsearchDocumentStore:
|
|
|
63
68
|
hosts: Optional[Hosts] = None,
|
|
64
69
|
custom_mapping: Optional[Dict[str, Any]] = None,
|
|
65
70
|
index: str = "default",
|
|
71
|
+
api_key: Secret = Secret.from_env_var("ELASTIC_API_KEY", strict=False), # noqa: B008
|
|
72
|
+
api_key_id: Secret = Secret.from_env_var("ELASTIC_API_KEY_ID", strict=False), # noqa: B008
|
|
66
73
|
embedding_similarity_function: Literal["cosine", "dot_product", "l2_norm", "max_inner_product"] = "cosine",
|
|
67
74
|
**kwargs: Any,
|
|
68
75
|
):
|
|
@@ -80,9 +87,16 @@ class ElasticsearchDocumentStore:
|
|
|
80
87
|
For the full list of supported kwargs, see the official Elasticsearch
|
|
81
88
|
[reference](https://elasticsearch-py.readthedocs.io/en/stable/api.html#module-elasticsearch)
|
|
82
89
|
|
|
90
|
+
Authentication is provided via Secret objects, which by default are loaded from environment variables.
|
|
91
|
+
You can either provide both `api_key_id` and `api_key`, or just `api_key` containing a base64-encoded string
|
|
92
|
+
of `id:secret`. Secret instances can also be loaded from a token using the `Secret.from_token()` method.
|
|
93
|
+
|
|
83
94
|
:param hosts: List of hosts running the Elasticsearch client.
|
|
84
95
|
:param custom_mapping: Custom mapping for the index. If not provided, a default mapping will be used.
|
|
85
96
|
:param index: Name of index in Elasticsearch.
|
|
97
|
+
:param api_key: A Secret object containing the API key for authenticating or base64-encoded with the
|
|
98
|
+
concatenated secret and id for authenticating with Elasticsearch (separated by “:”).
|
|
99
|
+
:param api_key_id: A Secret object containing the API key ID for authenticating with Elasticsearch.
|
|
86
100
|
:param embedding_similarity_function: The similarity function used to compare Documents embeddings.
|
|
87
101
|
This parameter only takes effect if the index does not yet exist and is created.
|
|
88
102
|
To choose the most appropriate function, look for information about your embedding model.
|
|
@@ -94,6 +108,8 @@ class ElasticsearchDocumentStore:
|
|
|
94
108
|
self._client: Optional[Elasticsearch] = None
|
|
95
109
|
self._async_client: Optional[AsyncElasticsearch] = None
|
|
96
110
|
self._index = index
|
|
111
|
+
self._api_key = api_key
|
|
112
|
+
self._api_key_id = api_key_id
|
|
97
113
|
self._embedding_similarity_function = embedding_similarity_function
|
|
98
114
|
self._custom_mapping = custom_mapping
|
|
99
115
|
self._kwargs = kwargs
|
|
@@ -111,14 +127,18 @@ class ElasticsearchDocumentStore:
|
|
|
111
127
|
headers = self._kwargs.pop("headers", {})
|
|
112
128
|
headers["user-agent"] = f"haystack-py-ds/{haystack_version}"
|
|
113
129
|
|
|
130
|
+
api_key = self._handle_auth()
|
|
131
|
+
|
|
114
132
|
# Initialize both sync and async clients
|
|
115
133
|
self._client = Elasticsearch(
|
|
116
134
|
self._hosts,
|
|
135
|
+
api_key=api_key,
|
|
117
136
|
headers=headers,
|
|
118
137
|
**self._kwargs,
|
|
119
138
|
)
|
|
120
139
|
self._async_client = AsyncElasticsearch(
|
|
121
140
|
self._hosts,
|
|
141
|
+
api_key=api_key,
|
|
122
142
|
headers=headers,
|
|
123
143
|
**self._kwargs,
|
|
124
144
|
)
|
|
@@ -158,6 +178,49 @@ class ElasticsearchDocumentStore:
|
|
|
158
178
|
|
|
159
179
|
self._initialized = True
|
|
160
180
|
|
|
181
|
+
def _handle_auth(self) -> Optional[Union[str, Tuple[str, str]]]:
|
|
182
|
+
"""
|
|
183
|
+
Handles authentication for the Elasticsearch client.
|
|
184
|
+
|
|
185
|
+
There are three possible scenarios.
|
|
186
|
+
|
|
187
|
+
1) Authentication with both api_key and api_key_id, either as Secrets or as environment variables. In this case,
|
|
188
|
+
use both for authentication.
|
|
189
|
+
|
|
190
|
+
2) Authentication with only api_key, either as a Secret or as an environment variable. In this case, the api_key
|
|
191
|
+
must be a base64-encoded string that encodes both id and secret <id:secret>.
|
|
192
|
+
|
|
193
|
+
3) There's no authentication, neither api_key nor api_key_id are provided as a Secret nor defined as
|
|
194
|
+
environment variables. In this case, the client will connect without authentication.
|
|
195
|
+
|
|
196
|
+
:returns:
|
|
197
|
+
api_key: Optional[Union[str, Tuple[str, str]]]
|
|
198
|
+
|
|
199
|
+
"""
|
|
200
|
+
|
|
201
|
+
api_key: Optional[Union[str, Tuple[str, str]]] # make the type checker happy
|
|
202
|
+
|
|
203
|
+
api_key_resolved = self._api_key.resolve_value()
|
|
204
|
+
api_key_id_resolved = self._api_key_id.resolve_value()
|
|
205
|
+
|
|
206
|
+
# Scenario 1: both are found, use them
|
|
207
|
+
if api_key_id_resolved and api_key_resolved:
|
|
208
|
+
api_key = (api_key_id_resolved, api_key_resolved)
|
|
209
|
+
return api_key
|
|
210
|
+
|
|
211
|
+
# Scenario 2: only api_key is set, must be a base64-encoded string that encodes id and secret (separated by “:”)
|
|
212
|
+
elif api_key_resolved and not api_key_id_resolved:
|
|
213
|
+
return api_key_resolved
|
|
214
|
+
|
|
215
|
+
# Error: only api_key_id is found, raise an error
|
|
216
|
+
elif api_key_id_resolved and not api_key_resolved:
|
|
217
|
+
msg = "api_key_id is provided but api_key is missing."
|
|
218
|
+
raise ValueError(msg)
|
|
219
|
+
|
|
220
|
+
else:
|
|
221
|
+
# Scenario 3: neither found, no authentication
|
|
222
|
+
return None
|
|
223
|
+
|
|
161
224
|
@property
|
|
162
225
|
def client(self) -> Elasticsearch:
|
|
163
226
|
"""
|
|
@@ -191,6 +254,8 @@ class ElasticsearchDocumentStore:
|
|
|
191
254
|
hosts=self._hosts,
|
|
192
255
|
custom_mapping=self._custom_mapping,
|
|
193
256
|
index=self._index,
|
|
257
|
+
api_key=self._api_key.to_dict(),
|
|
258
|
+
api_key_id=self._api_key_id.to_dict(),
|
|
194
259
|
embedding_similarity_function=self._embedding_similarity_function,
|
|
195
260
|
**self._kwargs,
|
|
196
261
|
)
|
|
@@ -205,6 +270,7 @@ class ElasticsearchDocumentStore:
|
|
|
205
270
|
:returns:
|
|
206
271
|
Deserialized component.
|
|
207
272
|
"""
|
|
273
|
+
deserialize_secrets_inplace(data, keys=["api_key", "api_key_id"])
|
|
208
274
|
return default_from_dict(cls, data)
|
|
209
275
|
|
|
210
276
|
def count_documents(self) -> int:
|
|
File without changes
|
{elasticsearch_haystack-3.1.0.dist-info → elasticsearch_haystack-4.0.0.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|