elasticsearch-haystack 0.4.0__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of elasticsearch-haystack might be problematic. Click here for more details.
- {elasticsearch_haystack-0.4.0 → elasticsearch_haystack-0.5.0}/PKG-INFO +2 -1
- {elasticsearch_haystack-0.4.0 → elasticsearch_haystack-0.5.0}/pydoc/config.yml +1 -1
- {elasticsearch_haystack-0.4.0 → elasticsearch_haystack-0.5.0}/pyproject.toml +1 -0
- {elasticsearch_haystack-0.4.0 → elasticsearch_haystack-0.5.0}/src/haystack_integrations/document_stores/elasticsearch/document_store.py +28 -17
- {elasticsearch_haystack-0.4.0 → elasticsearch_haystack-0.5.0}/tests/test_bm25_retriever.py +1 -0
- {elasticsearch_haystack-0.4.0 → elasticsearch_haystack-0.5.0}/tests/test_document_store.py +34 -1
- {elasticsearch_haystack-0.4.0 → elasticsearch_haystack-0.5.0}/tests/test_embedding_retriever.py +1 -0
- {elasticsearch_haystack-0.4.0 → elasticsearch_haystack-0.5.0}/.gitignore +0 -0
- {elasticsearch_haystack-0.4.0 → elasticsearch_haystack-0.5.0}/LICENSE +0 -0
- {elasticsearch_haystack-0.4.0 → elasticsearch_haystack-0.5.0}/README.md +0 -0
- {elasticsearch_haystack-0.4.0 → elasticsearch_haystack-0.5.0}/docker-compose.yml +0 -0
- {elasticsearch_haystack-0.4.0 → elasticsearch_haystack-0.5.0}/src/haystack_integrations/components/retrievers/elasticsearch/__init__.py +0 -0
- {elasticsearch_haystack-0.4.0 → elasticsearch_haystack-0.5.0}/src/haystack_integrations/components/retrievers/elasticsearch/bm25_retriever.py +0 -0
- {elasticsearch_haystack-0.4.0 → elasticsearch_haystack-0.5.0}/src/haystack_integrations/components/retrievers/elasticsearch/embedding_retriever.py +0 -0
- {elasticsearch_haystack-0.4.0 → elasticsearch_haystack-0.5.0}/src/haystack_integrations/document_stores/elasticsearch/__init__.py +0 -0
- {elasticsearch_haystack-0.4.0 → elasticsearch_haystack-0.5.0}/src/haystack_integrations/document_stores/elasticsearch/filters.py +0 -0
- {elasticsearch_haystack-0.4.0 → elasticsearch_haystack-0.5.0}/tests/__init__.py +0 -0
- {elasticsearch_haystack-0.4.0 → elasticsearch_haystack-0.5.0}/tests/test_filters.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: elasticsearch-haystack
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary: Haystack 2.x Document Store for ElasticSearch
|
|
5
5
|
Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/elasticsearch#readme
|
|
6
6
|
Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues
|
|
@@ -9,6 +9,7 @@ Author-email: Silvano Cerza <silvanocerza@gmail.com>
|
|
|
9
9
|
License-Expression: Apache-2.0
|
|
10
10
|
License-File: LICENSE
|
|
11
11
|
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
12
13
|
Classifier: Programming Language :: Python
|
|
13
14
|
Classifier: Programming Language :: Python :: 3.8
|
|
14
15
|
Classifier: Programming Language :: Python :: 3.9
|
|
@@ -17,7 +17,7 @@ processors:
|
|
|
17
17
|
- type: smart
|
|
18
18
|
- type: crossref
|
|
19
19
|
renderer:
|
|
20
|
-
type: haystack_pydoc_tools.renderers.
|
|
20
|
+
type: haystack_pydoc_tools.renderers.ReadmeIntegrationRenderer
|
|
21
21
|
excerpt: Elasticsearch integration for Haystack
|
|
22
22
|
category_slug: integrations-api
|
|
23
23
|
title: Elasticsearch
|
|
@@ -14,6 +14,7 @@ authors = [
|
|
|
14
14
|
{ name = "Silvano Cerza", email = "silvanocerza@gmail.com" },
|
|
15
15
|
]
|
|
16
16
|
classifiers = [
|
|
17
|
+
"License :: OSI Approved :: Apache Software License",
|
|
17
18
|
"Development Status :: 4 - Beta",
|
|
18
19
|
"Programming Language :: Python",
|
|
19
20
|
"Programming Language :: Python :: 3.8",
|
|
@@ -63,6 +63,7 @@ class ElasticsearchDocumentStore:
|
|
|
63
63
|
self,
|
|
64
64
|
*,
|
|
65
65
|
hosts: Optional[Hosts] = None,
|
|
66
|
+
custom_mapping: Optional[Dict[str, Any]] = None,
|
|
66
67
|
index: str = "default",
|
|
67
68
|
embedding_similarity_function: Literal["cosine", "dot_product", "l2_norm", "max_inner_product"] = "cosine",
|
|
68
69
|
**kwargs,
|
|
@@ -82,6 +83,7 @@ class ElasticsearchDocumentStore:
|
|
|
82
83
|
[reference](https://elasticsearch-py.readthedocs.io/en/stable/api.html#module-elasticsearch)
|
|
83
84
|
|
|
84
85
|
:param hosts: List of hosts running the Elasticsearch client.
|
|
86
|
+
:param custom_mapping: Custom mapping for the index. If not provided, a default mapping will be used.
|
|
85
87
|
:param index: Name of index in Elasticsearch.
|
|
86
88
|
:param embedding_similarity_function: The similarity function used to compare Documents embeddings.
|
|
87
89
|
This parameter only takes effect if the index does not yet exist and is created.
|
|
@@ -98,29 +100,37 @@ class ElasticsearchDocumentStore:
|
|
|
98
100
|
)
|
|
99
101
|
self._index = index
|
|
100
102
|
self._embedding_similarity_function = embedding_similarity_function
|
|
103
|
+
self._custom_mapping = custom_mapping
|
|
101
104
|
self._kwargs = kwargs
|
|
102
105
|
|
|
103
106
|
# Check client connection, this will raise if not connected
|
|
104
107
|
self._client.info()
|
|
105
108
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
109
|
+
if self._custom_mapping and not isinstance(self._custom_mapping, Dict):
|
|
110
|
+
msg = "custom_mapping must be a dictionary"
|
|
111
|
+
raise ValueError(msg)
|
|
112
|
+
|
|
113
|
+
if self._custom_mapping:
|
|
114
|
+
mappings = self._custom_mapping
|
|
115
|
+
else:
|
|
116
|
+
# Configure mapping for the embedding field if none is provided
|
|
117
|
+
mappings = {
|
|
118
|
+
"properties": {
|
|
119
|
+
"embedding": {"type": "dense_vector", "index": True, "similarity": embedding_similarity_function},
|
|
120
|
+
"content": {"type": "text"},
|
|
121
|
+
},
|
|
122
|
+
"dynamic_templates": [
|
|
123
|
+
{
|
|
124
|
+
"strings": {
|
|
125
|
+
"path_match": "*",
|
|
126
|
+
"match_mapping_type": "string",
|
|
127
|
+
"mapping": {
|
|
128
|
+
"type": "keyword",
|
|
129
|
+
},
|
|
130
|
+
}
|
|
120
131
|
}
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
}
|
|
132
|
+
],
|
|
133
|
+
}
|
|
124
134
|
|
|
125
135
|
# Create the index if it doesn't exist
|
|
126
136
|
if not self._client.indices.exists(index=index):
|
|
@@ -139,6 +149,7 @@ class ElasticsearchDocumentStore:
|
|
|
139
149
|
return default_to_dict(
|
|
140
150
|
self,
|
|
141
151
|
hosts=self._hosts,
|
|
152
|
+
custom_mapping=self._custom_mapping,
|
|
142
153
|
index=self._index,
|
|
143
154
|
embedding_similarity_function=self._embedding_similarity_function,
|
|
144
155
|
**self._kwargs,
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
import random
|
|
6
6
|
from typing import List
|
|
7
|
-
from unittest.mock import patch
|
|
7
|
+
from unittest.mock import Mock, patch
|
|
8
8
|
|
|
9
9
|
import pytest
|
|
10
10
|
from elasticsearch.exceptions import BadRequestError # type: ignore[import-not-found]
|
|
@@ -23,6 +23,7 @@ def test_to_dict(_mock_elasticsearch_client):
|
|
|
23
23
|
"type": "haystack_integrations.document_stores.elasticsearch.document_store.ElasticsearchDocumentStore",
|
|
24
24
|
"init_parameters": {
|
|
25
25
|
"hosts": "some hosts",
|
|
26
|
+
"custom_mapping": None,
|
|
26
27
|
"index": "default",
|
|
27
28
|
"embedding_similarity_function": "cosine",
|
|
28
29
|
},
|
|
@@ -35,6 +36,7 @@ def test_from_dict(_mock_elasticsearch_client):
|
|
|
35
36
|
"type": "haystack_integrations.document_stores.elasticsearch.document_store.ElasticsearchDocumentStore",
|
|
36
37
|
"init_parameters": {
|
|
37
38
|
"hosts": "some hosts",
|
|
39
|
+
"custom_mapping": None,
|
|
38
40
|
"index": "default",
|
|
39
41
|
"embedding_similarity_function": "cosine",
|
|
40
42
|
},
|
|
@@ -42,6 +44,7 @@ def test_from_dict(_mock_elasticsearch_client):
|
|
|
42
44
|
document_store = ElasticsearchDocumentStore.from_dict(data)
|
|
43
45
|
assert document_store._hosts == "some hosts"
|
|
44
46
|
assert document_store._index == "default"
|
|
47
|
+
assert document_store._custom_mapping is None
|
|
45
48
|
assert document_store._embedding_similarity_function == "cosine"
|
|
46
49
|
|
|
47
50
|
|
|
@@ -280,3 +283,33 @@ class TestDocumentStore(DocumentStoreBaseTests):
|
|
|
280
283
|
|
|
281
284
|
with pytest.raises(DocumentStoreError):
|
|
282
285
|
document_store.write_documents(docs)
|
|
286
|
+
|
|
287
|
+
@patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch")
|
|
288
|
+
def test_init_with_custom_mapping(self, mock_elasticsearch):
|
|
289
|
+
custom_mapping = {
|
|
290
|
+
"properties": {
|
|
291
|
+
"embedding": {"type": "dense_vector", "index": True, "similarity": "dot_product"},
|
|
292
|
+
"content": {"type": "text"},
|
|
293
|
+
},
|
|
294
|
+
"dynamic_templates": [
|
|
295
|
+
{
|
|
296
|
+
"strings": {
|
|
297
|
+
"path_match": "*",
|
|
298
|
+
"match_mapping_type": "string",
|
|
299
|
+
"mapping": {
|
|
300
|
+
"type": "keyword",
|
|
301
|
+
},
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
],
|
|
305
|
+
}
|
|
306
|
+
mock_client = Mock(
|
|
307
|
+
indices=Mock(create=Mock(), exists=Mock(return_value=False)),
|
|
308
|
+
)
|
|
309
|
+
mock_elasticsearch.return_value = mock_client
|
|
310
|
+
|
|
311
|
+
ElasticsearchDocumentStore(hosts="some hosts", custom_mapping=custom_mapping)
|
|
312
|
+
mock_client.indices.create.assert_called_once_with(
|
|
313
|
+
index="default",
|
|
314
|
+
mappings=custom_mapping,
|
|
315
|
+
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|