elasticsearch-haystack 0.4.0__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of elasticsearch-haystack might be problematic. Click here for more details.

Files changed (18) hide show
  1. {elasticsearch_haystack-0.4.0 → elasticsearch_haystack-0.5.0}/PKG-INFO +2 -1
  2. {elasticsearch_haystack-0.4.0 → elasticsearch_haystack-0.5.0}/pydoc/config.yml +1 -1
  3. {elasticsearch_haystack-0.4.0 → elasticsearch_haystack-0.5.0}/pyproject.toml +1 -0
  4. {elasticsearch_haystack-0.4.0 → elasticsearch_haystack-0.5.0}/src/haystack_integrations/document_stores/elasticsearch/document_store.py +28 -17
  5. {elasticsearch_haystack-0.4.0 → elasticsearch_haystack-0.5.0}/tests/test_bm25_retriever.py +1 -0
  6. {elasticsearch_haystack-0.4.0 → elasticsearch_haystack-0.5.0}/tests/test_document_store.py +34 -1
  7. {elasticsearch_haystack-0.4.0 → elasticsearch_haystack-0.5.0}/tests/test_embedding_retriever.py +1 -0
  8. {elasticsearch_haystack-0.4.0 → elasticsearch_haystack-0.5.0}/.gitignore +0 -0
  9. {elasticsearch_haystack-0.4.0 → elasticsearch_haystack-0.5.0}/LICENSE +0 -0
  10. {elasticsearch_haystack-0.4.0 → elasticsearch_haystack-0.5.0}/README.md +0 -0
  11. {elasticsearch_haystack-0.4.0 → elasticsearch_haystack-0.5.0}/docker-compose.yml +0 -0
  12. {elasticsearch_haystack-0.4.0 → elasticsearch_haystack-0.5.0}/src/haystack_integrations/components/retrievers/elasticsearch/__init__.py +0 -0
  13. {elasticsearch_haystack-0.4.0 → elasticsearch_haystack-0.5.0}/src/haystack_integrations/components/retrievers/elasticsearch/bm25_retriever.py +0 -0
  14. {elasticsearch_haystack-0.4.0 → elasticsearch_haystack-0.5.0}/src/haystack_integrations/components/retrievers/elasticsearch/embedding_retriever.py +0 -0
  15. {elasticsearch_haystack-0.4.0 → elasticsearch_haystack-0.5.0}/src/haystack_integrations/document_stores/elasticsearch/__init__.py +0 -0
  16. {elasticsearch_haystack-0.4.0 → elasticsearch_haystack-0.5.0}/src/haystack_integrations/document_stores/elasticsearch/filters.py +0 -0
  17. {elasticsearch_haystack-0.4.0 → elasticsearch_haystack-0.5.0}/tests/__init__.py +0 -0
  18. {elasticsearch_haystack-0.4.0 → elasticsearch_haystack-0.5.0}/tests/test_filters.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: elasticsearch-haystack
3
- Version: 0.4.0
3
+ Version: 0.5.0
4
4
  Summary: Haystack 2.x Document Store for ElasticSearch
5
5
  Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/elasticsearch#readme
6
6
  Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues
@@ -9,6 +9,7 @@ Author-email: Silvano Cerza <silvanocerza@gmail.com>
9
9
  License-Expression: Apache-2.0
10
10
  License-File: LICENSE
11
11
  Classifier: Development Status :: 4 - Beta
12
+ Classifier: License :: OSI Approved :: Apache Software License
12
13
  Classifier: Programming Language :: Python
13
14
  Classifier: Programming Language :: Python :: 3.8
14
15
  Classifier: Programming Language :: Python :: 3.9
@@ -17,7 +17,7 @@ processors:
17
17
  - type: smart
18
18
  - type: crossref
19
19
  renderer:
20
- type: haystack_pydoc_tools.renderers.ReadmePreviewRenderer
20
+ type: haystack_pydoc_tools.renderers.ReadmeIntegrationRenderer
21
21
  excerpt: Elasticsearch integration for Haystack
22
22
  category_slug: integrations-api
23
23
  title: Elasticsearch
@@ -14,6 +14,7 @@ authors = [
14
14
  { name = "Silvano Cerza", email = "silvanocerza@gmail.com" },
15
15
  ]
16
16
  classifiers = [
17
+ "License :: OSI Approved :: Apache Software License",
17
18
  "Development Status :: 4 - Beta",
18
19
  "Programming Language :: Python",
19
20
  "Programming Language :: Python :: 3.8",
@@ -63,6 +63,7 @@ class ElasticsearchDocumentStore:
63
63
  self,
64
64
  *,
65
65
  hosts: Optional[Hosts] = None,
66
+ custom_mapping: Optional[Dict[str, Any]] = None,
66
67
  index: str = "default",
67
68
  embedding_similarity_function: Literal["cosine", "dot_product", "l2_norm", "max_inner_product"] = "cosine",
68
69
  **kwargs,
@@ -82,6 +83,7 @@ class ElasticsearchDocumentStore:
82
83
  [reference](https://elasticsearch-py.readthedocs.io/en/stable/api.html#module-elasticsearch)
83
84
 
84
85
  :param hosts: List of hosts running the Elasticsearch client.
86
+ :param custom_mapping: Custom mapping for the index. If not provided, a default mapping will be used.
85
87
  :param index: Name of index in Elasticsearch.
86
88
  :param embedding_similarity_function: The similarity function used to compare Documents embeddings.
87
89
  This parameter only takes effect if the index does not yet exist and is created.
@@ -98,29 +100,37 @@ class ElasticsearchDocumentStore:
98
100
  )
99
101
  self._index = index
100
102
  self._embedding_similarity_function = embedding_similarity_function
103
+ self._custom_mapping = custom_mapping
101
104
  self._kwargs = kwargs
102
105
 
103
106
  # Check client connection, this will raise if not connected
104
107
  self._client.info()
105
108
 
106
- # configure mapping for the embedding field
107
- mappings = {
108
- "properties": {
109
- "embedding": {"type": "dense_vector", "index": True, "similarity": embedding_similarity_function},
110
- "content": {"type": "text"},
111
- },
112
- "dynamic_templates": [
113
- {
114
- "strings": {
115
- "path_match": "*",
116
- "match_mapping_type": "string",
117
- "mapping": {
118
- "type": "keyword",
119
- },
109
+ if self._custom_mapping and not isinstance(self._custom_mapping, Dict):
110
+ msg = "custom_mapping must be a dictionary"
111
+ raise ValueError(msg)
112
+
113
+ if self._custom_mapping:
114
+ mappings = self._custom_mapping
115
+ else:
116
+ # Configure mapping for the embedding field if none is provided
117
+ mappings = {
118
+ "properties": {
119
+ "embedding": {"type": "dense_vector", "index": True, "similarity": embedding_similarity_function},
120
+ "content": {"type": "text"},
121
+ },
122
+ "dynamic_templates": [
123
+ {
124
+ "strings": {
125
+ "path_match": "*",
126
+ "match_mapping_type": "string",
127
+ "mapping": {
128
+ "type": "keyword",
129
+ },
130
+ }
120
131
  }
121
- }
122
- ],
123
- }
132
+ ],
133
+ }
124
134
 
125
135
  # Create the index if it doesn't exist
126
136
  if not self._client.indices.exists(index=index):
@@ -139,6 +149,7 @@ class ElasticsearchDocumentStore:
139
149
  return default_to_dict(
140
150
  self,
141
151
  hosts=self._hosts,
152
+ custom_mapping=self._custom_mapping,
142
153
  index=self._index,
143
154
  embedding_similarity_function=self._embedding_similarity_function,
144
155
  **self._kwargs,
@@ -28,6 +28,7 @@ def test_to_dict(_mock_elasticsearch_client):
28
28
  "document_store": {
29
29
  "init_parameters": {
30
30
  "hosts": "some fake host",
31
+ "custom_mapping": None,
31
32
  "index": "default",
32
33
  "embedding_similarity_function": "cosine",
33
34
  },
@@ -4,7 +4,7 @@
4
4
 
5
5
  import random
6
6
  from typing import List
7
- from unittest.mock import patch
7
+ from unittest.mock import Mock, patch
8
8
 
9
9
  import pytest
10
10
  from elasticsearch.exceptions import BadRequestError # type: ignore[import-not-found]
@@ -23,6 +23,7 @@ def test_to_dict(_mock_elasticsearch_client):
23
23
  "type": "haystack_integrations.document_stores.elasticsearch.document_store.ElasticsearchDocumentStore",
24
24
  "init_parameters": {
25
25
  "hosts": "some hosts",
26
+ "custom_mapping": None,
26
27
  "index": "default",
27
28
  "embedding_similarity_function": "cosine",
28
29
  },
@@ -35,6 +36,7 @@ def test_from_dict(_mock_elasticsearch_client):
35
36
  "type": "haystack_integrations.document_stores.elasticsearch.document_store.ElasticsearchDocumentStore",
36
37
  "init_parameters": {
37
38
  "hosts": "some hosts",
39
+ "custom_mapping": None,
38
40
  "index": "default",
39
41
  "embedding_similarity_function": "cosine",
40
42
  },
@@ -42,6 +44,7 @@ def test_from_dict(_mock_elasticsearch_client):
42
44
  document_store = ElasticsearchDocumentStore.from_dict(data)
43
45
  assert document_store._hosts == "some hosts"
44
46
  assert document_store._index == "default"
47
+ assert document_store._custom_mapping is None
45
48
  assert document_store._embedding_similarity_function == "cosine"
46
49
 
47
50
 
@@ -280,3 +283,33 @@ class TestDocumentStore(DocumentStoreBaseTests):
280
283
 
281
284
  with pytest.raises(DocumentStoreError):
282
285
  document_store.write_documents(docs)
286
+
287
+ @patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch")
288
+ def test_init_with_custom_mapping(self, mock_elasticsearch):
289
+ custom_mapping = {
290
+ "properties": {
291
+ "embedding": {"type": "dense_vector", "index": True, "similarity": "dot_product"},
292
+ "content": {"type": "text"},
293
+ },
294
+ "dynamic_templates": [
295
+ {
296
+ "strings": {
297
+ "path_match": "*",
298
+ "match_mapping_type": "string",
299
+ "mapping": {
300
+ "type": "keyword",
301
+ },
302
+ }
303
+ }
304
+ ],
305
+ }
306
+ mock_client = Mock(
307
+ indices=Mock(create=Mock(), exists=Mock(return_value=False)),
308
+ )
309
+ mock_elasticsearch.return_value = mock_client
310
+
311
+ ElasticsearchDocumentStore(hosts="some hosts", custom_mapping=custom_mapping)
312
+ mock_client.indices.create.assert_called_once_with(
313
+ index="default",
314
+ mappings=custom_mapping,
315
+ )
@@ -29,6 +29,7 @@ def test_to_dict(_mock_elasticsearch_client):
29
29
  "document_store": {
30
30
  "init_parameters": {
31
31
  "hosts": "some fake host",
32
+ "custom_mapping": None,
32
33
  "index": "default",
33
34
  "embedding_similarity_function": "cosine",
34
35
  },