elasticsearch-haystack 0.5.0__tar.gz → 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of elasticsearch-haystack might be problematic. Click here for more details.
- {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.6.0}/PKG-INFO +1 -1
- {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.6.0}/src/haystack_integrations/document_stores/elasticsearch/document_store.py +47 -35
- {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.6.0}/tests/test_document_store.py +9 -3
- {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.6.0}/.gitignore +0 -0
- {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.6.0}/LICENSE +0 -0
- {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.6.0}/README.md +0 -0
- {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.6.0}/docker-compose.yml +0 -0
- {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.6.0}/pydoc/config.yml +0 -0
- {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.6.0}/pyproject.toml +0 -0
- {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.6.0}/src/haystack_integrations/components/retrievers/elasticsearch/__init__.py +0 -0
- {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.6.0}/src/haystack_integrations/components/retrievers/elasticsearch/bm25_retriever.py +0 -0
- {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.6.0}/src/haystack_integrations/components/retrievers/elasticsearch/embedding_retriever.py +0 -0
- {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.6.0}/src/haystack_integrations/document_stores/elasticsearch/__init__.py +0 -0
- {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.6.0}/src/haystack_integrations/document_stores/elasticsearch/filters.py +0 -0
- {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.6.0}/tests/__init__.py +0 -0
- {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.6.0}/tests/test_bm25_retriever.py +0 -0
- {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.6.0}/tests/test_embedding_retriever.py +0 -0
- {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.6.0}/tests/test_filters.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: elasticsearch-haystack
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.0
|
|
4
4
|
Summary: Haystack 2.x Document Store for ElasticSearch
|
|
5
5
|
Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/elasticsearch#readme
|
|
6
6
|
Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues
|
|
@@ -93,48 +93,60 @@ class ElasticsearchDocumentStore:
|
|
|
93
93
|
:param **kwargs: Optional arguments that `Elasticsearch` takes.
|
|
94
94
|
"""
|
|
95
95
|
self._hosts = hosts
|
|
96
|
-
self._client =
|
|
97
|
-
hosts,
|
|
98
|
-
headers={"user-agent": f"haystack-py-ds/{haystack_version}"},
|
|
99
|
-
**kwargs,
|
|
100
|
-
)
|
|
96
|
+
self._client = None
|
|
101
97
|
self._index = index
|
|
102
98
|
self._embedding_similarity_function = embedding_similarity_function
|
|
103
99
|
self._custom_mapping = custom_mapping
|
|
104
100
|
self._kwargs = kwargs
|
|
105
101
|
|
|
106
|
-
# Check client connection, this will raise if not connected
|
|
107
|
-
self._client.info()
|
|
108
|
-
|
|
109
102
|
if self._custom_mapping and not isinstance(self._custom_mapping, Dict):
|
|
110
103
|
msg = "custom_mapping must be a dictionary"
|
|
111
104
|
raise ValueError(msg)
|
|
112
105
|
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
"
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
106
|
+
@property
|
|
107
|
+
def client(self) -> Elasticsearch:
|
|
108
|
+
if self._client is None:
|
|
109
|
+
client = Elasticsearch(
|
|
110
|
+
self._hosts,
|
|
111
|
+
headers={"user-agent": f"haystack-py-ds/{haystack_version}"},
|
|
112
|
+
**self._kwargs,
|
|
113
|
+
)
|
|
114
|
+
# Check client connection, this will raise if not connected
|
|
115
|
+
client.info()
|
|
116
|
+
|
|
117
|
+
if self._custom_mapping:
|
|
118
|
+
mappings = self._custom_mapping
|
|
119
|
+
else:
|
|
120
|
+
# Configure mapping for the embedding field if none is provided
|
|
121
|
+
mappings = {
|
|
122
|
+
"properties": {
|
|
123
|
+
"embedding": {
|
|
124
|
+
"type": "dense_vector",
|
|
125
|
+
"index": True,
|
|
126
|
+
"similarity": self._embedding_similarity_function,
|
|
127
|
+
},
|
|
128
|
+
"content": {"type": "text"},
|
|
129
|
+
},
|
|
130
|
+
"dynamic_templates": [
|
|
131
|
+
{
|
|
132
|
+
"strings": {
|
|
133
|
+
"path_match": "*",
|
|
134
|
+
"match_mapping_type": "string",
|
|
135
|
+
"mapping": {
|
|
136
|
+
"type": "keyword",
|
|
137
|
+
},
|
|
138
|
+
}
|
|
130
139
|
}
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
140
|
+
],
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
# Create the index if it doesn't exist
|
|
144
|
+
if not client.indices.exists(index=self._index):
|
|
145
|
+
client.indices.create(index=self._index, mappings=mappings)
|
|
146
|
+
|
|
147
|
+
self._client = client
|
|
134
148
|
|
|
135
|
-
|
|
136
|
-
if not self._client.indices.exists(index=index):
|
|
137
|
-
self._client.indices.create(index=index, mappings=mappings)
|
|
149
|
+
return self._client
|
|
138
150
|
|
|
139
151
|
def to_dict(self) -> Dict[str, Any]:
|
|
140
152
|
"""
|
|
@@ -172,7 +184,7 @@ class ElasticsearchDocumentStore:
|
|
|
172
184
|
Returns how many documents are present in the document store.
|
|
173
185
|
:returns: Number of documents in the document store.
|
|
174
186
|
"""
|
|
175
|
-
return self.
|
|
187
|
+
return self.client.count(index=self._index)["count"]
|
|
176
188
|
|
|
177
189
|
def _search_documents(self, **kwargs) -> List[Document]:
|
|
178
190
|
"""
|
|
@@ -187,7 +199,7 @@ class ElasticsearchDocumentStore:
|
|
|
187
199
|
from_ = 0
|
|
188
200
|
# Handle pagination
|
|
189
201
|
while True:
|
|
190
|
-
res = self.
|
|
202
|
+
res = self.client.search(
|
|
191
203
|
index=self._index,
|
|
192
204
|
from_=from_,
|
|
193
205
|
**kwargs,
|
|
@@ -261,7 +273,7 @@ class ElasticsearchDocumentStore:
|
|
|
261
273
|
)
|
|
262
274
|
|
|
263
275
|
documents_written, errors = helpers.bulk(
|
|
264
|
-
client=self.
|
|
276
|
+
client=self.client,
|
|
265
277
|
actions=elasticsearch_actions,
|
|
266
278
|
refresh="wait_for",
|
|
267
279
|
index=self._index,
|
|
@@ -317,7 +329,7 @@ class ElasticsearchDocumentStore:
|
|
|
317
329
|
"""
|
|
318
330
|
|
|
319
331
|
helpers.bulk(
|
|
320
|
-
client=self.
|
|
332
|
+
client=self.client,
|
|
321
333
|
actions=({"_op_type": "delete", "_id": id_} for id_ in document_ids),
|
|
322
334
|
refresh="wait_for",
|
|
323
335
|
index=self._index,
|
|
@@ -15,6 +15,12 @@ from haystack.testing.document_store import DocumentStoreBaseTests
|
|
|
15
15
|
from haystack_integrations.document_stores.elasticsearch import ElasticsearchDocumentStore
|
|
16
16
|
|
|
17
17
|
|
|
18
|
+
@patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch")
|
|
19
|
+
def test_init_is_lazy(_mock_es_client):
|
|
20
|
+
ElasticsearchDocumentStore(hosts="testhost")
|
|
21
|
+
_mock_es_client.assert_not_called()
|
|
22
|
+
|
|
23
|
+
|
|
18
24
|
@patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch")
|
|
19
25
|
def test_to_dict(_mock_elasticsearch_client):
|
|
20
26
|
document_store = ElasticsearchDocumentStore(hosts="some hosts")
|
|
@@ -73,7 +79,7 @@ class TestDocumentStore(DocumentStoreBaseTests):
|
|
|
73
79
|
hosts=hosts, index=index, embedding_similarity_function=embedding_similarity_function
|
|
74
80
|
)
|
|
75
81
|
yield store
|
|
76
|
-
store.
|
|
82
|
+
store.client.options(ignore_status=[400, 404]).indices.delete(index=index)
|
|
77
83
|
|
|
78
84
|
def assert_documents_are_equal(self, received: List[Document], expected: List[Document]):
|
|
79
85
|
"""
|
|
@@ -101,7 +107,7 @@ class TestDocumentStore(DocumentStoreBaseTests):
|
|
|
101
107
|
super().assert_documents_are_equal(received, expected)
|
|
102
108
|
|
|
103
109
|
def test_user_agent_header(self, document_store: ElasticsearchDocumentStore):
|
|
104
|
-
assert document_store.
|
|
110
|
+
assert document_store.client._headers["user-agent"].startswith("haystack-py-ds/")
|
|
105
111
|
|
|
106
112
|
def test_write_documents(self, document_store: ElasticsearchDocumentStore):
|
|
107
113
|
docs = [Document(id="1")]
|
|
@@ -308,7 +314,7 @@ class TestDocumentStore(DocumentStoreBaseTests):
|
|
|
308
314
|
)
|
|
309
315
|
mock_elasticsearch.return_value = mock_client
|
|
310
316
|
|
|
311
|
-
ElasticsearchDocumentStore(hosts="some hosts", custom_mapping=custom_mapping)
|
|
317
|
+
_ = ElasticsearchDocumentStore(hosts="some hosts", custom_mapping=custom_mapping).client
|
|
312
318
|
mock_client.indices.create.assert_called_once_with(
|
|
313
319
|
index="default",
|
|
314
320
|
mappings=custom_mapping,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.6.0}/tests/test_embedding_retriever.py
RENAMED
|
File without changes
|
|
File without changes
|