elasticsearch-haystack 0.5.0__tar.gz → 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of elasticsearch-haystack might be problematic. Click here for more details.

Files changed (18) hide show
  1. {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.6.0}/PKG-INFO +1 -1
  2. {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.6.0}/src/haystack_integrations/document_stores/elasticsearch/document_store.py +47 -35
  3. {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.6.0}/tests/test_document_store.py +9 -3
  4. {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.6.0}/.gitignore +0 -0
  5. {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.6.0}/LICENSE +0 -0
  6. {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.6.0}/README.md +0 -0
  7. {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.6.0}/docker-compose.yml +0 -0
  8. {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.6.0}/pydoc/config.yml +0 -0
  9. {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.6.0}/pyproject.toml +0 -0
  10. {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.6.0}/src/haystack_integrations/components/retrievers/elasticsearch/__init__.py +0 -0
  11. {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.6.0}/src/haystack_integrations/components/retrievers/elasticsearch/bm25_retriever.py +0 -0
  12. {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.6.0}/src/haystack_integrations/components/retrievers/elasticsearch/embedding_retriever.py +0 -0
  13. {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.6.0}/src/haystack_integrations/document_stores/elasticsearch/__init__.py +0 -0
  14. {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.6.0}/src/haystack_integrations/document_stores/elasticsearch/filters.py +0 -0
  15. {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.6.0}/tests/__init__.py +0 -0
  16. {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.6.0}/tests/test_bm25_retriever.py +0 -0
  17. {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.6.0}/tests/test_embedding_retriever.py +0 -0
  18. {elasticsearch_haystack-0.5.0 → elasticsearch_haystack-0.6.0}/tests/test_filters.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: elasticsearch-haystack
3
- Version: 0.5.0
3
+ Version: 0.6.0
4
4
  Summary: Haystack 2.x Document Store for ElasticSearch
5
5
  Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/elasticsearch#readme
6
6
  Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues
@@ -93,48 +93,60 @@ class ElasticsearchDocumentStore:
93
93
  :param **kwargs: Optional arguments that `Elasticsearch` takes.
94
94
  """
95
95
  self._hosts = hosts
96
- self._client = Elasticsearch(
97
- hosts,
98
- headers={"user-agent": f"haystack-py-ds/{haystack_version}"},
99
- **kwargs,
100
- )
96
+ self._client = None
101
97
  self._index = index
102
98
  self._embedding_similarity_function = embedding_similarity_function
103
99
  self._custom_mapping = custom_mapping
104
100
  self._kwargs = kwargs
105
101
 
106
- # Check client connection, this will raise if not connected
107
- self._client.info()
108
-
109
102
  if self._custom_mapping and not isinstance(self._custom_mapping, Dict):
110
103
  msg = "custom_mapping must be a dictionary"
111
104
  raise ValueError(msg)
112
105
 
113
- if self._custom_mapping:
114
- mappings = self._custom_mapping
115
- else:
116
- # Configure mapping for the embedding field if none is provided
117
- mappings = {
118
- "properties": {
119
- "embedding": {"type": "dense_vector", "index": True, "similarity": embedding_similarity_function},
120
- "content": {"type": "text"},
121
- },
122
- "dynamic_templates": [
123
- {
124
- "strings": {
125
- "path_match": "*",
126
- "match_mapping_type": "string",
127
- "mapping": {
128
- "type": "keyword",
129
- },
106
+ @property
107
+ def client(self) -> Elasticsearch:
108
+ if self._client is None:
109
+ client = Elasticsearch(
110
+ self._hosts,
111
+ headers={"user-agent": f"haystack-py-ds/{haystack_version}"},
112
+ **self._kwargs,
113
+ )
114
+ # Check client connection, this will raise if not connected
115
+ client.info()
116
+
117
+ if self._custom_mapping:
118
+ mappings = self._custom_mapping
119
+ else:
120
+ # Configure mapping for the embedding field if none is provided
121
+ mappings = {
122
+ "properties": {
123
+ "embedding": {
124
+ "type": "dense_vector",
125
+ "index": True,
126
+ "similarity": self._embedding_similarity_function,
127
+ },
128
+ "content": {"type": "text"},
129
+ },
130
+ "dynamic_templates": [
131
+ {
132
+ "strings": {
133
+ "path_match": "*",
134
+ "match_mapping_type": "string",
135
+ "mapping": {
136
+ "type": "keyword",
137
+ },
138
+ }
130
139
  }
131
- }
132
- ],
133
- }
140
+ ],
141
+ }
142
+
143
+ # Create the index if it doesn't exist
144
+ if not client.indices.exists(index=self._index):
145
+ client.indices.create(index=self._index, mappings=mappings)
146
+
147
+ self._client = client
134
148
 
135
- # Create the index if it doesn't exist
136
- if not self._client.indices.exists(index=index):
137
- self._client.indices.create(index=index, mappings=mappings)
149
+ return self._client
138
150
 
139
151
  def to_dict(self) -> Dict[str, Any]:
140
152
  """
@@ -172,7 +184,7 @@ class ElasticsearchDocumentStore:
172
184
  Returns how many documents are present in the document store.
173
185
  :returns: Number of documents in the document store.
174
186
  """
175
- return self._client.count(index=self._index)["count"]
187
+ return self.client.count(index=self._index)["count"]
176
188
 
177
189
  def _search_documents(self, **kwargs) -> List[Document]:
178
190
  """
@@ -187,7 +199,7 @@ class ElasticsearchDocumentStore:
187
199
  from_ = 0
188
200
  # Handle pagination
189
201
  while True:
190
- res = self._client.search(
202
+ res = self.client.search(
191
203
  index=self._index,
192
204
  from_=from_,
193
205
  **kwargs,
@@ -261,7 +273,7 @@ class ElasticsearchDocumentStore:
261
273
  )
262
274
 
263
275
  documents_written, errors = helpers.bulk(
264
- client=self._client,
276
+ client=self.client,
265
277
  actions=elasticsearch_actions,
266
278
  refresh="wait_for",
267
279
  index=self._index,
@@ -317,7 +329,7 @@ class ElasticsearchDocumentStore:
317
329
  """
318
330
 
319
331
  helpers.bulk(
320
- client=self._client,
332
+ client=self.client,
321
333
  actions=({"_op_type": "delete", "_id": id_} for id_ in document_ids),
322
334
  refresh="wait_for",
323
335
  index=self._index,
@@ -15,6 +15,12 @@ from haystack.testing.document_store import DocumentStoreBaseTests
15
15
  from haystack_integrations.document_stores.elasticsearch import ElasticsearchDocumentStore
16
16
 
17
17
 
18
+ @patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch")
19
+ def test_init_is_lazy(_mock_es_client):
20
+ ElasticsearchDocumentStore(hosts="testhost")
21
+ _mock_es_client.assert_not_called()
22
+
23
+
18
24
  @patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch")
19
25
  def test_to_dict(_mock_elasticsearch_client):
20
26
  document_store = ElasticsearchDocumentStore(hosts="some hosts")
@@ -73,7 +79,7 @@ class TestDocumentStore(DocumentStoreBaseTests):
73
79
  hosts=hosts, index=index, embedding_similarity_function=embedding_similarity_function
74
80
  )
75
81
  yield store
76
- store._client.options(ignore_status=[400, 404]).indices.delete(index=index)
82
+ store.client.options(ignore_status=[400, 404]).indices.delete(index=index)
77
83
 
78
84
  def assert_documents_are_equal(self, received: List[Document], expected: List[Document]):
79
85
  """
@@ -101,7 +107,7 @@ class TestDocumentStore(DocumentStoreBaseTests):
101
107
  super().assert_documents_are_equal(received, expected)
102
108
 
103
109
  def test_user_agent_header(self, document_store: ElasticsearchDocumentStore):
104
- assert document_store._client._headers["user-agent"].startswith("haystack-py-ds/")
110
+ assert document_store.client._headers["user-agent"].startswith("haystack-py-ds/")
105
111
 
106
112
  def test_write_documents(self, document_store: ElasticsearchDocumentStore):
107
113
  docs = [Document(id="1")]
@@ -308,7 +314,7 @@ class TestDocumentStore(DocumentStoreBaseTests):
308
314
  )
309
315
  mock_elasticsearch.return_value = mock_client
310
316
 
311
- ElasticsearchDocumentStore(hosts="some hosts", custom_mapping=custom_mapping)
317
+ _ = ElasticsearchDocumentStore(hosts="some hosts", custom_mapping=custom_mapping).client
312
318
  mock_client.indices.create.assert_called_once_with(
313
319
  index="default",
314
320
  mappings=custom_mapping,