elasticsearch-haystack 0.5.0__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of elasticsearch-haystack might be problematic. Click here for more details.
- {elasticsearch_haystack-0.5.0.dist-info → elasticsearch_haystack-0.6.0.dist-info}/METADATA +1 -1
- {elasticsearch_haystack-0.5.0.dist-info → elasticsearch_haystack-0.6.0.dist-info}/RECORD +5 -5
- haystack_integrations/document_stores/elasticsearch/document_store.py +47 -35
- {elasticsearch_haystack-0.5.0.dist-info → elasticsearch_haystack-0.6.0.dist-info}/WHEEL +0 -0
- {elasticsearch_haystack-0.5.0.dist-info → elasticsearch_haystack-0.6.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: elasticsearch-haystack
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.0
|
|
4
4
|
Summary: Haystack 2.x Document Store for ElasticSearch
|
|
5
5
|
Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/elasticsearch#readme
|
|
6
6
|
Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues
|
|
@@ -2,9 +2,9 @@ haystack_integrations/components/retrievers/elasticsearch/__init__.py,sha256=cSJ
|
|
|
2
2
|
haystack_integrations/components/retrievers/elasticsearch/bm25_retriever.py,sha256=fFx554MTcUHnQZa2SgC0PzIR85YVbqAdMNOiXKkVSu8,4849
|
|
3
3
|
haystack_integrations/components/retrievers/elasticsearch/embedding_retriever.py,sha256=RcIbSMELiKIJsD-8F_u76J33YRt5bLr6lHnoX-hVQ1M,4990
|
|
4
4
|
haystack_integrations/document_stores/elasticsearch/__init__.py,sha256=YTfu94dtVUBogbJFr1aJrKuaI6-Bw9VuHfPoyU7M8os,207
|
|
5
|
-
haystack_integrations/document_stores/elasticsearch/document_store.py,sha256=
|
|
5
|
+
haystack_integrations/document_stores/elasticsearch/document_store.py,sha256=H5aqriF7rFYYpqALqAhvBSL41jzGtOxa-vSIPcLgXGw,18719
|
|
6
6
|
haystack_integrations/document_stores/elasticsearch/filters.py,sha256=L1tN7YCIDuNdhGrBQdPoqXFk37x__2-K038xZ6PRdNQ,9923
|
|
7
|
-
elasticsearch_haystack-0.
|
|
8
|
-
elasticsearch_haystack-0.
|
|
9
|
-
elasticsearch_haystack-0.
|
|
10
|
-
elasticsearch_haystack-0.
|
|
7
|
+
elasticsearch_haystack-0.6.0.dist-info/METADATA,sha256=KwdNHi8bOY4M646WWQrkGJRVHzQWQDi16sWFX4idbCQ,2168
|
|
8
|
+
elasticsearch_haystack-0.6.0.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
|
|
9
|
+
elasticsearch_haystack-0.6.0.dist-info/licenses/LICENSE,sha256=_M2kulivnaiTHiW-5CRlZrPmH47tt04pBgAgeDvfYi4,11342
|
|
10
|
+
elasticsearch_haystack-0.6.0.dist-info/RECORD,,
|
|
@@ -93,48 +93,60 @@ class ElasticsearchDocumentStore:
|
|
|
93
93
|
:param **kwargs: Optional arguments that `Elasticsearch` takes.
|
|
94
94
|
"""
|
|
95
95
|
self._hosts = hosts
|
|
96
|
-
self._client =
|
|
97
|
-
hosts,
|
|
98
|
-
headers={"user-agent": f"haystack-py-ds/{haystack_version}"},
|
|
99
|
-
**kwargs,
|
|
100
|
-
)
|
|
96
|
+
self._client = None
|
|
101
97
|
self._index = index
|
|
102
98
|
self._embedding_similarity_function = embedding_similarity_function
|
|
103
99
|
self._custom_mapping = custom_mapping
|
|
104
100
|
self._kwargs = kwargs
|
|
105
101
|
|
|
106
|
-
# Check client connection, this will raise if not connected
|
|
107
|
-
self._client.info()
|
|
108
|
-
|
|
109
102
|
if self._custom_mapping and not isinstance(self._custom_mapping, Dict):
|
|
110
103
|
msg = "custom_mapping must be a dictionary"
|
|
111
104
|
raise ValueError(msg)
|
|
112
105
|
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
"
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
106
|
+
@property
|
|
107
|
+
def client(self) -> Elasticsearch:
|
|
108
|
+
if self._client is None:
|
|
109
|
+
client = Elasticsearch(
|
|
110
|
+
self._hosts,
|
|
111
|
+
headers={"user-agent": f"haystack-py-ds/{haystack_version}"},
|
|
112
|
+
**self._kwargs,
|
|
113
|
+
)
|
|
114
|
+
# Check client connection, this will raise if not connected
|
|
115
|
+
client.info()
|
|
116
|
+
|
|
117
|
+
if self._custom_mapping:
|
|
118
|
+
mappings = self._custom_mapping
|
|
119
|
+
else:
|
|
120
|
+
# Configure mapping for the embedding field if none is provided
|
|
121
|
+
mappings = {
|
|
122
|
+
"properties": {
|
|
123
|
+
"embedding": {
|
|
124
|
+
"type": "dense_vector",
|
|
125
|
+
"index": True,
|
|
126
|
+
"similarity": self._embedding_similarity_function,
|
|
127
|
+
},
|
|
128
|
+
"content": {"type": "text"},
|
|
129
|
+
},
|
|
130
|
+
"dynamic_templates": [
|
|
131
|
+
{
|
|
132
|
+
"strings": {
|
|
133
|
+
"path_match": "*",
|
|
134
|
+
"match_mapping_type": "string",
|
|
135
|
+
"mapping": {
|
|
136
|
+
"type": "keyword",
|
|
137
|
+
},
|
|
138
|
+
}
|
|
130
139
|
}
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
140
|
+
],
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
# Create the index if it doesn't exist
|
|
144
|
+
if not client.indices.exists(index=self._index):
|
|
145
|
+
client.indices.create(index=self._index, mappings=mappings)
|
|
146
|
+
|
|
147
|
+
self._client = client
|
|
134
148
|
|
|
135
|
-
|
|
136
|
-
if not self._client.indices.exists(index=index):
|
|
137
|
-
self._client.indices.create(index=index, mappings=mappings)
|
|
149
|
+
return self._client
|
|
138
150
|
|
|
139
151
|
def to_dict(self) -> Dict[str, Any]:
|
|
140
152
|
"""
|
|
@@ -172,7 +184,7 @@ class ElasticsearchDocumentStore:
|
|
|
172
184
|
Returns how many documents are present in the document store.
|
|
173
185
|
:returns: Number of documents in the document store.
|
|
174
186
|
"""
|
|
175
|
-
return self.
|
|
187
|
+
return self.client.count(index=self._index)["count"]
|
|
176
188
|
|
|
177
189
|
def _search_documents(self, **kwargs) -> List[Document]:
|
|
178
190
|
"""
|
|
@@ -187,7 +199,7 @@ class ElasticsearchDocumentStore:
|
|
|
187
199
|
from_ = 0
|
|
188
200
|
# Handle pagination
|
|
189
201
|
while True:
|
|
190
|
-
res = self.
|
|
202
|
+
res = self.client.search(
|
|
191
203
|
index=self._index,
|
|
192
204
|
from_=from_,
|
|
193
205
|
**kwargs,
|
|
@@ -261,7 +273,7 @@ class ElasticsearchDocumentStore:
|
|
|
261
273
|
)
|
|
262
274
|
|
|
263
275
|
documents_written, errors = helpers.bulk(
|
|
264
|
-
client=self.
|
|
276
|
+
client=self.client,
|
|
265
277
|
actions=elasticsearch_actions,
|
|
266
278
|
refresh="wait_for",
|
|
267
279
|
index=self._index,
|
|
@@ -317,7 +329,7 @@ class ElasticsearchDocumentStore:
|
|
|
317
329
|
"""
|
|
318
330
|
|
|
319
331
|
helpers.bulk(
|
|
320
|
-
client=self.
|
|
332
|
+
client=self.client,
|
|
321
333
|
actions=({"_op_type": "delete", "_id": id_} for id_ in document_ids),
|
|
322
334
|
refresh="wait_for",
|
|
323
335
|
index=self._index,
|
|
File without changes
|
{elasticsearch_haystack-0.5.0.dist-info → elasticsearch_haystack-0.6.0.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|