elasticsearch-haystack 3.0.1__py3-none-any.whl → 3.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of elasticsearch-haystack might be problematic. Click here for more details.
- {elasticsearch_haystack-3.0.1.dist-info → elasticsearch_haystack-3.1.0.dist-info}/METADATA +2 -2
- elasticsearch_haystack-3.1.0.dist-info/RECORD +12 -0
- haystack_integrations/components/retrievers/elasticsearch/bm25_retriever.py +6 -2
- haystack_integrations/components/retrievers/elasticsearch/embedding_retriever.py +4 -2
- haystack_integrations/components/retrievers/py.typed +0 -0
- haystack_integrations/document_stores/elasticsearch/document_store.py +19 -11
- haystack_integrations/document_stores/py.typed +0 -0
- elasticsearch_haystack-3.0.1.dist-info/RECORD +0 -10
- {elasticsearch_haystack-3.0.1.dist-info → elasticsearch_haystack-3.1.0.dist-info}/WHEEL +0 -0
- {elasticsearch_haystack-3.0.1.dist-info → elasticsearch_haystack-3.1.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: elasticsearch-haystack
|
|
3
|
-
Version: 3.0
|
|
3
|
+
Version: 3.1.0
|
|
4
4
|
Summary: Haystack 2.x Document Store for ElasticSearch
|
|
5
5
|
Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/elasticsearch#readme
|
|
6
6
|
Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues
|
|
@@ -50,7 +50,7 @@ docker-compose up
|
|
|
50
50
|
Then run tests:
|
|
51
51
|
|
|
52
52
|
```console
|
|
53
|
-
hatch run test
|
|
53
|
+
hatch run test:all
|
|
54
54
|
```
|
|
55
55
|
|
|
56
56
|
## License
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
haystack_integrations/components/retrievers/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
haystack_integrations/components/retrievers/elasticsearch/__init__.py,sha256=cSJBsYjz_T4kK-M-auAHVUnYIcgUqqwwQe_hsF0_IG4,307
|
|
3
|
+
haystack_integrations/components/retrievers/elasticsearch/bm25_retriever.py,sha256=HsR42EolOBEIuLNwQ_8FeSmrHMJ6WscYulcoXSHoaYQ,7098
|
|
4
|
+
haystack_integrations/components/retrievers/elasticsearch/embedding_retriever.py,sha256=-6eIHW5cU4k8-jAsUsCb15hJRalpkUhzy_dNxr5HUZo,7404
|
|
5
|
+
haystack_integrations/document_stores/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
+
haystack_integrations/document_stores/elasticsearch/__init__.py,sha256=YTfu94dtVUBogbJFr1aJrKuaI6-Bw9VuHfPoyU7M8os,207
|
|
7
|
+
haystack_integrations/document_stores/elasticsearch/document_store.py,sha256=d_u49ySnhQzK_jGGThAYCWKPGDdVcpmCGQ-CWgCaO58,27852
|
|
8
|
+
haystack_integrations/document_stores/elasticsearch/filters.py,sha256=Umip-PP4uFjuWeB1JWkKhaKClQ0VpiykoDlDu99wIV0,9759
|
|
9
|
+
elasticsearch_haystack-3.1.0.dist-info/METADATA,sha256=O1bfELb0DpiXMSLvZuq4upfSo-1So67b058LXqt7N4E,2261
|
|
10
|
+
elasticsearch_haystack-3.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
11
|
+
elasticsearch_haystack-3.1.0.dist-info/licenses/LICENSE,sha256=_M2kulivnaiTHiW-5CRlZrPmH47tt04pBgAgeDvfYi4,11342
|
|
12
|
+
elasticsearch_haystack-3.1.0.dist-info/RECORD,,
|
|
@@ -116,7 +116,9 @@ class ElasticsearchBM25Retriever:
|
|
|
116
116
|
return default_from_dict(cls, data)
|
|
117
117
|
|
|
118
118
|
@component.output_types(documents=List[Document])
|
|
119
|
-
def run(
|
|
119
|
+
def run(
|
|
120
|
+
self, query: str, filters: Optional[Dict[str, Any]] = None, top_k: Optional[int] = None
|
|
121
|
+
) -> Dict[str, List[Document]]:
|
|
120
122
|
"""
|
|
121
123
|
Retrieve documents using the BM25 keyword-based algorithm.
|
|
122
124
|
|
|
@@ -139,7 +141,9 @@ class ElasticsearchBM25Retriever:
|
|
|
139
141
|
return {"documents": docs}
|
|
140
142
|
|
|
141
143
|
@component.output_types(documents=List[Document])
|
|
142
|
-
async def run_async(
|
|
144
|
+
async def run_async(
|
|
145
|
+
self, query: str, filters: Optional[Dict[str, Any]] = None, top_k: Optional[int] = None
|
|
146
|
+
) -> Dict[str, List[Document]]:
|
|
143
147
|
"""
|
|
144
148
|
Asynchronously retrieve documents using the BM25 keyword-based algorithm.
|
|
145
149
|
|
|
@@ -114,7 +114,9 @@ class ElasticsearchEmbeddingRetriever:
|
|
|
114
114
|
return default_from_dict(cls, data)
|
|
115
115
|
|
|
116
116
|
@component.output_types(documents=List[Document])
|
|
117
|
-
def run(
|
|
117
|
+
def run(
|
|
118
|
+
self, query_embedding: List[float], filters: Optional[Dict[str, Any]] = None, top_k: Optional[int] = None
|
|
119
|
+
) -> Dict[str, List[Document]]:
|
|
118
120
|
"""
|
|
119
121
|
Retrieve documents using a vector similarity metric.
|
|
120
122
|
|
|
@@ -139,7 +141,7 @@ class ElasticsearchEmbeddingRetriever:
|
|
|
139
141
|
@component.output_types(documents=List[Document])
|
|
140
142
|
async def run_async(
|
|
141
143
|
self, query_embedding: List[float], filters: Optional[Dict[str, Any]] = None, top_k: Optional[int] = None
|
|
142
|
-
):
|
|
144
|
+
) -> Dict[str, List[Document]]:
|
|
143
145
|
"""
|
|
144
146
|
Asynchronously retrieve documents using a vector similarity metric.
|
|
145
147
|
|
|
File without changes
|
|
@@ -5,16 +5,14 @@ from collections.abc import Mapping
|
|
|
5
5
|
from typing import Any, Dict, List, Literal, Optional, Union
|
|
6
6
|
|
|
7
7
|
import numpy as np
|
|
8
|
-
|
|
9
|
-
# There are no import stubs for elastic_transport and elasticsearch so mypy fails
|
|
10
|
-
from elastic_transport import NodeConfig # type: ignore[import-not-found]
|
|
8
|
+
from elastic_transport import NodeConfig
|
|
11
9
|
from haystack import default_from_dict, default_to_dict, logging
|
|
12
10
|
from haystack.dataclasses import Document
|
|
13
11
|
from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError
|
|
14
12
|
from haystack.document_stores.types import DuplicatePolicy
|
|
15
13
|
from haystack.version import __version__ as haystack_version
|
|
16
14
|
|
|
17
|
-
from elasticsearch import AsyncElasticsearch, Elasticsearch, helpers
|
|
15
|
+
from elasticsearch import AsyncElasticsearch, Elasticsearch, helpers
|
|
18
16
|
|
|
19
17
|
from .filters import _normalize_filters
|
|
20
18
|
|
|
@@ -66,7 +64,7 @@ class ElasticsearchDocumentStore:
|
|
|
66
64
|
custom_mapping: Optional[Dict[str, Any]] = None,
|
|
67
65
|
index: str = "default",
|
|
68
66
|
embedding_similarity_function: Literal["cosine", "dot_product", "l2_norm", "max_inner_product"] = "cosine",
|
|
69
|
-
**kwargs,
|
|
67
|
+
**kwargs: Any,
|
|
70
68
|
):
|
|
71
69
|
"""
|
|
72
70
|
Creates a new ElasticsearchDocumentStore instance.
|
|
@@ -93,8 +91,8 @@ class ElasticsearchDocumentStore:
|
|
|
93
91
|
:param **kwargs: Optional arguments that `Elasticsearch` takes.
|
|
94
92
|
"""
|
|
95
93
|
self._hosts = hosts
|
|
96
|
-
self._client = None
|
|
97
|
-
self._async_client = None
|
|
94
|
+
self._client: Optional[Elasticsearch] = None
|
|
95
|
+
self._async_client: Optional[AsyncElasticsearch] = None
|
|
98
96
|
self._index = index
|
|
99
97
|
self._embedding_similarity_function = embedding_similarity_function
|
|
100
98
|
self._custom_mapping = custom_mapping
|
|
@@ -166,6 +164,7 @@ class ElasticsearchDocumentStore:
|
|
|
166
164
|
Returns the synchronous Elasticsearch client, initializing it if necessary.
|
|
167
165
|
"""
|
|
168
166
|
self._ensure_initialized()
|
|
167
|
+
assert self._client is not None # noqa: S101
|
|
169
168
|
return self._client
|
|
170
169
|
|
|
171
170
|
@property
|
|
@@ -174,6 +173,7 @@ class ElasticsearchDocumentStore:
|
|
|
174
173
|
Returns the asynchronous Elasticsearch client, initializing it if necessary.
|
|
175
174
|
"""
|
|
176
175
|
self._ensure_initialized()
|
|
176
|
+
assert self._async_client is not None # noqa: S101
|
|
177
177
|
return self._async_client
|
|
178
178
|
|
|
179
179
|
def to_dict(self) -> Dict[str, Any]:
|
|
@@ -226,7 +226,7 @@ class ElasticsearchDocumentStore:
|
|
|
226
226
|
result = await self._async_client.count(index=self._index) # type: ignore
|
|
227
227
|
return result["count"]
|
|
228
228
|
|
|
229
|
-
def _search_documents(self, **kwargs) -> List[Document]:
|
|
229
|
+
def _search_documents(self, **kwargs: Any) -> List[Document]:
|
|
230
230
|
"""
|
|
231
231
|
Calls the Elasticsearch client's search method and handles pagination.
|
|
232
232
|
"""
|
|
@@ -253,7 +253,7 @@ class ElasticsearchDocumentStore:
|
|
|
253
253
|
break
|
|
254
254
|
return documents
|
|
255
255
|
|
|
256
|
-
async def _search_documents_async(self, **kwargs) -> List[Document]:
|
|
256
|
+
async def _search_documents_async(self, **kwargs: Any) -> List[Document]:
|
|
257
257
|
"""
|
|
258
258
|
Asynchronously calls the Elasticsearch client's search method and handles pagination.
|
|
259
259
|
"""
|
|
@@ -379,9 +379,12 @@ class ElasticsearchDocumentStore:
|
|
|
379
379
|
refresh="wait_for",
|
|
380
380
|
index=self._index,
|
|
381
381
|
raise_on_error=False,
|
|
382
|
+
stats_only=False,
|
|
382
383
|
)
|
|
383
384
|
|
|
384
385
|
if errors:
|
|
386
|
+
# with stats_only=False, errors is guaranteed to be a list of dicts
|
|
387
|
+
assert isinstance(errors, list) # noqa: S101
|
|
385
388
|
duplicate_errors_ids = []
|
|
386
389
|
other_errors = []
|
|
387
390
|
for e in errors:
|
|
@@ -451,13 +454,16 @@ class ElasticsearchDocumentStore:
|
|
|
451
454
|
|
|
452
455
|
try:
|
|
453
456
|
success, failed = await helpers.async_bulk(
|
|
454
|
-
client=self.
|
|
457
|
+
client=self.async_client,
|
|
455
458
|
actions=actions,
|
|
456
459
|
index=self._index,
|
|
457
460
|
refresh=True,
|
|
458
461
|
raise_on_error=False,
|
|
462
|
+
stats_only=False,
|
|
459
463
|
)
|
|
460
464
|
if failed:
|
|
465
|
+
# with stats_only=False, failed is guaranteed to be a list of dicts
|
|
466
|
+
assert isinstance(failed, list) # noqa: S101
|
|
461
467
|
if policy == DuplicatePolicy.FAIL:
|
|
462
468
|
for error in failed:
|
|
463
469
|
if "create" in error and error["create"]["status"] == DOC_ALREADY_EXISTS:
|
|
@@ -494,7 +500,7 @@ class ElasticsearchDocumentStore:
|
|
|
494
500
|
|
|
495
501
|
try:
|
|
496
502
|
await helpers.async_bulk(
|
|
497
|
-
client=self.
|
|
503
|
+
client=self.async_client,
|
|
498
504
|
actions=({"_op_type": "delete", "_id": id_} for id_ in document_ids),
|
|
499
505
|
index=self._index,
|
|
500
506
|
refresh=True,
|
|
@@ -551,6 +557,8 @@ class ElasticsearchDocumentStore:
|
|
|
551
557
|
|
|
552
558
|
if scale_score:
|
|
553
559
|
for doc in documents:
|
|
560
|
+
if doc.score is None:
|
|
561
|
+
continue
|
|
554
562
|
doc.score = float(1 / (1 + np.exp(-np.asarray(doc.score / BM25_SCALING_FACTOR))))
|
|
555
563
|
|
|
556
564
|
return documents
|
|
File without changes
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
haystack_integrations/components/retrievers/elasticsearch/__init__.py,sha256=cSJBsYjz_T4kK-M-auAHVUnYIcgUqqwwQe_hsF0_IG4,307
|
|
2
|
-
haystack_integrations/components/retrievers/elasticsearch/bm25_retriever.py,sha256=ISHc6elYXoDXDvC62_3bMMCk_Dv67jvZIgQBCZ1ZHdw,7012
|
|
3
|
-
haystack_integrations/components/retrievers/elasticsearch/embedding_retriever.py,sha256=jHDLMeecpf-DhvbRM1AAq2kIJn7xMNTR9vkm-FhHH7k,7332
|
|
4
|
-
haystack_integrations/document_stores/elasticsearch/__init__.py,sha256=YTfu94dtVUBogbJFr1aJrKuaI6-Bw9VuHfPoyU7M8os,207
|
|
5
|
-
haystack_integrations/document_stores/elasticsearch/document_store.py,sha256=pZ0pPyOCPTCKNYD4q5YbLrslSGTIbVPj60U18-BImX8,27406
|
|
6
|
-
haystack_integrations/document_stores/elasticsearch/filters.py,sha256=Umip-PP4uFjuWeB1JWkKhaKClQ0VpiykoDlDu99wIV0,9759
|
|
7
|
-
elasticsearch_haystack-3.0.1.dist-info/METADATA,sha256=defq5KselqQwwMLUa3jfrmXjjVxH15cLSn7iKlDKgJk,2257
|
|
8
|
-
elasticsearch_haystack-3.0.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
9
|
-
elasticsearch_haystack-3.0.1.dist-info/licenses/LICENSE,sha256=_M2kulivnaiTHiW-5CRlZrPmH47tt04pBgAgeDvfYi4,11342
|
|
10
|
-
elasticsearch_haystack-3.0.1.dist-info/RECORD,,
|
|
File without changes
|
{elasticsearch_haystack-3.0.1.dist-info → elasticsearch_haystack-3.1.0.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|