elasticsearch-haystack 0.1.3__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of elasticsearch-haystack might be problematic. Click here for more details.
- {elasticsearch_haystack-0.1.3.dist-info → elasticsearch_haystack-0.3.0.dist-info}/METADATA +1 -1
- elasticsearch_haystack-0.3.0.dist-info/RECORD +10 -0
- {elasticsearch_haystack-0.1.3.dist-info → elasticsearch_haystack-0.3.0.dist-info}/WHEEL +1 -1
- haystack_integrations/components/retrievers/elasticsearch/__init__.py +7 -0
- {elasticsearch_haystack → haystack_integrations/components/retrievers/elasticsearch}/bm25_retriever.py +6 -6
- {elasticsearch_haystack → haystack_integrations/components/retrievers/elasticsearch}/embedding_retriever.py +4 -4
- {elasticsearch_haystack → haystack_integrations/document_stores/elasticsearch}/__init__.py +1 -1
- {elasticsearch_haystack → haystack_integrations/document_stores/elasticsearch}/document_store.py +9 -3
- elasticsearch_haystack-0.1.3.dist-info/RECORD +0 -9
- {elasticsearch_haystack-0.1.3.dist-info → elasticsearch_haystack-0.3.0.dist-info}/licenses/LICENSE +0 -0
- {elasticsearch_haystack → haystack_integrations/document_stores/elasticsearch}/filters.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: elasticsearch-haystack
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Haystack 2.x Document Store for ElasticSearch
|
|
5
5
|
Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/elasticsearch#readme
|
|
6
6
|
Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
haystack_integrations/components/retrievers/elasticsearch/__init__.py,sha256=cSJBsYjz_T4kK-M-auAHVUnYIcgUqqwwQe_hsF0_IG4,307
|
|
2
|
+
haystack_integrations/components/retrievers/elasticsearch/bm25_retriever.py,sha256=qu67WxyTjh1DbEEqg1_IcOkNl0BHtedLFjQVDC0bONE,4398
|
|
3
|
+
haystack_integrations/components/retrievers/elasticsearch/embedding_retriever.py,sha256=6HhfAzOjec9R2PnjWT2hLcIWVEe6-bZuWhULu18gJCE,3513
|
|
4
|
+
haystack_integrations/document_stores/elasticsearch/__init__.py,sha256=YTfu94dtVUBogbJFr1aJrKuaI6-Bw9VuHfPoyU7M8os,207
|
|
5
|
+
haystack_integrations/document_stores/elasticsearch/document_store.py,sha256=qFe9BmXaT6vNOF5EYZ8PNhC6Z1bZVzBoAZ5J2agwcZI,15586
|
|
6
|
+
haystack_integrations/document_stores/elasticsearch/filters.py,sha256=L1tN7YCIDuNdhGrBQdPoqXFk37x__2-K038xZ6PRdNQ,9923
|
|
7
|
+
elasticsearch_haystack-0.3.0.dist-info/METADATA,sha256=P5KpaBuMowYSFu5pf88YxHfyZI0oHJZ-4Z5-tdj7AwE,2105
|
|
8
|
+
elasticsearch_haystack-0.3.0.dist-info/WHEEL,sha256=TJPnKdtrSue7xZ_AVGkp9YXcvDrobsjBds1du3Nx6dc,87
|
|
9
|
+
elasticsearch_haystack-0.3.0.dist-info/licenses/LICENSE,sha256=_M2kulivnaiTHiW-5CRlZrPmH47tt04pBgAgeDvfYi4,11342
|
|
10
|
+
elasticsearch_haystack-0.3.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
from .bm25_retriever import ElasticsearchBM25Retriever
|
|
5
|
+
from .embedding_retriever import ElasticsearchEmbeddingRetriever
|
|
6
|
+
|
|
7
|
+
__all__ = ["ElasticsearchBM25Retriever", "ElasticsearchEmbeddingRetriever"]
|
|
@@ -5,8 +5,7 @@ from typing import Any, Dict, List, Optional
|
|
|
5
5
|
|
|
6
6
|
from haystack import component, default_from_dict, default_to_dict
|
|
7
7
|
from haystack.dataclasses import Document
|
|
8
|
-
|
|
9
|
-
from elasticsearch_haystack.document_store import ElasticsearchDocumentStore
|
|
8
|
+
from haystack_integrations.document_stores.elasticsearch.document_store import ElasticsearchDocumentStore
|
|
10
9
|
|
|
11
10
|
|
|
12
11
|
@component
|
|
@@ -19,8 +18,8 @@ class ElasticsearchBM25Retriever:
|
|
|
19
18
|
Usage example:
|
|
20
19
|
```python
|
|
21
20
|
from haystack import Document
|
|
22
|
-
from
|
|
23
|
-
from
|
|
21
|
+
from haystack_integrations.document_stores.elasticsearch import ElasticsearchDocumentStore
|
|
22
|
+
from haystack_integrations.components.retrievers.elasticsearch import ElasticsearchBM25Retriever
|
|
24
23
|
|
|
25
24
|
document_store = ElasticsearchDocumentStore(hosts="http://localhost:9200")
|
|
26
25
|
retriever = ElasticsearchBM25Retriever(document_store=document_store)
|
|
@@ -90,17 +89,18 @@ class ElasticsearchBM25Retriever:
|
|
|
90
89
|
return default_from_dict(cls, data)
|
|
91
90
|
|
|
92
91
|
@component.output_types(documents=List[Document])
|
|
93
|
-
def run(self, query: str, top_k: Optional[int] = None):
|
|
92
|
+
def run(self, query: str, filters: Optional[Dict[str, Any]] = None, top_k: Optional[int] = None):
|
|
94
93
|
"""
|
|
95
94
|
Retrieve documents using the BM25 keyword-based algorithm.
|
|
96
95
|
|
|
97
96
|
:param query: String to search in Documents' text.
|
|
97
|
+
:param filters: Filters applied to the retrieved Documents.
|
|
98
98
|
:param top_k: Maximum number of Documents to return.
|
|
99
99
|
:return: List of Documents that match the query.
|
|
100
100
|
"""
|
|
101
101
|
docs = self._document_store._bm25_retrieval(
|
|
102
102
|
query=query,
|
|
103
|
-
filters=self._filters,
|
|
103
|
+
filters=filters or self._filters,
|
|
104
104
|
fuzziness=self._fuzziness,
|
|
105
105
|
top_k=top_k or self._top_k,
|
|
106
106
|
scale_score=self._scale_score,
|
|
@@ -5,8 +5,7 @@ from typing import Any, Dict, List, Optional
|
|
|
5
5
|
|
|
6
6
|
from haystack import component, default_from_dict, default_to_dict
|
|
7
7
|
from haystack.dataclasses import Document
|
|
8
|
-
|
|
9
|
-
from elasticsearch_haystack.document_store import ElasticsearchDocumentStore
|
|
8
|
+
from haystack_integrations.document_stores.elasticsearch.document_store import ElasticsearchDocumentStore
|
|
10
9
|
|
|
11
10
|
|
|
12
11
|
@component
|
|
@@ -64,17 +63,18 @@ class ElasticsearchEmbeddingRetriever:
|
|
|
64
63
|
return default_from_dict(cls, data)
|
|
65
64
|
|
|
66
65
|
@component.output_types(documents=List[Document])
|
|
67
|
-
def run(self, query_embedding: List[float], top_k: Optional[int] = None):
|
|
66
|
+
def run(self, query_embedding: List[float], filters: Optional[Dict[str, Any]] = None, top_k: Optional[int] = None):
|
|
68
67
|
"""
|
|
69
68
|
Retrieve documents using a vector similarity metric.
|
|
70
69
|
|
|
71
70
|
:param query_embedding: Embedding of the query.
|
|
71
|
+
:param filters: Filters applied to the retrieved Documents.
|
|
72
72
|
:param top_k: Maximum number of Documents to return.
|
|
73
73
|
:return: List of Documents similar to `query_embedding`.
|
|
74
74
|
"""
|
|
75
75
|
docs = self._document_store._embedding_retrieval(
|
|
76
76
|
query_embedding=query_embedding,
|
|
77
|
-
filters=self._filters,
|
|
77
|
+
filters=filters or self._filters,
|
|
78
78
|
top_k=top_k or self._top_k,
|
|
79
79
|
num_candidates=self._num_candidates,
|
|
80
80
|
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
|
|
2
2
|
#
|
|
3
3
|
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
-
from
|
|
4
|
+
from .document_store import ElasticsearchDocumentStore
|
|
5
5
|
|
|
6
6
|
__all__ = ["ElasticsearchDocumentStore"]
|
{elasticsearch_haystack → haystack_integrations/document_stores/elasticsearch}/document_store.py
RENAMED
|
@@ -8,14 +8,16 @@ import numpy as np
|
|
|
8
8
|
|
|
9
9
|
# There are no import stubs for elastic_transport and elasticsearch so mypy fails
|
|
10
10
|
from elastic_transport import NodeConfig # type: ignore[import-not-found]
|
|
11
|
-
from elasticsearch import Elasticsearch, helpers # type: ignore[import-not-found]
|
|
12
11
|
from haystack import default_from_dict, default_to_dict
|
|
13
12
|
from haystack.dataclasses import Document
|
|
14
13
|
from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError
|
|
15
14
|
from haystack.document_stores.types import DuplicatePolicy
|
|
16
15
|
from haystack.utils.filters import convert
|
|
16
|
+
from haystack.version import __version__ as haystack_version
|
|
17
|
+
|
|
18
|
+
from elasticsearch import Elasticsearch, helpers # type: ignore[import-not-found]
|
|
17
19
|
|
|
18
|
-
from
|
|
20
|
+
from .filters import _normalize_filters
|
|
19
21
|
|
|
20
22
|
logger = logging.getLogger(__name__)
|
|
21
23
|
|
|
@@ -89,7 +91,11 @@ class ElasticsearchDocumentStore:
|
|
|
89
91
|
:param **kwargs: Optional arguments that ``Elasticsearch`` takes.
|
|
90
92
|
"""
|
|
91
93
|
self._hosts = hosts
|
|
92
|
-
self._client = Elasticsearch(
|
|
94
|
+
self._client = Elasticsearch(
|
|
95
|
+
hosts,
|
|
96
|
+
headers={"user-agent": f"haystack-py-ds/{haystack_version}"},
|
|
97
|
+
**kwargs,
|
|
98
|
+
)
|
|
93
99
|
self._index = index
|
|
94
100
|
self._embedding_similarity_function = embedding_similarity_function
|
|
95
101
|
self._kwargs = kwargs
|
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
elasticsearch_haystack/__init__.py,sha256=g4FvBDxgCM32ZF9JwRne61b3yhs5-KA9qLta-YtMCQE,229
|
|
2
|
-
elasticsearch_haystack/bm25_retriever.py,sha256=eL_vUy8SYl5_LJytbgCbQH-jCmYjLEcw9YJnvV-IHcE,4215
|
|
3
|
-
elasticsearch_haystack/document_store.py,sha256=rHCQ3mSYOJDqXN4ghiL0hqIXzpBPySL2xwffX1cLfmA,15437
|
|
4
|
-
elasticsearch_haystack/embedding_retriever.py,sha256=C4su5erlQINxNqnPxnshUkvnwT1J1rs94jHTPA2VU68,3364
|
|
5
|
-
elasticsearch_haystack/filters.py,sha256=L1tN7YCIDuNdhGrBQdPoqXFk37x__2-K038xZ6PRdNQ,9923
|
|
6
|
-
elasticsearch_haystack-0.1.3.dist-info/METADATA,sha256=gRoQqQJcXhG7Emrx4yg7cR84PRlB3sTHttAT40PAJuc,2105
|
|
7
|
-
elasticsearch_haystack-0.1.3.dist-info/WHEEL,sha256=mRYSEL3Ih6g5a_CVMIcwiF__0Ae4_gLYh01YFNwiq1k,87
|
|
8
|
-
elasticsearch_haystack-0.1.3.dist-info/licenses/LICENSE,sha256=_M2kulivnaiTHiW-5CRlZrPmH47tt04pBgAgeDvfYi4,11342
|
|
9
|
-
elasticsearch_haystack-0.1.3.dist-info/RECORD,,
|
{elasticsearch_haystack-0.1.3.dist-info → elasticsearch_haystack-0.3.0.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
|
File without changes
|