elasticsearch-haystack 0.6.0__py3-none-any.whl → 0.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of elasticsearch-haystack might be problematic. Click here for more details.
- {elasticsearch_haystack-0.6.0.dist-info → elasticsearch_haystack-0.7.1.dist-info}/METADATA +1 -1
- {elasticsearch_haystack-0.6.0.dist-info → elasticsearch_haystack-0.7.1.dist-info}/RECORD +6 -6
- {elasticsearch_haystack-0.6.0.dist-info → elasticsearch_haystack-0.7.1.dist-info}/WHEEL +1 -1
- haystack_integrations/components/retrievers/elasticsearch/bm25_retriever.py +16 -3
- haystack_integrations/components/retrievers/elasticsearch/embedding_retriever.py +16 -3
- {elasticsearch_haystack-0.6.0.dist-info → elasticsearch_haystack-0.7.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: elasticsearch-haystack
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.7.1
|
|
4
4
|
Summary: Haystack 2.x Document Store for ElasticSearch
|
|
5
5
|
Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/elasticsearch#readme
|
|
6
6
|
Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
haystack_integrations/components/retrievers/elasticsearch/__init__.py,sha256=cSJBsYjz_T4kK-M-auAHVUnYIcgUqqwwQe_hsF0_IG4,307
|
|
2
|
-
haystack_integrations/components/retrievers/elasticsearch/bm25_retriever.py,sha256=
|
|
3
|
-
haystack_integrations/components/retrievers/elasticsearch/embedding_retriever.py,sha256=
|
|
2
|
+
haystack_integrations/components/retrievers/elasticsearch/bm25_retriever.py,sha256=4kCb10S073IHXnBxP4gVU2bxeYlqQbK6FhV8aTg3wVs,5850
|
|
3
|
+
haystack_integrations/components/retrievers/elasticsearch/embedding_retriever.py,sha256=9at7bGJt4ZLSwD4kmTKYGLc7bHAn8zWshx3-sZgDU0g,5991
|
|
4
4
|
haystack_integrations/document_stores/elasticsearch/__init__.py,sha256=YTfu94dtVUBogbJFr1aJrKuaI6-Bw9VuHfPoyU7M8os,207
|
|
5
5
|
haystack_integrations/document_stores/elasticsearch/document_store.py,sha256=H5aqriF7rFYYpqALqAhvBSL41jzGtOxa-vSIPcLgXGw,18719
|
|
6
6
|
haystack_integrations/document_stores/elasticsearch/filters.py,sha256=L1tN7YCIDuNdhGrBQdPoqXFk37x__2-K038xZ6PRdNQ,9923
|
|
7
|
-
elasticsearch_haystack-0.
|
|
8
|
-
elasticsearch_haystack-0.
|
|
9
|
-
elasticsearch_haystack-0.
|
|
10
|
-
elasticsearch_haystack-0.
|
|
7
|
+
elasticsearch_haystack-0.7.1.dist-info/METADATA,sha256=G6FdW5Hm8aNTFKon36o6RoigWXUNY2IrozKQUXksTg8,2168
|
|
8
|
+
elasticsearch_haystack-0.7.1.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
9
|
+
elasticsearch_haystack-0.7.1.dist-info/licenses/LICENSE,sha256=_M2kulivnaiTHiW-5CRlZrPmH47tt04pBgAgeDvfYi4,11342
|
|
10
|
+
elasticsearch_haystack-0.7.1.dist-info/RECORD,,
|
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
|
|
2
2
|
#
|
|
3
3
|
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
-
from typing import Any, Dict, List, Optional
|
|
4
|
+
from typing import Any, Dict, List, Optional, Union
|
|
5
5
|
|
|
6
6
|
from haystack import component, default_from_dict, default_to_dict
|
|
7
7
|
from haystack.dataclasses import Document
|
|
8
|
+
from haystack.document_stores.types import FilterPolicy
|
|
9
|
+
from haystack.document_stores.types.filter_policy import apply_filter_policy
|
|
8
10
|
from haystack_integrations.document_stores.elasticsearch.document_store import ElasticsearchDocumentStore
|
|
9
11
|
|
|
10
12
|
|
|
@@ -48,6 +50,7 @@ class ElasticsearchBM25Retriever:
|
|
|
48
50
|
fuzziness: str = "AUTO",
|
|
49
51
|
top_k: int = 10,
|
|
50
52
|
scale_score: bool = False,
|
|
53
|
+
filter_policy: Union[str, FilterPolicy] = FilterPolicy.REPLACE,
|
|
51
54
|
):
|
|
52
55
|
"""
|
|
53
56
|
Initialize ElasticsearchBM25Retriever with an instance ElasticsearchDocumentStore.
|
|
@@ -60,6 +63,7 @@ class ElasticsearchBM25Retriever:
|
|
|
60
63
|
for more details.
|
|
61
64
|
:param top_k: Maximum number of Documents to return.
|
|
62
65
|
:param scale_score: If `True` scales the Document`s scores between 0 and 1.
|
|
66
|
+
:param filter_policy: Policy to determine how filters are applied.
|
|
63
67
|
:raises ValueError: If `document_store` is not an instance of `ElasticsearchDocumentStore`.
|
|
64
68
|
"""
|
|
65
69
|
|
|
@@ -72,6 +76,7 @@ class ElasticsearchBM25Retriever:
|
|
|
72
76
|
self._fuzziness = fuzziness
|
|
73
77
|
self._top_k = top_k
|
|
74
78
|
self._scale_score = scale_score
|
|
79
|
+
self._filter_policy = FilterPolicy.from_str(filter_policy) if isinstance(filter_policy, str) else filter_policy
|
|
75
80
|
|
|
76
81
|
def to_dict(self) -> Dict[str, Any]:
|
|
77
82
|
"""
|
|
@@ -86,6 +91,7 @@ class ElasticsearchBM25Retriever:
|
|
|
86
91
|
fuzziness=self._fuzziness,
|
|
87
92
|
top_k=self._top_k,
|
|
88
93
|
scale_score=self._scale_score,
|
|
94
|
+
filter_policy=self._filter_policy.value,
|
|
89
95
|
document_store=self._document_store.to_dict(),
|
|
90
96
|
)
|
|
91
97
|
|
|
@@ -102,6 +108,10 @@ class ElasticsearchBM25Retriever:
|
|
|
102
108
|
data["init_parameters"]["document_store"] = ElasticsearchDocumentStore.from_dict(
|
|
103
109
|
data["init_parameters"]["document_store"]
|
|
104
110
|
)
|
|
111
|
+
# Pipelines serialized with old versions of the component might not
|
|
112
|
+
# have the filter_policy field.
|
|
113
|
+
if filter_policy := data["init_parameters"].get("filter_policy"):
|
|
114
|
+
data["init_parameters"]["filter_policy"] = FilterPolicy.from_str(filter_policy)
|
|
105
115
|
return default_from_dict(cls, data)
|
|
106
116
|
|
|
107
117
|
@component.output_types(documents=List[Document])
|
|
@@ -110,14 +120,17 @@ class ElasticsearchBM25Retriever:
|
|
|
110
120
|
Retrieve documents using the BM25 keyword-based algorithm.
|
|
111
121
|
|
|
112
122
|
:param query: String to search in `Document`s' text.
|
|
113
|
-
:param filters: Filters applied to the retrieved
|
|
123
|
+
:param filters: Filters applied to the retrieved Documents. The way runtime filters are applied depends on
|
|
124
|
+
the `filter_policy` chosen at retriever initialization. See init method docstring for more
|
|
125
|
+
details.
|
|
114
126
|
:param top_k: Maximum number of `Document` to return.
|
|
115
127
|
:returns: A dictionary with the following keys:
|
|
116
128
|
- `documents`: List of `Document`s that match the query.
|
|
117
129
|
"""
|
|
130
|
+
filters = apply_filter_policy(self._filter_policy, self._filters, filters)
|
|
118
131
|
docs = self._document_store._bm25_retrieval(
|
|
119
132
|
query=query,
|
|
120
|
-
filters=filters
|
|
133
|
+
filters=filters,
|
|
121
134
|
fuzziness=self._fuzziness,
|
|
122
135
|
top_k=top_k or self._top_k,
|
|
123
136
|
scale_score=self._scale_score,
|
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
|
|
2
2
|
#
|
|
3
3
|
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
-
from typing import Any, Dict, List, Optional
|
|
4
|
+
from typing import Any, Dict, List, Optional, Union
|
|
5
5
|
|
|
6
6
|
from haystack import component, default_from_dict, default_to_dict
|
|
7
7
|
from haystack.dataclasses import Document
|
|
8
|
+
from haystack.document_stores.types import FilterPolicy
|
|
9
|
+
from haystack.document_stores.types.filter_policy import apply_filter_policy
|
|
8
10
|
from haystack_integrations.document_stores.elasticsearch.document_store import ElasticsearchDocumentStore
|
|
9
11
|
|
|
10
12
|
|
|
@@ -49,6 +51,7 @@ class ElasticsearchEmbeddingRetriever:
|
|
|
49
51
|
filters: Optional[Dict[str, Any]] = None,
|
|
50
52
|
top_k: int = 10,
|
|
51
53
|
num_candidates: Optional[int] = None,
|
|
54
|
+
filter_policy: Union[str, FilterPolicy] = FilterPolicy.REPLACE,
|
|
52
55
|
):
|
|
53
56
|
"""
|
|
54
57
|
Create the ElasticsearchEmbeddingRetriever component.
|
|
@@ -61,6 +64,7 @@ class ElasticsearchEmbeddingRetriever:
|
|
|
61
64
|
Increasing this value will improve search accuracy at the cost of slower search speeds.
|
|
62
65
|
You can read more about it in the Elasticsearch
|
|
63
66
|
[documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/knn-search.html#tune-approximate-knn-for-speed-accuracy)
|
|
67
|
+
:param filter_policy: Policy to determine how filters are applied.
|
|
64
68
|
:raises ValueError: If `document_store` is not an instance of ElasticsearchDocumentStore.
|
|
65
69
|
"""
|
|
66
70
|
if not isinstance(document_store, ElasticsearchDocumentStore):
|
|
@@ -71,6 +75,7 @@ class ElasticsearchEmbeddingRetriever:
|
|
|
71
75
|
self._filters = filters or {}
|
|
72
76
|
self._top_k = top_k
|
|
73
77
|
self._num_candidates = num_candidates
|
|
78
|
+
self._filter_policy = FilterPolicy.from_str(filter_policy) if isinstance(filter_policy, str) else filter_policy
|
|
74
79
|
|
|
75
80
|
def to_dict(self) -> Dict[str, Any]:
|
|
76
81
|
"""
|
|
@@ -84,6 +89,7 @@ class ElasticsearchEmbeddingRetriever:
|
|
|
84
89
|
filters=self._filters,
|
|
85
90
|
top_k=self._top_k,
|
|
86
91
|
num_candidates=self._num_candidates,
|
|
92
|
+
filter_policy=self._filter_policy.value,
|
|
87
93
|
document_store=self._document_store.to_dict(),
|
|
88
94
|
)
|
|
89
95
|
|
|
@@ -100,6 +106,10 @@ class ElasticsearchEmbeddingRetriever:
|
|
|
100
106
|
data["init_parameters"]["document_store"] = ElasticsearchDocumentStore.from_dict(
|
|
101
107
|
data["init_parameters"]["document_store"]
|
|
102
108
|
)
|
|
109
|
+
# Pipelines serialized with old versions of the component might not
|
|
110
|
+
# have the filter_policy field.
|
|
111
|
+
if filter_policy := data["init_parameters"].get("filter_policy"):
|
|
112
|
+
data["init_parameters"]["filter_policy"] = FilterPolicy.from_str(filter_policy)
|
|
103
113
|
return default_from_dict(cls, data)
|
|
104
114
|
|
|
105
115
|
@component.output_types(documents=List[Document])
|
|
@@ -108,14 +118,17 @@ class ElasticsearchEmbeddingRetriever:
|
|
|
108
118
|
Retrieve documents using a vector similarity metric.
|
|
109
119
|
|
|
110
120
|
:param query_embedding: Embedding of the query.
|
|
111
|
-
:param filters: Filters applied to the retrieved
|
|
121
|
+
:param filters: Filters applied to the retrieved Documents. The way runtime filters are applied depends on
|
|
122
|
+
the `filter_policy` chosen at retriever initialization. See init method docstring for more
|
|
123
|
+
details.
|
|
112
124
|
:param top_k: Maximum number of `Document`s to return.
|
|
113
125
|
:returns: A dictionary with the following keys:
|
|
114
126
|
- `documents`: List of `Document`s most similar to the given `query_embedding`
|
|
115
127
|
"""
|
|
128
|
+
filters = apply_filter_policy(self._filter_policy, self._filters, filters)
|
|
116
129
|
docs = self._document_store._embedding_retrieval(
|
|
117
130
|
query_embedding=query_embedding,
|
|
118
|
-
filters=filters
|
|
131
|
+
filters=filters,
|
|
119
132
|
top_k=top_k or self._top_k,
|
|
120
133
|
num_candidates=self._num_candidates,
|
|
121
134
|
)
|
{elasticsearch_haystack-0.6.0.dist-info → elasticsearch_haystack-0.7.1.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|