elasticsearch-haystack 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of elasticsearch-haystack might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: elasticsearch-haystack
3
- Version: 0.6.0
3
+ Version: 0.7.0
4
4
  Summary: Haystack 2.x Document Store for ElasticSearch
5
5
  Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/elasticsearch#readme
6
6
  Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues
@@ -1,10 +1,10 @@
1
1
  haystack_integrations/components/retrievers/elasticsearch/__init__.py,sha256=cSJBsYjz_T4kK-M-auAHVUnYIcgUqqwwQe_hsF0_IG4,307
2
- haystack_integrations/components/retrievers/elasticsearch/bm25_retriever.py,sha256=fFx554MTcUHnQZa2SgC0PzIR85YVbqAdMNOiXKkVSu8,4849
3
- haystack_integrations/components/retrievers/elasticsearch/embedding_retriever.py,sha256=RcIbSMELiKIJsD-8F_u76J33YRt5bLr6lHnoX-hVQ1M,4990
2
+ haystack_integrations/components/retrievers/elasticsearch/bm25_retriever.py,sha256=ZjTrHctsxjcfbtTrSHiNMO0s3nnZLfXjkEvMbO5Aud4,5683
3
+ haystack_integrations/components/retrievers/elasticsearch/embedding_retriever.py,sha256=-9Cc5Y9UXoxL24FTFXCyCO0ZHUGjYLSdP2BediuQrMQ,5824
4
4
  haystack_integrations/document_stores/elasticsearch/__init__.py,sha256=YTfu94dtVUBogbJFr1aJrKuaI6-Bw9VuHfPoyU7M8os,207
5
5
  haystack_integrations/document_stores/elasticsearch/document_store.py,sha256=H5aqriF7rFYYpqALqAhvBSL41jzGtOxa-vSIPcLgXGw,18719
6
6
  haystack_integrations/document_stores/elasticsearch/filters.py,sha256=L1tN7YCIDuNdhGrBQdPoqXFk37x__2-K038xZ6PRdNQ,9923
7
- elasticsearch_haystack-0.6.0.dist-info/METADATA,sha256=KwdNHi8bOY4M646WWQrkGJRVHzQWQDi16sWFX4idbCQ,2168
8
- elasticsearch_haystack-0.6.0.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
9
- elasticsearch_haystack-0.6.0.dist-info/licenses/LICENSE,sha256=_M2kulivnaiTHiW-5CRlZrPmH47tt04pBgAgeDvfYi4,11342
10
- elasticsearch_haystack-0.6.0.dist-info/RECORD,,
7
+ elasticsearch_haystack-0.7.0.dist-info/METADATA,sha256=3oIMm0FLRYdRmzpVmNDz7mbNXpmbJS1N-b_PAUA57OQ,2168
8
+ elasticsearch_haystack-0.7.0.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
9
+ elasticsearch_haystack-0.7.0.dist-info/licenses/LICENSE,sha256=_M2kulivnaiTHiW-5CRlZrPmH47tt04pBgAgeDvfYi4,11342
10
+ elasticsearch_haystack-0.7.0.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.24.2
2
+ Generator: hatchling 1.25.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,10 +1,12 @@
1
1
  # SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
2
2
  #
3
3
  # SPDX-License-Identifier: Apache-2.0
4
- from typing import Any, Dict, List, Optional
4
+ from typing import Any, Dict, List, Optional, Union
5
5
 
6
6
  from haystack import component, default_from_dict, default_to_dict
7
7
  from haystack.dataclasses import Document
8
+ from haystack.document_stores.types import FilterPolicy
9
+ from haystack.document_stores.types.filter_policy import apply_filter_policy
8
10
  from haystack_integrations.document_stores.elasticsearch.document_store import ElasticsearchDocumentStore
9
11
 
10
12
 
@@ -48,6 +50,7 @@ class ElasticsearchBM25Retriever:
48
50
  fuzziness: str = "AUTO",
49
51
  top_k: int = 10,
50
52
  scale_score: bool = False,
53
+ filter_policy: Union[str, FilterPolicy] = FilterPolicy.REPLACE,
51
54
  ):
52
55
  """
53
56
  Initialize ElasticsearchBM25Retriever with an instance ElasticsearchDocumentStore.
@@ -60,6 +63,7 @@ class ElasticsearchBM25Retriever:
60
63
  for more details.
61
64
  :param top_k: Maximum number of Documents to return.
62
65
  :param scale_score: If `True` scales the Document`s scores between 0 and 1.
66
+ :param filter_policy: Policy to determine how filters are applied.
63
67
  :raises ValueError: If `document_store` is not an instance of `ElasticsearchDocumentStore`.
64
68
  """
65
69
 
@@ -72,6 +76,7 @@ class ElasticsearchBM25Retriever:
72
76
  self._fuzziness = fuzziness
73
77
  self._top_k = top_k
74
78
  self._scale_score = scale_score
79
+ self._filter_policy = FilterPolicy.from_str(filter_policy) if isinstance(filter_policy, str) else filter_policy
75
80
 
76
81
  def to_dict(self) -> Dict[str, Any]:
77
82
  """
@@ -86,6 +91,7 @@ class ElasticsearchBM25Retriever:
86
91
  fuzziness=self._fuzziness,
87
92
  top_k=self._top_k,
88
93
  scale_score=self._scale_score,
94
+ filter_policy=self._filter_policy.value,
89
95
  document_store=self._document_store.to_dict(),
90
96
  )
91
97
 
@@ -102,6 +108,7 @@ class ElasticsearchBM25Retriever:
102
108
  data["init_parameters"]["document_store"] = ElasticsearchDocumentStore.from_dict(
103
109
  data["init_parameters"]["document_store"]
104
110
  )
111
+ data["init_parameters"]["filter_policy"] = FilterPolicy.from_str(data["init_parameters"]["filter_policy"])
105
112
  return default_from_dict(cls, data)
106
113
 
107
114
  @component.output_types(documents=List[Document])
@@ -110,14 +117,17 @@ class ElasticsearchBM25Retriever:
110
117
  Retrieve documents using the BM25 keyword-based algorithm.
111
118
 
112
119
  :param query: String to search in `Document`s' text.
113
- :param filters: Filters applied to the retrieved `Document`s.
120
+ :param filters: Filters applied to the retrieved Documents. The way runtime filters are applied depends on
121
+ the `filter_policy` chosen at retriever initialization. See init method docstring for more
122
+ details.
114
123
  :param top_k: Maximum number of `Document` to return.
115
124
  :returns: A dictionary with the following keys:
116
125
  - `documents`: List of `Document`s that match the query.
117
126
  """
127
+ filters = apply_filter_policy(self._filter_policy, self._filters, filters)
118
128
  docs = self._document_store._bm25_retrieval(
119
129
  query=query,
120
- filters=filters or self._filters,
130
+ filters=filters,
121
131
  fuzziness=self._fuzziness,
122
132
  top_k=top_k or self._top_k,
123
133
  scale_score=self._scale_score,
@@ -1,10 +1,12 @@
1
1
  # SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
2
2
  #
3
3
  # SPDX-License-Identifier: Apache-2.0
4
- from typing import Any, Dict, List, Optional
4
+ from typing import Any, Dict, List, Optional, Union
5
5
 
6
6
  from haystack import component, default_from_dict, default_to_dict
7
7
  from haystack.dataclasses import Document
8
+ from haystack.document_stores.types import FilterPolicy
9
+ from haystack.document_stores.types.filter_policy import apply_filter_policy
8
10
  from haystack_integrations.document_stores.elasticsearch.document_store import ElasticsearchDocumentStore
9
11
 
10
12
 
@@ -49,6 +51,7 @@ class ElasticsearchEmbeddingRetriever:
49
51
  filters: Optional[Dict[str, Any]] = None,
50
52
  top_k: int = 10,
51
53
  num_candidates: Optional[int] = None,
54
+ filter_policy: Union[str, FilterPolicy] = FilterPolicy.REPLACE,
52
55
  ):
53
56
  """
54
57
  Create the ElasticsearchEmbeddingRetriever component.
@@ -61,6 +64,7 @@ class ElasticsearchEmbeddingRetriever:
61
64
  Increasing this value will improve search accuracy at the cost of slower search speeds.
62
65
  You can read more about it in the Elasticsearch
63
66
  [documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/knn-search.html#tune-approximate-knn-for-speed-accuracy)
67
+ :param filter_policy: Policy to determine how filters are applied.
64
68
  :raises ValueError: If `document_store` is not an instance of ElasticsearchDocumentStore.
65
69
  """
66
70
  if not isinstance(document_store, ElasticsearchDocumentStore):
@@ -71,6 +75,7 @@ class ElasticsearchEmbeddingRetriever:
71
75
  self._filters = filters or {}
72
76
  self._top_k = top_k
73
77
  self._num_candidates = num_candidates
78
+ self._filter_policy = FilterPolicy.from_str(filter_policy) if isinstance(filter_policy, str) else filter_policy
74
79
 
75
80
  def to_dict(self) -> Dict[str, Any]:
76
81
  """
@@ -84,6 +89,7 @@ class ElasticsearchEmbeddingRetriever:
84
89
  filters=self._filters,
85
90
  top_k=self._top_k,
86
91
  num_candidates=self._num_candidates,
92
+ filter_policy=self._filter_policy.value,
87
93
  document_store=self._document_store.to_dict(),
88
94
  )
89
95
 
@@ -100,6 +106,7 @@ class ElasticsearchEmbeddingRetriever:
100
106
  data["init_parameters"]["document_store"] = ElasticsearchDocumentStore.from_dict(
101
107
  data["init_parameters"]["document_store"]
102
108
  )
109
+ data["init_parameters"]["filter_policy"] = FilterPolicy.from_str(data["init_parameters"]["filter_policy"])
103
110
  return default_from_dict(cls, data)
104
111
 
105
112
  @component.output_types(documents=List[Document])
@@ -108,14 +115,17 @@ class ElasticsearchEmbeddingRetriever:
108
115
  Retrieve documents using a vector similarity metric.
109
116
 
110
117
  :param query_embedding: Embedding of the query.
111
- :param filters: Filters applied to the retrieved `Document`s.
118
+ :param filters: Filters applied to the retrieved Documents. The way runtime filters are applied depends on
119
+ the `filter_policy` chosen at retriever initialization. See init method docstring for more
120
+ details.
112
121
  :param top_k: Maximum number of `Document`s to return.
113
122
  :returns: A dictionary with the following keys:
114
123
  - `documents`: List of `Document`s most similar to the given `query_embedding`
115
124
  """
125
+ filters = apply_filter_policy(self._filter_policy, self._filters, filters)
116
126
  docs = self._document_store._embedding_retrieval(
117
127
  query_embedding=query_embedding,
118
- filters=filters or self._filters,
128
+ filters=filters,
119
129
  top_k=top_k or self._top_k,
120
130
  num_candidates=self._num_candidates,
121
131
  )