elasticsearch-haystack 1.0.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of elasticsearch-haystack might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.4
2
2
  Name: elasticsearch-haystack
3
- Version: 1.0.0
3
+ Version: 2.0.0
4
4
  Summary: Haystack 2.x Document Store for ElasticSearch
5
5
  Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/elasticsearch#readme
6
6
  Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues
@@ -2,9 +2,9 @@ haystack_integrations/components/retrievers/elasticsearch/__init__.py,sha256=cSJ
2
2
  haystack_integrations/components/retrievers/elasticsearch/bm25_retriever.py,sha256=XA6UiNFb59CMM5LSoPmNDe3IzZ7ty7HViSaU2ZT4--w,5851
3
3
  haystack_integrations/components/retrievers/elasticsearch/embedding_retriever.py,sha256=ZL9kHi6tCzks1_GXoOIRVLcN4BWnaMqN6t-JcwdTfao,5992
4
4
  haystack_integrations/document_stores/elasticsearch/__init__.py,sha256=YTfu94dtVUBogbJFr1aJrKuaI6-Bw9VuHfPoyU7M8os,207
5
- haystack_integrations/document_stores/elasticsearch/document_store.py,sha256=-F6Ij1tZ6WfUyep49X6t0q40NIcEmwf1XhYY2BgFkZg,18788
6
- haystack_integrations/document_stores/elasticsearch/filters.py,sha256=L1tN7YCIDuNdhGrBQdPoqXFk37x__2-K038xZ6PRdNQ,9923
7
- elasticsearch_haystack-1.0.0.dist-info/METADATA,sha256=L4XxIMl6z2IQkNQ-7Pe3cXbzcsv5gmljEnZsTMolwPo,2168
8
- elasticsearch_haystack-1.0.0.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
9
- elasticsearch_haystack-1.0.0.dist-info/licenses/LICENSE,sha256=_M2kulivnaiTHiW-5CRlZrPmH47tt04pBgAgeDvfYi4,11342
10
- elasticsearch_haystack-1.0.0.dist-info/RECORD,,
5
+ haystack_integrations/document_stores/elasticsearch/document_store.py,sha256=lNHnzVm30dHdZr6jJtRHY212r5fN7a2w_PEUCdoseA4,19817
6
+ haystack_integrations/document_stores/elasticsearch/filters.py,sha256=Umip-PP4uFjuWeB1JWkKhaKClQ0VpiykoDlDu99wIV0,9759
7
+ elasticsearch_haystack-2.0.0.dist-info/METADATA,sha256=wTRyUYeJy0jvMOa9t0JtWHv105JE2awalXH7pmVSmyI,2168
8
+ elasticsearch_haystack-2.0.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
9
+ elasticsearch_haystack-2.0.0.dist-info/licenses/LICENSE,sha256=_M2kulivnaiTHiW-5CRlZrPmH47tt04pBgAgeDvfYi4,11342
10
+ elasticsearch_haystack-2.0.0.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.25.0
2
+ Generator: hatchling 1.27.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -105,9 +105,12 @@ class ElasticsearchDocumentStore:
105
105
  @property
106
106
  def client(self) -> Elasticsearch:
107
107
  if self._client is None:
108
+ headers = self._kwargs.pop("headers", {})
109
+ headers["user-agent"] = f"haystack-py-ds/{haystack_version}"
110
+
108
111
  client = Elasticsearch(
109
112
  self._hosts,
110
- headers={"user-agent": f"haystack-py-ds/{haystack_version}"},
113
+ headers=headers,
111
114
  **self._kwargs,
112
115
  )
113
116
  # Check client connection, this will raise if not connected
@@ -255,6 +258,15 @@ class ElasticsearchDocumentStore:
255
258
  elasticsearch_actions = []
256
259
  for doc in documents:
257
260
  doc_dict = doc.to_dict()
261
+ if "dataframe" in doc_dict:
262
+ dataframe = doc_dict.pop("dataframe")
263
+ if dataframe:
264
+ logger.warning(
265
+ "Document %s has the `dataframe` field set,"
266
+ "ElasticsearchDocumentStore no longer supports dataframes and this field will be ignored. "
267
+ "The `dataframe` field will soon be removed from Haystack Document.",
268
+ doc.id,
269
+ )
258
270
  if "sparse_embedding" in doc_dict:
259
271
  sparse_embedding = doc_dict.pop("sparse_embedding", None)
260
272
  if sparse_embedding:
@@ -319,6 +331,15 @@ class ElasticsearchDocumentStore:
319
331
  data["metadata"]["highlighted"] = hit["highlight"]
320
332
  data["score"] = hit["_score"]
321
333
 
334
+ if "dataframe" in data:
335
+ dataframe = data.pop("dataframe")
336
+ if dataframe:
337
+ logger.warning(
338
+ "Document %s has the `dataframe` field set,"
339
+ "ElasticsearchDocumentStore no longer supports dataframes and this field will be ignored. "
340
+ "The `dataframe` field will soon be removed from Haystack Document.",
341
+ data["id"],
342
+ )
322
343
  return Document.from_dict(data)
323
344
 
324
345
  def delete_documents(self, document_ids: List[str]) -> None:
@@ -5,7 +5,6 @@ from datetime import datetime
5
5
  from typing import Any, Dict, List
6
6
 
7
7
  from haystack.errors import FilterError
8
- from pandas import DataFrame
9
8
 
10
9
 
11
10
  def _normalize_filters(filters: Dict[str, Any]) -> Dict[str, Any]:
@@ -57,7 +56,7 @@ def _equal(field: str, value: Any) -> Dict[str, Any]:
57
56
  }
58
57
  }
59
58
  }
60
- if field in ["text", "dataframe"]:
59
+ if field == "text":
61
60
  # We want to fully match the text field.
62
61
  return {"match": {field: {"query": value, "minimum_should_match": "100%"}}}
63
62
  return {"term": {field: value}}
@@ -69,7 +68,7 @@ def _not_equal(field: str, value: Any) -> Dict[str, Any]:
69
68
 
70
69
  if isinstance(value, list):
71
70
  return {"bool": {"must_not": {"terms": {field: value}}}}
72
- if field in ["text", "dataframe"]:
71
+ if field == "text":
73
72
  # We want to fully match the text field.
74
73
  return {"bool": {"must_not": {"match": {field: {"query": value, "minimum_should_match": "100%"}}}}}
75
74
 
@@ -92,7 +91,7 @@ def _greater_than(field: str, value: Any) -> Dict[str, Any]:
92
91
  "Strings are only comparable if they are ISO formatted dates."
93
92
  )
94
93
  raise FilterError(msg) from exc
95
- if type(value) in [list, DataFrame]:
94
+ if isinstance(value, list):
96
95
  msg = f"Filter value can't be of type {type(value)} using operators '>', '>=', '<', '<='"
97
96
  raise FilterError(msg)
98
97
  return {"range": {field: {"gt": value}}}
@@ -114,7 +113,7 @@ def _greater_than_equal(field: str, value: Any) -> Dict[str, Any]:
114
113
  "Strings are only comparable if they are ISO formatted dates."
115
114
  )
116
115
  raise FilterError(msg) from exc
117
- if type(value) in [list, DataFrame]:
116
+ if isinstance(value, list):
118
117
  msg = f"Filter value can't be of type {type(value)} using operators '>', '>=', '<', '<='"
119
118
  raise FilterError(msg)
120
119
  return {"range": {field: {"gte": value}}}
@@ -136,7 +135,7 @@ def _less_than(field: str, value: Any) -> Dict[str, Any]:
136
135
  "Strings are only comparable if they are ISO formatted dates."
137
136
  )
138
137
  raise FilterError(msg) from exc
139
- if type(value) in [list, DataFrame]:
138
+ if isinstance(value, list):
140
139
  msg = f"Filter value can't be of type {type(value)} using operators '>', '>=', '<', '<='"
141
140
  raise FilterError(msg)
142
141
  return {"range": {field: {"lt": value}}}
@@ -158,7 +157,7 @@ def _less_than_equal(field: str, value: Any) -> Dict[str, Any]:
158
157
  "Strings are only comparable if they are ISO formatted dates."
159
158
  )
160
159
  raise FilterError(msg) from exc
161
- if type(value) in [list, DataFrame]:
160
+ if isinstance(value, list):
162
161
  msg = f"Filter value can't be of type {type(value)} using operators '>', '>=', '<', '<='"
163
162
  raise FilterError(msg)
164
163
  return {"range": {field: {"lte": value}}}
@@ -212,8 +211,6 @@ def _parse_comparison_condition(condition: Dict[str, Any]) -> Dict[str, Any]:
212
211
  raise FilterError(msg)
213
212
  operator: str = condition["operator"]
214
213
  value: Any = condition["value"]
215
- if isinstance(value, DataFrame):
216
- value = value.to_json()
217
214
 
218
215
  return COMPARISON_OPERATORS[operator](field, value)
219
216