elasticsearch-haystack 1.0.1__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of elasticsearch-haystack might be problematic. Click here for more details.
- {elasticsearch_haystack-1.0.1.dist-info → elasticsearch_haystack-2.0.0.dist-info}/METADATA +2 -2
- {elasticsearch_haystack-1.0.1.dist-info → elasticsearch_haystack-2.0.0.dist-info}/RECORD +6 -6
- {elasticsearch_haystack-1.0.1.dist-info → elasticsearch_haystack-2.0.0.dist-info}/WHEEL +1 -1
- haystack_integrations/document_stores/elasticsearch/document_store.py +18 -0
- haystack_integrations/document_stores/elasticsearch/filters.py +6 -9
- {elasticsearch_haystack-1.0.1.dist-info → elasticsearch_haystack-2.0.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: elasticsearch-haystack
|
|
3
|
-
Version:
|
|
3
|
+
Version: 2.0.0
|
|
4
4
|
Summary: Haystack 2.x Document Store for ElasticSearch
|
|
5
5
|
Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/elasticsearch#readme
|
|
6
6
|
Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues
|
|
@@ -2,9 +2,9 @@ haystack_integrations/components/retrievers/elasticsearch/__init__.py,sha256=cSJ
|
|
|
2
2
|
haystack_integrations/components/retrievers/elasticsearch/bm25_retriever.py,sha256=XA6UiNFb59CMM5LSoPmNDe3IzZ7ty7HViSaU2ZT4--w,5851
|
|
3
3
|
haystack_integrations/components/retrievers/elasticsearch/embedding_retriever.py,sha256=ZL9kHi6tCzks1_GXoOIRVLcN4BWnaMqN6t-JcwdTfao,5992
|
|
4
4
|
haystack_integrations/document_stores/elasticsearch/__init__.py,sha256=YTfu94dtVUBogbJFr1aJrKuaI6-Bw9VuHfPoyU7M8os,207
|
|
5
|
-
haystack_integrations/document_stores/elasticsearch/document_store.py,sha256=
|
|
6
|
-
haystack_integrations/document_stores/elasticsearch/filters.py,sha256=
|
|
7
|
-
elasticsearch_haystack-
|
|
8
|
-
elasticsearch_haystack-
|
|
9
|
-
elasticsearch_haystack-
|
|
10
|
-
elasticsearch_haystack-
|
|
5
|
+
haystack_integrations/document_stores/elasticsearch/document_store.py,sha256=lNHnzVm30dHdZr6jJtRHY212r5fN7a2w_PEUCdoseA4,19817
|
|
6
|
+
haystack_integrations/document_stores/elasticsearch/filters.py,sha256=Umip-PP4uFjuWeB1JWkKhaKClQ0VpiykoDlDu99wIV0,9759
|
|
7
|
+
elasticsearch_haystack-2.0.0.dist-info/METADATA,sha256=wTRyUYeJy0jvMOa9t0JtWHv105JE2awalXH7pmVSmyI,2168
|
|
8
|
+
elasticsearch_haystack-2.0.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
9
|
+
elasticsearch_haystack-2.0.0.dist-info/licenses/LICENSE,sha256=_M2kulivnaiTHiW-5CRlZrPmH47tt04pBgAgeDvfYi4,11342
|
|
10
|
+
elasticsearch_haystack-2.0.0.dist-info/RECORD,,
|
|
@@ -258,6 +258,15 @@ class ElasticsearchDocumentStore:
|
|
|
258
258
|
elasticsearch_actions = []
|
|
259
259
|
for doc in documents:
|
|
260
260
|
doc_dict = doc.to_dict()
|
|
261
|
+
if "dataframe" in doc_dict:
|
|
262
|
+
dataframe = doc_dict.pop("dataframe")
|
|
263
|
+
if dataframe:
|
|
264
|
+
logger.warning(
|
|
265
|
+
"Document %s has the `dataframe` field set,"
|
|
266
|
+
"ElasticsearchDocumentStore no longer supports dataframes and this field will be ignored. "
|
|
267
|
+
"The `dataframe` field will soon be removed from Haystack Document.",
|
|
268
|
+
doc.id,
|
|
269
|
+
)
|
|
261
270
|
if "sparse_embedding" in doc_dict:
|
|
262
271
|
sparse_embedding = doc_dict.pop("sparse_embedding", None)
|
|
263
272
|
if sparse_embedding:
|
|
@@ -322,6 +331,15 @@ class ElasticsearchDocumentStore:
|
|
|
322
331
|
data["metadata"]["highlighted"] = hit["highlight"]
|
|
323
332
|
data["score"] = hit["_score"]
|
|
324
333
|
|
|
334
|
+
if "dataframe" in data:
|
|
335
|
+
dataframe = data.pop("dataframe")
|
|
336
|
+
if dataframe:
|
|
337
|
+
logger.warning(
|
|
338
|
+
"Document %s has the `dataframe` field set,"
|
|
339
|
+
"ElasticsearchDocumentStore no longer supports dataframes and this field will be ignored. "
|
|
340
|
+
"The `dataframe` field will soon be removed from Haystack Document.",
|
|
341
|
+
data["id"],
|
|
342
|
+
)
|
|
325
343
|
return Document.from_dict(data)
|
|
326
344
|
|
|
327
345
|
def delete_documents(self, document_ids: List[str]) -> None:
|
|
@@ -5,7 +5,6 @@ from datetime import datetime
|
|
|
5
5
|
from typing import Any, Dict, List
|
|
6
6
|
|
|
7
7
|
from haystack.errors import FilterError
|
|
8
|
-
from pandas import DataFrame
|
|
9
8
|
|
|
10
9
|
|
|
11
10
|
def _normalize_filters(filters: Dict[str, Any]) -> Dict[str, Any]:
|
|
@@ -57,7 +56,7 @@ def _equal(field: str, value: Any) -> Dict[str, Any]:
|
|
|
57
56
|
}
|
|
58
57
|
}
|
|
59
58
|
}
|
|
60
|
-
if field
|
|
59
|
+
if field == "text":
|
|
61
60
|
# We want to fully match the text field.
|
|
62
61
|
return {"match": {field: {"query": value, "minimum_should_match": "100%"}}}
|
|
63
62
|
return {"term": {field: value}}
|
|
@@ -69,7 +68,7 @@ def _not_equal(field: str, value: Any) -> Dict[str, Any]:
|
|
|
69
68
|
|
|
70
69
|
if isinstance(value, list):
|
|
71
70
|
return {"bool": {"must_not": {"terms": {field: value}}}}
|
|
72
|
-
if field
|
|
71
|
+
if field == "text":
|
|
73
72
|
# We want to fully match the text field.
|
|
74
73
|
return {"bool": {"must_not": {"match": {field: {"query": value, "minimum_should_match": "100%"}}}}}
|
|
75
74
|
|
|
@@ -92,7 +91,7 @@ def _greater_than(field: str, value: Any) -> Dict[str, Any]:
|
|
|
92
91
|
"Strings are only comparable if they are ISO formatted dates."
|
|
93
92
|
)
|
|
94
93
|
raise FilterError(msg) from exc
|
|
95
|
-
if
|
|
94
|
+
if isinstance(value, list):
|
|
96
95
|
msg = f"Filter value can't be of type {type(value)} using operators '>', '>=', '<', '<='"
|
|
97
96
|
raise FilterError(msg)
|
|
98
97
|
return {"range": {field: {"gt": value}}}
|
|
@@ -114,7 +113,7 @@ def _greater_than_equal(field: str, value: Any) -> Dict[str, Any]:
|
|
|
114
113
|
"Strings are only comparable if they are ISO formatted dates."
|
|
115
114
|
)
|
|
116
115
|
raise FilterError(msg) from exc
|
|
117
|
-
if
|
|
116
|
+
if isinstance(value, list):
|
|
118
117
|
msg = f"Filter value can't be of type {type(value)} using operators '>', '>=', '<', '<='"
|
|
119
118
|
raise FilterError(msg)
|
|
120
119
|
return {"range": {field: {"gte": value}}}
|
|
@@ -136,7 +135,7 @@ def _less_than(field: str, value: Any) -> Dict[str, Any]:
|
|
|
136
135
|
"Strings are only comparable if they are ISO formatted dates."
|
|
137
136
|
)
|
|
138
137
|
raise FilterError(msg) from exc
|
|
139
|
-
if
|
|
138
|
+
if isinstance(value, list):
|
|
140
139
|
msg = f"Filter value can't be of type {type(value)} using operators '>', '>=', '<', '<='"
|
|
141
140
|
raise FilterError(msg)
|
|
142
141
|
return {"range": {field: {"lt": value}}}
|
|
@@ -158,7 +157,7 @@ def _less_than_equal(field: str, value: Any) -> Dict[str, Any]:
|
|
|
158
157
|
"Strings are only comparable if they are ISO formatted dates."
|
|
159
158
|
)
|
|
160
159
|
raise FilterError(msg) from exc
|
|
161
|
-
if
|
|
160
|
+
if isinstance(value, list):
|
|
162
161
|
msg = f"Filter value can't be of type {type(value)} using operators '>', '>=', '<', '<='"
|
|
163
162
|
raise FilterError(msg)
|
|
164
163
|
return {"range": {field: {"lte": value}}}
|
|
@@ -212,8 +211,6 @@ def _parse_comparison_condition(condition: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
212
211
|
raise FilterError(msg)
|
|
213
212
|
operator: str = condition["operator"]
|
|
214
213
|
value: Any = condition["value"]
|
|
215
|
-
if isinstance(value, DataFrame):
|
|
216
|
-
value = value.to_json()
|
|
217
214
|
|
|
218
215
|
return COMPARISON_OPERATORS[operator](field, value)
|
|
219
216
|
|
{elasticsearch_haystack-1.0.1.dist-info → elasticsearch_haystack-2.0.0.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|