elasticsearch-haystack 2.1.0__tar.gz → 3.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of elasticsearch-haystack might be problematic. Click here for more details.
- {elasticsearch_haystack-2.1.0 → elasticsearch_haystack-3.0.0}/CHANGELOG.md +14 -0
- {elasticsearch_haystack-2.1.0 → elasticsearch_haystack-3.0.0}/PKG-INFO +3 -2
- {elasticsearch_haystack-2.1.0 → elasticsearch_haystack-3.0.0}/pyproject.toml +5 -1
- {elasticsearch_haystack-2.1.0 → elasticsearch_haystack-3.0.0}/src/haystack_integrations/document_stores/elasticsearch/document_store.py +7 -33
- {elasticsearch_haystack-2.1.0 → elasticsearch_haystack-3.0.0}/tests/test_document_store.py +0 -35
- {elasticsearch_haystack-2.1.0 → elasticsearch_haystack-3.0.0}/.gitignore +0 -0
- {elasticsearch_haystack-2.1.0 → elasticsearch_haystack-3.0.0}/LICENSE +0 -0
- {elasticsearch_haystack-2.1.0 → elasticsearch_haystack-3.0.0}/README.md +0 -0
- {elasticsearch_haystack-2.1.0 → elasticsearch_haystack-3.0.0}/docker-compose.yml +0 -0
- {elasticsearch_haystack-2.1.0 → elasticsearch_haystack-3.0.0}/pydoc/config.yml +0 -0
- {elasticsearch_haystack-2.1.0 → elasticsearch_haystack-3.0.0}/src/haystack_integrations/components/retrievers/elasticsearch/__init__.py +0 -0
- {elasticsearch_haystack-2.1.0 → elasticsearch_haystack-3.0.0}/src/haystack_integrations/components/retrievers/elasticsearch/bm25_retriever.py +0 -0
- {elasticsearch_haystack-2.1.0 → elasticsearch_haystack-3.0.0}/src/haystack_integrations/components/retrievers/elasticsearch/embedding_retriever.py +0 -0
- {elasticsearch_haystack-2.1.0 → elasticsearch_haystack-3.0.0}/src/haystack_integrations/document_stores/elasticsearch/__init__.py +0 -0
- {elasticsearch_haystack-2.1.0 → elasticsearch_haystack-3.0.0}/src/haystack_integrations/document_stores/elasticsearch/filters.py +0 -0
- {elasticsearch_haystack-2.1.0 → elasticsearch_haystack-3.0.0}/tests/__init__.py +0 -0
- {elasticsearch_haystack-2.1.0 → elasticsearch_haystack-3.0.0}/tests/test_bm25_retriever.py +0 -0
- {elasticsearch_haystack-2.1.0 → elasticsearch_haystack-3.0.0}/tests/test_embedding_retriever.py +0 -0
- {elasticsearch_haystack-2.1.0 → elasticsearch_haystack-3.0.0}/tests/test_filters.py +0 -0
|
@@ -1,5 +1,19 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [integrations/elasticsearch-v2.1.0] - 2025-02-26
|
|
4
|
+
|
|
5
|
+
### 🚀 Features
|
|
6
|
+
|
|
7
|
+
- Adding async support to ElasticSearch retrievers and document store (#1429)
|
|
8
|
+
|
|
9
|
+
### 🧹 Chores
|
|
10
|
+
|
|
11
|
+
- Remove Python 3.8 support (#1421)
|
|
12
|
+
|
|
13
|
+
### 🌀 Miscellaneous
|
|
14
|
+
|
|
15
|
+
- Docs: update changelog for integrations/elasticsearch (#1400)
|
|
16
|
+
|
|
3
17
|
## [integrations/elasticsearch-v2.0.0] - 2025-02-14
|
|
4
18
|
|
|
5
19
|
### 🧹 Chores
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: elasticsearch-haystack
|
|
3
|
-
Version:
|
|
3
|
+
Version: 3.0.0
|
|
4
4
|
Summary: Haystack 2.x Document Store for ElasticSearch
|
|
5
5
|
Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/elasticsearch#readme
|
|
6
6
|
Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues
|
|
@@ -17,8 +17,9 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
17
17
|
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
18
18
|
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
|
19
19
|
Requires-Python: >=3.9
|
|
20
|
+
Requires-Dist: aiohttp
|
|
20
21
|
Requires-Dist: elasticsearch<9,>=8
|
|
21
|
-
Requires-Dist: haystack-ai
|
|
22
|
+
Requires-Dist: haystack-ai>=2.11.0
|
|
22
23
|
Description-Content-Type: text/markdown
|
|
23
24
|
|
|
24
25
|
[](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/elasticsearch.yml)
|
|
@@ -21,7 +21,11 @@ classifiers = [
|
|
|
21
21
|
"Programming Language :: Python :: Implementation :: CPython",
|
|
22
22
|
"Programming Language :: Python :: Implementation :: PyPy",
|
|
23
23
|
]
|
|
24
|
-
dependencies = [
|
|
24
|
+
dependencies = [
|
|
25
|
+
"haystack-ai>=2.11.0",
|
|
26
|
+
"elasticsearch>=8,<9",
|
|
27
|
+
"aiohttp" # for async support https://elasticsearch-py.readthedocs.io/en/latest/async.html#valueerror-when-initializing-asyncelasticsearch
|
|
28
|
+
]
|
|
25
29
|
|
|
26
30
|
[project.urls]
|
|
27
31
|
Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/elasticsearch#readme"
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
|
|
2
2
|
#
|
|
3
3
|
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
-
import logging
|
|
5
4
|
from collections.abc import Mapping
|
|
6
5
|
from typing import Any, Dict, List, Literal, Optional, Union
|
|
7
6
|
|
|
@@ -9,7 +8,7 @@ import numpy as np
|
|
|
9
8
|
|
|
10
9
|
# There are no import stubs for elastic_transport and elasticsearch so mypy fails
|
|
11
10
|
from elastic_transport import NodeConfig # type: ignore[import-not-found]
|
|
12
|
-
from haystack import default_from_dict, default_to_dict
|
|
11
|
+
from haystack import default_from_dict, default_to_dict, logging
|
|
13
12
|
from haystack.dataclasses import Document
|
|
14
13
|
from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError
|
|
15
14
|
from haystack.document_stores.types import DuplicatePolicy
|
|
@@ -36,7 +35,7 @@ DOC_ALREADY_EXISTS = 409
|
|
|
36
35
|
|
|
37
36
|
class ElasticsearchDocumentStore:
|
|
38
37
|
"""
|
|
39
|
-
ElasticsearchDocumentStore
|
|
38
|
+
An ElasticsearchDocumentStore instance that works with Elastic Cloud or your own
|
|
40
39
|
Elasticsearch cluster.
|
|
41
40
|
|
|
42
41
|
Usage example (Elastic Cloud):
|
|
@@ -329,15 +328,6 @@ class ElasticsearchDocumentStore:
|
|
|
329
328
|
data["metadata"]["highlighted"] = hit["highlight"]
|
|
330
329
|
data["score"] = hit["_score"]
|
|
331
330
|
|
|
332
|
-
if "dataframe" in data:
|
|
333
|
-
dataframe = data.pop("dataframe")
|
|
334
|
-
if dataframe:
|
|
335
|
-
logger.warning(
|
|
336
|
-
"Document %s has the `dataframe` field set,"
|
|
337
|
-
"ElasticsearchDocumentStore no longer supports dataframes and this field will be ignored. "
|
|
338
|
-
"The `dataframe` field will soon be removed from Haystack Document.",
|
|
339
|
-
data["id"],
|
|
340
|
-
)
|
|
341
331
|
return Document.from_dict(data)
|
|
342
332
|
|
|
343
333
|
def write_documents(self, documents: List[Document], policy: DuplicatePolicy = DuplicatePolicy.NONE) -> int:
|
|
@@ -365,23 +355,15 @@ class ElasticsearchDocumentStore:
|
|
|
365
355
|
elasticsearch_actions = []
|
|
366
356
|
for doc in documents:
|
|
367
357
|
doc_dict = doc.to_dict()
|
|
368
|
-
|
|
369
|
-
dataframe = doc_dict.pop("dataframe")
|
|
370
|
-
if dataframe:
|
|
371
|
-
logger.warning(
|
|
372
|
-
"Document %s has the `dataframe` field set,"
|
|
373
|
-
"ElasticsearchDocumentStore no longer supports dataframes and this field will be ignored. "
|
|
374
|
-
"The `dataframe` field will soon be removed from Haystack Document.",
|
|
375
|
-
doc.id,
|
|
376
|
-
)
|
|
358
|
+
|
|
377
359
|
if "sparse_embedding" in doc_dict:
|
|
378
360
|
sparse_embedding = doc_dict.pop("sparse_embedding", None)
|
|
379
361
|
if sparse_embedding:
|
|
380
362
|
logger.warning(
|
|
381
|
-
"Document
|
|
363
|
+
"Document {doc_id} has the `sparse_embedding` field set,"
|
|
382
364
|
"but storing sparse embeddings in Elasticsearch is not currently supported."
|
|
383
365
|
"The `sparse_embedding` field will be ignored.",
|
|
384
|
-
doc.id,
|
|
366
|
+
doc_id=doc.id,
|
|
385
367
|
)
|
|
386
368
|
elasticsearch_actions.append(
|
|
387
369
|
{
|
|
@@ -449,23 +431,15 @@ class ElasticsearchDocumentStore:
|
|
|
449
431
|
actions = []
|
|
450
432
|
for doc in documents:
|
|
451
433
|
doc_dict = doc.to_dict()
|
|
452
|
-
if "dataframe" in doc_dict:
|
|
453
|
-
dataframe = doc_dict.pop("dataframe")
|
|
454
|
-
if dataframe:
|
|
455
|
-
logger.warning(
|
|
456
|
-
"Document {id} has the `dataframe` field set,"
|
|
457
|
-
"ElasticsearchDocumentStore no longer supports dataframes and this field will be ignored. "
|
|
458
|
-
"The `dataframe` field will soon be removed from Haystack Document.",
|
|
459
|
-
)
|
|
460
434
|
|
|
461
435
|
if "sparse_embedding" in doc_dict:
|
|
462
436
|
sparse_embedding = doc_dict.pop("sparse_embedding", None)
|
|
463
437
|
if sparse_embedding:
|
|
464
438
|
logger.warning(
|
|
465
|
-
"Document
|
|
439
|
+
"Document {doc_id} has the `sparse_embedding` field set,"
|
|
466
440
|
"but storing sparse embeddings in Elasticsearch is not currently supported."
|
|
467
441
|
"The `sparse_embedding` field will be ignored.",
|
|
468
|
-
doc.id,
|
|
442
|
+
doc_id=doc.id,
|
|
469
443
|
)
|
|
470
444
|
|
|
471
445
|
action = {
|
|
@@ -13,7 +13,6 @@ from haystack.dataclasses.sparse_embedding import SparseEmbedding
|
|
|
13
13
|
from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError
|
|
14
14
|
from haystack.document_stores.types import DuplicatePolicy
|
|
15
15
|
from haystack.testing.document_store import DocumentStoreBaseTests
|
|
16
|
-
from pandas import DataFrame
|
|
17
16
|
|
|
18
17
|
from haystack_integrations.document_stores.elasticsearch import ElasticsearchDocumentStore
|
|
19
18
|
|
|
@@ -135,27 +134,6 @@ class TestDocumentStore(DocumentStoreBaseTests):
|
|
|
135
134
|
with pytest.raises(DuplicateDocumentError):
|
|
136
135
|
document_store.write_documents(docs, DuplicatePolicy.FAIL)
|
|
137
136
|
|
|
138
|
-
def test_write_documents_dataframe_ignored(self, document_store: ElasticsearchDocumentStore):
|
|
139
|
-
doc = Document(id="1", content="test")
|
|
140
|
-
doc.dataframe = DataFrame({"a": [1, 2, 3]})
|
|
141
|
-
document_store.write_documents([doc])
|
|
142
|
-
res = document_store.filter_documents()
|
|
143
|
-
assert len(res) == 1
|
|
144
|
-
assert res[0].id == "1"
|
|
145
|
-
assert res[0].content == "test"
|
|
146
|
-
assert not hasattr(res[0], "dataframe") or res[0].dataframe is None
|
|
147
|
-
|
|
148
|
-
def test_deserialize_document_dataframe_ignored(self, document_store: ElasticsearchDocumentStore):
|
|
149
|
-
hit = {
|
|
150
|
-
"_source": {"id": "1", "content": "test", "dataframe": {"a": [1, 2, 3]}},
|
|
151
|
-
"_score": 1.0,
|
|
152
|
-
}
|
|
153
|
-
doc = document_store._deserialize_document(hit)
|
|
154
|
-
assert doc.id == "1"
|
|
155
|
-
assert doc.content == "test"
|
|
156
|
-
assert doc.score == 1.0
|
|
157
|
-
assert not hasattr(doc, "dataframe") or doc.dataframe is None
|
|
158
|
-
|
|
159
137
|
def test_bm25_retrieval(self, document_store: ElasticsearchDocumentStore):
|
|
160
138
|
document_store.write_documents(
|
|
161
139
|
[
|
|
@@ -499,19 +477,6 @@ class TestElasticsearchDocumentStoreAsync:
|
|
|
499
477
|
with pytest.raises(ValueError, match="param 'documents' must contain a list of objects of type Document"):
|
|
500
478
|
await document_store.write_documents_async(invalid_docs)
|
|
501
479
|
|
|
502
|
-
@pytest.mark.asyncio
|
|
503
|
-
async def test_write_documents_async_with_dataframe_warning(self, document_store, caplog):
|
|
504
|
-
"""Test write_documents with document containing dataframe field"""
|
|
505
|
-
doc = Document(id="1", content="test", dataframe=DataFrame({"col": [1, 2, 3]}))
|
|
506
|
-
|
|
507
|
-
await document_store.write_documents_async([doc])
|
|
508
|
-
assert "ElasticsearchDocumentStore no longer supports dataframes" in caplog.text
|
|
509
|
-
|
|
510
|
-
results = await document_store.filter_documents_async()
|
|
511
|
-
assert len(results) == 1
|
|
512
|
-
assert results[0].id == "1"
|
|
513
|
-
assert not hasattr(results[0], "dataframe") or results[0].dataframe is None
|
|
514
|
-
|
|
515
480
|
@pytest.mark.asyncio
|
|
516
481
|
async def test_write_documents_async_with_sparse_embedding_warning(self, document_store, caplog):
|
|
517
482
|
"""Test write_documents with document containing sparse_embedding field"""
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{elasticsearch_haystack-2.1.0 → elasticsearch_haystack-3.0.0}/tests/test_embedding_retriever.py
RENAMED
|
File without changes
|
|
File without changes
|