elasticsearch-haystack 2.1.0__tar.gz → 3.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of elasticsearch-haystack might be problematic. Click here for more details.

Files changed (19) hide show
  1. {elasticsearch_haystack-2.1.0 → elasticsearch_haystack-3.0.0}/CHANGELOG.md +14 -0
  2. {elasticsearch_haystack-2.1.0 → elasticsearch_haystack-3.0.0}/PKG-INFO +3 -2
  3. {elasticsearch_haystack-2.1.0 → elasticsearch_haystack-3.0.0}/pyproject.toml +5 -1
  4. {elasticsearch_haystack-2.1.0 → elasticsearch_haystack-3.0.0}/src/haystack_integrations/document_stores/elasticsearch/document_store.py +7 -33
  5. {elasticsearch_haystack-2.1.0 → elasticsearch_haystack-3.0.0}/tests/test_document_store.py +0 -35
  6. {elasticsearch_haystack-2.1.0 → elasticsearch_haystack-3.0.0}/.gitignore +0 -0
  7. {elasticsearch_haystack-2.1.0 → elasticsearch_haystack-3.0.0}/LICENSE +0 -0
  8. {elasticsearch_haystack-2.1.0 → elasticsearch_haystack-3.0.0}/README.md +0 -0
  9. {elasticsearch_haystack-2.1.0 → elasticsearch_haystack-3.0.0}/docker-compose.yml +0 -0
  10. {elasticsearch_haystack-2.1.0 → elasticsearch_haystack-3.0.0}/pydoc/config.yml +0 -0
  11. {elasticsearch_haystack-2.1.0 → elasticsearch_haystack-3.0.0}/src/haystack_integrations/components/retrievers/elasticsearch/__init__.py +0 -0
  12. {elasticsearch_haystack-2.1.0 → elasticsearch_haystack-3.0.0}/src/haystack_integrations/components/retrievers/elasticsearch/bm25_retriever.py +0 -0
  13. {elasticsearch_haystack-2.1.0 → elasticsearch_haystack-3.0.0}/src/haystack_integrations/components/retrievers/elasticsearch/embedding_retriever.py +0 -0
  14. {elasticsearch_haystack-2.1.0 → elasticsearch_haystack-3.0.0}/src/haystack_integrations/document_stores/elasticsearch/__init__.py +0 -0
  15. {elasticsearch_haystack-2.1.0 → elasticsearch_haystack-3.0.0}/src/haystack_integrations/document_stores/elasticsearch/filters.py +0 -0
  16. {elasticsearch_haystack-2.1.0 → elasticsearch_haystack-3.0.0}/tests/__init__.py +0 -0
  17. {elasticsearch_haystack-2.1.0 → elasticsearch_haystack-3.0.0}/tests/test_bm25_retriever.py +0 -0
  18. {elasticsearch_haystack-2.1.0 → elasticsearch_haystack-3.0.0}/tests/test_embedding_retriever.py +0 -0
  19. {elasticsearch_haystack-2.1.0 → elasticsearch_haystack-3.0.0}/tests/test_filters.py +0 -0
@@ -1,5 +1,19 @@
1
1
  # Changelog
2
2
 
3
+ ## [integrations/elasticsearch-v2.1.0] - 2025-02-26
4
+
5
+ ### 🚀 Features
6
+
7
+ - Adding async support to ElasticSearch retrievers and document store (#1429)
8
+
9
+ ### 🧹 Chores
10
+
11
+ - Remove Python 3.8 support (#1421)
12
+
13
+ ### 🌀 Miscellaneous
14
+
15
+ - Docs: update changelog for integrations/elasticsearch (#1400)
16
+
3
17
  ## [integrations/elasticsearch-v2.0.0] - 2025-02-14
4
18
 
5
19
  ### 🧹 Chores
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: elasticsearch-haystack
3
- Version: 2.1.0
3
+ Version: 3.0.0
4
4
  Summary: Haystack 2.x Document Store for ElasticSearch
5
5
  Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/elasticsearch#readme
6
6
  Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues
@@ -17,8 +17,9 @@ Classifier: Programming Language :: Python :: 3.11
17
17
  Classifier: Programming Language :: Python :: Implementation :: CPython
18
18
  Classifier: Programming Language :: Python :: Implementation :: PyPy
19
19
  Requires-Python: >=3.9
20
+ Requires-Dist: aiohttp
20
21
  Requires-Dist: elasticsearch<9,>=8
21
- Requires-Dist: haystack-ai
22
+ Requires-Dist: haystack-ai>=2.11.0
22
23
  Description-Content-Type: text/markdown
23
24
 
24
25
  [![test](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/elasticsearch.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/elasticsearch.yml)
@@ -21,7 +21,11 @@ classifiers = [
21
21
  "Programming Language :: Python :: Implementation :: CPython",
22
22
  "Programming Language :: Python :: Implementation :: PyPy",
23
23
  ]
24
- dependencies = ["haystack-ai", "elasticsearch>=8,<9"]
24
+ dependencies = [
25
+ "haystack-ai>=2.11.0",
26
+ "elasticsearch>=8,<9",
27
+ "aiohttp" # for async support https://elasticsearch-py.readthedocs.io/en/latest/async.html#valueerror-when-initializing-asyncelasticsearch
28
+ ]
25
29
 
26
30
  [project.urls]
27
31
  Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/elasticsearch#readme"
@@ -1,7 +1,6 @@
1
1
  # SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
2
2
  #
3
3
  # SPDX-License-Identifier: Apache-2.0
4
- import logging
5
4
  from collections.abc import Mapping
6
5
  from typing import Any, Dict, List, Literal, Optional, Union
7
6
 
@@ -9,7 +8,7 @@ import numpy as np
9
8
 
10
9
  # There are no import stubs for elastic_transport and elasticsearch so mypy fails
11
10
  from elastic_transport import NodeConfig # type: ignore[import-not-found]
12
- from haystack import default_from_dict, default_to_dict
11
+ from haystack import default_from_dict, default_to_dict, logging
13
12
  from haystack.dataclasses import Document
14
13
  from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError
15
14
  from haystack.document_stores.types import DuplicatePolicy
@@ -36,7 +35,7 @@ DOC_ALREADY_EXISTS = 409
36
35
 
37
36
  class ElasticsearchDocumentStore:
38
37
  """
39
- ElasticsearchDocumentStore is a Document Store for Elasticsearch. It can be used with Elastic Cloud or your own
38
+ An ElasticsearchDocumentStore instance that works with Elastic Cloud or your own
40
39
  Elasticsearch cluster.
41
40
 
42
41
  Usage example (Elastic Cloud):
@@ -329,15 +328,6 @@ class ElasticsearchDocumentStore:
329
328
  data["metadata"]["highlighted"] = hit["highlight"]
330
329
  data["score"] = hit["_score"]
331
330
 
332
- if "dataframe" in data:
333
- dataframe = data.pop("dataframe")
334
- if dataframe:
335
- logger.warning(
336
- "Document %s has the `dataframe` field set,"
337
- "ElasticsearchDocumentStore no longer supports dataframes and this field will be ignored. "
338
- "The `dataframe` field will soon be removed from Haystack Document.",
339
- data["id"],
340
- )
341
331
  return Document.from_dict(data)
342
332
 
343
333
  def write_documents(self, documents: List[Document], policy: DuplicatePolicy = DuplicatePolicy.NONE) -> int:
@@ -365,23 +355,15 @@ class ElasticsearchDocumentStore:
365
355
  elasticsearch_actions = []
366
356
  for doc in documents:
367
357
  doc_dict = doc.to_dict()
368
- if "dataframe" in doc_dict:
369
- dataframe = doc_dict.pop("dataframe")
370
- if dataframe:
371
- logger.warning(
372
- "Document %s has the `dataframe` field set,"
373
- "ElasticsearchDocumentStore no longer supports dataframes and this field will be ignored. "
374
- "The `dataframe` field will soon be removed from Haystack Document.",
375
- doc.id,
376
- )
358
+
377
359
  if "sparse_embedding" in doc_dict:
378
360
  sparse_embedding = doc_dict.pop("sparse_embedding", None)
379
361
  if sparse_embedding:
380
362
  logger.warning(
381
- "Document %s has the `sparse_embedding` field set,"
363
+ "Document {doc_id} has the `sparse_embedding` field set,"
382
364
  "but storing sparse embeddings in Elasticsearch is not currently supported."
383
365
  "The `sparse_embedding` field will be ignored.",
384
- doc.id,
366
+ doc_id=doc.id,
385
367
  )
386
368
  elasticsearch_actions.append(
387
369
  {
@@ -449,23 +431,15 @@ class ElasticsearchDocumentStore:
449
431
  actions = []
450
432
  for doc in documents:
451
433
  doc_dict = doc.to_dict()
452
- if "dataframe" in doc_dict:
453
- dataframe = doc_dict.pop("dataframe")
454
- if dataframe:
455
- logger.warning(
456
- "Document {id} has the `dataframe` field set,"
457
- "ElasticsearchDocumentStore no longer supports dataframes and this field will be ignored. "
458
- "The `dataframe` field will soon be removed from Haystack Document.",
459
- )
460
434
 
461
435
  if "sparse_embedding" in doc_dict:
462
436
  sparse_embedding = doc_dict.pop("sparse_embedding", None)
463
437
  if sparse_embedding:
464
438
  logger.warning(
465
- "Document %s has the `sparse_embedding` field set,"
439
+ "Document {doc_id} has the `sparse_embedding` field set,"
466
440
  "but storing sparse embeddings in Elasticsearch is not currently supported."
467
441
  "The `sparse_embedding` field will be ignored.",
468
- doc.id,
442
+ doc_id=doc.id,
469
443
  )
470
444
 
471
445
  action = {
@@ -13,7 +13,6 @@ from haystack.dataclasses.sparse_embedding import SparseEmbedding
13
13
  from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError
14
14
  from haystack.document_stores.types import DuplicatePolicy
15
15
  from haystack.testing.document_store import DocumentStoreBaseTests
16
- from pandas import DataFrame
17
16
 
18
17
  from haystack_integrations.document_stores.elasticsearch import ElasticsearchDocumentStore
19
18
 
@@ -135,27 +134,6 @@ class TestDocumentStore(DocumentStoreBaseTests):
135
134
  with pytest.raises(DuplicateDocumentError):
136
135
  document_store.write_documents(docs, DuplicatePolicy.FAIL)
137
136
 
138
- def test_write_documents_dataframe_ignored(self, document_store: ElasticsearchDocumentStore):
139
- doc = Document(id="1", content="test")
140
- doc.dataframe = DataFrame({"a": [1, 2, 3]})
141
- document_store.write_documents([doc])
142
- res = document_store.filter_documents()
143
- assert len(res) == 1
144
- assert res[0].id == "1"
145
- assert res[0].content == "test"
146
- assert not hasattr(res[0], "dataframe") or res[0].dataframe is None
147
-
148
- def test_deserialize_document_dataframe_ignored(self, document_store: ElasticsearchDocumentStore):
149
- hit = {
150
- "_source": {"id": "1", "content": "test", "dataframe": {"a": [1, 2, 3]}},
151
- "_score": 1.0,
152
- }
153
- doc = document_store._deserialize_document(hit)
154
- assert doc.id == "1"
155
- assert doc.content == "test"
156
- assert doc.score == 1.0
157
- assert not hasattr(doc, "dataframe") or doc.dataframe is None
158
-
159
137
  def test_bm25_retrieval(self, document_store: ElasticsearchDocumentStore):
160
138
  document_store.write_documents(
161
139
  [
@@ -499,19 +477,6 @@ class TestElasticsearchDocumentStoreAsync:
499
477
  with pytest.raises(ValueError, match="param 'documents' must contain a list of objects of type Document"):
500
478
  await document_store.write_documents_async(invalid_docs)
501
479
 
502
- @pytest.mark.asyncio
503
- async def test_write_documents_async_with_dataframe_warning(self, document_store, caplog):
504
- """Test write_documents with document containing dataframe field"""
505
- doc = Document(id="1", content="test", dataframe=DataFrame({"col": [1, 2, 3]}))
506
-
507
- await document_store.write_documents_async([doc])
508
- assert "ElasticsearchDocumentStore no longer supports dataframes" in caplog.text
509
-
510
- results = await document_store.filter_documents_async()
511
- assert len(results) == 1
512
- assert results[0].id == "1"
513
- assert not hasattr(results[0], "dataframe") or results[0].dataframe is None
514
-
515
480
  @pytest.mark.asyncio
516
481
  async def test_write_documents_async_with_sparse_embedding_warning(self, document_store, caplog):
517
482
  """Test write_documents with document containing sparse_embedding field"""