elasticsearch-haystack 3.0.1__tar.gz → 3.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of elasticsearch-haystack might be problematic. Click here for more details.

Files changed (21) hide show
  1. {elasticsearch_haystack-3.0.1 → elasticsearch_haystack-3.1.0}/CHANGELOG.md +12 -3
  2. {elasticsearch_haystack-3.0.1 → elasticsearch_haystack-3.1.0}/PKG-INFO +2 -2
  3. {elasticsearch_haystack-3.0.1 → elasticsearch_haystack-3.1.0}/README.md +1 -1
  4. {elasticsearch_haystack-3.0.1 → elasticsearch_haystack-3.1.0}/pyproject.toml +27 -26
  5. {elasticsearch_haystack-3.0.1 → elasticsearch_haystack-3.1.0}/src/haystack_integrations/components/retrievers/elasticsearch/bm25_retriever.py +6 -2
  6. {elasticsearch_haystack-3.0.1 → elasticsearch_haystack-3.1.0}/src/haystack_integrations/components/retrievers/elasticsearch/embedding_retriever.py +4 -2
  7. elasticsearch_haystack-3.1.0/src/haystack_integrations/components/retrievers/py.typed +0 -0
  8. {elasticsearch_haystack-3.0.1 → elasticsearch_haystack-3.1.0}/src/haystack_integrations/document_stores/elasticsearch/document_store.py +19 -11
  9. elasticsearch_haystack-3.1.0/src/haystack_integrations/document_stores/py.typed +0 -0
  10. {elasticsearch_haystack-3.0.1 → elasticsearch_haystack-3.1.0}/tests/test_document_store.py +0 -2
  11. {elasticsearch_haystack-3.0.1 → elasticsearch_haystack-3.1.0}/.gitignore +0 -0
  12. {elasticsearch_haystack-3.0.1 → elasticsearch_haystack-3.1.0}/LICENSE +0 -0
  13. {elasticsearch_haystack-3.0.1 → elasticsearch_haystack-3.1.0}/docker-compose.yml +0 -0
  14. {elasticsearch_haystack-3.0.1 → elasticsearch_haystack-3.1.0}/pydoc/config.yml +0 -0
  15. {elasticsearch_haystack-3.0.1 → elasticsearch_haystack-3.1.0}/src/haystack_integrations/components/retrievers/elasticsearch/__init__.py +0 -0
  16. {elasticsearch_haystack-3.0.1 → elasticsearch_haystack-3.1.0}/src/haystack_integrations/document_stores/elasticsearch/__init__.py +0 -0
  17. {elasticsearch_haystack-3.0.1 → elasticsearch_haystack-3.1.0}/src/haystack_integrations/document_stores/elasticsearch/filters.py +0 -0
  18. {elasticsearch_haystack-3.0.1 → elasticsearch_haystack-3.1.0}/tests/__init__.py +0 -0
  19. {elasticsearch_haystack-3.0.1 → elasticsearch_haystack-3.1.0}/tests/test_bm25_retriever.py +0 -0
  20. {elasticsearch_haystack-3.0.1 → elasticsearch_haystack-3.1.0}/tests/test_embedding_retriever.py +0 -0
  21. {elasticsearch_haystack-3.0.1 → elasticsearch_haystack-3.1.0}/tests/test_filters.py +0 -0
@@ -1,7 +1,19 @@
1
1
  # Changelog
2
2
 
3
+ ## [integrations/elasticsearch-v3.0.1] - 2025-05-27
4
+
5
+
6
+ ### ⚙️ CI
7
+
8
+ - Review testing workflows (#1541)
9
+
10
+ ### 🌀 Miscellaneous
11
+
12
+ - Pining lower versions of haystack and `aiohttp` for `ElasticSearch` (#1827)
13
+
3
14
  ## [integrations/elasticsearch-v3.0.0] - 2025-03-11
4
15
 
16
+
5
17
  ### 🧹 Chores
6
18
 
7
19
  - Use Haystack logging across integrations (#1484)
@@ -21,9 +33,6 @@
21
33
 
22
34
  - Remove Python 3.8 support (#1421)
23
35
 
24
- ### 🌀 Miscellaneous
25
-
26
- - Docs: update changelog for integrations/elasticsearch (#1400)
27
36
 
28
37
  ## [integrations/elasticsearch-v2.0.0] - 2025-02-14
29
38
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: elasticsearch-haystack
3
- Version: 3.0.1
3
+ Version: 3.1.0
4
4
  Summary: Haystack 2.x Document Store for ElasticSearch
5
5
  Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/elasticsearch#readme
6
6
  Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues
@@ -50,7 +50,7 @@ docker-compose up
50
50
  Then run tests:
51
51
 
52
52
  ```console
53
- hatch run test
53
+ hatch run test:all
54
54
  ```
55
55
 
56
56
  ## License
@@ -24,7 +24,7 @@ docker-compose up
24
24
  Then run tests:
25
25
 
26
26
  ```console
27
- hatch run test
27
+ hatch run test:all
28
28
  ```
29
29
 
30
30
  ## License
@@ -47,33 +47,37 @@ git_describe_command = 'git describe --tags --match="integrations/elasticsearch-
47
47
 
48
48
  [tool.hatch.envs.default]
49
49
  installer = "uv"
50
- dependencies = [
51
- "coverage[toml]>=6.5",
52
- "pytest",
53
- "pytest-asyncio",
54
- "pytest-rerunfailures",
55
- "pytest-xdist",
56
- "haystack-pydoc-tools",
57
- ]
50
+ dependencies = ["haystack-pydoc-tools", "ruff"]
51
+
58
52
  [tool.hatch.envs.default.scripts]
59
- test = "pytest {args:tests}"
60
- test-cov = "coverage run -m pytest {args:tests}"
61
- test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x"
62
- cov-report = ["- coverage combine", "coverage report"]
63
- cov = ["test-cov", "cov-report"]
64
- cov-retry = ["test-cov-retry", "cov-report"]
65
53
  docs = ["pydoc-markdown pydoc/config.yml"]
54
+ fmt = "ruff check --fix {args} && ruff format {args}"
55
+ fmt-check = "ruff check {args} && ruff format --check {args}"
66
56
 
67
- [tool.hatch.envs.lint]
68
- installer = "uv"
69
- detached = true
70
- dependencies = ["pip", "black>=23.1.0", "mypy>=1.0.0", "ruff>=0.0.243"]
57
+ [tool.hatch.envs.test]
58
+ dependencies = [
59
+ "pytest",
60
+ "pytest-asyncio",
61
+ "pytest-cov",
62
+ "pytest-rerunfailures",
63
+ "mypy",
64
+ "pip"
65
+ ]
66
+
67
+ [tool.hatch.envs.test.scripts]
68
+ unit = 'pytest -m "not integration" {args:tests}'
69
+ integration = 'pytest -m "integration" {args:tests}'
70
+ all = 'pytest {args:tests}'
71
+ cov-retry = 'all --cov=haystack_integrations --reruns 3 --reruns-delay 30 -x'
72
+
73
+ types = """mypy -p haystack_integrations.document_stores.elasticsearch \
74
+ -p haystack_integrations.components.retrievers.elasticsearch {args}"""
71
75
 
72
- [tool.hatch.envs.lint.scripts]
73
- typing = "mypy --install-types --non-interactive --explicit-package-bases {args:src/ tests}"
74
- style = ["ruff check {args:}", "black --check --diff {args:.}"]
75
- fmt = ["black {args:.}", "ruff check --fix {args:}", "style"]
76
- all = ["style", "typing"]
76
+ [tool.mypy]
77
+ install_types = true
78
+ non_interactive = true
79
+ check_untyped_defs = true
80
+ disallow_incomplete_defs = true
77
81
 
78
82
  [tool.hatch.metadata]
79
83
  allow-direct-references = true
@@ -164,6 +168,3 @@ markers = ["unit: unit tests", "integration: integration tests"]
164
168
  asyncio_mode = "auto"
165
169
  asyncio_default_fixture_loop_scope = "class"
166
170
 
167
- [[tool.mypy.overrides]]
168
- module = ["haystack.*", "haystack_integrations.*", "numpy.*", "pytest.*"]
169
- ignore_missing_imports = true
@@ -116,7 +116,9 @@ class ElasticsearchBM25Retriever:
116
116
  return default_from_dict(cls, data)
117
117
 
118
118
  @component.output_types(documents=List[Document])
119
- def run(self, query: str, filters: Optional[Dict[str, Any]] = None, top_k: Optional[int] = None):
119
+ def run(
120
+ self, query: str, filters: Optional[Dict[str, Any]] = None, top_k: Optional[int] = None
121
+ ) -> Dict[str, List[Document]]:
120
122
  """
121
123
  Retrieve documents using the BM25 keyword-based algorithm.
122
124
 
@@ -139,7 +141,9 @@ class ElasticsearchBM25Retriever:
139
141
  return {"documents": docs}
140
142
 
141
143
  @component.output_types(documents=List[Document])
142
- async def run_async(self, query: str, filters: Optional[Dict[str, Any]] = None, top_k: Optional[int] = None):
144
+ async def run_async(
145
+ self, query: str, filters: Optional[Dict[str, Any]] = None, top_k: Optional[int] = None
146
+ ) -> Dict[str, List[Document]]:
143
147
  """
144
148
  Asynchronously retrieve documents using the BM25 keyword-based algorithm.
145
149
 
@@ -114,7 +114,9 @@ class ElasticsearchEmbeddingRetriever:
114
114
  return default_from_dict(cls, data)
115
115
 
116
116
  @component.output_types(documents=List[Document])
117
- def run(self, query_embedding: List[float], filters: Optional[Dict[str, Any]] = None, top_k: Optional[int] = None):
117
+ def run(
118
+ self, query_embedding: List[float], filters: Optional[Dict[str, Any]] = None, top_k: Optional[int] = None
119
+ ) -> Dict[str, List[Document]]:
118
120
  """
119
121
  Retrieve documents using a vector similarity metric.
120
122
 
@@ -139,7 +141,7 @@ class ElasticsearchEmbeddingRetriever:
139
141
  @component.output_types(documents=List[Document])
140
142
  async def run_async(
141
143
  self, query_embedding: List[float], filters: Optional[Dict[str, Any]] = None, top_k: Optional[int] = None
142
- ):
144
+ ) -> Dict[str, List[Document]]:
143
145
  """
144
146
  Asynchronously retrieve documents using a vector similarity metric.
145
147
 
@@ -5,16 +5,14 @@ from collections.abc import Mapping
5
5
  from typing import Any, Dict, List, Literal, Optional, Union
6
6
 
7
7
  import numpy as np
8
-
9
- # There are no import stubs for elastic_transport and elasticsearch so mypy fails
10
- from elastic_transport import NodeConfig # type: ignore[import-not-found]
8
+ from elastic_transport import NodeConfig
11
9
  from haystack import default_from_dict, default_to_dict, logging
12
10
  from haystack.dataclasses import Document
13
11
  from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError
14
12
  from haystack.document_stores.types import DuplicatePolicy
15
13
  from haystack.version import __version__ as haystack_version
16
14
 
17
- from elasticsearch import AsyncElasticsearch, Elasticsearch, helpers # type: ignore[import-not-found]
15
+ from elasticsearch import AsyncElasticsearch, Elasticsearch, helpers
18
16
 
19
17
  from .filters import _normalize_filters
20
18
 
@@ -66,7 +64,7 @@ class ElasticsearchDocumentStore:
66
64
  custom_mapping: Optional[Dict[str, Any]] = None,
67
65
  index: str = "default",
68
66
  embedding_similarity_function: Literal["cosine", "dot_product", "l2_norm", "max_inner_product"] = "cosine",
69
- **kwargs,
67
+ **kwargs: Any,
70
68
  ):
71
69
  """
72
70
  Creates a new ElasticsearchDocumentStore instance.
@@ -93,8 +91,8 @@ class ElasticsearchDocumentStore:
93
91
  :param **kwargs: Optional arguments that `Elasticsearch` takes.
94
92
  """
95
93
  self._hosts = hosts
96
- self._client = None
97
- self._async_client = None
94
+ self._client: Optional[Elasticsearch] = None
95
+ self._async_client: Optional[AsyncElasticsearch] = None
98
96
  self._index = index
99
97
  self._embedding_similarity_function = embedding_similarity_function
100
98
  self._custom_mapping = custom_mapping
@@ -166,6 +164,7 @@ class ElasticsearchDocumentStore:
166
164
  Returns the synchronous Elasticsearch client, initializing it if necessary.
167
165
  """
168
166
  self._ensure_initialized()
167
+ assert self._client is not None # noqa: S101
169
168
  return self._client
170
169
 
171
170
  @property
@@ -174,6 +173,7 @@ class ElasticsearchDocumentStore:
174
173
  Returns the asynchronous Elasticsearch client, initializing it if necessary.
175
174
  """
176
175
  self._ensure_initialized()
176
+ assert self._async_client is not None # noqa: S101
177
177
  return self._async_client
178
178
 
179
179
  def to_dict(self) -> Dict[str, Any]:
@@ -226,7 +226,7 @@ class ElasticsearchDocumentStore:
226
226
  result = await self._async_client.count(index=self._index) # type: ignore
227
227
  return result["count"]
228
228
 
229
- def _search_documents(self, **kwargs) -> List[Document]:
229
+ def _search_documents(self, **kwargs: Any) -> List[Document]:
230
230
  """
231
231
  Calls the Elasticsearch client's search method and handles pagination.
232
232
  """
@@ -253,7 +253,7 @@ class ElasticsearchDocumentStore:
253
253
  break
254
254
  return documents
255
255
 
256
- async def _search_documents_async(self, **kwargs) -> List[Document]:
256
+ async def _search_documents_async(self, **kwargs: Any) -> List[Document]:
257
257
  """
258
258
  Asynchronously calls the Elasticsearch client's search method and handles pagination.
259
259
  """
@@ -379,9 +379,12 @@ class ElasticsearchDocumentStore:
379
379
  refresh="wait_for",
380
380
  index=self._index,
381
381
  raise_on_error=False,
382
+ stats_only=False,
382
383
  )
383
384
 
384
385
  if errors:
386
+ # with stats_only=False, errors is guaranteed to be a list of dicts
387
+ assert isinstance(errors, list) # noqa: S101
385
388
  duplicate_errors_ids = []
386
389
  other_errors = []
387
390
  for e in errors:
@@ -451,13 +454,16 @@ class ElasticsearchDocumentStore:
451
454
 
452
455
  try:
453
456
  success, failed = await helpers.async_bulk(
454
- client=self._async_client,
457
+ client=self.async_client,
455
458
  actions=actions,
456
459
  index=self._index,
457
460
  refresh=True,
458
461
  raise_on_error=False,
462
+ stats_only=False,
459
463
  )
460
464
  if failed:
465
+ # with stats_only=False, failed is guaranteed to be a list of dicts
466
+ assert isinstance(failed, list) # noqa: S101
461
467
  if policy == DuplicatePolicy.FAIL:
462
468
  for error in failed:
463
469
  if "create" in error and error["create"]["status"] == DOC_ALREADY_EXISTS:
@@ -494,7 +500,7 @@ class ElasticsearchDocumentStore:
494
500
 
495
501
  try:
496
502
  await helpers.async_bulk(
497
- client=self._async_client,
503
+ client=self.async_client,
498
504
  actions=({"_op_type": "delete", "_id": id_} for id_ in document_ids),
499
505
  index=self._index,
500
506
  refresh=True,
@@ -551,6 +557,8 @@ class ElasticsearchDocumentStore:
551
557
 
552
558
  if scale_score:
553
559
  for doc in documents:
560
+ if doc.score is None:
561
+ continue
554
562
  doc.score = float(1 / (1 + np.exp(-np.asarray(doc.score / BM25_SCALING_FACTOR))))
555
563
 
556
564
  return documents
@@ -342,7 +342,6 @@ class TestDocumentStore(DocumentStoreBaseTests):
342
342
 
343
343
  @pytest.mark.integration
344
344
  class TestElasticsearchDocumentStoreAsync:
345
-
346
345
  @pytest.fixture
347
346
  async def document_store(self, request):
348
347
  """
@@ -410,7 +409,6 @@ class TestElasticsearchDocumentStoreAsync:
410
409
 
411
410
  @pytest.mark.asyncio
412
411
  async def test_embedding_retrieval_async(self, document_store):
413
-
414
412
  # init document store
415
413
  docs = [
416
414
  Document(content="Most similar document", embedding=[1.0, 1.0, 1.0, 1.0]),