elasticsearch-haystack 0.1.3__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of elasticsearch-haystack might be problematic. Click here for more details.

Files changed (18) hide show
  1. {elasticsearch_haystack-0.1.3 → elasticsearch_haystack-0.3.0}/PKG-INFO +1 -1
  2. elasticsearch_haystack-0.3.0/pydoc/config.yml +31 -0
  3. {elasticsearch_haystack-0.1.3 → elasticsearch_haystack-0.3.0}/pyproject.toml +14 -9
  4. elasticsearch_haystack-0.3.0/src/haystack_integrations/components/retrievers/elasticsearch/__init__.py +7 -0
  5. {elasticsearch_haystack-0.1.3/src/elasticsearch_haystack → elasticsearch_haystack-0.3.0/src/haystack_integrations/components/retrievers/elasticsearch}/bm25_retriever.py +6 -6
  6. {elasticsearch_haystack-0.1.3/src/elasticsearch_haystack → elasticsearch_haystack-0.3.0/src/haystack_integrations/components/retrievers/elasticsearch}/embedding_retriever.py +4 -4
  7. {elasticsearch_haystack-0.1.3/src/elasticsearch_haystack → elasticsearch_haystack-0.3.0/src/haystack_integrations/document_stores/elasticsearch}/__init__.py +1 -1
  8. {elasticsearch_haystack-0.1.3/src/elasticsearch_haystack → elasticsearch_haystack-0.3.0/src/haystack_integrations/document_stores/elasticsearch}/document_store.py +9 -3
  9. {elasticsearch_haystack-0.1.3 → elasticsearch_haystack-0.3.0}/tests/test_bm25_retriever.py +8 -9
  10. {elasticsearch_haystack-0.1.3 → elasticsearch_haystack-0.3.0}/tests/test_document_store.py +9 -6
  11. {elasticsearch_haystack-0.1.3 → elasticsearch_haystack-0.3.0}/tests/test_embedding_retriever.py +10 -9
  12. {elasticsearch_haystack-0.1.3 → elasticsearch_haystack-0.3.0}/tests/test_filters.py +1 -2
  13. {elasticsearch_haystack-0.1.3 → elasticsearch_haystack-0.3.0}/.gitignore +0 -0
  14. {elasticsearch_haystack-0.1.3 → elasticsearch_haystack-0.3.0}/LICENSE +0 -0
  15. {elasticsearch_haystack-0.1.3 → elasticsearch_haystack-0.3.0}/README.md +0 -0
  16. {elasticsearch_haystack-0.1.3 → elasticsearch_haystack-0.3.0}/docker-compose.yml +0 -0
  17. {elasticsearch_haystack-0.1.3/src/elasticsearch_haystack → elasticsearch_haystack-0.3.0/src/haystack_integrations/document_stores/elasticsearch}/filters.py +0 -0
  18. {elasticsearch_haystack-0.1.3 → elasticsearch_haystack-0.3.0}/tests/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: elasticsearch-haystack
3
- Version: 0.1.3
3
+ Version: 0.3.0
4
4
  Summary: Haystack 2.x Document Store for ElasticSearch
5
5
  Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/elasticsearch#readme
6
6
  Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues
@@ -0,0 +1,31 @@
1
+ loaders:
2
+ - type: haystack_pydoc_tools.loaders.CustomPythonLoader
3
+ search_path: [../src]
4
+ modules: [
5
+ "haystack_integrations.components.retrievers.elasticsearch.bm25_retriever",
6
+ "haystack_integrations.components.retrievers.elasticsearch.embedding_retriever",
7
+ "haystack_integrations.document_stores.elasticsearch.document_store",
8
+ "haystack_integrations.document_stores.elasticsearch.filters",
9
+ ]
10
+ ignore_when_discovered: ["__init__"]
11
+ processors:
12
+ - type: filter
13
+ expression:
14
+ documented_only: true
15
+ do_not_filter_modules: false
16
+ skip_empty_modules: true
17
+ - type: smart
18
+ - type: crossref
19
+ renderer:
20
+ type: haystack_pydoc_tools.renderers.ReadmePreviewRenderer
21
+ excerpt: Elasticsearch integration for Haystack
22
+ category_slug: integrations-api
23
+ title: Elasticsearch
24
+ slug: integrations-elasticsearch
25
+ order: 70
26
+ markdown:
27
+ descriptive_class_title: false
28
+ descriptive_module_title: true
29
+ add_method_class_prefix: true
30
+ add_member_class_prefix: false
31
+ filename: _readme_elasticsearch.md
@@ -33,6 +33,9 @@ Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/m
33
33
  Issues = "https://github.com/deepset-ai/haystack-core-integrations/issues"
34
34
  Source = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/elasticsearch"
35
35
 
36
+ [tool.hatch.build.targets.wheel]
37
+ packages = ["src/haystack_integrations"]
38
+
36
39
  [tool.hatch.version]
37
40
  source = "vcs"
38
41
  tag-pattern = 'integrations\/elasticsearch-v(?P<version>.*)'
@@ -46,6 +49,7 @@ dependencies = [
46
49
  "coverage[toml]>=6.5",
47
50
  "pytest",
48
51
  "pytest-xdist",
52
+ "haystack-pydoc-tools",
49
53
  ]
50
54
  [tool.hatch.envs.default.scripts]
51
55
  test = "pytest {args:tests}"
@@ -58,6 +62,9 @@ cov = [
58
62
  "test-cov",
59
63
  "cov-report",
60
64
  ]
65
+ docs = [
66
+ "pydoc-markdown pydoc/config.yml"
67
+ ]
61
68
 
62
69
  [[tool.hatch.envs.all.matrix]]
63
70
  python = ["3.8", "3.9", "3.10", "3.11"]
@@ -70,7 +77,7 @@ dependencies = [
70
77
  "ruff>=0.0.243",
71
78
  ]
72
79
  [tool.hatch.envs.lint.scripts]
73
- typing = "mypy --install-types --non-interactive {args:src/elasticsearch_haystack tests}"
80
+ typing = "mypy --install-types --non-interactive --explicit-package-bases {args:src/ tests}"
74
81
  style = [
75
82
  "ruff {args:.}",
76
83
  "black --check --diff {args:.}",
@@ -139,26 +146,23 @@ unfixable = [
139
146
  ]
140
147
 
141
148
  [tool.ruff.isort]
142
- known-first-party = ["elasticsearch_haystack"]
149
+ known-first-party = ["src"]
143
150
 
144
151
  [tool.ruff.flake8-tidy-imports]
145
- ban-relative-imports = "all"
152
+ ban-relative-imports = "parents"
146
153
 
147
154
  [tool.ruff.per-file-ignores]
148
155
  # Tests can use magic values, assertions, and relative imports
149
156
  "tests/**/*" = ["PLR2004", "S101", "TID252"]
150
157
 
151
158
  [tool.coverage.run]
152
- source_pkgs = ["elasticsearch_haystack", "tests"]
159
+ source_pkgs = ["src", "tests"]
153
160
  branch = true
154
161
  parallel = true
155
- omit = [
156
- "src/elasticsearch_haystack/__about__.py",
157
- ]
158
162
 
159
163
  [tool.coverage.paths]
160
- elasticsearch_haystack = ["src/elasticsearch_haystack", "*/elasticsearch-haystack/src/elasticsearch_haystack"]
161
- tests = ["tests", "*/elasticsearch-haystack/tests"]
164
+ elasticsearch_haystack = ["src/haystack_integrations", "*/elasticsearch/src/haystack_integrations"]
165
+ tests = ["tests", "*/elasticsearch/src/tests"]
162
166
 
163
167
  [tool.coverage.report]
164
168
  exclude_lines = [
@@ -177,6 +181,7 @@ markers = [
177
181
  [[tool.mypy.overrides]]
178
182
  module = [
179
183
  "haystack.*",
184
+ "haystack_integrations.*",
180
185
  "pytest.*"
181
186
  ]
182
187
  ignore_missing_imports = true
@@ -0,0 +1,7 @@
1
+ # SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ from .bm25_retriever import ElasticsearchBM25Retriever
5
+ from .embedding_retriever import ElasticsearchEmbeddingRetriever
6
+
7
+ __all__ = ["ElasticsearchBM25Retriever", "ElasticsearchEmbeddingRetriever"]
@@ -5,8 +5,7 @@ from typing import Any, Dict, List, Optional
5
5
 
6
6
  from haystack import component, default_from_dict, default_to_dict
7
7
  from haystack.dataclasses import Document
8
-
9
- from elasticsearch_haystack.document_store import ElasticsearchDocumentStore
8
+ from haystack_integrations.document_stores.elasticsearch.document_store import ElasticsearchDocumentStore
10
9
 
11
10
 
12
11
  @component
@@ -19,8 +18,8 @@ class ElasticsearchBM25Retriever:
19
18
  Usage example:
20
19
  ```python
21
20
  from haystack import Document
22
- from elasticsearch_haystack.document_store import ElasticsearchDocumentStore
23
- from elasticsearch_haystack.bm25_retriever import ElasticsearchBM25Retriever
21
+ from haystack_integrations.document_stores.elasticsearch import ElasticsearchDocumentStore
22
+ from haystack_integrations.components.retrievers.elasticsearch import ElasticsearchBM25Retriever
24
23
 
25
24
  document_store = ElasticsearchDocumentStore(hosts="http://localhost:9200")
26
25
  retriever = ElasticsearchBM25Retriever(document_store=document_store)
@@ -90,17 +89,18 @@ class ElasticsearchBM25Retriever:
90
89
  return default_from_dict(cls, data)
91
90
 
92
91
  @component.output_types(documents=List[Document])
93
- def run(self, query: str, top_k: Optional[int] = None):
92
+ def run(self, query: str, filters: Optional[Dict[str, Any]] = None, top_k: Optional[int] = None):
94
93
  """
95
94
  Retrieve documents using the BM25 keyword-based algorithm.
96
95
 
97
96
  :param query: String to search in Documents' text.
97
+ :param filters: Filters applied to the retrieved Documents.
98
98
  :param top_k: Maximum number of Documents to return.
99
99
  :return: List of Documents that match the query.
100
100
  """
101
101
  docs = self._document_store._bm25_retrieval(
102
102
  query=query,
103
- filters=self._filters,
103
+ filters=filters or self._filters,
104
104
  fuzziness=self._fuzziness,
105
105
  top_k=top_k or self._top_k,
106
106
  scale_score=self._scale_score,
@@ -5,8 +5,7 @@ from typing import Any, Dict, List, Optional
5
5
 
6
6
  from haystack import component, default_from_dict, default_to_dict
7
7
  from haystack.dataclasses import Document
8
-
9
- from elasticsearch_haystack.document_store import ElasticsearchDocumentStore
8
+ from haystack_integrations.document_stores.elasticsearch.document_store import ElasticsearchDocumentStore
10
9
 
11
10
 
12
11
  @component
@@ -64,17 +63,18 @@ class ElasticsearchEmbeddingRetriever:
64
63
  return default_from_dict(cls, data)
65
64
 
66
65
  @component.output_types(documents=List[Document])
67
- def run(self, query_embedding: List[float], top_k: Optional[int] = None):
66
+ def run(self, query_embedding: List[float], filters: Optional[Dict[str, Any]] = None, top_k: Optional[int] = None):
68
67
  """
69
68
  Retrieve documents using a vector similarity metric.
70
69
 
71
70
  :param query_embedding: Embedding of the query.
71
+ :param filters: Filters applied to the retrieved Documents.
72
72
  :param top_k: Maximum number of Documents to return.
73
73
  :return: List of Documents similar to `query_embedding`.
74
74
  """
75
75
  docs = self._document_store._embedding_retrieval(
76
76
  query_embedding=query_embedding,
77
- filters=self._filters,
77
+ filters=filters or self._filters,
78
78
  top_k=top_k or self._top_k,
79
79
  num_candidates=self._num_candidates,
80
80
  )
@@ -1,6 +1,6 @@
1
1
  # SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
2
2
  #
3
3
  # SPDX-License-Identifier: Apache-2.0
4
- from elasticsearch_haystack.document_store import ElasticsearchDocumentStore
4
+ from .document_store import ElasticsearchDocumentStore
5
5
 
6
6
  __all__ = ["ElasticsearchDocumentStore"]
@@ -8,14 +8,16 @@ import numpy as np
8
8
 
9
9
  # There are no import stubs for elastic_transport and elasticsearch so mypy fails
10
10
  from elastic_transport import NodeConfig # type: ignore[import-not-found]
11
- from elasticsearch import Elasticsearch, helpers # type: ignore[import-not-found]
12
11
  from haystack import default_from_dict, default_to_dict
13
12
  from haystack.dataclasses import Document
14
13
  from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError
15
14
  from haystack.document_stores.types import DuplicatePolicy
16
15
  from haystack.utils.filters import convert
16
+ from haystack.version import __version__ as haystack_version
17
+
18
+ from elasticsearch import Elasticsearch, helpers # type: ignore[import-not-found]
17
19
 
18
- from elasticsearch_haystack.filters import _normalize_filters
20
+ from .filters import _normalize_filters
19
21
 
20
22
  logger = logging.getLogger(__name__)
21
23
 
@@ -89,7 +91,11 @@ class ElasticsearchDocumentStore:
89
91
  :param **kwargs: Optional arguments that ``Elasticsearch`` takes.
90
92
  """
91
93
  self._hosts = hosts
92
- self._client = Elasticsearch(hosts, **kwargs)
94
+ self._client = Elasticsearch(
95
+ hosts,
96
+ headers={"user-agent": f"haystack-py-ds/{haystack_version}"},
97
+ **kwargs,
98
+ )
93
99
  self._index = index
94
100
  self._embedding_similarity_function = embedding_similarity_function
95
101
  self._kwargs = kwargs
@@ -4,9 +4,8 @@
4
4
  from unittest.mock import Mock, patch
5
5
 
6
6
  from haystack.dataclasses import Document
7
-
8
- from elasticsearch_haystack.bm25_retriever import ElasticsearchBM25Retriever
9
- from elasticsearch_haystack.document_store import ElasticsearchDocumentStore
7
+ from haystack_integrations.components.retrievers.elasticsearch import ElasticsearchBM25Retriever
8
+ from haystack_integrations.document_stores.elasticsearch import ElasticsearchDocumentStore
10
9
 
11
10
 
12
11
  def test_init_default():
@@ -18,13 +17,13 @@ def test_init_default():
18
17
  assert not retriever._scale_score
19
18
 
20
19
 
21
- @patch("elasticsearch_haystack.document_store.Elasticsearch")
20
+ @patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch")
22
21
  def test_to_dict(_mock_elasticsearch_client):
23
22
  document_store = ElasticsearchDocumentStore(hosts="some fake host")
24
23
  retriever = ElasticsearchBM25Retriever(document_store=document_store)
25
24
  res = retriever.to_dict()
26
25
  assert res == {
27
- "type": "elasticsearch_haystack.bm25_retriever.ElasticsearchBM25Retriever",
26
+ "type": "haystack_integrations.components.retrievers.elasticsearch.bm25_retriever.ElasticsearchBM25Retriever",
28
27
  "init_parameters": {
29
28
  "document_store": {
30
29
  "init_parameters": {
@@ -32,7 +31,7 @@ def test_to_dict(_mock_elasticsearch_client):
32
31
  "index": "default",
33
32
  "embedding_similarity_function": "cosine",
34
33
  },
35
- "type": "elasticsearch_haystack.document_store.ElasticsearchDocumentStore",
34
+ "type": "haystack_integrations.document_stores.elasticsearch.document_store.ElasticsearchDocumentStore",
36
35
  },
37
36
  "filters": {},
38
37
  "fuzziness": "AUTO",
@@ -42,14 +41,14 @@ def test_to_dict(_mock_elasticsearch_client):
42
41
  }
43
42
 
44
43
 
45
- @patch("elasticsearch_haystack.document_store.Elasticsearch")
44
+ @patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch")
46
45
  def test_from_dict(_mock_elasticsearch_client):
47
46
  data = {
48
- "type": "elasticsearch_haystack.bm25_retriever.ElasticsearchBM25Retriever",
47
+ "type": "haystack_integrations.components.retrievers.elasticsearch.bm25_retriever.ElasticsearchBM25Retriever",
49
48
  "init_parameters": {
50
49
  "document_store": {
51
50
  "init_parameters": {"hosts": "some fake host", "index": "default"},
52
- "type": "elasticsearch_haystack.document_store.ElasticsearchDocumentStore",
51
+ "type": "haystack_integrations.document_stores.elasticsearch.document_store.ElasticsearchDocumentStore",
53
52
  },
54
53
  "filters": {},
55
54
  "fuzziness": "AUTO",
@@ -12,10 +12,10 @@ from haystack.dataclasses.document import Document
12
12
  from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError
13
13
  from haystack.document_stores.types import DuplicatePolicy
14
14
  from haystack.testing.document_store import DocumentStoreBaseTests
15
-
16
- from elasticsearch_haystack.document_store import ElasticsearchDocumentStore
15
+ from haystack_integrations.document_stores.elasticsearch import ElasticsearchDocumentStore
17
16
 
18
17
 
18
+ @pytest.mark.integration
19
19
  class TestDocumentStore(DocumentStoreBaseTests):
20
20
  """
21
21
  Common test cases will be provided by `DocumentStoreBaseTests` but
@@ -67,12 +67,12 @@ class TestDocumentStore(DocumentStoreBaseTests):
67
67
 
68
68
  super().assert_documents_are_equal(received, expected)
69
69
 
70
- @patch("elasticsearch_haystack.document_store.Elasticsearch")
70
+ @patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch")
71
71
  def test_to_dict(self, _mock_elasticsearch_client):
72
72
  document_store = ElasticsearchDocumentStore(hosts="some hosts")
73
73
  res = document_store.to_dict()
74
74
  assert res == {
75
- "type": "elasticsearch_haystack.document_store.ElasticsearchDocumentStore",
75
+ "type": "haystack_integrations.document_stores.elasticsearch.document_store.ElasticsearchDocumentStore",
76
76
  "init_parameters": {
77
77
  "hosts": "some hosts",
78
78
  "index": "default",
@@ -80,10 +80,10 @@ class TestDocumentStore(DocumentStoreBaseTests):
80
80
  },
81
81
  }
82
82
 
83
- @patch("elasticsearch_haystack.document_store.Elasticsearch")
83
+ @patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch")
84
84
  def test_from_dict(self, _mock_elasticsearch_client):
85
85
  data = {
86
- "type": "elasticsearch_haystack.document_store.ElasticsearchDocumentStore",
86
+ "type": "haystack_integrations.document_stores.elasticsearch.document_store.ElasticsearchDocumentStore",
87
87
  "init_parameters": {
88
88
  "hosts": "some hosts",
89
89
  "index": "default",
@@ -95,6 +95,9 @@ class TestDocumentStore(DocumentStoreBaseTests):
95
95
  assert document_store._index == "default"
96
96
  assert document_store._embedding_similarity_function == "cosine"
97
97
 
98
+ def test_user_agent_header(self, document_store: ElasticsearchDocumentStore):
99
+ assert document_store._client._headers["user-agent"].startswith("haystack-py-ds/")
100
+
98
101
  def test_write_documents(self, document_store: ElasticsearchDocumentStore):
99
102
  docs = [Document(id="1")]
100
103
  assert document_store.write_documents(docs) == 1
@@ -4,9 +4,8 @@
4
4
  from unittest.mock import Mock, patch
5
5
 
6
6
  from haystack.dataclasses import Document
7
-
8
- from elasticsearch_haystack.document_store import ElasticsearchDocumentStore
9
- from elasticsearch_haystack.embedding_retriever import ElasticsearchEmbeddingRetriever
7
+ from haystack_integrations.components.retrievers.elasticsearch import ElasticsearchEmbeddingRetriever
8
+ from haystack_integrations.document_stores.elasticsearch import ElasticsearchDocumentStore
10
9
 
11
10
 
12
11
  def test_init_default():
@@ -18,13 +17,14 @@ def test_init_default():
18
17
  assert retriever._num_candidates is None
19
18
 
20
19
 
21
- @patch("elasticsearch_haystack.document_store.Elasticsearch")
20
+ @patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch")
22
21
  def test_to_dict(_mock_elasticsearch_client):
23
22
  document_store = ElasticsearchDocumentStore(hosts="some fake host")
24
23
  retriever = ElasticsearchEmbeddingRetriever(document_store=document_store)
25
24
  res = retriever.to_dict()
25
+ t = "haystack_integrations.components.retrievers.elasticsearch.embedding_retriever.ElasticsearchEmbeddingRetriever"
26
26
  assert res == {
27
- "type": "elasticsearch_haystack.embedding_retriever.ElasticsearchEmbeddingRetriever",
27
+ "type": t,
28
28
  "init_parameters": {
29
29
  "document_store": {
30
30
  "init_parameters": {
@@ -32,7 +32,7 @@ def test_to_dict(_mock_elasticsearch_client):
32
32
  "index": "default",
33
33
  "embedding_similarity_function": "cosine",
34
34
  },
35
- "type": "elasticsearch_haystack.document_store.ElasticsearchDocumentStore",
35
+ "type": "haystack_integrations.document_stores.elasticsearch.document_store.ElasticsearchDocumentStore",
36
36
  },
37
37
  "filters": {},
38
38
  "top_k": 10,
@@ -41,14 +41,15 @@ def test_to_dict(_mock_elasticsearch_client):
41
41
  }
42
42
 
43
43
 
44
- @patch("elasticsearch_haystack.document_store.Elasticsearch")
44
+ @patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch")
45
45
  def test_from_dict(_mock_elasticsearch_client):
46
+ t = "haystack_integrations.components.retrievers.elasticsearch.embedding_retriever.ElasticsearchEmbeddingRetriever"
46
47
  data = {
47
- "type": "elasticsearch_haystack.embedding_retriever.ElasticsearchEmbeddingRetriever",
48
+ "type": t,
48
49
  "init_parameters": {
49
50
  "document_store": {
50
51
  "init_parameters": {"hosts": "some fake host", "index": "default"},
51
- "type": "elasticsearch_haystack.document_store.ElasticsearchDocumentStore",
52
+ "type": "haystack_integrations.document_stores.elasticsearch.document_store.ElasticsearchDocumentStore",
52
53
  },
53
54
  "filters": {},
54
55
  "top_k": 10,
@@ -3,8 +3,7 @@
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
  import pytest
5
5
  from haystack.errors import FilterError
6
-
7
- from elasticsearch_haystack.filters import _normalize_filters, _normalize_ranges
6
+ from haystack_integrations.document_stores.elasticsearch.filters import _normalize_filters, _normalize_ranges
8
7
 
9
8
  filters_data = [
10
9
  (