elasticsearch-haystack 1.0.1__tar.gz → 2.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of elasticsearch-haystack might be problematic. Click here for more details.

Files changed (20) hide show
  1. elasticsearch_haystack-2.1.0/CHANGELOG.md +152 -0
  2. {elasticsearch_haystack-1.0.1 → elasticsearch_haystack-2.1.0}/PKG-INFO +3 -4
  3. {elasticsearch_haystack-1.0.1 → elasticsearch_haystack-2.1.0}/pyproject.toml +7 -4
  4. {elasticsearch_haystack-1.0.1 → elasticsearch_haystack-2.1.0}/src/haystack_integrations/components/retrievers/elasticsearch/bm25_retriever.py +24 -1
  5. {elasticsearch_haystack-1.0.1 → elasticsearch_haystack-2.1.0}/src/haystack_integrations/components/retrievers/elasticsearch/embedding_retriever.py +30 -4
  6. {elasticsearch_haystack-1.0.1 → elasticsearch_haystack-2.1.0}/src/haystack_integrations/document_stores/elasticsearch/document_store.py +330 -66
  7. {elasticsearch_haystack-1.0.1 → elasticsearch_haystack-2.1.0}/src/haystack_integrations/document_stores/elasticsearch/filters.py +6 -9
  8. {elasticsearch_haystack-1.0.1 → elasticsearch_haystack-2.1.0}/tests/test_bm25_retriever.py +63 -0
  9. {elasticsearch_haystack-1.0.1 → elasticsearch_haystack-2.1.0}/tests/test_document_store.py +196 -6
  10. {elasticsearch_haystack-1.0.1 → elasticsearch_haystack-2.1.0}/tests/test_embedding_retriever.py +65 -0
  11. elasticsearch_haystack-1.0.1/CHANGELOG.md +0 -95
  12. {elasticsearch_haystack-1.0.1 → elasticsearch_haystack-2.1.0}/.gitignore +0 -0
  13. {elasticsearch_haystack-1.0.1 → elasticsearch_haystack-2.1.0}/LICENSE +0 -0
  14. {elasticsearch_haystack-1.0.1 → elasticsearch_haystack-2.1.0}/README.md +0 -0
  15. {elasticsearch_haystack-1.0.1 → elasticsearch_haystack-2.1.0}/docker-compose.yml +0 -0
  16. {elasticsearch_haystack-1.0.1 → elasticsearch_haystack-2.1.0}/pydoc/config.yml +0 -0
  17. {elasticsearch_haystack-1.0.1 → elasticsearch_haystack-2.1.0}/src/haystack_integrations/components/retrievers/elasticsearch/__init__.py +0 -0
  18. {elasticsearch_haystack-1.0.1 → elasticsearch_haystack-2.1.0}/src/haystack_integrations/document_stores/elasticsearch/__init__.py +0 -0
  19. {elasticsearch_haystack-1.0.1 → elasticsearch_haystack-2.1.0}/tests/__init__.py +0 -0
  20. {elasticsearch_haystack-1.0.1 → elasticsearch_haystack-2.1.0}/tests/test_filters.py +0 -0
@@ -0,0 +1,152 @@
1
+ # Changelog
2
+
3
+ ## [integrations/elasticsearch-v2.0.0] - 2025-02-14
4
+
5
+ ### 🧹 Chores
6
+
7
+ - Inherit from `FilterDocumentsTestWithDataframe` in Document Stores (#1290)
8
+ - [**breaking**] Elasticsearch - remove dataframe support (#1377)
9
+
10
+
11
+ ## [integrations/elasticsearch-v1.0.1] - 2024-10-28
12
+
13
+ ### ⚙️ CI
14
+
15
+ - Adopt uv as installer (#1142)
16
+
17
+ ### 🧹 Chores
18
+
19
+ - Update changelog after removing legacy filters (#1083)
20
+ - Update ruff linting scripts and settings (#1105)
21
+
22
+ ### 🌀 Miscellaneous
23
+
24
+ - Fix: Elasticsearch - allow passing headers (#1156)
25
+
26
+ ## [integrations/elasticsearch-v1.0.0] - 2024-09-12
27
+
28
+ ### 🚀 Features
29
+
30
+ - Defer the database connection to when it's needed (#766)
31
+ - Add filter_policy to elasticsearch integration (#825)
32
+
33
+ ### 🐛 Bug Fixes
34
+
35
+ - `ElasticSearch` - Fallback to default filter policy when deserializing retrievers without the init parameter (#898)
36
+
37
+ ### 🧪 Testing
38
+
39
+ - Do not retry tests in `hatch run test` command (#954)
40
+
41
+ ### ⚙️ CI
42
+
43
+ - Retry tests to reduce flakyness (#836)
44
+
45
+ ### 🧹 Chores
46
+
47
+ - Update ruff invocation to include check parameter (#853)
48
+ - ElasticSearch - remove legacy filters elasticsearch (#1078)
49
+
50
+ ### 🌀 Miscellaneous
51
+
52
+ - Ci: install `pytest-rerunfailures` where needed; add retry config to `test-cov` script (#845)
53
+ - Chore: Minor retriever pydoc fix (#884)
54
+ - Chore: elasticsearch - ruff update, don't ruff tests (#999)
55
+
56
+ ## [integrations/elasticsearch-v0.5.0] - 2024-05-24
57
+
58
+ ### 🐛 Bug Fixes
59
+
60
+ - Add support for custom mapping in ElasticsearchDocumentStore (#721)
61
+
62
+ ### 🌀 Miscellaneous
63
+
64
+ - Chore: add license classifiers (#680)
65
+ - Chore: change the pydoc renderer class (#718)
66
+
67
+ ## [integrations/elasticsearch-v0.4.0] - 2024-04-03
68
+
69
+ ### 📚 Documentation
70
+
71
+ - Docstring update (#525)
72
+ - Review Elastic (#541)
73
+ - Disable-class-def (#556)
74
+
75
+ ### 🌀 Miscellaneous
76
+
77
+ - Make tests show coverage (#566)
78
+ - Refactor tests (#574)
79
+ - Remove references to Python 3.7 (#601)
80
+ - Make Document Stores initially skip `SparseEmbedding` (#606)
81
+ - [Elasticsearch] fix: Filters not working with metadata that contain a space or capitalization (#639)
82
+
83
+ ## [integrations/elasticsearch-v0.3.0] - 2024-02-23
84
+
85
+ ### 🐛 Bug Fixes
86
+
87
+ - Fix order of API docs (#447)
88
+
89
+ ### 📚 Documentation
90
+
91
+ - Update category slug (#442)
92
+
93
+ ### 🌀 Miscellaneous
94
+
95
+ - Generate api docs (#322)
96
+ - Add filters to run function in retrievers of elasticsearch (#440)
97
+ - Add user-agent header (#457)
98
+
99
+ ## [integrations/elasticsearch-v0.2.0] - 2024-01-19
100
+
101
+ ### 🌀 Miscellaneous
102
+
103
+ - Mount import paths under haystack_integrations (#244)
104
+
105
+ ## [integrations/elasticsearch-v0.1.3] - 2024-01-18
106
+
107
+ ### 🌀 Miscellaneous
108
+
109
+ - Added top_k argument in the run function of ElasticSearcBM25Retriever (#130)
110
+ - Add more docstrings for `ElasticsearchDocumentStore` and `ElasticsearchBM25Retriever` (#184)
111
+ - Elastic - update imports for beta5 (#238)
112
+
113
+ ## [integrations/elasticsearch-v0.1.2] - 2023-12-20
114
+
115
+ ### 🐛 Bug Fixes
116
+
117
+ - Fix project URLs (#96)
118
+
119
+ ### 🚜 Refactor
120
+
121
+ - Use `hatch_vcs` to manage integrations versioning (#103)
122
+
123
+ ### 🌀 Miscellaneous
124
+
125
+ - Update elasticsearch test badge (#79)
126
+ - [Elasticsearch] - BM25 retrieval: not all terms must mandatorily match (#125)
127
+
128
+ ## [integrations/elasticsearch-v0.1.1] - 2023-12-05
129
+
130
+ ### 🐛 Bug Fixes
131
+
132
+ - Document Stores: fix protocol import (#77)
133
+
134
+ ## [integrations/elasticsearch-v0.1.0] - 2023-12-04
135
+
136
+ ### 🐛 Bug Fixes
137
+
138
+ - Fix license headers
139
+
140
+ ### 🌀 Miscellaneous
141
+
142
+ - Remove Document Store decorator (#76)
143
+
144
+ ## [integrations/elasticsearch-v0.0.2] - 2023-11-29
145
+
146
+ ### 🌀 Miscellaneous
147
+
148
+ - Reorganize repository (#62)
149
+ - Update `ElasticSearchDocumentStore` to use latest `haystack-ai` version (#63)
150
+ - Bump elasticsearch_haystack to 0.0.2
151
+
152
+ <!-- generated by git-cliff -->
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.4
2
2
  Name: elasticsearch-haystack
3
- Version: 1.0.1
3
+ Version: 2.1.0
4
4
  Summary: Haystack 2.x Document Store for ElasticSearch
5
5
  Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/elasticsearch#readme
6
6
  Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues
@@ -11,13 +11,12 @@ License-File: LICENSE
11
11
  Classifier: Development Status :: 4 - Beta
12
12
  Classifier: License :: OSI Approved :: Apache Software License
13
13
  Classifier: Programming Language :: Python
14
- Classifier: Programming Language :: Python :: 3.8
15
14
  Classifier: Programming Language :: Python :: 3.9
16
15
  Classifier: Programming Language :: Python :: 3.10
17
16
  Classifier: Programming Language :: Python :: 3.11
18
17
  Classifier: Programming Language :: Python :: Implementation :: CPython
19
18
  Classifier: Programming Language :: Python :: Implementation :: PyPy
20
- Requires-Python: >=3.8
19
+ Requires-Python: >=3.9
21
20
  Requires-Dist: elasticsearch<9,>=8
22
21
  Requires-Dist: haystack-ai
23
22
  Description-Content-Type: text/markdown
@@ -7,7 +7,7 @@ name = "elasticsearch-haystack"
7
7
  dynamic = ["version"]
8
8
  description = 'Haystack 2.x Document Store for ElasticSearch'
9
9
  readme = "README.md"
10
- requires-python = ">=3.8"
10
+ requires-python = ">=3.9"
11
11
  license = "Apache-2.0"
12
12
  keywords = []
13
13
  authors = [{ name = "Silvano Cerza", email = "silvanocerza@gmail.com" }]
@@ -15,7 +15,6 @@ classifiers = [
15
15
  "License :: OSI Approved :: Apache Software License",
16
16
  "Development Status :: 4 - Beta",
17
17
  "Programming Language :: Python",
18
- "Programming Language :: Python :: 3.8",
19
18
  "Programming Language :: Python :: 3.9",
20
19
  "Programming Language :: Python :: 3.10",
21
20
  "Programming Language :: Python :: 3.11",
@@ -45,6 +44,7 @@ installer = "uv"
45
44
  dependencies = [
46
45
  "coverage[toml]>=6.5",
47
46
  "pytest",
47
+ "pytest-asyncio",
48
48
  "pytest-rerunfailures",
49
49
  "pytest-xdist",
50
50
  "haystack-pydoc-tools",
@@ -59,12 +59,13 @@ cov-retry = ["test-cov-retry", "cov-report"]
59
59
  docs = ["pydoc-markdown pydoc/config.yml"]
60
60
 
61
61
  [[tool.hatch.envs.all.matrix]]
62
- python = ["3.8", "3.9", "3.10", "3.11"]
62
+ python = [ "3.9", "3.10", "3.11"]
63
63
 
64
64
  [tool.hatch.envs.lint]
65
65
  installer = "uv"
66
66
  detached = true
67
67
  dependencies = ["pip", "black>=23.1.0", "mypy>=1.0.0", "ruff>=0.0.243"]
68
+
68
69
  [tool.hatch.envs.lint.scripts]
69
70
  typing = "mypy --install-types --non-interactive --explicit-package-bases {args:src/ tests}"
70
71
  style = ["ruff check {args:}", "black --check --diff {args:.}"]
@@ -157,7 +158,9 @@ exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"]
157
158
  [tool.pytest.ini_options]
158
159
  minversion = "6.0"
159
160
  markers = ["unit: unit tests", "integration: integration tests"]
161
+ asyncio_mode = "auto"
162
+ asyncio_default_fixture_loop_scope = "class"
160
163
 
161
164
  [[tool.mypy.overrides]]
162
- module = ["haystack.*", "haystack_integrations.*", "pytest.*"]
165
+ module = ["haystack.*", "haystack_integrations.*", "numpy.*", "pytest.*"]
163
166
  ignore_missing_imports = true
@@ -120,7 +120,7 @@ class ElasticsearchBM25Retriever:
120
120
  """
121
121
  Retrieve documents using the BM25 keyword-based algorithm.
122
122
 
123
- :param query: String to search in `Document`s' text.
123
+ :param query: String to search in the `Document`s text.
124
124
  :param filters: Filters applied to the retrieved Documents. The way runtime filters are applied depends on
125
125
  the `filter_policy` chosen at retriever initialization. See init method docstring for more
126
126
  details.
@@ -137,3 +137,26 @@ class ElasticsearchBM25Retriever:
137
137
  scale_score=self._scale_score,
138
138
  )
139
139
  return {"documents": docs}
140
+
141
+ @component.output_types(documents=List[Document])
142
+ async def run_async(self, query: str, filters: Optional[Dict[str, Any]] = None, top_k: Optional[int] = None):
143
+ """
144
+ Asynchronously retrieve documents using the BM25 keyword-based algorithm.
145
+
146
+ :param query: String to search in the `Document` text.
147
+ :param filters: Filters applied to the retrieved Documents. The way runtime filters are applied depends on
148
+ the `filter_policy` chosen at retriever initialization. See init method docstring for more
149
+ details.
150
+ :param top_k: Maximum number of `Document` to return.
151
+ :returns: A dictionary with the following keys:
152
+ - `documents`: List of `Document`s that match the query.
153
+ """
154
+ filters = apply_filter_policy(self._filter_policy, self._filters, filters)
155
+ docs = await self._document_store._bm25_retrieval_async(
156
+ query=query,
157
+ filters=filters,
158
+ fuzziness=self._fuzziness,
159
+ top_k=top_k or self._top_k,
160
+ scale_score=self._scale_score,
161
+ )
162
+ return {"documents": docs}
@@ -119,10 +119,11 @@ class ElasticsearchEmbeddingRetriever:
119
119
  Retrieve documents using a vector similarity metric.
120
120
 
121
121
  :param query_embedding: Embedding of the query.
122
- :param filters: Filters applied to the retrieved Documents. The way runtime filters are applied depends on
123
- the `filter_policy` chosen at retriever initialization. See init method docstring for more
124
- details.
125
- :param top_k: Maximum number of `Document`s to return.
122
+ :param filters: Filters applied when fetching documents from the Document Store.
123
+ Filters are applied during the approximate kNN search to ensure the Retriever returns
124
+ `top_k` matching documents.
125
+ The way runtime filters are applied depends on the `filter_policy` selected when initializing the Retriever.
126
+ :param top_k: Maximum number of documents to return.
126
127
  :returns: A dictionary with the following keys:
127
128
  - `documents`: List of `Document`s most similar to the given `query_embedding`
128
129
  """
@@ -134,3 +135,28 @@ class ElasticsearchEmbeddingRetriever:
134
135
  num_candidates=self._num_candidates,
135
136
  )
136
137
  return {"documents": docs}
138
+
139
+ @component.output_types(documents=List[Document])
140
+ async def run_async(
141
+ self, query_embedding: List[float], filters: Optional[Dict[str, Any]] = None, top_k: Optional[int] = None
142
+ ):
143
+ """
144
+ Asynchronously retrieve documents using a vector similarity metric.
145
+
146
+ :param query_embedding: Embedding of the query.
147
+ :param filters: Filters applied when fetching documents from the Document Store.
148
+ Filters are applied during the approximate kNN search to ensure the Retriever returns
149
+ `top_k` matching documents.
150
+ The way runtime filters are applied depends on the `filter_policy` selected when initializing the Retriever.
151
+ :param top_k: Maximum number of documents to return.
152
+ :returns: A dictionary with the following keys:
153
+ - `documents`: List of `Document`s that match the query.
154
+ """
155
+ filters = apply_filter_policy(self._filter_policy, self._filters, filters)
156
+ docs = await self._document_store._embedding_retrieval_async(
157
+ query_embedding=query_embedding,
158
+ filters=filters,
159
+ top_k=top_k or self._top_k,
160
+ num_candidates=self._num_candidates,
161
+ )
162
+ return {"documents": docs}