elasticsearch-haystack 2.0.0__tar.gz → 3.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of elasticsearch-haystack might be problematic. Click here for more details.
- elasticsearch_haystack-3.0.0/CHANGELOG.md +166 -0
- {elasticsearch_haystack-2.0.0 → elasticsearch_haystack-3.0.0}/PKG-INFO +4 -4
- {elasticsearch_haystack-2.0.0 → elasticsearch_haystack-3.0.0}/pyproject.toml +11 -4
- {elasticsearch_haystack-2.0.0 → elasticsearch_haystack-3.0.0}/src/haystack_integrations/components/retrievers/elasticsearch/bm25_retriever.py +24 -1
- {elasticsearch_haystack-2.0.0 → elasticsearch_haystack-3.0.0}/src/haystack_integrations/components/retrievers/elasticsearch/embedding_retriever.py +30 -4
- {elasticsearch_haystack-2.0.0 → elasticsearch_haystack-3.0.0}/src/haystack_integrations/document_stores/elasticsearch/document_store.py +309 -89
- {elasticsearch_haystack-2.0.0 → elasticsearch_haystack-3.0.0}/tests/test_bm25_retriever.py +63 -0
- {elasticsearch_haystack-2.0.0 → elasticsearch_haystack-3.0.0}/tests/test_document_store.py +161 -32
- {elasticsearch_haystack-2.0.0 → elasticsearch_haystack-3.0.0}/tests/test_embedding_retriever.py +65 -0
- elasticsearch_haystack-2.0.0/CHANGELOG.md +0 -98
- {elasticsearch_haystack-2.0.0 → elasticsearch_haystack-3.0.0}/.gitignore +0 -0
- {elasticsearch_haystack-2.0.0 → elasticsearch_haystack-3.0.0}/LICENSE +0 -0
- {elasticsearch_haystack-2.0.0 → elasticsearch_haystack-3.0.0}/README.md +0 -0
- {elasticsearch_haystack-2.0.0 → elasticsearch_haystack-3.0.0}/docker-compose.yml +0 -0
- {elasticsearch_haystack-2.0.0 → elasticsearch_haystack-3.0.0}/pydoc/config.yml +0 -0
- {elasticsearch_haystack-2.0.0 → elasticsearch_haystack-3.0.0}/src/haystack_integrations/components/retrievers/elasticsearch/__init__.py +0 -0
- {elasticsearch_haystack-2.0.0 → elasticsearch_haystack-3.0.0}/src/haystack_integrations/document_stores/elasticsearch/__init__.py +0 -0
- {elasticsearch_haystack-2.0.0 → elasticsearch_haystack-3.0.0}/src/haystack_integrations/document_stores/elasticsearch/filters.py +0 -0
- {elasticsearch_haystack-2.0.0 → elasticsearch_haystack-3.0.0}/tests/__init__.py +0 -0
- {elasticsearch_haystack-2.0.0 → elasticsearch_haystack-3.0.0}/tests/test_filters.py +0 -0
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## [integrations/elasticsearch-v2.1.0] - 2025-02-26
|
|
4
|
+
|
|
5
|
+
### 🚀 Features
|
|
6
|
+
|
|
7
|
+
- Adding async support to ElasticSearch retrievers and document store (#1429)
|
|
8
|
+
|
|
9
|
+
### 🧹 Chores
|
|
10
|
+
|
|
11
|
+
- Remove Python 3.8 support (#1421)
|
|
12
|
+
|
|
13
|
+
### 🌀 Miscellaneous
|
|
14
|
+
|
|
15
|
+
- Docs: update changelog for integrations/elasticsearch (#1400)
|
|
16
|
+
|
|
17
|
+
## [integrations/elasticsearch-v2.0.0] - 2025-02-14
|
|
18
|
+
|
|
19
|
+
### 🧹 Chores
|
|
20
|
+
|
|
21
|
+
- Inherit from `FilterDocumentsTestWithDataframe` in Document Stores (#1290)
|
|
22
|
+
- [**breaking**] Elasticsearch - remove dataframe support (#1377)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
## [integrations/elasticsearch-v1.0.1] - 2024-10-28
|
|
26
|
+
|
|
27
|
+
### ⚙️ CI
|
|
28
|
+
|
|
29
|
+
- Adopt uv as installer (#1142)
|
|
30
|
+
|
|
31
|
+
### 🧹 Chores
|
|
32
|
+
|
|
33
|
+
- Update changelog after removing legacy filters (#1083)
|
|
34
|
+
- Update ruff linting scripts and settings (#1105)
|
|
35
|
+
|
|
36
|
+
### 🌀 Miscellaneous
|
|
37
|
+
|
|
38
|
+
- Fix: Elasticsearch - allow passing headers (#1156)
|
|
39
|
+
|
|
40
|
+
## [integrations/elasticsearch-v1.0.0] - 2024-09-12
|
|
41
|
+
|
|
42
|
+
### 🚀 Features
|
|
43
|
+
|
|
44
|
+
- Defer the database connection to when it's needed (#766)
|
|
45
|
+
- Add filter_policy to elasticsearch integration (#825)
|
|
46
|
+
|
|
47
|
+
### 🐛 Bug Fixes
|
|
48
|
+
|
|
49
|
+
- `ElasticSearch` - Fallback to default filter policy when deserializing retrievers without the init parameter (#898)
|
|
50
|
+
|
|
51
|
+
### 🧪 Testing
|
|
52
|
+
|
|
53
|
+
- Do not retry tests in `hatch run test` command (#954)
|
|
54
|
+
|
|
55
|
+
### ⚙️ CI
|
|
56
|
+
|
|
57
|
+
- Retry tests to reduce flakyness (#836)
|
|
58
|
+
|
|
59
|
+
### 🧹 Chores
|
|
60
|
+
|
|
61
|
+
- Update ruff invocation to include check parameter (#853)
|
|
62
|
+
- ElasticSearch - remove legacy filters elasticsearch (#1078)
|
|
63
|
+
|
|
64
|
+
### 🌀 Miscellaneous
|
|
65
|
+
|
|
66
|
+
- Ci: install `pytest-rerunfailures` where needed; add retry config to `test-cov` script (#845)
|
|
67
|
+
- Chore: Minor retriever pydoc fix (#884)
|
|
68
|
+
- Chore: elasticsearch - ruff update, don't ruff tests (#999)
|
|
69
|
+
|
|
70
|
+
## [integrations/elasticsearch-v0.5.0] - 2024-05-24
|
|
71
|
+
|
|
72
|
+
### 🐛 Bug Fixes
|
|
73
|
+
|
|
74
|
+
- Add support for custom mapping in ElasticsearchDocumentStore (#721)
|
|
75
|
+
|
|
76
|
+
### 🌀 Miscellaneous
|
|
77
|
+
|
|
78
|
+
- Chore: add license classifiers (#680)
|
|
79
|
+
- Chore: change the pydoc renderer class (#718)
|
|
80
|
+
|
|
81
|
+
## [integrations/elasticsearch-v0.4.0] - 2024-04-03
|
|
82
|
+
|
|
83
|
+
### 📚 Documentation
|
|
84
|
+
|
|
85
|
+
- Docstring update (#525)
|
|
86
|
+
- Review Elastic (#541)
|
|
87
|
+
- Disable-class-def (#556)
|
|
88
|
+
|
|
89
|
+
### 🌀 Miscellaneous
|
|
90
|
+
|
|
91
|
+
- Make tests show coverage (#566)
|
|
92
|
+
- Refactor tests (#574)
|
|
93
|
+
- Remove references to Python 3.7 (#601)
|
|
94
|
+
- Make Document Stores initially skip `SparseEmbedding` (#606)
|
|
95
|
+
- [Elasticsearch] fix: Filters not working with metadata that contain a space or capitalization (#639)
|
|
96
|
+
|
|
97
|
+
## [integrations/elasticsearch-v0.3.0] - 2024-02-23
|
|
98
|
+
|
|
99
|
+
### 🐛 Bug Fixes
|
|
100
|
+
|
|
101
|
+
- Fix order of API docs (#447)
|
|
102
|
+
|
|
103
|
+
### 📚 Documentation
|
|
104
|
+
|
|
105
|
+
- Update category slug (#442)
|
|
106
|
+
|
|
107
|
+
### 🌀 Miscellaneous
|
|
108
|
+
|
|
109
|
+
- Generate api docs (#322)
|
|
110
|
+
- Add filters to run function in retrievers of elasticsearch (#440)
|
|
111
|
+
- Add user-agent header (#457)
|
|
112
|
+
|
|
113
|
+
## [integrations/elasticsearch-v0.2.0] - 2024-01-19
|
|
114
|
+
|
|
115
|
+
### 🌀 Miscellaneous
|
|
116
|
+
|
|
117
|
+
- Mount import paths under haystack_integrations (#244)
|
|
118
|
+
|
|
119
|
+
## [integrations/elasticsearch-v0.1.3] - 2024-01-18
|
|
120
|
+
|
|
121
|
+
### 🌀 Miscellaneous
|
|
122
|
+
|
|
123
|
+
- Added top_k argument in the run function of ElasticSearcBM25Retriever (#130)
|
|
124
|
+
- Add more docstrings for `ElasticsearchDocumentStore` and `ElasticsearchBM25Retriever` (#184)
|
|
125
|
+
- Elastic - update imports for beta5 (#238)
|
|
126
|
+
|
|
127
|
+
## [integrations/elasticsearch-v0.1.2] - 2023-12-20
|
|
128
|
+
|
|
129
|
+
### 🐛 Bug Fixes
|
|
130
|
+
|
|
131
|
+
- Fix project URLs (#96)
|
|
132
|
+
|
|
133
|
+
### 🚜 Refactor
|
|
134
|
+
|
|
135
|
+
- Use `hatch_vcs` to manage integrations versioning (#103)
|
|
136
|
+
|
|
137
|
+
### 🌀 Miscellaneous
|
|
138
|
+
|
|
139
|
+
- Update elasticsearch test badge (#79)
|
|
140
|
+
- [Elasticsearch] - BM25 retrieval: not all terms must mandatorily match (#125)
|
|
141
|
+
|
|
142
|
+
## [integrations/elasticsearch-v0.1.1] - 2023-12-05
|
|
143
|
+
|
|
144
|
+
### 🐛 Bug Fixes
|
|
145
|
+
|
|
146
|
+
- Document Stores: fix protocol import (#77)
|
|
147
|
+
|
|
148
|
+
## [integrations/elasticsearch-v0.1.0] - 2023-12-04
|
|
149
|
+
|
|
150
|
+
### 🐛 Bug Fixes
|
|
151
|
+
|
|
152
|
+
- Fix license headers
|
|
153
|
+
|
|
154
|
+
### 🌀 Miscellaneous
|
|
155
|
+
|
|
156
|
+
- Remove Document Store decorator (#76)
|
|
157
|
+
|
|
158
|
+
## [integrations/elasticsearch-v0.0.2] - 2023-11-29
|
|
159
|
+
|
|
160
|
+
### 🌀 Miscellaneous
|
|
161
|
+
|
|
162
|
+
- Reorganize repository (#62)
|
|
163
|
+
- Update `ElasticSearchDocumentStore` to use latest `haystack-ai` version (#63)
|
|
164
|
+
- Bump elasticsearch_haystack to 0.0.2
|
|
165
|
+
|
|
166
|
+
<!-- generated by git-cliff -->
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: elasticsearch-haystack
|
|
3
|
-
Version:
|
|
3
|
+
Version: 3.0.0
|
|
4
4
|
Summary: Haystack 2.x Document Store for ElasticSearch
|
|
5
5
|
Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/elasticsearch#readme
|
|
6
6
|
Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues
|
|
@@ -11,15 +11,15 @@ License-File: LICENSE
|
|
|
11
11
|
Classifier: Development Status :: 4 - Beta
|
|
12
12
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
13
13
|
Classifier: Programming Language :: Python
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
15
14
|
Classifier: Programming Language :: Python :: 3.9
|
|
16
15
|
Classifier: Programming Language :: Python :: 3.10
|
|
17
16
|
Classifier: Programming Language :: Python :: 3.11
|
|
18
17
|
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
19
18
|
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
|
20
|
-
Requires-Python: >=3.
|
|
19
|
+
Requires-Python: >=3.9
|
|
20
|
+
Requires-Dist: aiohttp
|
|
21
21
|
Requires-Dist: elasticsearch<9,>=8
|
|
22
|
-
Requires-Dist: haystack-ai
|
|
22
|
+
Requires-Dist: haystack-ai>=2.11.0
|
|
23
23
|
Description-Content-Type: text/markdown
|
|
24
24
|
|
|
25
25
|
[](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/elasticsearch.yml)
|
|
@@ -7,7 +7,7 @@ name = "elasticsearch-haystack"
|
|
|
7
7
|
dynamic = ["version"]
|
|
8
8
|
description = 'Haystack 2.x Document Store for ElasticSearch'
|
|
9
9
|
readme = "README.md"
|
|
10
|
-
requires-python = ">=3.
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
11
|
license = "Apache-2.0"
|
|
12
12
|
keywords = []
|
|
13
13
|
authors = [{ name = "Silvano Cerza", email = "silvanocerza@gmail.com" }]
|
|
@@ -15,14 +15,17 @@ classifiers = [
|
|
|
15
15
|
"License :: OSI Approved :: Apache Software License",
|
|
16
16
|
"Development Status :: 4 - Beta",
|
|
17
17
|
"Programming Language :: Python",
|
|
18
|
-
"Programming Language :: Python :: 3.8",
|
|
19
18
|
"Programming Language :: Python :: 3.9",
|
|
20
19
|
"Programming Language :: Python :: 3.10",
|
|
21
20
|
"Programming Language :: Python :: 3.11",
|
|
22
21
|
"Programming Language :: Python :: Implementation :: CPython",
|
|
23
22
|
"Programming Language :: Python :: Implementation :: PyPy",
|
|
24
23
|
]
|
|
25
|
-
dependencies = [
|
|
24
|
+
dependencies = [
|
|
25
|
+
"haystack-ai>=2.11.0",
|
|
26
|
+
"elasticsearch>=8,<9",
|
|
27
|
+
"aiohttp" # for async support https://elasticsearch-py.readthedocs.io/en/latest/async.html#valueerror-when-initializing-asyncelasticsearch
|
|
28
|
+
]
|
|
26
29
|
|
|
27
30
|
[project.urls]
|
|
28
31
|
Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/elasticsearch#readme"
|
|
@@ -45,6 +48,7 @@ installer = "uv"
|
|
|
45
48
|
dependencies = [
|
|
46
49
|
"coverage[toml]>=6.5",
|
|
47
50
|
"pytest",
|
|
51
|
+
"pytest-asyncio",
|
|
48
52
|
"pytest-rerunfailures",
|
|
49
53
|
"pytest-xdist",
|
|
50
54
|
"haystack-pydoc-tools",
|
|
@@ -59,12 +63,13 @@ cov-retry = ["test-cov-retry", "cov-report"]
|
|
|
59
63
|
docs = ["pydoc-markdown pydoc/config.yml"]
|
|
60
64
|
|
|
61
65
|
[[tool.hatch.envs.all.matrix]]
|
|
62
|
-
python = [
|
|
66
|
+
python = [ "3.9", "3.10", "3.11"]
|
|
63
67
|
|
|
64
68
|
[tool.hatch.envs.lint]
|
|
65
69
|
installer = "uv"
|
|
66
70
|
detached = true
|
|
67
71
|
dependencies = ["pip", "black>=23.1.0", "mypy>=1.0.0", "ruff>=0.0.243"]
|
|
72
|
+
|
|
68
73
|
[tool.hatch.envs.lint.scripts]
|
|
69
74
|
typing = "mypy --install-types --non-interactive --explicit-package-bases {args:src/ tests}"
|
|
70
75
|
style = ["ruff check {args:}", "black --check --diff {args:.}"]
|
|
@@ -157,6 +162,8 @@ exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"]
|
|
|
157
162
|
[tool.pytest.ini_options]
|
|
158
163
|
minversion = "6.0"
|
|
159
164
|
markers = ["unit: unit tests", "integration: integration tests"]
|
|
165
|
+
asyncio_mode = "auto"
|
|
166
|
+
asyncio_default_fixture_loop_scope = "class"
|
|
160
167
|
|
|
161
168
|
[[tool.mypy.overrides]]
|
|
162
169
|
module = ["haystack.*", "haystack_integrations.*", "numpy.*", "pytest.*"]
|
|
@@ -120,7 +120,7 @@ class ElasticsearchBM25Retriever:
|
|
|
120
120
|
"""
|
|
121
121
|
Retrieve documents using the BM25 keyword-based algorithm.
|
|
122
122
|
|
|
123
|
-
:param query: String to search in `Document`s
|
|
123
|
+
:param query: String to search in the `Document`s text.
|
|
124
124
|
:param filters: Filters applied to the retrieved Documents. The way runtime filters are applied depends on
|
|
125
125
|
the `filter_policy` chosen at retriever initialization. See init method docstring for more
|
|
126
126
|
details.
|
|
@@ -137,3 +137,26 @@ class ElasticsearchBM25Retriever:
|
|
|
137
137
|
scale_score=self._scale_score,
|
|
138
138
|
)
|
|
139
139
|
return {"documents": docs}
|
|
140
|
+
|
|
141
|
+
@component.output_types(documents=List[Document])
|
|
142
|
+
async def run_async(self, query: str, filters: Optional[Dict[str, Any]] = None, top_k: Optional[int] = None):
|
|
143
|
+
"""
|
|
144
|
+
Asynchronously retrieve documents using the BM25 keyword-based algorithm.
|
|
145
|
+
|
|
146
|
+
:param query: String to search in the `Document` text.
|
|
147
|
+
:param filters: Filters applied to the retrieved Documents. The way runtime filters are applied depends on
|
|
148
|
+
the `filter_policy` chosen at retriever initialization. See init method docstring for more
|
|
149
|
+
details.
|
|
150
|
+
:param top_k: Maximum number of `Document` to return.
|
|
151
|
+
:returns: A dictionary with the following keys:
|
|
152
|
+
- `documents`: List of `Document`s that match the query.
|
|
153
|
+
"""
|
|
154
|
+
filters = apply_filter_policy(self._filter_policy, self._filters, filters)
|
|
155
|
+
docs = await self._document_store._bm25_retrieval_async(
|
|
156
|
+
query=query,
|
|
157
|
+
filters=filters,
|
|
158
|
+
fuzziness=self._fuzziness,
|
|
159
|
+
top_k=top_k or self._top_k,
|
|
160
|
+
scale_score=self._scale_score,
|
|
161
|
+
)
|
|
162
|
+
return {"documents": docs}
|
|
@@ -119,10 +119,11 @@ class ElasticsearchEmbeddingRetriever:
|
|
|
119
119
|
Retrieve documents using a vector similarity metric.
|
|
120
120
|
|
|
121
121
|
:param query_embedding: Embedding of the query.
|
|
122
|
-
:param filters: Filters applied
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
122
|
+
:param filters: Filters applied when fetching documents from the Document Store.
|
|
123
|
+
Filters are applied during the approximate kNN search to ensure the Retriever returns
|
|
124
|
+
`top_k` matching documents.
|
|
125
|
+
The way runtime filters are applied depends on the `filter_policy` selected when initializing the Retriever.
|
|
126
|
+
:param top_k: Maximum number of documents to return.
|
|
126
127
|
:returns: A dictionary with the following keys:
|
|
127
128
|
- `documents`: List of `Document`s most similar to the given `query_embedding`
|
|
128
129
|
"""
|
|
@@ -134,3 +135,28 @@ class ElasticsearchEmbeddingRetriever:
|
|
|
134
135
|
num_candidates=self._num_candidates,
|
|
135
136
|
)
|
|
136
137
|
return {"documents": docs}
|
|
138
|
+
|
|
139
|
+
@component.output_types(documents=List[Document])
|
|
140
|
+
async def run_async(
|
|
141
|
+
self, query_embedding: List[float], filters: Optional[Dict[str, Any]] = None, top_k: Optional[int] = None
|
|
142
|
+
):
|
|
143
|
+
"""
|
|
144
|
+
Asynchronously retrieve documents using a vector similarity metric.
|
|
145
|
+
|
|
146
|
+
:param query_embedding: Embedding of the query.
|
|
147
|
+
:param filters: Filters applied when fetching documents from the Document Store.
|
|
148
|
+
Filters are applied during the approximate kNN search to ensure the Retriever returns
|
|
149
|
+
`top_k` matching documents.
|
|
150
|
+
The way runtime filters are applied depends on the `filter_policy` selected when initializing the Retriever.
|
|
151
|
+
:param top_k: Maximum number of documents to return.
|
|
152
|
+
:returns: A dictionary with the following keys:
|
|
153
|
+
- `documents`: List of `Document`s that match the query.
|
|
154
|
+
"""
|
|
155
|
+
filters = apply_filter_policy(self._filter_policy, self._filters, filters)
|
|
156
|
+
docs = await self._document_store._embedding_retrieval_async(
|
|
157
|
+
query_embedding=query_embedding,
|
|
158
|
+
filters=filters,
|
|
159
|
+
top_k=top_k or self._top_k,
|
|
160
|
+
num_candidates=self._num_candidates,
|
|
161
|
+
)
|
|
162
|
+
return {"documents": docs}
|