elasticsearch-haystack 0.7.0__tar.gz → 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {elasticsearch_haystack-0.7.0 → elasticsearch_haystack-1.0.0}/CHANGELOG.md +10 -0
- {elasticsearch_haystack-0.7.0 → elasticsearch_haystack-1.0.0}/PKG-INFO +1 -1
- {elasticsearch_haystack-0.7.0 → elasticsearch_haystack-1.0.0}/pyproject.toml +6 -4
- {elasticsearch_haystack-0.7.0 → elasticsearch_haystack-1.0.0}/src/haystack_integrations/components/retrievers/elasticsearch/bm25_retriever.py +5 -1
- {elasticsearch_haystack-0.7.0 → elasticsearch_haystack-1.0.0}/src/haystack_integrations/components/retrievers/elasticsearch/embedding_retriever.py +5 -1
- {elasticsearch_haystack-0.7.0 → elasticsearch_haystack-1.0.0}/src/haystack_integrations/document_stores/elasticsearch/document_store.py +2 -2
- {elasticsearch_haystack-0.7.0 → elasticsearch_haystack-1.0.0}/tests/test_bm25_retriever.py +25 -0
- {elasticsearch_haystack-0.7.0 → elasticsearch_haystack-1.0.0}/tests/test_document_store.py +1 -0
- {elasticsearch_haystack-0.7.0 → elasticsearch_haystack-1.0.0}/tests/test_embedding_retriever.py +24 -0
- {elasticsearch_haystack-0.7.0 → elasticsearch_haystack-1.0.0}/tests/test_filters.py +1 -0
- {elasticsearch_haystack-0.7.0 → elasticsearch_haystack-1.0.0}/.gitignore +0 -0
- {elasticsearch_haystack-0.7.0 → elasticsearch_haystack-1.0.0}/LICENSE +0 -0
- {elasticsearch_haystack-0.7.0 → elasticsearch_haystack-1.0.0}/README.md +0 -0
- {elasticsearch_haystack-0.7.0 → elasticsearch_haystack-1.0.0}/docker-compose.yml +0 -0
- {elasticsearch_haystack-0.7.0 → elasticsearch_haystack-1.0.0}/pydoc/config.yml +0 -0
- {elasticsearch_haystack-0.7.0 → elasticsearch_haystack-1.0.0}/src/haystack_integrations/components/retrievers/elasticsearch/__init__.py +0 -0
- {elasticsearch_haystack-0.7.0 → elasticsearch_haystack-1.0.0}/src/haystack_integrations/document_stores/elasticsearch/__init__.py +0 -0
- {elasticsearch_haystack-0.7.0 → elasticsearch_haystack-1.0.0}/src/haystack_integrations/document_stores/elasticsearch/filters.py +0 -0
- {elasticsearch_haystack-0.7.0 → elasticsearch_haystack-1.0.0}/tests/__init__.py +0 -0
|
@@ -5,6 +5,16 @@
|
|
|
5
5
|
### 🚀 Features
|
|
6
6
|
|
|
7
7
|
- Defer the database connection to when it's needed (#766)
|
|
8
|
+
- Add filter_policy to elasticsearch integration (#825)
|
|
9
|
+
|
|
10
|
+
### 🐛 Bug Fixes
|
|
11
|
+
|
|
12
|
+
- `ElasticSearch` - Fallback to default filter policy when deserializing retrievers without the init parameter (#898)
|
|
13
|
+
|
|
14
|
+
### ⚙️ Miscellaneous Tasks
|
|
15
|
+
|
|
16
|
+
- Retry tests to reduce flakyness (#836)
|
|
17
|
+
- Update ruff invocation to include check parameter (#853)
|
|
8
18
|
|
|
9
19
|
## [integrations/elasticsearch-v0.5.0] - 2024-05-24
|
|
10
20
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: elasticsearch-haystack
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 1.0.0
|
|
4
4
|
Summary: Haystack 2.x Document Store for ElasticSearch
|
|
5
5
|
Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/elasticsearch#readme
|
|
6
6
|
Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues
|
|
@@ -49,10 +49,12 @@ dependencies = [
|
|
|
49
49
|
"haystack-pydoc-tools",
|
|
50
50
|
]
|
|
51
51
|
[tool.hatch.envs.default.scripts]
|
|
52
|
-
test = "pytest
|
|
53
|
-
test-cov = "coverage run -m pytest
|
|
52
|
+
test = "pytest {args:tests}"
|
|
53
|
+
test-cov = "coverage run -m pytest {args:tests}"
|
|
54
|
+
test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x"
|
|
54
55
|
cov-report = ["- coverage combine", "coverage report"]
|
|
55
56
|
cov = ["test-cov", "cov-report"]
|
|
57
|
+
cov-retry = ["test-cov-retry", "cov-report"]
|
|
56
58
|
docs = ["pydoc-markdown pydoc/config.yml"]
|
|
57
59
|
|
|
58
60
|
[[tool.hatch.envs.all.matrix]]
|
|
@@ -63,8 +65,8 @@ detached = true
|
|
|
63
65
|
dependencies = ["black>=23.1.0", "mypy>=1.0.0", "ruff>=0.0.243"]
|
|
64
66
|
[tool.hatch.envs.lint.scripts]
|
|
65
67
|
typing = "mypy --install-types --non-interactive --explicit-package-bases {args:src/ tests}"
|
|
66
|
-
style = ["ruff check {args:.}", "black --check --diff {args:.}"]
|
|
67
|
-
fmt = ["black {args:.}", "ruff --fix {args:.}", "style"]
|
|
68
|
+
style = ["ruff check {args:. --exclude tests/}", "black --check --diff {args:.}"]
|
|
69
|
+
fmt = ["black {args:.}", "ruff --fix {args:. --exclude tests/}", "style"]
|
|
68
70
|
all = ["style", "typing"]
|
|
69
71
|
|
|
70
72
|
[tool.hatch.metadata]
|
|
@@ -7,6 +7,7 @@ from haystack import component, default_from_dict, default_to_dict
|
|
|
7
7
|
from haystack.dataclasses import Document
|
|
8
8
|
from haystack.document_stores.types import FilterPolicy
|
|
9
9
|
from haystack.document_stores.types.filter_policy import apply_filter_policy
|
|
10
|
+
|
|
10
11
|
from haystack_integrations.document_stores.elasticsearch.document_store import ElasticsearchDocumentStore
|
|
11
12
|
|
|
12
13
|
|
|
@@ -108,7 +109,10 @@ class ElasticsearchBM25Retriever:
|
|
|
108
109
|
data["init_parameters"]["document_store"] = ElasticsearchDocumentStore.from_dict(
|
|
109
110
|
data["init_parameters"]["document_store"]
|
|
110
111
|
)
|
|
111
|
-
|
|
112
|
+
# Pipelines serialized with old versions of the component might not
|
|
113
|
+
# have the filter_policy field.
|
|
114
|
+
if filter_policy := data["init_parameters"].get("filter_policy"):
|
|
115
|
+
data["init_parameters"]["filter_policy"] = FilterPolicy.from_str(filter_policy)
|
|
112
116
|
return default_from_dict(cls, data)
|
|
113
117
|
|
|
114
118
|
@component.output_types(documents=List[Document])
|
|
@@ -7,6 +7,7 @@ from haystack import component, default_from_dict, default_to_dict
|
|
|
7
7
|
from haystack.dataclasses import Document
|
|
8
8
|
from haystack.document_stores.types import FilterPolicy
|
|
9
9
|
from haystack.document_stores.types.filter_policy import apply_filter_policy
|
|
10
|
+
|
|
10
11
|
from haystack_integrations.document_stores.elasticsearch.document_store import ElasticsearchDocumentStore
|
|
11
12
|
|
|
12
13
|
|
|
@@ -106,7 +107,10 @@ class ElasticsearchEmbeddingRetriever:
|
|
|
106
107
|
data["init_parameters"]["document_store"] = ElasticsearchDocumentStore.from_dict(
|
|
107
108
|
data["init_parameters"]["document_store"]
|
|
108
109
|
)
|
|
109
|
-
|
|
110
|
+
# Pipelines serialized with old versions of the component might not
|
|
111
|
+
# have the filter_policy field.
|
|
112
|
+
if filter_policy := data["init_parameters"].get("filter_policy"):
|
|
113
|
+
data["init_parameters"]["filter_policy"] = FilterPolicy.from_str(filter_policy)
|
|
110
114
|
return default_from_dict(cls, data)
|
|
111
115
|
|
|
112
116
|
@component.output_types(documents=List[Document])
|
|
@@ -12,7 +12,6 @@ from haystack import default_from_dict, default_to_dict
|
|
|
12
12
|
from haystack.dataclasses import Document
|
|
13
13
|
from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError
|
|
14
14
|
from haystack.document_stores.types import DuplicatePolicy
|
|
15
|
-
from haystack.utils.filters import convert
|
|
16
15
|
from haystack.version import __version__ as haystack_version
|
|
17
16
|
|
|
18
17
|
from elasticsearch import Elasticsearch, helpers # type: ignore[import-not-found]
|
|
@@ -224,7 +223,8 @@ class ElasticsearchDocumentStore:
|
|
|
224
223
|
:returns: List of `Document`s that match the filters.
|
|
225
224
|
"""
|
|
226
225
|
if filters and "operator" not in filters and "conditions" not in filters:
|
|
227
|
-
|
|
226
|
+
msg = "Invalid filter syntax. See https://docs.haystack.deepset.ai/docs/metadata-filtering for details."
|
|
227
|
+
raise ValueError(msg)
|
|
228
228
|
|
|
229
229
|
query = {"bool": {"filter": _normalize_filters(filters)}} if filters else None
|
|
230
230
|
documents = self._search_documents(query=query)
|
|
@@ -6,6 +6,7 @@ from unittest.mock import Mock, patch
|
|
|
6
6
|
import pytest
|
|
7
7
|
from haystack.dataclasses import Document
|
|
8
8
|
from haystack.document_stores.types import FilterPolicy
|
|
9
|
+
|
|
9
10
|
from haystack_integrations.components.retrievers.elasticsearch import ElasticsearchBM25Retriever
|
|
10
11
|
from haystack_integrations.document_stores.elasticsearch import ElasticsearchDocumentStore
|
|
11
12
|
|
|
@@ -77,6 +78,30 @@ def test_from_dict(_mock_elasticsearch_client):
|
|
|
77
78
|
assert retriever._filter_policy == FilterPolicy.REPLACE
|
|
78
79
|
|
|
79
80
|
|
|
81
|
+
@patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch")
|
|
82
|
+
def test_from_dict_no_filter_policy(_mock_elasticsearch_client):
|
|
83
|
+
data = {
|
|
84
|
+
"type": "haystack_integrations.components.retrievers.elasticsearch.bm25_retriever.ElasticsearchBM25Retriever",
|
|
85
|
+
"init_parameters": {
|
|
86
|
+
"document_store": {
|
|
87
|
+
"init_parameters": {"hosts": "some fake host", "index": "default"},
|
|
88
|
+
"type": "haystack_integrations.document_stores.elasticsearch.document_store.ElasticsearchDocumentStore",
|
|
89
|
+
},
|
|
90
|
+
"filters": {},
|
|
91
|
+
"fuzziness": "AUTO",
|
|
92
|
+
"top_k": 10,
|
|
93
|
+
"scale_score": True,
|
|
94
|
+
},
|
|
95
|
+
}
|
|
96
|
+
retriever = ElasticsearchBM25Retriever.from_dict(data)
|
|
97
|
+
assert retriever._document_store
|
|
98
|
+
assert retriever._filters == {}
|
|
99
|
+
assert retriever._fuzziness == "AUTO"
|
|
100
|
+
assert retriever._top_k == 10
|
|
101
|
+
assert retriever._scale_score
|
|
102
|
+
assert retriever._filter_policy == FilterPolicy.REPLACE # defaults to REPLACE
|
|
103
|
+
|
|
104
|
+
|
|
80
105
|
def test_run():
|
|
81
106
|
mock_store = Mock(spec=ElasticsearchDocumentStore)
|
|
82
107
|
mock_store._bm25_retrieval.return_value = [Document(content="Test doc")]
|
|
@@ -12,6 +12,7 @@ from haystack.dataclasses.document import Document
|
|
|
12
12
|
from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError
|
|
13
13
|
from haystack.document_stores.types import DuplicatePolicy
|
|
14
14
|
from haystack.testing.document_store import DocumentStoreBaseTests
|
|
15
|
+
|
|
15
16
|
from haystack_integrations.document_stores.elasticsearch import ElasticsearchDocumentStore
|
|
16
17
|
|
|
17
18
|
|
{elasticsearch_haystack-0.7.0 → elasticsearch_haystack-1.0.0}/tests/test_embedding_retriever.py
RENAMED
|
@@ -6,6 +6,7 @@ from unittest.mock import Mock, patch
|
|
|
6
6
|
import pytest
|
|
7
7
|
from haystack.dataclasses import Document
|
|
8
8
|
from haystack.document_stores.types import FilterPolicy
|
|
9
|
+
|
|
9
10
|
from haystack_integrations.components.retrievers.elasticsearch import ElasticsearchEmbeddingRetriever
|
|
10
11
|
from haystack_integrations.document_stores.elasticsearch import ElasticsearchDocumentStore
|
|
11
12
|
|
|
@@ -74,6 +75,29 @@ def test_from_dict(_mock_elasticsearch_client):
|
|
|
74
75
|
assert retriever._num_candidates is None
|
|
75
76
|
|
|
76
77
|
|
|
78
|
+
@patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch")
|
|
79
|
+
def test_from_dict_no_filter_policy(_mock_elasticsearch_client):
|
|
80
|
+
t = "haystack_integrations.components.retrievers.elasticsearch.embedding_retriever.ElasticsearchEmbeddingRetriever"
|
|
81
|
+
data = {
|
|
82
|
+
"type": t,
|
|
83
|
+
"init_parameters": {
|
|
84
|
+
"document_store": {
|
|
85
|
+
"init_parameters": {"hosts": "some fake host", "index": "default"},
|
|
86
|
+
"type": "haystack_integrations.document_stores.elasticsearch.document_store.ElasticsearchDocumentStore",
|
|
87
|
+
},
|
|
88
|
+
"filters": {},
|
|
89
|
+
"top_k": 10,
|
|
90
|
+
"num_candidates": None,
|
|
91
|
+
},
|
|
92
|
+
}
|
|
93
|
+
retriever = ElasticsearchEmbeddingRetriever.from_dict(data)
|
|
94
|
+
assert retriever._document_store
|
|
95
|
+
assert retriever._filters == {}
|
|
96
|
+
assert retriever._top_k == 10
|
|
97
|
+
assert retriever._num_candidates is None
|
|
98
|
+
assert retriever._filter_policy == FilterPolicy.REPLACE # defaults to REPLACE
|
|
99
|
+
|
|
100
|
+
|
|
77
101
|
def test_run():
|
|
78
102
|
mock_store = Mock(spec=ElasticsearchDocumentStore)
|
|
79
103
|
mock_store._embedding_retrieval.return_value = [Document(content="Test doc", embedding=[0.1, 0.2])]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|