elasticsearch-haystack 0.2.0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of elasticsearch-haystack might be problematic. Click here for more details.

Files changed (18) hide show
  1. {elasticsearch_haystack-0.2.0 → elasticsearch_haystack-0.3.0}/PKG-INFO +1 -1
  2. elasticsearch_haystack-0.3.0/pydoc/config.yml +31 -0
  3. {elasticsearch_haystack-0.2.0 → elasticsearch_haystack-0.3.0}/pyproject.toml +4 -0
  4. {elasticsearch_haystack-0.2.0 → elasticsearch_haystack-0.3.0}/src/haystack_integrations/components/retrievers/elasticsearch/bm25_retriever.py +3 -2
  5. {elasticsearch_haystack-0.2.0 → elasticsearch_haystack-0.3.0}/src/haystack_integrations/components/retrievers/elasticsearch/embedding_retriever.py +3 -2
  6. {elasticsearch_haystack-0.2.0 → elasticsearch_haystack-0.3.0}/src/haystack_integrations/document_stores/elasticsearch/document_store.py +6 -1
  7. {elasticsearch_haystack-0.2.0 → elasticsearch_haystack-0.3.0}/tests/test_document_store.py +3 -0
  8. {elasticsearch_haystack-0.2.0 → elasticsearch_haystack-0.3.0}/.gitignore +0 -0
  9. {elasticsearch_haystack-0.2.0 → elasticsearch_haystack-0.3.0}/LICENSE +0 -0
  10. {elasticsearch_haystack-0.2.0 → elasticsearch_haystack-0.3.0}/README.md +0 -0
  11. {elasticsearch_haystack-0.2.0 → elasticsearch_haystack-0.3.0}/docker-compose.yml +0 -0
  12. {elasticsearch_haystack-0.2.0 → elasticsearch_haystack-0.3.0}/src/haystack_integrations/components/retrievers/elasticsearch/__init__.py +0 -0
  13. {elasticsearch_haystack-0.2.0 → elasticsearch_haystack-0.3.0}/src/haystack_integrations/document_stores/elasticsearch/__init__.py +0 -0
  14. {elasticsearch_haystack-0.2.0 → elasticsearch_haystack-0.3.0}/src/haystack_integrations/document_stores/elasticsearch/filters.py +0 -0
  15. {elasticsearch_haystack-0.2.0 → elasticsearch_haystack-0.3.0}/tests/__init__.py +0 -0
  16. {elasticsearch_haystack-0.2.0 → elasticsearch_haystack-0.3.0}/tests/test_bm25_retriever.py +0 -0
  17. {elasticsearch_haystack-0.2.0 → elasticsearch_haystack-0.3.0}/tests/test_embedding_retriever.py +0 -0
  18. {elasticsearch_haystack-0.2.0 → elasticsearch_haystack-0.3.0}/tests/test_filters.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: elasticsearch-haystack
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: Haystack 2.x Document Store for ElasticSearch
5
5
  Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/elasticsearch#readme
6
6
  Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues
@@ -0,0 +1,31 @@
1
+ loaders:
2
+ - type: haystack_pydoc_tools.loaders.CustomPythonLoader
3
+ search_path: [../src]
4
+ modules: [
5
+ "haystack_integrations.components.retrievers.elasticsearch.bm25_retriever",
6
+ "haystack_integrations.components.retrievers.elasticsearch.embedding_retriever",
7
+ "haystack_integrations.document_stores.elasticsearch.document_store",
8
+ "haystack_integrations.document_stores.elasticsearch.filters",
9
+ ]
10
+ ignore_when_discovered: ["__init__"]
11
+ processors:
12
+ - type: filter
13
+ expression:
14
+ documented_only: true
15
+ do_not_filter_modules: false
16
+ skip_empty_modules: true
17
+ - type: smart
18
+ - type: crossref
19
+ renderer:
20
+ type: haystack_pydoc_tools.renderers.ReadmePreviewRenderer
21
+ excerpt: Elasticsearch integration for Haystack
22
+ category_slug: integrations-api
23
+ title: Elasticsearch
24
+ slug: integrations-elasticsearch
25
+ order: 70
26
+ markdown:
27
+ descriptive_class_title: false
28
+ descriptive_module_title: true
29
+ add_method_class_prefix: true
30
+ add_member_class_prefix: false
31
+ filename: _readme_elasticsearch.md
@@ -49,6 +49,7 @@ dependencies = [
49
49
  "coverage[toml]>=6.5",
50
50
  "pytest",
51
51
  "pytest-xdist",
52
+ "haystack-pydoc-tools",
52
53
  ]
53
54
  [tool.hatch.envs.default.scripts]
54
55
  test = "pytest {args:tests}"
@@ -61,6 +62,9 @@ cov = [
61
62
  "test-cov",
62
63
  "cov-report",
63
64
  ]
65
+ docs = [
66
+ "pydoc-markdown pydoc/config.yml"
67
+ ]
64
68
 
65
69
  [[tool.hatch.envs.all.matrix]]
66
70
  python = ["3.8", "3.9", "3.10", "3.11"]
@@ -89,17 +89,18 @@ class ElasticsearchBM25Retriever:
89
89
  return default_from_dict(cls, data)
90
90
 
91
91
  @component.output_types(documents=List[Document])
92
- def run(self, query: str, top_k: Optional[int] = None):
92
+ def run(self, query: str, filters: Optional[Dict[str, Any]] = None, top_k: Optional[int] = None):
93
93
  """
94
94
  Retrieve documents using the BM25 keyword-based algorithm.
95
95
 
96
96
  :param query: String to search in Documents' text.
97
+ :param filters: Filters applied to the retrieved Documents.
97
98
  :param top_k: Maximum number of Documents to return.
98
99
  :return: List of Documents that match the query.
99
100
  """
100
101
  docs = self._document_store._bm25_retrieval(
101
102
  query=query,
102
- filters=self._filters,
103
+ filters=filters or self._filters,
103
104
  fuzziness=self._fuzziness,
104
105
  top_k=top_k or self._top_k,
105
106
  scale_score=self._scale_score,
@@ -63,17 +63,18 @@ class ElasticsearchEmbeddingRetriever:
63
63
  return default_from_dict(cls, data)
64
64
 
65
65
  @component.output_types(documents=List[Document])
66
- def run(self, query_embedding: List[float], top_k: Optional[int] = None):
66
+ def run(self, query_embedding: List[float], filters: Optional[Dict[str, Any]] = None, top_k: Optional[int] = None):
67
67
  """
68
68
  Retrieve documents using a vector similarity metric.
69
69
 
70
70
  :param query_embedding: Embedding of the query.
71
+ :param filters: Filters applied to the retrieved Documents.
71
72
  :param top_k: Maximum number of Documents to return.
72
73
  :return: List of Documents similar to `query_embedding`.
73
74
  """
74
75
  docs = self._document_store._embedding_retrieval(
75
76
  query_embedding=query_embedding,
76
- filters=self._filters,
77
+ filters=filters or self._filters,
77
78
  top_k=top_k or self._top_k,
78
79
  num_candidates=self._num_candidates,
79
80
  )
@@ -13,6 +13,7 @@ from haystack.dataclasses import Document
13
13
  from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError
14
14
  from haystack.document_stores.types import DuplicatePolicy
15
15
  from haystack.utils.filters import convert
16
+ from haystack.version import __version__ as haystack_version
16
17
 
17
18
  from elasticsearch import Elasticsearch, helpers # type: ignore[import-not-found]
18
19
 
@@ -90,7 +91,11 @@ class ElasticsearchDocumentStore:
90
91
  :param **kwargs: Optional arguments that ``Elasticsearch`` takes.
91
92
  """
92
93
  self._hosts = hosts
93
- self._client = Elasticsearch(hosts, **kwargs)
94
+ self._client = Elasticsearch(
95
+ hosts,
96
+ headers={"user-agent": f"haystack-py-ds/{haystack_version}"},
97
+ **kwargs,
98
+ )
94
99
  self._index = index
95
100
  self._embedding_similarity_function = embedding_similarity_function
96
101
  self._kwargs = kwargs
@@ -95,6 +95,9 @@ class TestDocumentStore(DocumentStoreBaseTests):
95
95
  assert document_store._index == "default"
96
96
  assert document_store._embedding_similarity_function == "cosine"
97
97
 
98
+ def test_user_agent_header(self, document_store: ElasticsearchDocumentStore):
99
+ assert document_store._client._headers["user-agent"].startswith("haystack-py-ds/")
100
+
98
101
  def test_write_documents(self, document_store: ElasticsearchDocumentStore):
99
102
  docs = [Document(id="1")]
100
103
  assert document_store.write_documents(docs) == 1