PyPI - elasticsearch-haystack - Versions diffs - 0.2.0__tar.gz → 0.4.0__tar.gz - Mend

elasticsearch-haystack 0.2.0tar.gz → 0.4.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of elasticsearch-haystack might be problematic. Click here for more details.

Files changed (18) hide show

{elasticsearch_haystack-0.2.0 → elasticsearch_haystack-0.4.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.3
 Name: elasticsearch-haystack
-Version: 0.2.0
+Version: 0.4.0
 Summary: Haystack 2.x Document Store for ElasticSearch
 Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/elasticsearch#readme
 Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues

elasticsearch_haystack-0.4.0/pydoc/config.yml ADDED Viewed

@@ -0,0 +1,32 @@
+loaders:
+  - type: haystack_pydoc_tools.loaders.CustomPythonLoader
+    search_path: [../src]
+    modules: [
+      "haystack_integrations.components.retrievers.elasticsearch.bm25_retriever",
+      "haystack_integrations.components.retrievers.elasticsearch.embedding_retriever",
+      "haystack_integrations.document_stores.elasticsearch.document_store",
+      "haystack_integrations.document_stores.elasticsearch.filters",
+    ]
+    ignore_when_discovered: ["__init__"]
+processors:
+  - type: filter
+    expression:
+    documented_only: true
+    do_not_filter_modules: false
+    skip_empty_modules: true
+  - type: smart
+  - type: crossref
+renderer:
+  type: haystack_pydoc_tools.renderers.ReadmePreviewRenderer
+  excerpt: Elasticsearch integration for Haystack
+  category_slug: integrations-api
+  title: Elasticsearch
+  slug: integrations-elasticsearch
+  order: 70
+  markdown:
+    descriptive_class_title: false
+    classdef_code_block: false
+    descriptive_module_title: true
+    add_method_class_prefix: true
+    add_member_class_prefix: false
+    filename: _readme_elasticsearch.md

{elasticsearch_haystack-0.2.0 → elasticsearch_haystack-0.4.0}/pyproject.toml RENAMED Viewed

@@ -49,6 +49,7 @@ dependencies = [
   "coverage[toml]>=6.5",
   "pytest",
   "pytest-xdist",
+  "haystack-pydoc-tools",
 ]
 [tool.hatch.envs.default.scripts]
 test = "pytest {args:tests}"
@@ -61,6 +62,9 @@ cov = [
   "test-cov",
   "cov-report",
 ]
+docs = [
+  "pydoc-markdown pydoc/config.yml"
+]
 [[tool.hatch.envs.all.matrix]]
 python = ["3.8", "3.9", "3.10", "3.11"]
@@ -92,12 +96,12 @@ all = [
 allow-direct-references = true
 [tool.black]
-target-version = ["py37"]
+target-version = ["py38"]
 line-length = 120
 skip-string-normalization = true
 [tool.ruff]
-target-version = "py37"
+target-version = "py38"
 line-length = 120
 select = [
   "A",
@@ -152,21 +156,21 @@ ban-relative-imports = "parents"
 "tests/**/*" = ["PLR2004", "S101", "TID252"]
 [tool.coverage.run]
-source_pkgs = ["src", "tests"]
+source = ["haystack_integrations"]
 branch = true
-parallel = true
+parallel = false
-[tool.coverage.paths]
-elasticsearch_haystack = ["src/haystack_integrations", "*/elasticsearch/src/haystack_integrations"]
-tests = ["tests", "*/elasticsearch/src/tests"]
 [tool.coverage.report]
+omit = ["*/tests/*", "*/__init__.py"]
+show_missing=true
 exclude_lines = [
   "no cov",
   "if __name__ == .__main__.:",
   "if TYPE_CHECKING:",
 ]
 [tool.pytest.ini_options]
 minversion = "6.0"
 markers = [

{elasticsearch_haystack-0.2.0 → elasticsearch_haystack-0.4.0}/src/haystack_integrations/components/retrievers/elasticsearch/bm25_retriever.py RENAMED Viewed

@@ -11,8 +11,9 @@ from haystack_integrations.document_stores.elasticsearch.document_store import E
 @component
 class ElasticsearchBM25Retriever:
     """
-    ElasticsearchBM25Retriever is a keyword-based retriever that uses BM25 to find the most
-    similar documents to a user's query.
+    ElasticsearchBM25Retriever retrieves documents from the ElasticsearchDocumentStore using BM25 algorithm to find the
+    most similar documents to a user's query.
     This retriever is only compatible with ElasticsearchDocumentStore.
     Usage example:
@@ -35,7 +36,7 @@ class ElasticsearchBM25Retriever:
     result = retriever.run(query="Who lives in Berlin?")
     for doc in result["documents"]:
-        print(doc.text)
+        print(doc.content)
     ```
     """
@@ -53,12 +54,13 @@ class ElasticsearchBM25Retriever:
         :param document_store: An instance of ElasticsearchDocumentStore.
         :param filters: Filters applied to the retrieved Documents, for more info
-                        see `ElasticsearchDocumentStore.filter_documents`, defaults to None
-        :param fuzziness: Fuzziness parameter passed to Elasticsearch, defaults to "AUTO".
-                          see the official documentation for valid values:
-                          https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#fuzziness
-        :param top_k: Maximum number of Documents to return, defaults to 10
-        :param scale_score: If `True` scales the Document`s scores between 0 and 1, defaults to False
+                        see `ElasticsearchDocumentStore.filter_documents`.
+        :param fuzziness: Fuzziness parameter passed to Elasticsearch. See the official
+            [documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#fuzziness)
+            for more details.
+        :param top_k: Maximum number of Documents to return.
+        :param scale_score: If `True` scales the Document`s scores between 0 and 1.
+        :raises ValueError: If `document_store` is not an instance of `ElasticsearchDocumentStore`.
         """
         if not isinstance(document_store, ElasticsearchDocumentStore):
@@ -72,6 +74,12 @@ class ElasticsearchBM25Retriever:
         self._scale_score = scale_score
     def to_dict(self) -> Dict[str, Any]:
+        """
+        Serializes the component to a dictionary.
+        :returns:
+            Dictionary with serialized data.
+        """
         return default_to_dict(
             self,
             filters=self._filters,
@@ -83,23 +91,33 @@ class ElasticsearchBM25Retriever:
     @classmethod
     def from_dict(cls, data: Dict[str, Any]) -> "ElasticsearchBM25Retriever":
+        """
+        Deserializes the component from a dictionary.
+        :param data:
+            Dictionary to deserialize from.
+        :returns:
+            Deserialized component.
+        """
         data["init_parameters"]["document_store"] = ElasticsearchDocumentStore.from_dict(
             data["init_parameters"]["document_store"]
         )
         return default_from_dict(cls, data)
     @component.output_types(documents=List[Document])
-    def run(self, query: str, top_k: Optional[int] = None):
+    def run(self, query: str, filters: Optional[Dict[str, Any]] = None, top_k: Optional[int] = None):
         """
         Retrieve documents using the BM25 keyword-based algorithm.
-        :param query: String to search in Documents' text.
-        :param top_k: Maximum number of Documents to return.
-        :return: List of Documents that match the query.
+        :param query: String to search in `Document`s' text.
+        :param filters: Filters applied to the retrieved `Document`s.
+        :param top_k: Maximum number of `Document` to return.
+        :returns: A dictionary with the following keys:
+            - `documents`: List of `Document`s that match the query.
         """
         docs = self._document_store._bm25_retrieval(
             query=query,
-            filters=self._filters,
+            filters=filters or self._filters,
             fuzziness=self._fuzziness,
             top_k=top_k or self._top_k,
             scale_score=self._scale_score,

{elasticsearch_haystack-0.2.0 → elasticsearch_haystack-0.4.0}/src/haystack_integrations/components/retrievers/elasticsearch/embedding_retriever.py RENAMED Viewed

@@ -11,9 +11,35 @@ from haystack_integrations.document_stores.elasticsearch.document_store import E
 @component
 class ElasticsearchEmbeddingRetriever:
     """
-    Uses a vector similarity metric to retrieve documents from the ElasticsearchDocumentStore.
+    ElasticsearchEmbeddingRetriever retrieves documents from the ElasticsearchDocumentStore using vector similarity.
-    Needs to be connected to the ElasticsearchDocumentStore to run.
+    Usage example:
+    ```python
+    from haystack import Document
+    from haystack.components.embedders import SentenceTransformersTextEmbedder
+    from haystack_integrations.document_stores.elasticsearch import ElasticsearchDocumentStore
+    from haystack_integrations.components.retrievers.elasticsearch import ElasticsearchEmbeddingRetriever
+    document_store = ElasticsearchDocumentStore(hosts="http://localhost:9200")
+    retriever = ElasticsearchEmbeddingRetriever(document_store=document_store)
+    # Add documents to DocumentStore
+    documents = [
+        Document(text="My name is Carla and I live in Berlin"),
+        Document(text="My name is Paul and I live in New York"),
+        Document(text="My name is Silvano and I live in Matera"),
+        Document(text="My name is Usagi Tsukino and I live in Tokyo"),
+    ]
+    document_store.write_documents(documents)
+    te = SentenceTransformersTextEmbedder()
+    te.warm_up()
+    query_embeddings = te.run("Who lives in Berlin?")["embedding"]
+    result = retriever.run(query=query_embeddings)
+    for doc in result["documents"]:
+        print(doc.content)
+    ```
     """
     def __init__(
@@ -28,13 +54,13 @@ class ElasticsearchEmbeddingRetriever:
         Create the ElasticsearchEmbeddingRetriever component.
         :param document_store: An instance of ElasticsearchDocumentStore.
-        :param filters: Filters applied to the retrieved Documents. Defaults to None.
-            Filters are applied during the approximate kNN search to ensure that top_k matching documents are returned.
-        :param top_k: Maximum number of Documents to return, defaults to 10
+        :param filters: Filters applied to the retrieved Documents.
+            Filters are applied during the approximate KNN search to ensure that top_k matching documents are returned.
+        :param top_k: Maximum number of Documents to return.
         :param num_candidates: Number of approximate nearest neighbor candidates on each shard. Defaults to top_k * 10.
             Increasing this value will improve search accuracy at the cost of slower search speeds.
-            You can read more about it in the Elasticsearch documentation:
-            https://www.elastic.co/guide/en/elasticsearch/reference/current/knn-search.html#tune-approximate-knn-for-speed-accuracy
+            You can read more about it in the Elasticsearch
+            [documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/knn-search.html#tune-approximate-knn-for-speed-accuracy)
         :raises ValueError: If `document_store` is not an instance of ElasticsearchDocumentStore.
         """
         if not isinstance(document_store, ElasticsearchDocumentStore):
@@ -47,6 +73,12 @@ class ElasticsearchEmbeddingRetriever:
         self._num_candidates = num_candidates
     def to_dict(self) -> Dict[str, Any]:
+        """
+        Serializes the component to a dictionary.
+        :returns:
+            Dictionary with serialized data.
+        """
         return default_to_dict(
             self,
             filters=self._filters,
@@ -57,23 +89,33 @@ class ElasticsearchEmbeddingRetriever:
     @classmethod
     def from_dict(cls, data: Dict[str, Any]) -> "ElasticsearchEmbeddingRetriever":
+        """
+        Deserializes the component from a dictionary.
+        :param data:
+            Dictionary to deserialize from.
+        :returns:
+            Deserialized component.
+        """
         data["init_parameters"]["document_store"] = ElasticsearchDocumentStore.from_dict(
             data["init_parameters"]["document_store"]
         )
         return default_from_dict(cls, data)
     @component.output_types(documents=List[Document])
-    def run(self, query_embedding: List[float], top_k: Optional[int] = None):
+    def run(self, query_embedding: List[float], filters: Optional[Dict[str, Any]] = None, top_k: Optional[int] = None):
         """
         Retrieve documents using a vector similarity metric.
         :param query_embedding: Embedding of the query.
-        :param top_k: Maximum number of Documents to return.
-        :return: List of Documents similar to `query_embedding`.
+        :param filters: Filters applied to the retrieved `Document`s.
+        :param top_k: Maximum number of `Document`s to return.
+        :returns: A dictionary with the following keys:
+            - `documents`: List of `Document`s most similar to the given `query_embedding`
         """
         docs = self._document_store._embedding_retrieval(
             query_embedding=query_embedding,
-            filters=self._filters,
+            filters=filters or self._filters,
             top_k=top_k or self._top_k,
             num_candidates=self._num_candidates,
         )

{elasticsearch_haystack-0.2.0 → elasticsearch_haystack-0.4.0}/src/haystack_integrations/document_stores/elasticsearch/document_store.py RENAMED Viewed

@@ -13,6 +13,7 @@ from haystack.dataclasses import Document
 from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError
 from haystack.document_stores.types import DuplicatePolicy
 from haystack.utils.filters import convert
+from haystack.version import __version__ as haystack_version
 from elasticsearch import Elasticsearch, helpers  # type: ignore[import-not-found]
@@ -34,16 +35,16 @@ BM25_SCALING_FACTOR = 8
 class ElasticsearchDocumentStore:
     """
-    ElasticsearchDocumentStore is a Document Store for Elasticsearch.
-    It can be used with Elastic Cloud or your own Elasticsearch cluster.
+    ElasticsearchDocumentStore is a Document Store for Elasticsearch. It can be used with Elastic Cloud or your own
+    Elasticsearch cluster.
-    Simple usage with Elastic Cloud:
+    Usage example (Elastic Cloud):
     ```python
     from haystack.document_store.elasticsearch import ElasticsearchDocumentStore
     document_store = ElasticsearchDocumentStore(cloud_id="YOUR_CLOUD_ID", api_key="YOUR_API_KEY")
     ```
-    One can also connect to a self-hosted Elasticsearch instance:
+    Usage example (self-hosted Elasticsearch instance):
     ```python
     from haystack.document_store.elasticsearch import ElasticsearchDocumentStore
     document_store = ElasticsearchDocumentStore(hosts="http://localhost:9200")
@@ -52,8 +53,8 @@ class ElasticsearchDocumentStore:
     We strongly recommend to enable security so that only authorized users can access your data.
     For more details on how to connect to Elasticsearch and configure security,
-    see the official Elasticsearch documentation:
-    https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/connecting.html
+    see the official Elasticsearch
+    [documentation](https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/connecting.html)
     All extra keyword arguments will be passed to the Elasticsearch client.
     """
@@ -68,29 +69,33 @@ class ElasticsearchDocumentStore:
     ):
         """
         Creates a new ElasticsearchDocumentStore instance.
-        When no index is explicitly specified, it will use the default index "default".
-        It will also try to create that index if it doesn't exist yet. Otherwise it will use the existing one.
+        It will also try to create that index if it doesn't exist yet. Otherwise, it will use the existing one.
         One can also set the similarity function used to compare Documents embeddings. This is mostly useful
         when using the `ElasticsearchDocumentStore` in a Pipeline with an `ElasticsearchEmbeddingRetriever`.
-        For more information on connection parameters, see the official Elasticsearch documentation:
-        https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/connecting.html
+        For more information on connection parameters, see the official Elasticsearch
+        [documentation](https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/connecting.html)
-        For the full list of supported kwargs, see the official Elasticsearch reference:
-        https://elasticsearch-py.readthedocs.io/en/stable/api.html#module-elasticsearch
+        For the full list of supported kwargs, see the official Elasticsearch
+        [reference](https://elasticsearch-py.readthedocs.io/en/stable/api.html#module-elasticsearch)
-        :param hosts: List of hosts running the Elasticsearch client. Defaults to None
-        :param index: Name of index in Elasticsearch, if it doesn't exist it will be created. Defaults to "default"
+        :param hosts: List of hosts running the Elasticsearch client.
+        :param index: Name of index in Elasticsearch.
         :param embedding_similarity_function: The similarity function used to compare Documents embeddings.
-            Defaults to "cosine". This parameter only takes effect if the index does not yet exist and is created.
+            This parameter only takes effect if the index does not yet exist and is created.
             To choose the most appropriate function, look for information about your embedding model.
-            To understand how document scores are computed, see the Elasticsearch documentation:
-            https://www.elastic.co/guide/en/elasticsearch/reference/current/dense-vector.html#dense-vector-params
-        :param **kwargs: Optional arguments that ``Elasticsearch`` takes.
+            To understand how document scores are computed, see the Elasticsearch
+            [documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/dense-vector.html#dense-vector-params)
+        :param **kwargs: Optional arguments that `Elasticsearch` takes.
         """
         self._hosts = hosts
-        self._client = Elasticsearch(hosts, **kwargs)
+        self._client = Elasticsearch(
+            hosts,
+            headers={"user-agent": f"haystack-py-ds/{haystack_version}"},
+            **kwargs,
+        )
         self._index = index
         self._embedding_similarity_function = embedding_similarity_function
         self._kwargs = kwargs
@@ -101,8 +106,20 @@ class ElasticsearchDocumentStore:
         # configure mapping for the embedding field
         mappings = {
             "properties": {
-                "embedding": {"type": "dense_vector", "index": True, "similarity": embedding_similarity_function}
-            }
+                "embedding": {"type": "dense_vector", "index": True, "similarity": embedding_similarity_function},
+                "content": {"type": "text"},
+            },
+            "dynamic_templates": [
+                {
+                    "strings": {
+                        "path_match": "*",
+                        "match_mapping_type": "string",
+                        "mapping": {
+                            "type": "keyword",
+                        },
+                    }
+                }
+            ],
         }
         # Create the index if it doesn't exist
@@ -110,6 +127,12 @@ class ElasticsearchDocumentStore:
             self._client.indices.create(index=index, mappings=mappings)
     def to_dict(self) -> Dict[str, Any]:
+        """
+        Serializes the component to a dictionary.
+        :returns:
+            Dictionary with serialized data.
+        """
         # This is not the best solution to serialise this class but is the fastest to implement.
         # Not all kwargs types can be serialised to text so this can fail. We must serialise each
         # type explicitly to handle this properly.
@@ -123,11 +146,20 @@ class ElasticsearchDocumentStore:
     @classmethod
     def from_dict(cls, data: Dict[str, Any]) -> "ElasticsearchDocumentStore":
+        """
+        Deserializes the component from a dictionary.
+        :param data:
+            Dictionary to deserialize from.
+        :returns:
+            Deserialized component.
+        """
         return default_from_dict(cls, data)
     def count_documents(self) -> int:
         """
         Returns how many documents are present in the document store.
+        :returns: Number of documents in the document store.
         """
         return self._client.count(index=self._index)["count"]
@@ -160,6 +192,14 @@ class ElasticsearchDocumentStore:
         return documents
     def filter_documents(self, filters: Optional[Dict[str, Any]] = None) -> List[Document]:
+        """
+        The main query method for the document store. It retrieves all documents that match the filters.
+        :param filters: A dictionary of filters to apply. For more information on the structure of the filters,
+            see the official Elasticsearch
+            [documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html)
+        :returns: List of `Document`s that match the filters.
+        """
         if filters and "operator" not in filters and "conditions" not in filters:
             filters = convert(filters)
@@ -169,9 +209,15 @@ class ElasticsearchDocumentStore:
     def write_documents(self, documents: List[Document], policy: DuplicatePolicy = DuplicatePolicy.NONE) -> int:
         """
-        Writes Documents to Elasticsearch.
-        If policy is not specified or set to DuplicatePolicy.NONE, it will raise an exception if a document with the
-        same ID already exists in the document store.
+        Writes `Document`s to Elasticsearch.
+        :param documents: List of Documents to write to the document store.
+        :param policy: DuplicatePolicy to apply when a document with the same ID already exists in the document store.
+        :raises ValueError: If `documents` is not a list of `Document`s.
+        :raises DuplicateDocumentError: If a document with the same ID already exists in the document store and
+            `policy` is set to `DuplicatePolicy.FAIL` or `DuplicatePolicy.NONE`.
+        :raises DocumentStoreError: If an error occurs while writing the documents to the document store.
+        :returns: Number of documents written to the document store.
         """
         if len(documents) > 0:
             if not isinstance(documents[0], Document):
@@ -182,16 +228,30 @@ class ElasticsearchDocumentStore:
             policy = DuplicatePolicy.FAIL
         action = "index" if policy == DuplicatePolicy.OVERWRITE else "create"
-        documents_written, errors = helpers.bulk(
-            client=self._client,
-            actions=(
+        elasticsearch_actions = []
+        for doc in documents:
+            doc_dict = doc.to_dict()
+            if "sparse_embedding" in doc_dict:
+                sparse_embedding = doc_dict.pop("sparse_embedding", None)
+                if sparse_embedding:
+                    logger.warning(
+                        "Document %s has the `sparse_embedding` field set,"
+                        "but storing sparse embeddings in Elasticsearch is not currently supported."
+                        "The `sparse_embedding` field will be ignored.",
+                        doc.id,
+                    )
+            elasticsearch_actions.append(
                 {
                     "_op_type": action,
                     "_id": doc.id,
-                    "_source": doc.to_dict(),
+                    "_source": doc_dict,
                 }
-                for doc in documents
-            ),
+            )
+        documents_written, errors = helpers.bulk(
+            client=self._client,
+            actions=elasticsearch_actions,
             refresh="wait_for",
             index=self._index,
             raise_on_error=False,
@@ -220,10 +280,15 @@ class ElasticsearchDocumentStore:
         return documents_written
-    def _deserialize_document(self, hit: Dict[str, Any]) -> Document:
+    @staticmethod
+    def _deserialize_document(hit: Dict[str, Any]) -> Document:
         """
-        Creates a Document from the search hit provided.
+        Creates a `Document` from the search hit provided.
         This is mostly useful in self.filter_documents().
+        :param hit: A search hit from Elasticsearch.
+        :returns: `Document` created from the search hit.
         """
         data = hit["_source"]
@@ -235,12 +300,11 @@ class ElasticsearchDocumentStore:
     def delete_documents(self, document_ids: List[str]) -> None:
         """
-        Deletes all documents with a matching document_ids from the document store.
+        Deletes all `Document`s with a matching `document_ids` from the document store.
-        :param object_ids: the object_ids to delete
+        :param document_ids: the object IDs to delete
         """
-        #
         helpers.bulk(
             client=self._client,
             actions=({"_op_type": "delete", "_id": id_} for id_ in document_ids),
@@ -259,26 +323,25 @@ class ElasticsearchDocumentStore:
         scale_score: bool = False,
     ) -> List[Document]:
         """
-        Elasticsearch by defaults uses BM25 search algorithm.
+        Retrieves `Document`s from Elasticsearch using the BM25 search algorithm.
         Even though this method is called `bm25_retrieval` it searches for `query`
         using the search algorithm `_client` was configured with.
-        This method is not mean to be part of the public interface of
+        This method is not meant to be part of the public interface of
         `ElasticsearchDocumentStore` nor called directly.
         `ElasticsearchBM25Retriever` uses this method directly and is the public interface for it.
-        `query` must be a non empty string, otherwise a `ValueError` will be raised.
-        :param query: String to search in saved Documents' text.
-        :param filters: Filters applied to the retrieved Documents, for more info
-                        see `ElasticsearchDocumentStore.filter_documents`, defaults to None
-        :param fuzziness: Fuzziness parameter passed to Elasticsearch, defaults to "AUTO".
-                          see the official documentation for valid values:
-                          https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#fuzziness
-        :param top_k: Maximum number of Documents to return, defaults to 10
-        :param scale_score: If `True` scales the Document`s scores between 0 and 1, defaults to False
+        :param query: String to search in saved `Document`s' text.
+        :param filters: Filters applied to the retrieved `Document`s, for more info
+                        see `ElasticsearchDocumentStore.filter_documents`.
+        :param fuzziness: Fuzziness parameter passed to Elasticsearch. See the official
+            [documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#fuzziness)
+            for valid values.
+        :param top_k: Maximum number of `Document`s to return.
+        :param scale_score: If `True` scales the `Document``s scores between 0 and 1.
         :raises ValueError: If `query` is an empty string
-        :return: List of Document that match `query`
+        :returns: List of `Document` that match `query`
         """
         if not query:
@@ -324,22 +387,23 @@ class ElasticsearchDocumentStore:
     ) -> List[Document]:
         """
         Retrieves documents that are most similar to the query embedding using a vector similarity metric.
         It uses the Elasticsearch's Approximate k-Nearest Neighbors search algorithm.
-        This method is not mean to be part of the public interface of
+        This method is not meant to be part of the public interface of
         `ElasticsearchDocumentStore` nor called directly.
         `ElasticsearchEmbeddingRetriever` uses this method directly and is the public interface for it.
         :param query_embedding: Embedding of the query.
-        :param filters: Filters applied to the retrieved Documents. Defaults to None.
+        :param filters: Filters applied to the retrieved `Document`s.
             Filters are applied during the approximate kNN search to ensure that top_k matching documents are returned.
-        :param top_k: Maximum number of Documents to return, defaults to 10
+        :param top_k: Maximum number of `Document`s to return.
         :param num_candidates: Number of approximate nearest neighbor candidates on each shard. Defaults to top_k * 10.
             Increasing this value will improve search accuracy at the cost of slower search speeds.
-            You can read more about it in the Elasticsearch documentation:
-            https://www.elastic.co/guide/en/elasticsearch/reference/current/knn-search.html#tune-approximate-knn-for-speed-accuracy
-        :raises ValueError: If `query_embedding` is an empty list
-        :return: List of Document that are most similar to `query_embedding`
+            You can read more about it in the Elasticsearch
+            [documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/knn-search.html#tune-approximate-knn-for-speed-accuracy)
+        :raises ValueError: If `query_embedding` is an empty list.
+        :returns: List of `Document` that are most similar to `query_embedding`.
         """
         if not query_embedding:

{elasticsearch_haystack-0.2.0 → elasticsearch_haystack-0.4.0}/tests/test_document_store.py RENAMED Viewed

@@ -15,6 +15,36 @@ from haystack.testing.document_store import DocumentStoreBaseTests
 from haystack_integrations.document_stores.elasticsearch import ElasticsearchDocumentStore
+@patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch")
+def test_to_dict(_mock_elasticsearch_client):
+    document_store = ElasticsearchDocumentStore(hosts="some hosts")
+    res = document_store.to_dict()
+    assert res == {
+        "type": "haystack_integrations.document_stores.elasticsearch.document_store.ElasticsearchDocumentStore",
+        "init_parameters": {
+            "hosts": "some hosts",
+            "index": "default",
+            "embedding_similarity_function": "cosine",
+        },
+    }
+@patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch")
+def test_from_dict(_mock_elasticsearch_client):
+    data = {
+        "type": "haystack_integrations.document_stores.elasticsearch.document_store.ElasticsearchDocumentStore",
+        "init_parameters": {
+            "hosts": "some hosts",
+            "index": "default",
+            "embedding_similarity_function": "cosine",
+        },
+    }
+    document_store = ElasticsearchDocumentStore.from_dict(data)
+    assert document_store._hosts == "some hosts"
+    assert document_store._index == "default"
+    assert document_store._embedding_similarity_function == "cosine"
 @pytest.mark.integration
 class TestDocumentStore(DocumentStoreBaseTests):
     """
@@ -67,33 +97,8 @@ class TestDocumentStore(DocumentStoreBaseTests):
         super().assert_documents_are_equal(received, expected)
-    @patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch")
-    def test_to_dict(self, _mock_elasticsearch_client):
-        document_store = ElasticsearchDocumentStore(hosts="some hosts")
-        res = document_store.to_dict()
-        assert res == {
-            "type": "haystack_integrations.document_stores.elasticsearch.document_store.ElasticsearchDocumentStore",
-            "init_parameters": {
-                "hosts": "some hosts",
-                "index": "default",
-                "embedding_similarity_function": "cosine",
-            },
-        }
-    @patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch")
-    def test_from_dict(self, _mock_elasticsearch_client):
-        data = {
-            "type": "haystack_integrations.document_stores.elasticsearch.document_store.ElasticsearchDocumentStore",
-            "init_parameters": {
-                "hosts": "some hosts",
-                "index": "default",
-                "embedding_similarity_function": "cosine",
-            },
-        }
-        document_store = ElasticsearchDocumentStore.from_dict(data)
-        assert document_store._hosts == "some hosts"
-        assert document_store._index == "default"
-        assert document_store._embedding_similarity_function == "cosine"
+    def test_user_agent_header(self, document_store: ElasticsearchDocumentStore):
+        assert document_store._client._headers["user-agent"].startswith("haystack-py-ds/")
     def test_write_documents(self, document_store: ElasticsearchDocumentStore):
         docs = [Document(id="1")]