PyPI - elasticsearch - Versions diffs - 8.19.0__py3-none-any.whl → 8.19.1__py3-none-any.whl - Mend

elasticsearch 8.19.0py3-none-any.whl → 8.19.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

elasticsearch/_async/client/__init__.py +12 -6
elasticsearch/_async/client/cat.py +124 -10
elasticsearch/_async/client/cluster.py +7 -2
elasticsearch/_async/client/esql.py +16 -6
elasticsearch/_async/client/indices.py +1 -1
elasticsearch/_async/client/inference.py +112 -4
elasticsearch/_async/client/snapshot.py +262 -112
elasticsearch/_async/client/sql.py +1 -1
elasticsearch/_async/client/transform.py +60 -0
elasticsearch/_sync/client/__init__.py +12 -6
elasticsearch/_sync/client/cat.py +124 -10
elasticsearch/_sync/client/cluster.py +7 -2
elasticsearch/_sync/client/esql.py +16 -6
elasticsearch/_sync/client/indices.py +1 -1
elasticsearch/_sync/client/inference.py +112 -4
elasticsearch/_sync/client/snapshot.py +262 -112
elasticsearch/_sync/client/sql.py +1 -1
elasticsearch/_sync/client/transform.py +60 -0
elasticsearch/_version.py +1 -1
elasticsearch/dsl/_async/document.py +84 -0
elasticsearch/dsl/_sync/document.py +84 -0
elasticsearch/dsl/aggs.py +20 -0
elasticsearch/dsl/document_base.py +43 -0
elasticsearch/dsl/field.py +49 -10
elasticsearch/dsl/response/aggs.py +1 -1
elasticsearch/dsl/types.py +140 -11
elasticsearch/dsl/utils.py +1 -1
elasticsearch/esql/__init__.py +2 -1
elasticsearch/esql/esql.py +85 -34
elasticsearch/esql/functions.py +37 -25
{elasticsearch-8.19.0.dist-info → elasticsearch-8.19.1.dist-info}/METADATA +1 -3
{elasticsearch-8.19.0.dist-info → elasticsearch-8.19.1.dist-info}/RECORD +35 -35
{elasticsearch-8.19.0.dist-info → elasticsearch-8.19.1.dist-info}/WHEEL +0 -0
{elasticsearch-8.19.0.dist-info → elasticsearch-8.19.1.dist-info}/licenses/LICENSE +0 -0
{elasticsearch-8.19.0.dist-info → elasticsearch-8.19.1.dist-info}/licenses/NOTICE +0 -0

elasticsearch/dsl/_async/document.py CHANGED Viewed

@@ -20,6 +20,7 @@ from typing import (
     TYPE_CHECKING,
     Any,
     AsyncIterable,
+    AsyncIterator,
     Dict,
     List,
     Optional,
@@ -42,6 +43,7 @@ from .search import AsyncSearch
 if TYPE_CHECKING:
     from elasticsearch import AsyncElasticsearch
+    from elasticsearch.esql.esql import ESQLBase
 class AsyncIndexMeta(DocumentMeta):
@@ -520,3 +522,85 @@ class AsyncDocument(DocumentBase, metaclass=AsyncIndexMeta):
                 return action
         return await async_bulk(es, Generate(actions), **kwargs)
+    @classmethod
+    async def esql_execute(
+        cls,
+        query: "ESQLBase",
+        return_additional: bool = False,
+        ignore_missing_fields: bool = False,
+        using: Optional[AsyncUsingType] = None,
+        **kwargs: Any,
+    ) -> AsyncIterator[Union[Self, Tuple[Self, Dict[str, Any]]]]:
+        """
+        Execute the given ES|QL query and return an iterator of 2-element tuples,
+        where the first element is an instance of this ``Document`` and the
+        second a dictionary with any remaining columns requested in the query.
+        :arg query: an ES|QL query object created with the ``esql_from()`` method.
+        :arg return_additional: if ``False`` (the default), this method returns
+            document objects. If set to ``True``, the method returns tuples with
+            a document in the first element and a dictionary with any additional
+            columns returned by the query in the second element.
+        :arg ignore_missing_fields: if ``False`` (the default), all the fields of
+            the document must be present in the query, or else an exception is
+            raised. Set to ``True`` to allow missing fields, which will result in
+            partially initialized document objects.
+        :arg using: connection alias to use, defaults to ``'default'``
+        :arg kwargs: additional options for the ``client.esql.query()`` function.
+        """
+        es = cls._get_connection(using)
+        response = await es.esql.query(query=str(query), **kwargs)
+        query_columns = [col["name"] for col in response.body.get("columns", [])]
+        # Here we get the list of columns defined in the document, which are the
+        # columns that we will take from each result to assemble the document
+        # object.
+        # When `for_esql=False` is passed below by default, the list will include
+        # nested fields, which ES|QL does not return, causing an error. When passing
+        # `ignore_missing_fields=True` the list will be generated with
+        # `for_esql=True`, so the error will not occur, but the documents will
+        # not have any Nested objects in them.
+        doc_fields = set(cls._get_field_names(for_esql=ignore_missing_fields))
+        if not ignore_missing_fields and not doc_fields.issubset(set(query_columns)):
+            raise ValueError(
+                f"Not all fields of {cls.__name__} were returned by the query. "
+                "Make sure your document does not use Nested fields, which are "
+                "currently not supported in ES|QL. To force the query to be "
+                "evaluated in spite of the missing fields, pass set the "
+                "ignore_missing_fields=True option in the esql_execute() call."
+            )
+        non_doc_fields: set[str] = set(query_columns) - doc_fields - {"_id"}
+        index_id = query_columns.index("_id")
+        results = response.body.get("values", [])
+        for column_values in results:
+            # create a dictionary with all the document fields, expanding the
+            # dot notation returned by ES|QL into the recursive dictionaries
+            # used by Document.from_dict()
+            doc_dict: Dict[str, Any] = {}
+            for col, val in zip(query_columns, column_values):
+                if col in doc_fields:
+                    cols = col.split(".")
+                    d = doc_dict
+                    for c in cols[:-1]:
+                        if c not in d:
+                            d[c] = {}
+                        d = d[c]
+                    d[cols[-1]] = val
+            # create the document instance
+            obj = cls(meta={"_id": column_values[index_id]})
+            obj._from_dict(doc_dict)
+            if return_additional:
+                # build a dict with any other values included in the response
+                other = {
+                    col: val
+                    for col, val in zip(query_columns, column_values)
+                    if col in non_doc_fields
+                }
+                yield obj, other
+            else:
+                yield obj

elasticsearch/dsl/_sync/document.py CHANGED Viewed

@@ -21,6 +21,7 @@ from typing import (
     Any,
     Dict,
     Iterable,
+    Iterator,
     List,
     Optional,
     Tuple,
@@ -42,6 +43,7 @@ from .search import Search
 if TYPE_CHECKING:
     from elasticsearch import Elasticsearch
+    from elasticsearch.esql.esql import ESQLBase
 class IndexMeta(DocumentMeta):
@@ -512,3 +514,85 @@ class Document(DocumentBase, metaclass=IndexMeta):
                 return action
         return bulk(es, Generate(actions), **kwargs)
+    @classmethod
+    def esql_execute(
+        cls,
+        query: "ESQLBase",
+        return_additional: bool = False,
+        ignore_missing_fields: bool = False,
+        using: Optional[UsingType] = None,
+        **kwargs: Any,
+    ) -> Iterator[Union[Self, Tuple[Self, Dict[str, Any]]]]:
+        """
+        Execute the given ES|QL query and return an iterator of 2-element tuples,
+        where the first element is an instance of this ``Document`` and the
+        second a dictionary with any remaining columns requested in the query.
+        :arg query: an ES|QL query object created with the ``esql_from()`` method.
+        :arg return_additional: if ``False`` (the default), this method returns
+            document objects. If set to ``True``, the method returns tuples with
+            a document in the first element and a dictionary with any additional
+            columns returned by the query in the second element.
+        :arg ignore_missing_fields: if ``False`` (the default), all the fields of
+            the document must be present in the query, or else an exception is
+            raised. Set to ``True`` to allow missing fields, which will result in
+            partially initialized document objects.
+        :arg using: connection alias to use, defaults to ``'default'``
+        :arg kwargs: additional options for the ``client.esql.query()`` function.
+        """
+        es = cls._get_connection(using)
+        response = es.esql.query(query=str(query), **kwargs)
+        query_columns = [col["name"] for col in response.body.get("columns", [])]
+        # Here we get the list of columns defined in the document, which are the
+        # columns that we will take from each result to assemble the document
+        # object.
+        # When `for_esql=False` is passed below by default, the list will include
+        # nested fields, which ES|QL does not return, causing an error. When passing
+        # `ignore_missing_fields=True` the list will be generated with
+        # `for_esql=True`, so the error will not occur, but the documents will
+        # not have any Nested objects in them.
+        doc_fields = set(cls._get_field_names(for_esql=ignore_missing_fields))
+        if not ignore_missing_fields and not doc_fields.issubset(set(query_columns)):
+            raise ValueError(
+                f"Not all fields of {cls.__name__} were returned by the query. "
+                "Make sure your document does not use Nested fields, which are "
+                "currently not supported in ES|QL. To force the query to be "
+                "evaluated in spite of the missing fields, pass set the "
+                "ignore_missing_fields=True option in the esql_execute() call."
+            )
+        non_doc_fields: set[str] = set(query_columns) - doc_fields - {"_id"}
+        index_id = query_columns.index("_id")
+        results = response.body.get("values", [])
+        for column_values in results:
+            # create a dictionary with all the document fields, expanding the
+            # dot notation returned by ES|QL into the recursive dictionaries
+            # used by Document.from_dict()
+            doc_dict: Dict[str, Any] = {}
+            for col, val in zip(query_columns, column_values):
+                if col in doc_fields:
+                    cols = col.split(".")
+                    d = doc_dict
+                    for c in cols[:-1]:
+                        if c not in d:
+                            d[c] = {}
+                        d = d[c]
+                    d[cols[-1]] = val
+            # create the document instance
+            obj = cls(meta={"_id": column_values[index_id]})
+            obj._from_dict(doc_dict)
+            if return_additional:
+                # build a dict with any other values included in the response
+                other = {
+                    col: val
+                    for col, val in zip(query_columns, column_values)
+                    if col in non_doc_fields
+                }
+                yield obj, other
+            else:
+                yield obj

elasticsearch/dsl/aggs.py CHANGED Viewed

@@ -372,6 +372,12 @@ class Boxplot(Agg[_R]):
     :arg compression: Limits the maximum number of nodes used by the
         underlying TDigest algorithm to `20 * compression`, enabling
         control of memory usage and approximation error.
+    :arg execution_hint: The default implementation of TDigest is
+        optimized for performance, scaling to millions or even billions of
+        sample values while maintaining acceptable accuracy levels (close
+        to 1% relative error for millions of samples in some cases). To
+        use an implementation optimized for accuracy, set this parameter
+        to high_accuracy instead. Defaults to `default` if omitted.
     :arg field: The field on which to run the aggregation.
     :arg missing: The value to apply to documents that do not have a
         value. By default, documents without a value are ignored.
@@ -384,6 +390,9 @@ class Boxplot(Agg[_R]):
         self,
         *,
         compression: Union[float, "DefaultType"] = DEFAULT,
+        execution_hint: Union[
+            Literal["default", "high_accuracy"], "DefaultType"
+        ] = DEFAULT,
         field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT,
         missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT,
         script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT,
@@ -391,6 +400,7 @@ class Boxplot(Agg[_R]):
     ):
         super().__init__(
             compression=compression,
+            execution_hint=execution_hint,
             field=field,
             missing=missing,
             script=script,
@@ -1897,6 +1907,12 @@ class MedianAbsoluteDeviation(Agg[_R]):
         underlying TDigest algorithm to `20 * compression`, enabling
         control of memory usage and approximation error. Defaults to
         `1000` if omitted.
+    :arg execution_hint: The default implementation of TDigest is
+        optimized for performance, scaling to millions or even billions of
+        sample values while maintaining acceptable accuracy levels (close
+        to 1% relative error for millions of samples in some cases). To
+        use an implementation optimized for accuracy, set this parameter
+        to high_accuracy instead. Defaults to `default` if omitted.
     :arg format:
     :arg field: The field on which to run the aggregation.
     :arg missing: The value to apply to documents that do not have a
@@ -1910,6 +1926,9 @@ class MedianAbsoluteDeviation(Agg[_R]):
         self,
         *,
         compression: Union[float, "DefaultType"] = DEFAULT,
+        execution_hint: Union[
+            Literal["default", "high_accuracy"], "DefaultType"
+        ] = DEFAULT,
         format: Union[str, "DefaultType"] = DEFAULT,
         field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT,
         missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT,
@@ -1918,6 +1937,7 @@ class MedianAbsoluteDeviation(Agg[_R]):
     ):
         super().__init__(
             compression=compression,
+            execution_hint=execution_hint,
             format=format,
             field=field,
             missing=missing,

elasticsearch/dsl/document_base.py CHANGED Viewed

@@ -28,6 +28,7 @@ from typing import (
     List,
     Optional,
     Tuple,
+    Type,
     TypeVar,
     Union,
     get_args,
@@ -49,6 +50,7 @@ from .utils import DOC_META_FIELDS, ObjectBase
 if TYPE_CHECKING:
     from elastic_transport import ObjectApiResponse
+    from ..esql.esql import ESQLBase
     from .index_base import IndexBase
@@ -602,3 +604,44 @@ class DocumentBase(ObjectBase):
         meta["_source"] = d
         return meta
+    @classmethod
+    def _get_field_names(
+        cls, for_esql: bool = False, nested_class: Optional[Type[InnerDoc]] = None
+    ) -> List[str]:
+        """Return the list of field names used by this document.
+        If the document has nested objects, their fields are reported using dot
+        notation. If the ``for_esql`` argument is set to ``True``, the list omits
+        nested fields, which are currently unsupported in ES|QL.
+        """
+        fields = []
+        class_ = nested_class or cls
+        for field_name in class_._doc_type.mapping:
+            field = class_._doc_type.mapping[field_name]
+            if isinstance(field, Object):
+                if for_esql and isinstance(field, Nested):
+                    # ES|QL does not recognize Nested fields at this time
+                    continue
+                sub_fields = cls._get_field_names(
+                    for_esql=for_esql, nested_class=field._doc_class
+                )
+                for sub_field in sub_fields:
+                    fields.append(f"{field_name}.{sub_field}")
+            else:
+                fields.append(field_name)
+        return fields
+    @classmethod
+    def esql_from(cls) -> "ESQLBase":
+        """Return a base ES|QL query for instances of this document class.
+        The returned query is initialized with ``FROM`` and ``KEEP`` statements,
+        and can be completed as desired.
+        """
+        from ..esql import ESQL  # here to avoid circular imports
+        return (
+            ESQL.from_(cls)
+            .metadata("_id")
+            .keep("_id", *tuple(cls._get_field_names(for_esql=True)))
+        )

elasticsearch/dsl/field.py CHANGED Viewed

@@ -119,9 +119,16 @@ class Field(DslBase):
     def __getitem__(self, subfield: str) -> "Field":
         return cast(Field, self._params.get("fields", {})[subfield])
-    def _serialize(self, data: Any) -> Any:
+    def _serialize(self, data: Any, skip_empty: bool) -> Any:
         return data
+    def _safe_serialize(self, data: Any, skip_empty: bool) -> Any:
+        try:
+            return self._serialize(data, skip_empty)
+        except TypeError:
+            # older method signature, without skip_empty
+            return self._serialize(data)  # type: ignore[call-arg]
     def _deserialize(self, data: Any) -> Any:
         return data
@@ -133,10 +140,16 @@ class Field(DslBase):
             return AttrList([])
         return self._empty()
-    def serialize(self, data: Any) -> Any:
+    def serialize(self, data: Any, skip_empty: bool = True) -> Any:
         if isinstance(data, (list, AttrList, tuple)):
-            return list(map(self._serialize, cast(Iterable[Any], data)))
-        return self._serialize(data)
+            return list(
+                map(
+                    self._safe_serialize,
+                    cast(Iterable[Any], data),
+                    [skip_empty] * len(data),
+                )
+            )
+        return self._safe_serialize(data, skip_empty)
     def deserialize(self, data: Any) -> Any:
         if isinstance(data, (list, AttrList, tuple)):
@@ -186,7 +199,7 @@ class RangeField(Field):
         data = {k: self._core_field.deserialize(v) for k, v in data.items()}  # type: ignore[union-attr]
         return Range(data)
-    def _serialize(self, data: Any) -> Optional[Dict[str, Any]]:
+    def _serialize(self, data: Any, skip_empty: bool) -> Optional[Dict[str, Any]]:
         if data is None:
             return None
         if not isinstance(data, collections.abc.Mapping):
@@ -550,7 +563,7 @@ class Object(Field):
         return self._wrap(data)
     def _serialize(
-        self, data: Optional[Union[Dict[str, Any], "InnerDoc"]]
+        self, data: Optional[Union[Dict[str, Any], "InnerDoc"]], skip_empty: bool
     ) -> Optional[Dict[str, Any]]:
         if data is None:
             return None
@@ -559,7 +572,7 @@ class Object(Field):
         if isinstance(data, collections.abc.Mapping):
             return data
-        return data.to_dict()
+        return data.to_dict(skip_empty=skip_empty)
     def clean(self, data: Any) -> Any:
         data = super().clean(data)
@@ -768,7 +781,7 @@ class Binary(Field):
     def _deserialize(self, data: Any) -> bytes:
         return base64.b64decode(data)
-    def _serialize(self, data: Any) -> Optional[str]:
+    def _serialize(self, data: Any, skip_empty: bool) -> Optional[str]:
         if data is None:
             return None
         return base64.b64encode(data).decode()
@@ -2619,7 +2632,7 @@ class Ip(Field):
         # the ipaddress library for pypy only accepts unicode.
         return ipaddress.ip_address(unicode(data))
-    def _serialize(self, data: Any) -> Optional[str]:
+    def _serialize(self, data: Any, skip_empty: bool) -> Optional[str]:
         if data is None:
             return None
         return str(data)
@@ -3367,7 +3380,7 @@ class Percolator(Field):
     def _deserialize(self, data: Any) -> "Query":
         return Q(data)  # type: ignore[no-any-return]
-    def _serialize(self, data: Any) -> Optional[Dict[str, Any]]:
+    def _serialize(self, data: Any, skip_empty: bool) -> Optional[Dict[str, Any]]:
         if data is None:
             return None
         return data.to_dict()  # type: ignore[no-any-return]
@@ -3849,6 +3862,14 @@ class SemanticText(Field):
         by using the Update mapping API. Use the Create inference API to
         create the endpoint. If not specified, the inference endpoint
         defined by inference_id will be used at both index and query time.
+    :arg index_options: Settings for index_options that override any
+        defaults used by semantic_text, for example specific quantization
+        settings.
+    :arg chunking_settings: Settings for chunking text into smaller
+        passages. If specified, these will override the chunking settings
+        sent in the inference endpoint associated with inference_id. If
+        chunking settings are updated, they will not be applied to
+        existing documents until they are reindexed.
     """
     name = "semantic_text"
@@ -3859,6 +3880,12 @@ class SemanticText(Field):
         meta: Union[Mapping[str, str], "DefaultType"] = DEFAULT,
         inference_id: Union[str, "DefaultType"] = DEFAULT,
         search_inference_id: Union[str, "DefaultType"] = DEFAULT,
+        index_options: Union[
+            "types.SemanticTextIndexOptions", Dict[str, Any], "DefaultType"
+        ] = DEFAULT,
+        chunking_settings: Union[
+            "types.ChunkingSettings", Dict[str, Any], "DefaultType"
+        ] = DEFAULT,
         **kwargs: Any,
     ):
         if meta is not DEFAULT:
@@ -3867,6 +3894,10 @@ class SemanticText(Field):
             kwargs["inference_id"] = inference_id
         if search_inference_id is not DEFAULT:
             kwargs["search_inference_id"] = search_inference_id
+        if index_options is not DEFAULT:
+            kwargs["index_options"] = index_options
+        if chunking_settings is not DEFAULT:
+            kwargs["chunking_settings"] = chunking_settings
         super().__init__(*args, **kwargs)
@@ -4063,6 +4094,9 @@ class Short(Integer):
 class SparseVector(Field):
     """
     :arg store:
+    :arg index_options: Additional index options for the sparse vector
+        field that controls the token pruning behavior of the sparse
+        vector field.
     :arg meta: Metadata about the field.
     :arg properties:
     :arg ignore_above:
@@ -4081,6 +4115,9 @@ class SparseVector(Field):
         self,
         *args: Any,
         store: Union[bool, "DefaultType"] = DEFAULT,
+        index_options: Union[
+            "types.SparseVectorIndexOptions", Dict[str, Any], "DefaultType"
+        ] = DEFAULT,
         meta: Union[Mapping[str, str], "DefaultType"] = DEFAULT,
         properties: Union[Mapping[str, Field], "DefaultType"] = DEFAULT,
         ignore_above: Union[int, "DefaultType"] = DEFAULT,
@@ -4095,6 +4132,8 @@ class SparseVector(Field):
     ):
         if store is not DEFAULT:
             kwargs["store"] = store
+        if index_options is not DEFAULT:
+            kwargs["index_options"] = index_options
         if meta is not DEFAULT:
             kwargs["meta"] = meta
         if properties is not DEFAULT:

elasticsearch/dsl/response/aggs.py CHANGED Viewed

@@ -63,7 +63,7 @@ class BucketData(AggResponse[_R]):
         )
     def __iter__(self) -> Iterator["Agg"]:  # type: ignore[override]
-        return iter(self.buckets)  # type: ignore[arg-type]
+        return iter(self.buckets)
     def __len__(self) -> int:
         return len(self.buckets)

elasticsearch 8.19.0__py3-none-any.whl → 8.19.1__py3-none-any.whl

elasticsearch 8.19.0py3-none-any.whl → 8.19.1py3-none-any.whl