elasticsearch 8.19.0__py3-none-any.whl → 8.19.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- elasticsearch/_async/client/__init__.py +39 -55
- elasticsearch/_async/client/cat.py +605 -35
- elasticsearch/_async/client/cluster.py +7 -2
- elasticsearch/_async/client/connector.py +3 -3
- elasticsearch/_async/client/esql.py +16 -6
- elasticsearch/_async/client/fleet.py +1 -5
- elasticsearch/_async/client/graph.py +1 -5
- elasticsearch/_async/client/ilm.py +2 -10
- elasticsearch/_async/client/indices.py +159 -32
- elasticsearch/_async/client/inference.py +142 -120
- elasticsearch/_async/client/nodes.py +2 -2
- elasticsearch/_async/client/shutdown.py +5 -15
- elasticsearch/_async/client/slm.py +1 -5
- elasticsearch/_async/client/snapshot.py +262 -112
- elasticsearch/_async/client/sql.py +1 -1
- elasticsearch/_async/client/streams.py +185 -0
- elasticsearch/_async/client/transform.py +60 -0
- elasticsearch/_async/client/watcher.py +1 -5
- elasticsearch/_async/helpers.py +58 -9
- elasticsearch/_sync/client/__init__.py +39 -55
- elasticsearch/_sync/client/cat.py +605 -35
- elasticsearch/_sync/client/cluster.py +7 -2
- elasticsearch/_sync/client/connector.py +3 -3
- elasticsearch/_sync/client/esql.py +16 -6
- elasticsearch/_sync/client/fleet.py +1 -5
- elasticsearch/_sync/client/graph.py +1 -5
- elasticsearch/_sync/client/ilm.py +2 -10
- elasticsearch/_sync/client/indices.py +159 -32
- elasticsearch/_sync/client/inference.py +142 -120
- elasticsearch/_sync/client/nodes.py +2 -2
- elasticsearch/_sync/client/shutdown.py +5 -15
- elasticsearch/_sync/client/slm.py +1 -5
- elasticsearch/_sync/client/snapshot.py +262 -112
- elasticsearch/_sync/client/sql.py +1 -1
- elasticsearch/_sync/client/streams.py +185 -0
- elasticsearch/_sync/client/transform.py +60 -0
- elasticsearch/_sync/client/watcher.py +1 -5
- elasticsearch/_version.py +2 -1
- elasticsearch/client.py +2 -0
- elasticsearch/compat.py +45 -1
- elasticsearch/dsl/__init__.py +28 -0
- elasticsearch/dsl/_async/document.py +84 -0
- elasticsearch/dsl/_sync/document.py +84 -0
- elasticsearch/dsl/aggs.py +117 -0
- elasticsearch/dsl/document_base.py +59 -1
- elasticsearch/dsl/field.py +60 -10
- elasticsearch/dsl/query.py +1 -1
- elasticsearch/dsl/response/__init__.py +3 -0
- elasticsearch/dsl/response/aggs.py +1 -1
- elasticsearch/dsl/types.py +325 -20
- elasticsearch/dsl/utils.py +1 -1
- elasticsearch/esql/__init__.py +2 -1
- elasticsearch/esql/esql.py +85 -34
- elasticsearch/esql/functions.py +37 -25
- elasticsearch/helpers/__init__.py +10 -1
- elasticsearch/helpers/actions.py +106 -33
- {elasticsearch-8.19.0.dist-info → elasticsearch-8.19.2.dist-info}/METADATA +2 -4
- {elasticsearch-8.19.0.dist-info → elasticsearch-8.19.2.dist-info}/RECORD +61 -59
- {elasticsearch-8.19.0.dist-info → elasticsearch-8.19.2.dist-info}/WHEEL +0 -0
- {elasticsearch-8.19.0.dist-info → elasticsearch-8.19.2.dist-info}/licenses/LICENSE +0 -0
- {elasticsearch-8.19.0.dist-info → elasticsearch-8.19.2.dist-info}/licenses/NOTICE +0 -0
|
@@ -21,6 +21,7 @@ from typing import (
|
|
|
21
21
|
Any,
|
|
22
22
|
Dict,
|
|
23
23
|
Iterable,
|
|
24
|
+
Iterator,
|
|
24
25
|
List,
|
|
25
26
|
Optional,
|
|
26
27
|
Tuple,
|
|
@@ -42,6 +43,7 @@ from .search import Search
|
|
|
42
43
|
|
|
43
44
|
if TYPE_CHECKING:
|
|
44
45
|
from elasticsearch import Elasticsearch
|
|
46
|
+
from elasticsearch.esql.esql import ESQLBase
|
|
45
47
|
|
|
46
48
|
|
|
47
49
|
class IndexMeta(DocumentMeta):
|
|
@@ -512,3 +514,85 @@ class Document(DocumentBase, metaclass=IndexMeta):
|
|
|
512
514
|
return action
|
|
513
515
|
|
|
514
516
|
return bulk(es, Generate(actions), **kwargs)
|
|
517
|
+
|
|
518
|
+
@classmethod
|
|
519
|
+
def esql_execute(
|
|
520
|
+
cls,
|
|
521
|
+
query: "ESQLBase",
|
|
522
|
+
return_additional: bool = False,
|
|
523
|
+
ignore_missing_fields: bool = False,
|
|
524
|
+
using: Optional[UsingType] = None,
|
|
525
|
+
**kwargs: Any,
|
|
526
|
+
) -> Iterator[Union[Self, Tuple[Self, Dict[str, Any]]]]:
|
|
527
|
+
"""
|
|
528
|
+
Execute the given ES|QL query and return an iterator of 2-element tuples,
|
|
529
|
+
where the first element is an instance of this ``Document`` and the
|
|
530
|
+
second a dictionary with any remaining columns requested in the query.
|
|
531
|
+
|
|
532
|
+
:arg query: an ES|QL query object created with the ``esql_from()`` method.
|
|
533
|
+
:arg return_additional: if ``False`` (the default), this method returns
|
|
534
|
+
document objects. If set to ``True``, the method returns tuples with
|
|
535
|
+
a document in the first element and a dictionary with any additional
|
|
536
|
+
columns returned by the query in the second element.
|
|
537
|
+
:arg ignore_missing_fields: if ``False`` (the default), all the fields of
|
|
538
|
+
the document must be present in the query, or else an exception is
|
|
539
|
+
raised. Set to ``True`` to allow missing fields, which will result in
|
|
540
|
+
partially initialized document objects.
|
|
541
|
+
:arg using: connection alias to use, defaults to ``'default'``
|
|
542
|
+
:arg kwargs: additional options for the ``client.esql.query()`` function.
|
|
543
|
+
"""
|
|
544
|
+
es = cls._get_connection(using)
|
|
545
|
+
response = es.esql.query(query=str(query), **kwargs)
|
|
546
|
+
query_columns = [col["name"] for col in response.body.get("columns", [])]
|
|
547
|
+
|
|
548
|
+
# Here we get the list of columns defined in the document, which are the
|
|
549
|
+
# columns that we will take from each result to assemble the document
|
|
550
|
+
# object.
|
|
551
|
+
# When `for_esql=False` is passed below by default, the list will include
|
|
552
|
+
# nested fields, which ES|QL does not return, causing an error. When passing
|
|
553
|
+
# `ignore_missing_fields=True` the list will be generated with
|
|
554
|
+
# `for_esql=True`, so the error will not occur, but the documents will
|
|
555
|
+
# not have any Nested objects in them.
|
|
556
|
+
doc_fields = set(cls._get_field_names(for_esql=ignore_missing_fields))
|
|
557
|
+
if not ignore_missing_fields and not doc_fields.issubset(set(query_columns)):
|
|
558
|
+
raise ValueError(
|
|
559
|
+
f"Not all fields of {cls.__name__} were returned by the query. "
|
|
560
|
+
"Make sure your document does not use Nested fields, which are "
|
|
561
|
+
"currently not supported in ES|QL. To force the query to be "
|
|
562
|
+
"evaluated in spite of the missing fields, pass set the "
|
|
563
|
+
"ignore_missing_fields=True option in the esql_execute() call."
|
|
564
|
+
)
|
|
565
|
+
non_doc_fields: set[str] = set(query_columns) - doc_fields - {"_id"}
|
|
566
|
+
index_id = query_columns.index("_id")
|
|
567
|
+
|
|
568
|
+
results = response.body.get("values", [])
|
|
569
|
+
for column_values in results:
|
|
570
|
+
# create a dictionary with all the document fields, expanding the
|
|
571
|
+
# dot notation returned by ES|QL into the recursive dictionaries
|
|
572
|
+
# used by Document.from_dict()
|
|
573
|
+
doc_dict: Dict[str, Any] = {}
|
|
574
|
+
for col, val in zip(query_columns, column_values):
|
|
575
|
+
if col in doc_fields:
|
|
576
|
+
cols = col.split(".")
|
|
577
|
+
d = doc_dict
|
|
578
|
+
for c in cols[:-1]:
|
|
579
|
+
if c not in d:
|
|
580
|
+
d[c] = {}
|
|
581
|
+
d = d[c]
|
|
582
|
+
d[cols[-1]] = val
|
|
583
|
+
|
|
584
|
+
# create the document instance
|
|
585
|
+
obj = cls(meta={"_id": column_values[index_id]})
|
|
586
|
+
obj._from_dict(doc_dict)
|
|
587
|
+
|
|
588
|
+
if return_additional:
|
|
589
|
+
# build a dict with any other values included in the response
|
|
590
|
+
other = {
|
|
591
|
+
col: val
|
|
592
|
+
for col, val in zip(query_columns, column_values)
|
|
593
|
+
if col in non_doc_fields
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
yield obj, other
|
|
597
|
+
else:
|
|
598
|
+
yield obj
|
elasticsearch/dsl/aggs.py
CHANGED
|
@@ -372,6 +372,12 @@ class Boxplot(Agg[_R]):
|
|
|
372
372
|
:arg compression: Limits the maximum number of nodes used by the
|
|
373
373
|
underlying TDigest algorithm to `20 * compression`, enabling
|
|
374
374
|
control of memory usage and approximation error.
|
|
375
|
+
:arg execution_hint: The default implementation of TDigest is
|
|
376
|
+
optimized for performance, scaling to millions or even billions of
|
|
377
|
+
sample values while maintaining acceptable accuracy levels (close
|
|
378
|
+
to 1% relative error for millions of samples in some cases). To
|
|
379
|
+
use an implementation optimized for accuracy, set this parameter
|
|
380
|
+
to high_accuracy instead. Defaults to `default` if omitted.
|
|
375
381
|
:arg field: The field on which to run the aggregation.
|
|
376
382
|
:arg missing: The value to apply to documents that do not have a
|
|
377
383
|
value. By default, documents without a value are ignored.
|
|
@@ -384,6 +390,9 @@ class Boxplot(Agg[_R]):
|
|
|
384
390
|
self,
|
|
385
391
|
*,
|
|
386
392
|
compression: Union[float, "DefaultType"] = DEFAULT,
|
|
393
|
+
execution_hint: Union[
|
|
394
|
+
Literal["default", "high_accuracy"], "DefaultType"
|
|
395
|
+
] = DEFAULT,
|
|
387
396
|
field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT,
|
|
388
397
|
missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT,
|
|
389
398
|
script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT,
|
|
@@ -391,6 +400,7 @@ class Boxplot(Agg[_R]):
|
|
|
391
400
|
):
|
|
392
401
|
super().__init__(
|
|
393
402
|
compression=compression,
|
|
403
|
+
execution_hint=execution_hint,
|
|
394
404
|
field=field,
|
|
395
405
|
missing=missing,
|
|
396
406
|
script=script,
|
|
@@ -642,6 +652,54 @@ class Cardinality(Agg[_R]):
|
|
|
642
652
|
)
|
|
643
653
|
|
|
644
654
|
|
|
655
|
+
class CartesianBounds(Agg[_R]):
|
|
656
|
+
"""
|
|
657
|
+
A metric aggregation that computes the spatial bounding box containing
|
|
658
|
+
all values for a Point or Shape field.
|
|
659
|
+
|
|
660
|
+
:arg field: The field on which to run the aggregation.
|
|
661
|
+
:arg missing: The value to apply to documents that do not have a
|
|
662
|
+
value. By default, documents without a value are ignored.
|
|
663
|
+
:arg script:
|
|
664
|
+
"""
|
|
665
|
+
|
|
666
|
+
name = "cartesian_bounds"
|
|
667
|
+
|
|
668
|
+
def __init__(
|
|
669
|
+
self,
|
|
670
|
+
*,
|
|
671
|
+
field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT,
|
|
672
|
+
missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT,
|
|
673
|
+
script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT,
|
|
674
|
+
**kwargs: Any,
|
|
675
|
+
):
|
|
676
|
+
super().__init__(field=field, missing=missing, script=script, **kwargs)
|
|
677
|
+
|
|
678
|
+
|
|
679
|
+
class CartesianCentroid(Agg[_R]):
|
|
680
|
+
"""
|
|
681
|
+
A metric aggregation that computes the weighted centroid from all
|
|
682
|
+
coordinate values for point and shape fields.
|
|
683
|
+
|
|
684
|
+
:arg field: The field on which to run the aggregation.
|
|
685
|
+
:arg missing: The value to apply to documents that do not have a
|
|
686
|
+
value. By default, documents without a value are ignored.
|
|
687
|
+
:arg script:
|
|
688
|
+
"""
|
|
689
|
+
|
|
690
|
+
name = "cartesian_centroid"
|
|
691
|
+
|
|
692
|
+
def __init__(
|
|
693
|
+
self,
|
|
694
|
+
*,
|
|
695
|
+
field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT,
|
|
696
|
+
missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT,
|
|
697
|
+
script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT,
|
|
698
|
+
**kwargs: Any,
|
|
699
|
+
):
|
|
700
|
+
super().__init__(field=field, missing=missing, script=script, **kwargs)
|
|
701
|
+
|
|
702
|
+
|
|
645
703
|
class CategorizeText(Bucket[_R]):
|
|
646
704
|
"""
|
|
647
705
|
A multi-bucket aggregation that groups semi-structured text into
|
|
@@ -724,6 +782,43 @@ class CategorizeText(Bucket[_R]):
|
|
|
724
782
|
)
|
|
725
783
|
|
|
726
784
|
|
|
785
|
+
class ChangePoint(Pipeline[_R]):
|
|
786
|
+
"""
|
|
787
|
+
A sibling pipeline that detects, spikes, dips, and change points in a
|
|
788
|
+
metric. Given a distribution of values provided by the sibling multi-
|
|
789
|
+
bucket aggregation, this aggregation indicates the bucket of any spike
|
|
790
|
+
or dip and/or the bucket at which the largest change in the
|
|
791
|
+
distribution of values, if they are statistically significant. There
|
|
792
|
+
must be at least 22 bucketed values. Fewer than 1,000 is preferred.
|
|
793
|
+
|
|
794
|
+
:arg format: `DecimalFormat` pattern for the output value. If
|
|
795
|
+
specified, the formatted value is returned in the aggregation’s
|
|
796
|
+
`value_as_string` property.
|
|
797
|
+
:arg gap_policy: Policy to apply when gaps are found in the data.
|
|
798
|
+
Defaults to `skip` if omitted.
|
|
799
|
+
:arg buckets_path: Path to the buckets that contain one set of values
|
|
800
|
+
to correlate.
|
|
801
|
+
"""
|
|
802
|
+
|
|
803
|
+
name = "change_point"
|
|
804
|
+
|
|
805
|
+
def __init__(
|
|
806
|
+
self,
|
|
807
|
+
*,
|
|
808
|
+
format: Union[str, "DefaultType"] = DEFAULT,
|
|
809
|
+
gap_policy: Union[
|
|
810
|
+
Literal["skip", "insert_zeros", "keep_values"], "DefaultType"
|
|
811
|
+
] = DEFAULT,
|
|
812
|
+
buckets_path: Union[
|
|
813
|
+
str, Sequence[str], Mapping[str, str], "DefaultType"
|
|
814
|
+
] = DEFAULT,
|
|
815
|
+
**kwargs: Any,
|
|
816
|
+
):
|
|
817
|
+
super().__init__(
|
|
818
|
+
format=format, gap_policy=gap_policy, buckets_path=buckets_path, **kwargs
|
|
819
|
+
)
|
|
820
|
+
|
|
821
|
+
|
|
727
822
|
class Children(Bucket[_R]):
|
|
728
823
|
"""
|
|
729
824
|
A single bucket aggregation that selects child documents that have the
|
|
@@ -1897,6 +1992,12 @@ class MedianAbsoluteDeviation(Agg[_R]):
|
|
|
1897
1992
|
underlying TDigest algorithm to `20 * compression`, enabling
|
|
1898
1993
|
control of memory usage and approximation error. Defaults to
|
|
1899
1994
|
`1000` if omitted.
|
|
1995
|
+
:arg execution_hint: The default implementation of TDigest is
|
|
1996
|
+
optimized for performance, scaling to millions or even billions of
|
|
1997
|
+
sample values while maintaining acceptable accuracy levels (close
|
|
1998
|
+
to 1% relative error for millions of samples in some cases). To
|
|
1999
|
+
use an implementation optimized for accuracy, set this parameter
|
|
2000
|
+
to high_accuracy instead. Defaults to `default` if omitted.
|
|
1900
2001
|
:arg format:
|
|
1901
2002
|
:arg field: The field on which to run the aggregation.
|
|
1902
2003
|
:arg missing: The value to apply to documents that do not have a
|
|
@@ -1910,6 +2011,9 @@ class MedianAbsoluteDeviation(Agg[_R]):
|
|
|
1910
2011
|
self,
|
|
1911
2012
|
*,
|
|
1912
2013
|
compression: Union[float, "DefaultType"] = DEFAULT,
|
|
2014
|
+
execution_hint: Union[
|
|
2015
|
+
Literal["default", "high_accuracy"], "DefaultType"
|
|
2016
|
+
] = DEFAULT,
|
|
1913
2017
|
format: Union[str, "DefaultType"] = DEFAULT,
|
|
1914
2018
|
field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT,
|
|
1915
2019
|
missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT,
|
|
@@ -1918,6 +2022,7 @@ class MedianAbsoluteDeviation(Agg[_R]):
|
|
|
1918
2022
|
):
|
|
1919
2023
|
super().__init__(
|
|
1920
2024
|
compression=compression,
|
|
2025
|
+
execution_hint=execution_hint,
|
|
1921
2026
|
format=format,
|
|
1922
2027
|
field=field,
|
|
1923
2028
|
missing=missing,
|
|
@@ -2955,6 +3060,14 @@ class SignificantTerms(Bucket[_R]):
|
|
|
2955
3060
|
the foreground sample with a term divided by the number of
|
|
2956
3061
|
documents in the background with the term.
|
|
2957
3062
|
:arg script_heuristic: Customized score, implemented via a script.
|
|
3063
|
+
:arg p_value: Significant terms heuristic that calculates the p-value
|
|
3064
|
+
between the term existing in foreground and background sets. The
|
|
3065
|
+
p-value is the probability of obtaining test results at least as
|
|
3066
|
+
extreme as the results actually observed, under the assumption
|
|
3067
|
+
that the null hypothesis is correct. The p-value is calculated
|
|
3068
|
+
assuming that the foreground set and the background set are
|
|
3069
|
+
independent https://en.wikipedia.org/wiki/Bernoulli_trial, with
|
|
3070
|
+
the null hypothesis that the probabilities are the same.
|
|
2958
3071
|
:arg shard_min_doc_count: Regulates the certainty a shard has if the
|
|
2959
3072
|
term should actually be added to the candidate list or not with
|
|
2960
3073
|
respect to the `min_doc_count`. Terms will only be considered if
|
|
@@ -3008,6 +3121,9 @@ class SignificantTerms(Bucket[_R]):
|
|
|
3008
3121
|
script_heuristic: Union[
|
|
3009
3122
|
"types.ScriptedHeuristic", Dict[str, Any], "DefaultType"
|
|
3010
3123
|
] = DEFAULT,
|
|
3124
|
+
p_value: Union[
|
|
3125
|
+
"types.PValueHeuristic", Dict[str, Any], "DefaultType"
|
|
3126
|
+
] = DEFAULT,
|
|
3011
3127
|
shard_min_doc_count: Union[int, "DefaultType"] = DEFAULT,
|
|
3012
3128
|
shard_size: Union[int, "DefaultType"] = DEFAULT,
|
|
3013
3129
|
size: Union[int, "DefaultType"] = DEFAULT,
|
|
@@ -3026,6 +3142,7 @@ class SignificantTerms(Bucket[_R]):
|
|
|
3026
3142
|
mutual_information=mutual_information,
|
|
3027
3143
|
percentage=percentage,
|
|
3028
3144
|
script_heuristic=script_heuristic,
|
|
3145
|
+
p_value=p_value,
|
|
3029
3146
|
shard_min_doc_count=shard_min_doc_count,
|
|
3030
3147
|
shard_size=shard_size,
|
|
3031
3148
|
size=size,
|
|
@@ -28,12 +28,18 @@ from typing import (
|
|
|
28
28
|
List,
|
|
29
29
|
Optional,
|
|
30
30
|
Tuple,
|
|
31
|
+
Type,
|
|
31
32
|
TypeVar,
|
|
32
33
|
Union,
|
|
33
34
|
get_args,
|
|
34
35
|
overload,
|
|
35
36
|
)
|
|
36
37
|
|
|
38
|
+
try:
|
|
39
|
+
import annotationlib
|
|
40
|
+
except ImportError:
|
|
41
|
+
annotationlib = None
|
|
42
|
+
|
|
37
43
|
try:
|
|
38
44
|
from types import UnionType
|
|
39
45
|
except ImportError:
|
|
@@ -49,6 +55,7 @@ from .utils import DOC_META_FIELDS, ObjectBase
|
|
|
49
55
|
if TYPE_CHECKING:
|
|
50
56
|
from elastic_transport import ObjectApiResponse
|
|
51
57
|
|
|
58
|
+
from ..esql.esql import ESQLBase
|
|
52
59
|
from .index_base import IndexBase
|
|
53
60
|
|
|
54
61
|
|
|
@@ -331,7 +338,17 @@ class DocumentOptions:
|
|
|
331
338
|
# # ignore attributes
|
|
332
339
|
# field10: ClassVar[string] = "a regular class variable"
|
|
333
340
|
annotations = attrs.get("__annotations__", {})
|
|
334
|
-
|
|
341
|
+
if not annotations and annotationlib:
|
|
342
|
+
# Python 3.14+ uses annotationlib
|
|
343
|
+
annotate = annotationlib.get_annotate_from_class_namespace(attrs)
|
|
344
|
+
if annotate:
|
|
345
|
+
annotations = (
|
|
346
|
+
annotationlib.call_annotate_function(
|
|
347
|
+
annotate, format=annotationlib.Format.VALUE
|
|
348
|
+
)
|
|
349
|
+
or {}
|
|
350
|
+
)
|
|
351
|
+
fields = {n for n in attrs if isinstance(attrs[n], Field)}
|
|
335
352
|
fields.update(annotations.keys())
|
|
336
353
|
field_defaults = {}
|
|
337
354
|
for name in fields:
|
|
@@ -602,3 +619,44 @@ class DocumentBase(ObjectBase):
|
|
|
602
619
|
|
|
603
620
|
meta["_source"] = d
|
|
604
621
|
return meta
|
|
622
|
+
|
|
623
|
+
@classmethod
|
|
624
|
+
def _get_field_names(
|
|
625
|
+
cls, for_esql: bool = False, nested_class: Optional[Type[InnerDoc]] = None
|
|
626
|
+
) -> List[str]:
|
|
627
|
+
"""Return the list of field names used by this document.
|
|
628
|
+
If the document has nested objects, their fields are reported using dot
|
|
629
|
+
notation. If the ``for_esql`` argument is set to ``True``, the list omits
|
|
630
|
+
nested fields, which are currently unsupported in ES|QL.
|
|
631
|
+
"""
|
|
632
|
+
fields = []
|
|
633
|
+
class_ = nested_class or cls
|
|
634
|
+
for field_name in class_._doc_type.mapping:
|
|
635
|
+
field = class_._doc_type.mapping[field_name]
|
|
636
|
+
if isinstance(field, Object):
|
|
637
|
+
if for_esql and isinstance(field, Nested):
|
|
638
|
+
# ES|QL does not recognize Nested fields at this time
|
|
639
|
+
continue
|
|
640
|
+
sub_fields = cls._get_field_names(
|
|
641
|
+
for_esql=for_esql, nested_class=field._doc_class
|
|
642
|
+
)
|
|
643
|
+
for sub_field in sub_fields:
|
|
644
|
+
fields.append(f"{field_name}.{sub_field}")
|
|
645
|
+
else:
|
|
646
|
+
fields.append(field_name)
|
|
647
|
+
return fields
|
|
648
|
+
|
|
649
|
+
@classmethod
|
|
650
|
+
def esql_from(cls) -> "ESQLBase":
|
|
651
|
+
"""Return a base ES|QL query for instances of this document class.
|
|
652
|
+
|
|
653
|
+
The returned query is initialized with ``FROM`` and ``KEEP`` statements,
|
|
654
|
+
and can be completed as desired.
|
|
655
|
+
"""
|
|
656
|
+
from ..esql import ESQL # here to avoid circular imports
|
|
657
|
+
|
|
658
|
+
return (
|
|
659
|
+
ESQL.from_(cls)
|
|
660
|
+
.metadata("_id")
|
|
661
|
+
.keep("_id", *tuple(cls._get_field_names(for_esql=True)))
|
|
662
|
+
)
|
elasticsearch/dsl/field.py
CHANGED
|
@@ -119,9 +119,16 @@ class Field(DslBase):
|
|
|
119
119
|
def __getitem__(self, subfield: str) -> "Field":
|
|
120
120
|
return cast(Field, self._params.get("fields", {})[subfield])
|
|
121
121
|
|
|
122
|
-
def _serialize(self, data: Any) -> Any:
|
|
122
|
+
def _serialize(self, data: Any, skip_empty: bool) -> Any:
|
|
123
123
|
return data
|
|
124
124
|
|
|
125
|
+
def _safe_serialize(self, data: Any, skip_empty: bool) -> Any:
|
|
126
|
+
try:
|
|
127
|
+
return self._serialize(data, skip_empty)
|
|
128
|
+
except TypeError:
|
|
129
|
+
# older method signature, without skip_empty
|
|
130
|
+
return self._serialize(data) # type: ignore[call-arg]
|
|
131
|
+
|
|
125
132
|
def _deserialize(self, data: Any) -> Any:
|
|
126
133
|
return data
|
|
127
134
|
|
|
@@ -133,10 +140,16 @@ class Field(DslBase):
|
|
|
133
140
|
return AttrList([])
|
|
134
141
|
return self._empty()
|
|
135
142
|
|
|
136
|
-
def serialize(self, data: Any) -> Any:
|
|
143
|
+
def serialize(self, data: Any, skip_empty: bool = True) -> Any:
|
|
137
144
|
if isinstance(data, (list, AttrList, tuple)):
|
|
138
|
-
return list(
|
|
139
|
-
|
|
145
|
+
return list(
|
|
146
|
+
map(
|
|
147
|
+
self._safe_serialize,
|
|
148
|
+
cast(Iterable[Any], data),
|
|
149
|
+
[skip_empty] * len(data),
|
|
150
|
+
)
|
|
151
|
+
)
|
|
152
|
+
return self._safe_serialize(data, skip_empty)
|
|
140
153
|
|
|
141
154
|
def deserialize(self, data: Any) -> Any:
|
|
142
155
|
if isinstance(data, (list, AttrList, tuple)):
|
|
@@ -186,7 +199,7 @@ class RangeField(Field):
|
|
|
186
199
|
data = {k: self._core_field.deserialize(v) for k, v in data.items()} # type: ignore[union-attr]
|
|
187
200
|
return Range(data)
|
|
188
201
|
|
|
189
|
-
def _serialize(self, data: Any) -> Optional[Dict[str, Any]]:
|
|
202
|
+
def _serialize(self, data: Any, skip_empty: bool) -> Optional[Dict[str, Any]]:
|
|
190
203
|
if data is None:
|
|
191
204
|
return None
|
|
192
205
|
if not isinstance(data, collections.abc.Mapping):
|
|
@@ -550,7 +563,7 @@ class Object(Field):
|
|
|
550
563
|
return self._wrap(data)
|
|
551
564
|
|
|
552
565
|
def _serialize(
|
|
553
|
-
self, data: Optional[Union[Dict[str, Any], "InnerDoc"]]
|
|
566
|
+
self, data: Optional[Union[Dict[str, Any], "InnerDoc"]], skip_empty: bool
|
|
554
567
|
) -> Optional[Dict[str, Any]]:
|
|
555
568
|
if data is None:
|
|
556
569
|
return None
|
|
@@ -559,7 +572,11 @@ class Object(Field):
|
|
|
559
572
|
if isinstance(data, collections.abc.Mapping):
|
|
560
573
|
return data
|
|
561
574
|
|
|
562
|
-
|
|
575
|
+
try:
|
|
576
|
+
return data.to_dict(skip_empty=skip_empty)
|
|
577
|
+
except TypeError:
|
|
578
|
+
# this would only happen if an AttrDict was given instead of an InnerDoc
|
|
579
|
+
return data.to_dict()
|
|
563
580
|
|
|
564
581
|
def clean(self, data: Any) -> Any:
|
|
565
582
|
data = super().clean(data)
|
|
@@ -768,7 +785,7 @@ class Binary(Field):
|
|
|
768
785
|
def _deserialize(self, data: Any) -> bytes:
|
|
769
786
|
return base64.b64decode(data)
|
|
770
787
|
|
|
771
|
-
def _serialize(self, data: Any) -> Optional[str]:
|
|
788
|
+
def _serialize(self, data: Any, skip_empty: bool) -> Optional[str]:
|
|
772
789
|
if data is None:
|
|
773
790
|
return None
|
|
774
791
|
return base64.b64encode(data).decode()
|
|
@@ -2619,7 +2636,7 @@ class Ip(Field):
|
|
|
2619
2636
|
# the ipaddress library for pypy only accepts unicode.
|
|
2620
2637
|
return ipaddress.ip_address(unicode(data))
|
|
2621
2638
|
|
|
2622
|
-
def _serialize(self, data: Any) -> Optional[str]:
|
|
2639
|
+
def _serialize(self, data: Any, skip_empty: bool) -> Optional[str]:
|
|
2623
2640
|
if data is None:
|
|
2624
2641
|
return None
|
|
2625
2642
|
return str(data)
|
|
@@ -3367,7 +3384,7 @@ class Percolator(Field):
|
|
|
3367
3384
|
def _deserialize(self, data: Any) -> "Query":
|
|
3368
3385
|
return Q(data) # type: ignore[no-any-return]
|
|
3369
3386
|
|
|
3370
|
-
def _serialize(self, data: Any) -> Optional[Dict[str, Any]]:
|
|
3387
|
+
def _serialize(self, data: Any, skip_empty: bool) -> Optional[Dict[str, Any]]:
|
|
3371
3388
|
if data is None:
|
|
3372
3389
|
return None
|
|
3373
3390
|
return data.to_dict() # type: ignore[no-any-return]
|
|
@@ -3849,9 +3866,21 @@ class SemanticText(Field):
|
|
|
3849
3866
|
by using the Update mapping API. Use the Create inference API to
|
|
3850
3867
|
create the endpoint. If not specified, the inference endpoint
|
|
3851
3868
|
defined by inference_id will be used at both index and query time.
|
|
3869
|
+
:arg index_options: Settings for index_options that override any
|
|
3870
|
+
defaults used by semantic_text, for example specific quantization
|
|
3871
|
+
settings.
|
|
3872
|
+
:arg chunking_settings: Settings for chunking text into smaller
|
|
3873
|
+
passages. If specified, these will override the chunking settings
|
|
3874
|
+
sent in the inference endpoint associated with inference_id. If
|
|
3875
|
+
chunking settings are updated, they will not be applied to
|
|
3876
|
+
existing documents until they are reindexed.
|
|
3877
|
+
:arg fields:
|
|
3852
3878
|
"""
|
|
3853
3879
|
|
|
3854
3880
|
name = "semantic_text"
|
|
3881
|
+
_param_defs = {
|
|
3882
|
+
"fields": {"type": "field", "hash": True},
|
|
3883
|
+
}
|
|
3855
3884
|
|
|
3856
3885
|
def __init__(
|
|
3857
3886
|
self,
|
|
@@ -3859,6 +3888,13 @@ class SemanticText(Field):
|
|
|
3859
3888
|
meta: Union[Mapping[str, str], "DefaultType"] = DEFAULT,
|
|
3860
3889
|
inference_id: Union[str, "DefaultType"] = DEFAULT,
|
|
3861
3890
|
search_inference_id: Union[str, "DefaultType"] = DEFAULT,
|
|
3891
|
+
index_options: Union[
|
|
3892
|
+
"types.SemanticTextIndexOptions", Dict[str, Any], "DefaultType"
|
|
3893
|
+
] = DEFAULT,
|
|
3894
|
+
chunking_settings: Union[
|
|
3895
|
+
"types.ChunkingSettings", Dict[str, Any], "DefaultType"
|
|
3896
|
+
] = DEFAULT,
|
|
3897
|
+
fields: Union[Mapping[str, Field], "DefaultType"] = DEFAULT,
|
|
3862
3898
|
**kwargs: Any,
|
|
3863
3899
|
):
|
|
3864
3900
|
if meta is not DEFAULT:
|
|
@@ -3867,6 +3903,12 @@ class SemanticText(Field):
|
|
|
3867
3903
|
kwargs["inference_id"] = inference_id
|
|
3868
3904
|
if search_inference_id is not DEFAULT:
|
|
3869
3905
|
kwargs["search_inference_id"] = search_inference_id
|
|
3906
|
+
if index_options is not DEFAULT:
|
|
3907
|
+
kwargs["index_options"] = index_options
|
|
3908
|
+
if chunking_settings is not DEFAULT:
|
|
3909
|
+
kwargs["chunking_settings"] = chunking_settings
|
|
3910
|
+
if fields is not DEFAULT:
|
|
3911
|
+
kwargs["fields"] = fields
|
|
3870
3912
|
super().__init__(*args, **kwargs)
|
|
3871
3913
|
|
|
3872
3914
|
|
|
@@ -4063,6 +4105,9 @@ class Short(Integer):
|
|
|
4063
4105
|
class SparseVector(Field):
|
|
4064
4106
|
"""
|
|
4065
4107
|
:arg store:
|
|
4108
|
+
:arg index_options: Additional index options for the sparse vector
|
|
4109
|
+
field that controls the token pruning behavior of the sparse
|
|
4110
|
+
vector field.
|
|
4066
4111
|
:arg meta: Metadata about the field.
|
|
4067
4112
|
:arg properties:
|
|
4068
4113
|
:arg ignore_above:
|
|
@@ -4081,6 +4126,9 @@ class SparseVector(Field):
|
|
|
4081
4126
|
self,
|
|
4082
4127
|
*args: Any,
|
|
4083
4128
|
store: Union[bool, "DefaultType"] = DEFAULT,
|
|
4129
|
+
index_options: Union[
|
|
4130
|
+
"types.SparseVectorIndexOptions", Dict[str, Any], "DefaultType"
|
|
4131
|
+
] = DEFAULT,
|
|
4084
4132
|
meta: Union[Mapping[str, str], "DefaultType"] = DEFAULT,
|
|
4085
4133
|
properties: Union[Mapping[str, Field], "DefaultType"] = DEFAULT,
|
|
4086
4134
|
ignore_above: Union[int, "DefaultType"] = DEFAULT,
|
|
@@ -4095,6 +4143,8 @@ class SparseVector(Field):
|
|
|
4095
4143
|
):
|
|
4096
4144
|
if store is not DEFAULT:
|
|
4097
4145
|
kwargs["store"] = store
|
|
4146
|
+
if index_options is not DEFAULT:
|
|
4147
|
+
kwargs["index_options"] = index_options
|
|
4098
4148
|
if meta is not DEFAULT:
|
|
4099
4149
|
kwargs["meta"] = meta
|
|
4100
4150
|
if properties is not DEFAULT:
|
elasticsearch/dsl/query.py
CHANGED
|
@@ -1433,7 +1433,7 @@ class MoreLikeThis(Query):
|
|
|
1433
1433
|
] = DEFAULT,
|
|
1434
1434
|
version: Union[int, "DefaultType"] = DEFAULT,
|
|
1435
1435
|
version_type: Union[
|
|
1436
|
-
Literal["internal", "external", "external_gte"
|
|
1436
|
+
Literal["internal", "external", "external_gte"], "DefaultType"
|
|
1437
1437
|
] = DEFAULT,
|
|
1438
1438
|
boost: Union[float, "DefaultType"] = DEFAULT,
|
|
1439
1439
|
_name: Union[str, "DefaultType"] = DEFAULT,
|
|
@@ -233,10 +233,13 @@ AggregateResponseType = Union[
|
|
|
233
233
|
"types.SimpleValueAggregate",
|
|
234
234
|
"types.DerivativeAggregate",
|
|
235
235
|
"types.BucketMetricValueAggregate",
|
|
236
|
+
"types.ChangePointAggregate",
|
|
236
237
|
"types.StatsAggregate",
|
|
237
238
|
"types.StatsBucketAggregate",
|
|
238
239
|
"types.ExtendedStatsAggregate",
|
|
239
240
|
"types.ExtendedStatsBucketAggregate",
|
|
241
|
+
"types.CartesianBoundsAggregate",
|
|
242
|
+
"types.CartesianCentroidAggregate",
|
|
240
243
|
"types.GeoBoundsAggregate",
|
|
241
244
|
"types.GeoCentroidAggregate",
|
|
242
245
|
"types.HistogramAggregate",
|