elasticsearch 9.1.0__py3-none-any.whl → 9.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. elasticsearch/_async/client/__init__.py +21 -6
  2. elasticsearch/_async/client/cat.py +1091 -51
  3. elasticsearch/_async/client/cluster.py +7 -2
  4. elasticsearch/_async/client/connector.py +3 -3
  5. elasticsearch/_async/client/esql.py +20 -6
  6. elasticsearch/_async/client/indices.py +27 -13
  7. elasticsearch/_async/client/inference.py +16 -5
  8. elasticsearch/_async/client/logstash.py +3 -1
  9. elasticsearch/_async/client/nodes.py +2 -2
  10. elasticsearch/_async/client/shutdown.py +5 -15
  11. elasticsearch/_async/client/sql.py +1 -1
  12. elasticsearch/_async/client/streams.py +186 -0
  13. elasticsearch/_async/client/transform.py +60 -0
  14. elasticsearch/_async/client/watcher.py +1 -5
  15. elasticsearch/_async/helpers.py +58 -9
  16. elasticsearch/_sync/client/__init__.py +21 -6
  17. elasticsearch/_sync/client/cat.py +1091 -51
  18. elasticsearch/_sync/client/cluster.py +7 -2
  19. elasticsearch/_sync/client/connector.py +3 -3
  20. elasticsearch/_sync/client/esql.py +20 -6
  21. elasticsearch/_sync/client/indices.py +27 -13
  22. elasticsearch/_sync/client/inference.py +16 -5
  23. elasticsearch/_sync/client/logstash.py +3 -1
  24. elasticsearch/_sync/client/nodes.py +2 -2
  25. elasticsearch/_sync/client/shutdown.py +5 -15
  26. elasticsearch/_sync/client/sql.py +1 -1
  27. elasticsearch/_sync/client/streams.py +186 -0
  28. elasticsearch/_sync/client/transform.py +60 -0
  29. elasticsearch/_sync/client/watcher.py +1 -5
  30. elasticsearch/_version.py +2 -1
  31. elasticsearch/client.py +2 -0
  32. elasticsearch/compat.py +43 -1
  33. elasticsearch/dsl/__init__.py +28 -0
  34. elasticsearch/dsl/_async/document.py +84 -0
  35. elasticsearch/dsl/_sync/document.py +84 -0
  36. elasticsearch/dsl/aggs.py +97 -0
  37. elasticsearch/dsl/document_base.py +57 -0
  38. elasticsearch/dsl/field.py +43 -11
  39. elasticsearch/dsl/query.py +5 -1
  40. elasticsearch/dsl/response/__init__.py +3 -0
  41. elasticsearch/dsl/response/aggs.py +1 -1
  42. elasticsearch/dsl/types.py +273 -24
  43. elasticsearch/dsl/utils.py +1 -1
  44. elasticsearch/esql/__init__.py +2 -1
  45. elasticsearch/esql/esql.py +85 -34
  46. elasticsearch/esql/functions.py +37 -25
  47. elasticsearch/helpers/__init__.py +10 -1
  48. elasticsearch/helpers/actions.py +106 -33
  49. {elasticsearch-9.1.0.dist-info → elasticsearch-9.1.2.dist-info}/METADATA +2 -4
  50. {elasticsearch-9.1.0.dist-info → elasticsearch-9.1.2.dist-info}/RECORD +53 -52
  51. elasticsearch/esql/esql1.py1 +0 -307
  52. {elasticsearch-9.1.0.dist-info → elasticsearch-9.1.2.dist-info}/WHEEL +0 -0
  53. {elasticsearch-9.1.0.dist-info → elasticsearch-9.1.2.dist-info}/licenses/LICENSE +0 -0
  54. {elasticsearch-9.1.0.dist-info → elasticsearch-9.1.2.dist-info}/licenses/NOTICE +0 -0
@@ -552,11 +552,7 @@ class WatcherClient(NamespacedClient):
552
552
  __body["transform"] = transform
553
553
  if trigger is not None:
554
554
  __body["trigger"] = trigger
555
- if not __body:
556
- __body = None # type: ignore[assignment]
557
- __headers = {"accept": "application/json"}
558
- if __body is not None:
559
- __headers["content-type"] = "application/json"
555
+ __headers = {"accept": "application/json", "content-type": "application/json"}
560
556
  return self.perform_request( # type: ignore[return-value]
561
557
  "PUT",
562
558
  __path,
elasticsearch/_version.py CHANGED
@@ -15,4 +15,5 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
- __versionstr__ = "9.1.0"
18
+ __versionstr__ = "9.1.2"
19
+ __es_specification_commit__ = "cc623e3b52dd3dfd85848ee992713d37da020bfb"
elasticsearch/client.py CHANGED
@@ -62,6 +62,7 @@ from ._sync.client.slm import SlmClient as SlmClient # noqa: F401
62
62
  from ._sync.client.snapshot import SnapshotClient as SnapshotClient # noqa: F401
63
63
  from ._sync.client.sql import SqlClient as SqlClient # noqa: F401
64
64
  from ._sync.client.ssl import SslClient as SslClient # noqa: F401
65
+ from ._sync.client.streams import StreamsClient as StreamsClient # noqa: F401
65
66
  from ._sync.client.synonyms import SynonymsClient as SynonymsClient # noqa: F401
66
67
  from ._sync.client.tasks import TasksClient as TasksClient # noqa: F401
67
68
  from ._sync.client.text_structure import ( # noqa: F401
@@ -115,6 +116,7 @@ __all__ = [
115
116
  "SnapshotClient",
116
117
  "SqlClient",
117
118
  "SslClient",
119
+ "StreamsClient",
118
120
  "TasksClient",
119
121
  "TextStructureClient",
120
122
  "TransformClient",
elasticsearch/compat.py CHANGED
@@ -15,11 +15,14 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
+ import asyncio
18
19
  import inspect
19
20
  import os
20
21
  import sys
22
+ from contextlib import asynccontextmanager, contextmanager
21
23
  from pathlib import Path
22
- from typing import Tuple, Type, Union
24
+ from threading import Thread
25
+ from typing import Any, AsyncIterator, Callable, Coroutine, Iterator, Tuple, Type, Union
23
26
 
24
27
  string_types: Tuple[Type[str], Type[bytes]] = (str, bytes)
25
28
 
@@ -76,9 +79,48 @@ def warn_stacklevel() -> int:
76
79
  return 0
77
80
 
78
81
 
82
+ @contextmanager
83
+ def safe_thread(
84
+ target: Callable[..., Any], *args: Any, **kwargs: Any
85
+ ) -> Iterator[Thread]:
86
+ """Run a thread within a context manager block.
87
+
88
+ The thread is automatically joined when the block ends. If the thread raised
89
+ an exception, it is raised in the caller's context.
90
+ """
91
+ captured_exception = None
92
+
93
+ def run() -> None:
94
+ try:
95
+ target(*args, **kwargs)
96
+ except BaseException as exc:
97
+ nonlocal captured_exception
98
+ captured_exception = exc
99
+
100
+ thread = Thread(target=run)
101
+ thread.start()
102
+ yield thread
103
+ thread.join()
104
+ if captured_exception:
105
+ raise captured_exception
106
+
107
+
108
+ @asynccontextmanager
109
+ async def safe_task(coro: Coroutine[Any, Any, Any]) -> AsyncIterator[asyncio.Task[Any]]:
110
+ """Run a background task within a context manager block.
111
+
112
+ The task is awaited when the block ends.
113
+ """
114
+ task = asyncio.create_task(coro)
115
+ yield task
116
+ await task
117
+
118
+
79
119
  __all__ = [
80
120
  "string_types",
81
121
  "to_str",
82
122
  "to_bytes",
83
123
  "warn_stacklevel",
124
+ "safe_thread",
125
+ "safe_task",
84
126
  ]
@@ -38,23 +38,30 @@ from .faceted_search import (
38
38
  TermsFacet,
39
39
  )
40
40
  from .field import (
41
+ AggregateMetricDouble,
42
+ Alias,
41
43
  Binary,
42
44
  Boolean,
43
45
  Byte,
44
46
  Completion,
45
47
  ConstantKeyword,
48
+ CountedKeyword,
46
49
  CustomField,
47
50
  Date,
51
+ DateNanos,
48
52
  DateRange,
49
53
  DenseVector,
50
54
  Double,
51
55
  DoubleRange,
52
56
  Field,
57
+ Flattened,
53
58
  Float,
54
59
  FloatRange,
55
60
  GeoPoint,
56
61
  GeoShape,
57
62
  HalfFloat,
63
+ Histogram,
64
+ IcuCollationKeyword,
58
65
  Integer,
59
66
  IntegerRange,
60
67
  Ip,
@@ -63,21 +70,28 @@ from .field import (
63
70
  Keyword,
64
71
  Long,
65
72
  LongRange,
73
+ MatchOnlyText,
66
74
  Murmur3,
67
75
  Nested,
68
76
  Object,
77
+ Passthrough,
69
78
  Percolator,
70
79
  Point,
71
80
  RangeField,
72
81
  RankFeature,
73
82
  RankFeatures,
83
+ RankVectors,
74
84
  ScaledFloat,
75
85
  SearchAsYouType,
86
+ SemanticText,
76
87
  Shape,
77
88
  Short,
78
89
  SparseVector,
79
90
  Text,
80
91
  TokenCount,
92
+ UnsignedLong,
93
+ Version,
94
+ Wildcard,
81
95
  construct_field,
82
96
  )
83
97
  from .function import SF
@@ -108,6 +122,8 @@ __all__ = [
108
122
  "A",
109
123
  "Agg",
110
124
  "AggResponse",
125
+ "AggregateMetricDouble",
126
+ "Alias",
111
127
  "AsyncComposableIndexTemplate",
112
128
  "AsyncDocument",
113
129
  "AsyncEmptySearch",
@@ -126,9 +142,11 @@ __all__ = [
126
142
  "Completion",
127
143
  "ComposableIndexTemplate",
128
144
  "ConstantKeyword",
145
+ "CountedKeyword",
129
146
  "CustomField",
130
147
  "Date",
131
148
  "DateHistogramFacet",
149
+ "DateNanos",
132
150
  "DateRange",
133
151
  "DenseVector",
134
152
  "Document",
@@ -142,12 +160,15 @@ __all__ = [
142
160
  "FacetedResponse",
143
161
  "FacetedSearch",
144
162
  "Field",
163
+ "Flattened",
145
164
  "Float",
146
165
  "FloatRange",
147
166
  "GeoPoint",
148
167
  "GeoShape",
149
168
  "HalfFloat",
169
+ "Histogram",
150
170
  "HistogramFacet",
171
+ "IcuCollationKeyword",
151
172
  "IllegalOperation",
152
173
  "Index",
153
174
  "IndexTemplate",
@@ -162,12 +183,14 @@ __all__ = [
162
183
  "LongRange",
163
184
  "M",
164
185
  "Mapping",
186
+ "MatchOnlyText",
165
187
  "MetaField",
166
188
  "MultiSearch",
167
189
  "Murmur3",
168
190
  "Nested",
169
191
  "NestedFacet",
170
192
  "Object",
193
+ "Passthrough",
171
194
  "Percolator",
172
195
  "Point",
173
196
  "Q",
@@ -177,11 +200,13 @@ __all__ = [
177
200
  "RangeField",
178
201
  "RankFeature",
179
202
  "RankFeatures",
203
+ "RankVectors",
180
204
  "Response",
181
205
  "SF",
182
206
  "ScaledFloat",
183
207
  "Search",
184
208
  "SearchAsYouType",
209
+ "SemanticText",
185
210
  "Shape",
186
211
  "Short",
187
212
  "SparseVector",
@@ -189,9 +214,12 @@ __all__ = [
189
214
  "Text",
190
215
  "TokenCount",
191
216
  "UnknownDslObject",
217
+ "UnsignedLong",
192
218
  "UpdateByQuery",
193
219
  "UpdateByQueryResponse",
194
220
  "ValidationException",
221
+ "Version",
222
+ "Wildcard",
195
223
  "analyzer",
196
224
  "async_connections",
197
225
  "char_filter",
@@ -20,6 +20,7 @@ from typing import (
20
20
  TYPE_CHECKING,
21
21
  Any,
22
22
  AsyncIterable,
23
+ AsyncIterator,
23
24
  Dict,
24
25
  List,
25
26
  Optional,
@@ -42,6 +43,7 @@ from .search import AsyncSearch
42
43
 
43
44
  if TYPE_CHECKING:
44
45
  from elasticsearch import AsyncElasticsearch
46
+ from elasticsearch.esql.esql import ESQLBase
45
47
 
46
48
 
47
49
  class AsyncIndexMeta(DocumentMeta):
@@ -520,3 +522,85 @@ class AsyncDocument(DocumentBase, metaclass=AsyncIndexMeta):
520
522
  return action
521
523
 
522
524
  return await async_bulk(es, Generate(actions), **kwargs)
525
+
526
+ @classmethod
527
+ async def esql_execute(
528
+ cls,
529
+ query: "ESQLBase",
530
+ return_additional: bool = False,
531
+ ignore_missing_fields: bool = False,
532
+ using: Optional[AsyncUsingType] = None,
533
+ **kwargs: Any,
534
+ ) -> AsyncIterator[Union[Self, Tuple[Self, Dict[str, Any]]]]:
535
+ """
536
+ Execute the given ES|QL query and return an iterator of 2-element tuples,
537
+ where the first element is an instance of this ``Document`` and the
538
+ second a dictionary with any remaining columns requested in the query.
539
+
540
+ :arg query: an ES|QL query object created with the ``esql_from()`` method.
541
+ :arg return_additional: if ``False`` (the default), this method returns
542
+ document objects. If set to ``True``, the method returns tuples with
543
+ a document in the first element and a dictionary with any additional
544
+ columns returned by the query in the second element.
545
+ :arg ignore_missing_fields: if ``False`` (the default), all the fields of
546
+ the document must be present in the query, or else an exception is
547
+ raised. Set to ``True`` to allow missing fields, which will result in
548
+ partially initialized document objects.
549
+ :arg using: connection alias to use, defaults to ``'default'``
550
+ :arg kwargs: additional options for the ``client.esql.query()`` function.
551
+ """
552
+ es = cls._get_connection(using)
553
+ response = await es.esql.query(query=str(query), **kwargs)
554
+ query_columns = [col["name"] for col in response.body.get("columns", [])]
555
+
556
+ # Here we get the list of columns defined in the document, which are the
557
+ # columns that we will take from each result to assemble the document
558
+ # object.
559
+ # When `for_esql=False` is passed below by default, the list will include
560
+ # nested fields, which ES|QL does not return, causing an error. When passing
561
+ # `ignore_missing_fields=True` the list will be generated with
562
+ # `for_esql=True`, so the error will not occur, but the documents will
563
+ # not have any Nested objects in them.
564
+ doc_fields = set(cls._get_field_names(for_esql=ignore_missing_fields))
565
+ if not ignore_missing_fields and not doc_fields.issubset(set(query_columns)):
566
+ raise ValueError(
567
+ f"Not all fields of {cls.__name__} were returned by the query. "
568
+ "Make sure your document does not use Nested fields, which are "
569
+ "currently not supported in ES|QL. To force the query to be "
570
+ "evaluated in spite of the missing fields, pass set the "
571
+ "ignore_missing_fields=True option in the esql_execute() call."
572
+ )
573
+ non_doc_fields: set[str] = set(query_columns) - doc_fields - {"_id"}
574
+ index_id = query_columns.index("_id")
575
+
576
+ results = response.body.get("values", [])
577
+ for column_values in results:
578
+ # create a dictionary with all the document fields, expanding the
579
+ # dot notation returned by ES|QL into the recursive dictionaries
580
+ # used by Document.from_dict()
581
+ doc_dict: Dict[str, Any] = {}
582
+ for col, val in zip(query_columns, column_values):
583
+ if col in doc_fields:
584
+ cols = col.split(".")
585
+ d = doc_dict
586
+ for c in cols[:-1]:
587
+ if c not in d:
588
+ d[c] = {}
589
+ d = d[c]
590
+ d[cols[-1]] = val
591
+
592
+ # create the document instance
593
+ obj = cls(meta={"_id": column_values[index_id]})
594
+ obj._from_dict(doc_dict)
595
+
596
+ if return_additional:
597
+ # build a dict with any other values included in the response
598
+ other = {
599
+ col: val
600
+ for col, val in zip(query_columns, column_values)
601
+ if col in non_doc_fields
602
+ }
603
+
604
+ yield obj, other
605
+ else:
606
+ yield obj
@@ -21,6 +21,7 @@ from typing import (
21
21
  Any,
22
22
  Dict,
23
23
  Iterable,
24
+ Iterator,
24
25
  List,
25
26
  Optional,
26
27
  Tuple,
@@ -42,6 +43,7 @@ from .search import Search
42
43
 
43
44
  if TYPE_CHECKING:
44
45
  from elasticsearch import Elasticsearch
46
+ from elasticsearch.esql.esql import ESQLBase
45
47
 
46
48
 
47
49
  class IndexMeta(DocumentMeta):
@@ -512,3 +514,85 @@ class Document(DocumentBase, metaclass=IndexMeta):
512
514
  return action
513
515
 
514
516
  return bulk(es, Generate(actions), **kwargs)
517
+
518
+ @classmethod
519
+ def esql_execute(
520
+ cls,
521
+ query: "ESQLBase",
522
+ return_additional: bool = False,
523
+ ignore_missing_fields: bool = False,
524
+ using: Optional[UsingType] = None,
525
+ **kwargs: Any,
526
+ ) -> Iterator[Union[Self, Tuple[Self, Dict[str, Any]]]]:
527
+ """
528
+ Execute the given ES|QL query and return an iterator of 2-element tuples,
529
+ where the first element is an instance of this ``Document`` and the
530
+ second a dictionary with any remaining columns requested in the query.
531
+
532
+ :arg query: an ES|QL query object created with the ``esql_from()`` method.
533
+ :arg return_additional: if ``False`` (the default), this method returns
534
+ document objects. If set to ``True``, the method returns tuples with
535
+ a document in the first element and a dictionary with any additional
536
+ columns returned by the query in the second element.
537
+ :arg ignore_missing_fields: if ``False`` (the default), all the fields of
538
+ the document must be present in the query, or else an exception is
539
+ raised. Set to ``True`` to allow missing fields, which will result in
540
+ partially initialized document objects.
541
+ :arg using: connection alias to use, defaults to ``'default'``
542
+ :arg kwargs: additional options for the ``client.esql.query()`` function.
543
+ """
544
+ es = cls._get_connection(using)
545
+ response = es.esql.query(query=str(query), **kwargs)
546
+ query_columns = [col["name"] for col in response.body.get("columns", [])]
547
+
548
+ # Here we get the list of columns defined in the document, which are the
549
+ # columns that we will take from each result to assemble the document
550
+ # object.
551
+ # When `for_esql=False` is passed below by default, the list will include
552
+ # nested fields, which ES|QL does not return, causing an error. When passing
553
+ # `ignore_missing_fields=True` the list will be generated with
554
+ # `for_esql=True`, so the error will not occur, but the documents will
555
+ # not have any Nested objects in them.
556
+ doc_fields = set(cls._get_field_names(for_esql=ignore_missing_fields))
557
+ if not ignore_missing_fields and not doc_fields.issubset(set(query_columns)):
558
+ raise ValueError(
559
+ f"Not all fields of {cls.__name__} were returned by the query. "
560
+ "Make sure your document does not use Nested fields, which are "
561
+ "currently not supported in ES|QL. To force the query to be "
562
+ "evaluated in spite of the missing fields, pass set the "
563
+ "ignore_missing_fields=True option in the esql_execute() call."
564
+ )
565
+ non_doc_fields: set[str] = set(query_columns) - doc_fields - {"_id"}
566
+ index_id = query_columns.index("_id")
567
+
568
+ results = response.body.get("values", [])
569
+ for column_values in results:
570
+ # create a dictionary with all the document fields, expanding the
571
+ # dot notation returned by ES|QL into the recursive dictionaries
572
+ # used by Document.from_dict()
573
+ doc_dict: Dict[str, Any] = {}
574
+ for col, val in zip(query_columns, column_values):
575
+ if col in doc_fields:
576
+ cols = col.split(".")
577
+ d = doc_dict
578
+ for c in cols[:-1]:
579
+ if c not in d:
580
+ d[c] = {}
581
+ d = d[c]
582
+ d[cols[-1]] = val
583
+
584
+ # create the document instance
585
+ obj = cls(meta={"_id": column_values[index_id]})
586
+ obj._from_dict(doc_dict)
587
+
588
+ if return_additional:
589
+ # build a dict with any other values included in the response
590
+ other = {
591
+ col: val
592
+ for col, val in zip(query_columns, column_values)
593
+ if col in non_doc_fields
594
+ }
595
+
596
+ yield obj, other
597
+ else:
598
+ yield obj
elasticsearch/dsl/aggs.py CHANGED
@@ -653,6 +653,54 @@ class Cardinality(Agg[_R]):
653
653
  )
654
654
 
655
655
 
656
+ class CartesianBounds(Agg[_R]):
657
+ """
658
+ A metric aggregation that computes the spatial bounding box containing
659
+ all values for a Point or Shape field.
660
+
661
+ :arg field: The field on which to run the aggregation.
662
+ :arg missing: The value to apply to documents that do not have a
663
+ value. By default, documents without a value are ignored.
664
+ :arg script:
665
+ """
666
+
667
+ name = "cartesian_bounds"
668
+
669
+ def __init__(
670
+ self,
671
+ *,
672
+ field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT,
673
+ missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT,
674
+ script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT,
675
+ **kwargs: Any,
676
+ ):
677
+ super().__init__(field=field, missing=missing, script=script, **kwargs)
678
+
679
+
680
+ class CartesianCentroid(Agg[_R]):
681
+ """
682
+ A metric aggregation that computes the weighted centroid from all
683
+ coordinate values for point and shape fields.
684
+
685
+ :arg field: The field on which to run the aggregation.
686
+ :arg missing: The value to apply to documents that do not have a
687
+ value. By default, documents without a value are ignored.
688
+ :arg script:
689
+ """
690
+
691
+ name = "cartesian_centroid"
692
+
693
+ def __init__(
694
+ self,
695
+ *,
696
+ field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT,
697
+ missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT,
698
+ script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT,
699
+ **kwargs: Any,
700
+ ):
701
+ super().__init__(field=field, missing=missing, script=script, **kwargs)
702
+
703
+
656
704
  class CategorizeText(Bucket[_R]):
657
705
  """
658
706
  A multi-bucket aggregation that groups semi-structured text into
@@ -735,6 +783,43 @@ class CategorizeText(Bucket[_R]):
735
783
  )
736
784
 
737
785
 
786
+ class ChangePoint(Pipeline[_R]):
787
+ """
788
+ A sibling pipeline that detects, spikes, dips, and change points in a
789
+ metric. Given a distribution of values provided by the sibling multi-
790
+ bucket aggregation, this aggregation indicates the bucket of any spike
791
+ or dip and/or the bucket at which the largest change in the
792
+ distribution of values, if they are statistically significant. There
793
+ must be at least 22 bucketed values. Fewer than 1,000 is preferred.
794
+
795
+ :arg format: `DecimalFormat` pattern for the output value. If
796
+ specified, the formatted value is returned in the aggregation’s
797
+ `value_as_string` property.
798
+ :arg gap_policy: Policy to apply when gaps are found in the data.
799
+ Defaults to `skip` if omitted.
800
+ :arg buckets_path: Path to the buckets that contain one set of values
801
+ to correlate.
802
+ """
803
+
804
+ name = "change_point"
805
+
806
+ def __init__(
807
+ self,
808
+ *,
809
+ format: Union[str, "DefaultType"] = DEFAULT,
810
+ gap_policy: Union[
811
+ Literal["skip", "insert_zeros", "keep_values"], "DefaultType"
812
+ ] = DEFAULT,
813
+ buckets_path: Union[
814
+ str, Sequence[str], Mapping[str, str], "DefaultType"
815
+ ] = DEFAULT,
816
+ **kwargs: Any,
817
+ ):
818
+ super().__init__(
819
+ format=format, gap_policy=gap_policy, buckets_path=buckets_path, **kwargs
820
+ )
821
+
822
+
738
823
  class Children(Bucket[_R]):
739
824
  """
740
825
  A single bucket aggregation that selects child documents that have the
@@ -2980,6 +3065,14 @@ class SignificantTerms(Bucket[_R]):
2980
3065
  the foreground sample with a term divided by the number of
2981
3066
  documents in the background with the term.
2982
3067
  :arg script_heuristic: Customized score, implemented via a script.
3068
+ :arg p_value: Significant terms heuristic that calculates the p-value
3069
+ between the term existing in foreground and background sets. The
3070
+ p-value is the probability of obtaining test results at least as
3071
+ extreme as the results actually observed, under the assumption
3072
+ that the null hypothesis is correct. The p-value is calculated
3073
+ assuming that the foreground set and the background set are
3074
+ independent https://en.wikipedia.org/wiki/Bernoulli_trial, with
3075
+ the null hypothesis that the probabilities are the same.
2983
3076
  :arg shard_min_doc_count: Regulates the certainty a shard has if the
2984
3077
  term should actually be added to the candidate list or not with
2985
3078
  respect to the `min_doc_count`. Terms will only be considered if
@@ -3033,6 +3126,9 @@ class SignificantTerms(Bucket[_R]):
3033
3126
  script_heuristic: Union[
3034
3127
  "types.ScriptedHeuristic", Dict[str, Any], "DefaultType"
3035
3128
  ] = DEFAULT,
3129
+ p_value: Union[
3130
+ "types.PValueHeuristic", Dict[str, Any], "DefaultType"
3131
+ ] = DEFAULT,
3036
3132
  shard_min_doc_count: Union[int, "DefaultType"] = DEFAULT,
3037
3133
  shard_size: Union[int, "DefaultType"] = DEFAULT,
3038
3134
  size: Union[int, "DefaultType"] = DEFAULT,
@@ -3051,6 +3147,7 @@ class SignificantTerms(Bucket[_R]):
3051
3147
  mutual_information=mutual_information,
3052
3148
  percentage=percentage,
3053
3149
  script_heuristic=script_heuristic,
3150
+ p_value=p_value,
3054
3151
  shard_min_doc_count=shard_min_doc_count,
3055
3152
  shard_size=shard_size,
3056
3153
  size=size,
@@ -34,6 +34,11 @@ from typing import (
34
34
  overload,
35
35
  )
36
36
 
37
+ try:
38
+ import annotationlib
39
+ except ImportError:
40
+ annotationlib = None
41
+
37
42
  try:
38
43
  from types import UnionType
39
44
  except ImportError:
@@ -49,6 +54,7 @@ from .utils import DOC_META_FIELDS, ObjectBase
49
54
  if TYPE_CHECKING:
50
55
  from elastic_transport import ObjectApiResponse
51
56
 
57
+ from ..esql.esql import ESQLBase
52
58
  from .index_base import IndexBase
53
59
 
54
60
 
@@ -331,6 +337,16 @@ class DocumentOptions:
331
337
  # # ignore attributes
332
338
  # field10: ClassVar[string] = "a regular class variable"
333
339
  annotations = attrs.get("__annotations__", {})
340
+ if not annotations and annotationlib:
341
+ # Python 3.14+ uses annotationlib
342
+ annotate = annotationlib.get_annotate_from_class_namespace(attrs)
343
+ if annotate:
344
+ annotations = (
345
+ annotationlib.call_annotate_function(
346
+ annotate, format=annotationlib.Format.VALUE
347
+ )
348
+ or {}
349
+ )
334
350
  fields = {n for n in attrs if isinstance(attrs[n], Field)}
335
351
  fields.update(annotations.keys())
336
352
  field_defaults = {}
@@ -602,3 +618,44 @@ class DocumentBase(ObjectBase):
602
618
 
603
619
  meta["_source"] = d
604
620
  return meta
621
+
622
+ @classmethod
623
+ def _get_field_names(
624
+ cls, for_esql: bool = False, nested_class: Optional[type[InnerDoc]] = None
625
+ ) -> List[str]:
626
+ """Return the list of field names used by this document.
627
+ If the document has nested objects, their fields are reported using dot
628
+ notation. If the ``for_esql`` argument is set to ``True``, the list omits
629
+ nested fields, which are currently unsupported in ES|QL.
630
+ """
631
+ fields = []
632
+ class_ = nested_class or cls
633
+ for field_name in class_._doc_type.mapping:
634
+ field = class_._doc_type.mapping[field_name]
635
+ if isinstance(field, Object):
636
+ if for_esql and isinstance(field, Nested):
637
+ # ES|QL does not recognize Nested fields at this time
638
+ continue
639
+ sub_fields = cls._get_field_names(
640
+ for_esql=for_esql, nested_class=field._doc_class
641
+ )
642
+ for sub_field in sub_fields:
643
+ fields.append(f"{field_name}.{sub_field}")
644
+ else:
645
+ fields.append(field_name)
646
+ return fields
647
+
648
+ @classmethod
649
+ def esql_from(cls) -> "ESQLBase":
650
+ """Return a base ES|QL query for instances of this document class.
651
+
652
+ The returned query is initialized with ``FROM`` and ``KEEP`` statements,
653
+ and can be completed as desired.
654
+ """
655
+ from ..esql import ESQL # here to avoid circular imports
656
+
657
+ return (
658
+ ESQL.from_(cls)
659
+ .metadata("_id")
660
+ .keep("_id", *tuple(cls._get_field_names(for_esql=True)))
661
+ )