elasticsearch 9.1.1__py3-none-any.whl → 9.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. elasticsearch/_async/client/__init__.py +96 -44
  2. elasticsearch/_async/client/async_search.py +7 -0
  3. elasticsearch/_async/client/cat.py +489 -26
  4. elasticsearch/_async/client/cluster.py +9 -8
  5. elasticsearch/_async/client/connector.py +3 -3
  6. elasticsearch/_async/client/eql.py +7 -0
  7. elasticsearch/_async/client/esql.py +26 -3
  8. elasticsearch/_async/client/fleet.py +1 -5
  9. elasticsearch/_async/client/graph.py +1 -5
  10. elasticsearch/_async/client/ilm.py +2 -10
  11. elasticsearch/_async/client/indices.py +181 -37
  12. elasticsearch/_async/client/inference.py +291 -124
  13. elasticsearch/_async/client/ingest.py +8 -0
  14. elasticsearch/_async/client/license.py +4 -2
  15. elasticsearch/_async/client/logstash.py +3 -1
  16. elasticsearch/_async/client/ml.py +2 -2
  17. elasticsearch/_async/client/nodes.py +3 -5
  18. elasticsearch/_async/client/project.py +67 -0
  19. elasticsearch/_async/client/security.py +39 -0
  20. elasticsearch/_async/client/shutdown.py +5 -15
  21. elasticsearch/_async/client/simulate.py +8 -0
  22. elasticsearch/_async/client/slm.py +1 -5
  23. elasticsearch/_async/client/snapshot.py +20 -10
  24. elasticsearch/_async/client/sql.py +7 -0
  25. elasticsearch/_async/client/streams.py +185 -0
  26. elasticsearch/_async/client/watcher.py +1 -5
  27. elasticsearch/_async/helpers.py +74 -12
  28. elasticsearch/_sync/client/__init__.py +96 -44
  29. elasticsearch/_sync/client/async_search.py +7 -0
  30. elasticsearch/_sync/client/cat.py +489 -26
  31. elasticsearch/_sync/client/cluster.py +9 -8
  32. elasticsearch/_sync/client/connector.py +3 -3
  33. elasticsearch/_sync/client/eql.py +7 -0
  34. elasticsearch/_sync/client/esql.py +26 -3
  35. elasticsearch/_sync/client/fleet.py +1 -5
  36. elasticsearch/_sync/client/graph.py +1 -5
  37. elasticsearch/_sync/client/ilm.py +2 -10
  38. elasticsearch/_sync/client/indices.py +181 -37
  39. elasticsearch/_sync/client/inference.py +291 -124
  40. elasticsearch/_sync/client/ingest.py +8 -0
  41. elasticsearch/_sync/client/license.py +4 -2
  42. elasticsearch/_sync/client/logstash.py +3 -1
  43. elasticsearch/_sync/client/ml.py +2 -2
  44. elasticsearch/_sync/client/nodes.py +3 -5
  45. elasticsearch/_sync/client/project.py +67 -0
  46. elasticsearch/_sync/client/security.py +39 -0
  47. elasticsearch/_sync/client/shutdown.py +5 -15
  48. elasticsearch/_sync/client/simulate.py +8 -0
  49. elasticsearch/_sync/client/slm.py +1 -5
  50. elasticsearch/_sync/client/snapshot.py +20 -10
  51. elasticsearch/_sync/client/sql.py +7 -0
  52. elasticsearch/_sync/client/streams.py +185 -0
  53. elasticsearch/_sync/client/watcher.py +1 -5
  54. elasticsearch/_version.py +2 -1
  55. elasticsearch/client.py +4 -0
  56. elasticsearch/compat.py +30 -1
  57. elasticsearch/dsl/__init__.py +28 -0
  58. elasticsearch/dsl/_async/document.py +2 -1
  59. elasticsearch/dsl/_sync/document.py +2 -1
  60. elasticsearch/dsl/aggs.py +97 -0
  61. elasticsearch/dsl/document_base.py +53 -13
  62. elasticsearch/dsl/field.py +21 -2
  63. elasticsearch/dsl/pydantic.py +152 -0
  64. elasticsearch/dsl/query.py +5 -1
  65. elasticsearch/dsl/response/__init__.py +3 -0
  66. elasticsearch/dsl/search_base.py +5 -1
  67. elasticsearch/dsl/types.py +226 -14
  68. elasticsearch/esql/esql.py +331 -41
  69. elasticsearch/esql/functions.py +88 -0
  70. elasticsearch/helpers/__init__.py +10 -1
  71. elasticsearch/helpers/actions.py +106 -33
  72. {elasticsearch-9.1.1.dist-info → elasticsearch-9.2.0.dist-info}/METADATA +27 -5
  73. {elasticsearch-9.1.1.dist-info → elasticsearch-9.2.0.dist-info}/RECORD +76 -71
  74. {elasticsearch-9.1.1.dist-info → elasticsearch-9.2.0.dist-info}/WHEEL +0 -0
  75. {elasticsearch-9.1.1.dist-info → elasticsearch-9.2.0.dist-info}/licenses/LICENSE +0 -0
  76. {elasticsearch-9.1.1.dist-info → elasticsearch-9.2.0.dist-info}/licenses/NOTICE +0 -0
elasticsearch/compat.py CHANGED
@@ -18,8 +18,10 @@
18
18
  import inspect
19
19
  import os
20
20
  import sys
21
+ from contextlib import contextmanager
21
22
  from pathlib import Path
22
- from typing import Tuple, Type, Union
23
+ from threading import Thread
24
+ from typing import Any, Callable, Iterator, Tuple, Type, Union
23
25
 
24
26
  string_types: Tuple[Type[str], Type[bytes]] = (str, bytes)
25
27
 
@@ -76,9 +78,36 @@ def warn_stacklevel() -> int:
76
78
  return 0
77
79
 
78
80
 
81
+ @contextmanager
82
+ def safe_thread(
83
+ target: Callable[..., Any], *args: Any, **kwargs: Any
84
+ ) -> Iterator[Thread]:
85
+ """Run a thread within a context manager block.
86
+
87
+ The thread is automatically joined when the block ends. If the thread raised
88
+ an exception, it is raised in the caller's context.
89
+ """
90
+ captured_exception = None
91
+
92
+ def run() -> None:
93
+ try:
94
+ target(*args, **kwargs)
95
+ except BaseException as exc:
96
+ nonlocal captured_exception
97
+ captured_exception = exc
98
+
99
+ thread = Thread(target=run)
100
+ thread.start()
101
+ yield thread
102
+ thread.join()
103
+ if captured_exception:
104
+ raise captured_exception
105
+
106
+
79
107
  __all__ = [
80
108
  "string_types",
81
109
  "to_str",
82
110
  "to_bytes",
83
111
  "warn_stacklevel",
112
+ "safe_thread",
84
113
  ]
@@ -38,23 +38,30 @@ from .faceted_search import (
38
38
  TermsFacet,
39
39
  )
40
40
  from .field import (
41
+ AggregateMetricDouble,
42
+ Alias,
41
43
  Binary,
42
44
  Boolean,
43
45
  Byte,
44
46
  Completion,
45
47
  ConstantKeyword,
48
+ CountedKeyword,
46
49
  CustomField,
47
50
  Date,
51
+ DateNanos,
48
52
  DateRange,
49
53
  DenseVector,
50
54
  Double,
51
55
  DoubleRange,
52
56
  Field,
57
+ Flattened,
53
58
  Float,
54
59
  FloatRange,
55
60
  GeoPoint,
56
61
  GeoShape,
57
62
  HalfFloat,
63
+ Histogram,
64
+ IcuCollationKeyword,
58
65
  Integer,
59
66
  IntegerRange,
60
67
  Ip,
@@ -63,21 +70,28 @@ from .field import (
63
70
  Keyword,
64
71
  Long,
65
72
  LongRange,
73
+ MatchOnlyText,
66
74
  Murmur3,
67
75
  Nested,
68
76
  Object,
77
+ Passthrough,
69
78
  Percolator,
70
79
  Point,
71
80
  RangeField,
72
81
  RankFeature,
73
82
  RankFeatures,
83
+ RankVectors,
74
84
  ScaledFloat,
75
85
  SearchAsYouType,
86
+ SemanticText,
76
87
  Shape,
77
88
  Short,
78
89
  SparseVector,
79
90
  Text,
80
91
  TokenCount,
92
+ UnsignedLong,
93
+ Version,
94
+ Wildcard,
81
95
  construct_field,
82
96
  )
83
97
  from .function import SF
@@ -108,6 +122,8 @@ __all__ = [
108
122
  "A",
109
123
  "Agg",
110
124
  "AggResponse",
125
+ "AggregateMetricDouble",
126
+ "Alias",
111
127
  "AsyncComposableIndexTemplate",
112
128
  "AsyncDocument",
113
129
  "AsyncEmptySearch",
@@ -126,9 +142,11 @@ __all__ = [
126
142
  "Completion",
127
143
  "ComposableIndexTemplate",
128
144
  "ConstantKeyword",
145
+ "CountedKeyword",
129
146
  "CustomField",
130
147
  "Date",
131
148
  "DateHistogramFacet",
149
+ "DateNanos",
132
150
  "DateRange",
133
151
  "DenseVector",
134
152
  "Document",
@@ -142,12 +160,15 @@ __all__ = [
142
160
  "FacetedResponse",
143
161
  "FacetedSearch",
144
162
  "Field",
163
+ "Flattened",
145
164
  "Float",
146
165
  "FloatRange",
147
166
  "GeoPoint",
148
167
  "GeoShape",
149
168
  "HalfFloat",
169
+ "Histogram",
150
170
  "HistogramFacet",
171
+ "IcuCollationKeyword",
151
172
  "IllegalOperation",
152
173
  "Index",
153
174
  "IndexTemplate",
@@ -162,12 +183,14 @@ __all__ = [
162
183
  "LongRange",
163
184
  "M",
164
185
  "Mapping",
186
+ "MatchOnlyText",
165
187
  "MetaField",
166
188
  "MultiSearch",
167
189
  "Murmur3",
168
190
  "Nested",
169
191
  "NestedFacet",
170
192
  "Object",
193
+ "Passthrough",
171
194
  "Percolator",
172
195
  "Point",
173
196
  "Q",
@@ -177,11 +200,13 @@ __all__ = [
177
200
  "RangeField",
178
201
  "RankFeature",
179
202
  "RankFeatures",
203
+ "RankVectors",
180
204
  "Response",
181
205
  "SF",
182
206
  "ScaledFloat",
183
207
  "Search",
184
208
  "SearchAsYouType",
209
+ "SemanticText",
185
210
  "Shape",
186
211
  "Short",
187
212
  "SparseVector",
@@ -189,9 +214,12 @@ __all__ = [
189
214
  "Text",
190
215
  "TokenCount",
191
216
  "UnknownDslObject",
217
+ "UnsignedLong",
192
218
  "UpdateByQuery",
193
219
  "UpdateByQueryResponse",
194
220
  "ValidationException",
221
+ "Version",
222
+ "Wildcard",
195
223
  "analyzer",
196
224
  "async_connections",
197
225
  "char_filter",
@@ -126,9 +126,10 @@ class AsyncDocument(DocumentBase, metaclass=AsyncIndexMeta):
126
126
  Create an :class:`~elasticsearch.dsl.Search` instance that will search
127
127
  over this ``Document``.
128
128
  """
129
- return AsyncSearch(
129
+ s = AsyncSearch[Self](
130
130
  using=cls._get_using(using), index=cls._default_index(index), doc_type=[cls]
131
131
  )
132
+ return s.source(exclude_vectors=False)
132
133
 
133
134
  @classmethod
134
135
  async def get(
@@ -120,9 +120,10 @@ class Document(DocumentBase, metaclass=IndexMeta):
120
120
  Create an :class:`~elasticsearch.dsl.Search` instance that will search
121
121
  over this ``Document``.
122
122
  """
123
- return Search(
123
+ s = Search[Self](
124
124
  using=cls._get_using(using), index=cls._default_index(index), doc_type=[cls]
125
125
  )
126
+ return s.source(exclude_vectors=False)
126
127
 
127
128
  @classmethod
128
129
  def get(
elasticsearch/dsl/aggs.py CHANGED
@@ -653,6 +653,54 @@ class Cardinality(Agg[_R]):
653
653
  )
654
654
 
655
655
 
656
+ class CartesianBounds(Agg[_R]):
657
+ """
658
+ A metric aggregation that computes the spatial bounding box containing
659
+ all values for a Point or Shape field.
660
+
661
+ :arg field: The field on which to run the aggregation.
662
+ :arg missing: The value to apply to documents that do not have a
663
+ value. By default, documents without a value are ignored.
664
+ :arg script:
665
+ """
666
+
667
+ name = "cartesian_bounds"
668
+
669
+ def __init__(
670
+ self,
671
+ *,
672
+ field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT,
673
+ missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT,
674
+ script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT,
675
+ **kwargs: Any,
676
+ ):
677
+ super().__init__(field=field, missing=missing, script=script, **kwargs)
678
+
679
+
680
+ class CartesianCentroid(Agg[_R]):
681
+ """
682
+ A metric aggregation that computes the weighted centroid from all
683
+ coordinate values for point and shape fields.
684
+
685
+ :arg field: The field on which to run the aggregation.
686
+ :arg missing: The value to apply to documents that do not have a
687
+ value. By default, documents without a value are ignored.
688
+ :arg script:
689
+ """
690
+
691
+ name = "cartesian_centroid"
692
+
693
+ def __init__(
694
+ self,
695
+ *,
696
+ field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT,
697
+ missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT,
698
+ script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT,
699
+ **kwargs: Any,
700
+ ):
701
+ super().__init__(field=field, missing=missing, script=script, **kwargs)
702
+
703
+
656
704
  class CategorizeText(Bucket[_R]):
657
705
  """
658
706
  A multi-bucket aggregation that groups semi-structured text into
@@ -735,6 +783,43 @@ class CategorizeText(Bucket[_R]):
735
783
  )
736
784
 
737
785
 
786
+ class ChangePoint(Pipeline[_R]):
787
+ """
788
+ A sibling pipeline that detects, spikes, dips, and change points in a
789
+ metric. Given a distribution of values provided by the sibling multi-
790
+ bucket aggregation, this aggregation indicates the bucket of any spike
791
+ or dip and/or the bucket at which the largest change in the
792
+ distribution of values, if they are statistically significant. There
793
+ must be at least 22 bucketed values. Fewer than 1,000 is preferred.
794
+
795
+ :arg format: `DecimalFormat` pattern for the output value. If
796
+ specified, the formatted value is returned in the aggregation’s
797
+ `value_as_string` property.
798
+ :arg gap_policy: Policy to apply when gaps are found in the data.
799
+ Defaults to `skip` if omitted.
800
+ :arg buckets_path: Path to the buckets that contain one set of values
801
+ to correlate.
802
+ """
803
+
804
+ name = "change_point"
805
+
806
+ def __init__(
807
+ self,
808
+ *,
809
+ format: Union[str, "DefaultType"] = DEFAULT,
810
+ gap_policy: Union[
811
+ Literal["skip", "insert_zeros", "keep_values"], "DefaultType"
812
+ ] = DEFAULT,
813
+ buckets_path: Union[
814
+ str, Sequence[str], Mapping[str, str], "DefaultType"
815
+ ] = DEFAULT,
816
+ **kwargs: Any,
817
+ ):
818
+ super().__init__(
819
+ format=format, gap_policy=gap_policy, buckets_path=buckets_path, **kwargs
820
+ )
821
+
822
+
738
823
  class Children(Bucket[_R]):
739
824
  """
740
825
  A single bucket aggregation that selects child documents that have the
@@ -2980,6 +3065,14 @@ class SignificantTerms(Bucket[_R]):
2980
3065
  the foreground sample with a term divided by the number of
2981
3066
  documents in the background with the term.
2982
3067
  :arg script_heuristic: Customized score, implemented via a script.
3068
+ :arg p_value: Significant terms heuristic that calculates the p-value
3069
+ between the term existing in foreground and background sets. The
3070
+ p-value is the probability of obtaining test results at least as
3071
+ extreme as the results actually observed, under the assumption
3072
+ that the null hypothesis is correct. The p-value is calculated
3073
+ assuming that the foreground set and the background set are
3074
+ independent https://en.wikipedia.org/wiki/Bernoulli_trial, with
3075
+ the null hypothesis that the probabilities are the same.
2983
3076
  :arg shard_min_doc_count: Regulates the certainty a shard has if the
2984
3077
  term should actually be added to the candidate list or not with
2985
3078
  respect to the `min_doc_count`. Terms will only be considered if
@@ -3033,6 +3126,9 @@ class SignificantTerms(Bucket[_R]):
3033
3126
  script_heuristic: Union[
3034
3127
  "types.ScriptedHeuristic", Dict[str, Any], "DefaultType"
3035
3128
  ] = DEFAULT,
3129
+ p_value: Union[
3130
+ "types.PValueHeuristic", Dict[str, Any], "DefaultType"
3131
+ ] = DEFAULT,
3036
3132
  shard_min_doc_count: Union[int, "DefaultType"] = DEFAULT,
3037
3133
  shard_size: Union[int, "DefaultType"] = DEFAULT,
3038
3134
  size: Union[int, "DefaultType"] = DEFAULT,
@@ -3051,6 +3147,7 @@ class SignificantTerms(Bucket[_R]):
3051
3147
  mutual_information=mutual_information,
3052
3148
  percentage=percentage,
3053
3149
  script_heuristic=script_heuristic,
3150
+ p_value=p_value,
3054
3151
  shard_min_doc_count=shard_min_doc_count,
3055
3152
  shard_size=shard_size,
3056
3153
  size=size,
@@ -34,6 +34,13 @@ from typing import (
34
34
  overload,
35
35
  )
36
36
 
37
+ from typing_extensions import _AnnotatedAlias
38
+
39
+ try:
40
+ import annotationlib
41
+ except ImportError:
42
+ annotationlib = None
43
+
37
44
  try:
38
45
  from types import UnionType
39
46
  except ImportError:
@@ -332,6 +339,16 @@ class DocumentOptions:
332
339
  # # ignore attributes
333
340
  # field10: ClassVar[string] = "a regular class variable"
334
341
  annotations = attrs.get("__annotations__", {})
342
+ if not annotations and annotationlib:
343
+ # Python 3.14+ uses annotationlib
344
+ annotate = annotationlib.get_annotate_from_class_namespace(attrs)
345
+ if annotate:
346
+ annotations = (
347
+ annotationlib.call_annotate_function(
348
+ annotate, format=annotationlib.Format.VALUE
349
+ )
350
+ or {}
351
+ )
335
352
  fields = {n for n in attrs if isinstance(attrs[n], Field)}
336
353
  fields.update(annotations.keys())
337
354
  field_defaults = {}
@@ -343,6 +360,10 @@ class DocumentOptions:
343
360
  # the field has a type annotation, so next we try to figure out
344
361
  # what field type we can use
345
362
  type_ = annotations[name]
363
+ type_metadata = []
364
+ if isinstance(type_, _AnnotatedAlias):
365
+ type_metadata = type_.__metadata__
366
+ type_ = type_.__origin__
346
367
  skip = False
347
368
  required = True
348
369
  multi = False
@@ -389,6 +410,12 @@ class DocumentOptions:
389
410
  # use best field type for the type hint provided
390
411
  field, field_kwargs = self.type_annotation_map[type_] # type: ignore[assignment]
391
412
 
413
+ # if this field does not have a right-hand value, we look in the metadata
414
+ # of the annotation to see if we find it there
415
+ for md in type_metadata:
416
+ if isinstance(md, (_FieldMetadataDict, Field)):
417
+ attrs[name] = md
418
+
392
419
  if field:
393
420
  field_kwargs = {
394
421
  "multi": multi,
@@ -401,17 +428,20 @@ class DocumentOptions:
401
428
  # this field has a right-side value, which can be field
402
429
  # instance on its own or wrapped with mapped_field()
403
430
  attr_value = attrs[name]
404
- if isinstance(attr_value, dict):
431
+ if isinstance(attr_value, _FieldMetadataDict):
405
432
  # the mapped_field() wrapper function was used so we need
406
433
  # to look for the field instance and also record any
407
434
  # dataclass-style defaults
435
+ if attr_value.get("exclude"):
436
+ # skip this field
437
+ continue
408
438
  attr_value = attrs[name].get("_field")
409
439
  default_value = attrs[name].get("default") or attrs[name].get(
410
440
  "default_factory"
411
441
  )
412
442
  if default_value:
413
443
  field_defaults[name] = default_value
414
- if attr_value:
444
+ if isinstance(attr_value, Field):
415
445
  value = attr_value
416
446
  if required is not None:
417
447
  value._required = required
@@ -490,12 +520,19 @@ class Mapped(Generic[_FieldType]):
490
520
  M = Mapped
491
521
 
492
522
 
523
+ class _FieldMetadataDict(dict[str, Any]):
524
+ """This class is used to identify metadata returned by the `mapped_field()` function."""
525
+
526
+ pass
527
+
528
+
493
529
  def mapped_field(
494
530
  field: Optional[Field] = None,
495
531
  *,
496
532
  init: bool = True,
497
533
  default: Any = None,
498
534
  default_factory: Optional[Callable[[], Any]] = None,
535
+ exclude: bool = False,
499
536
  **kwargs: Any,
500
537
  ) -> Any:
501
538
  """Construct a field using dataclass behaviors
@@ -505,22 +542,25 @@ def mapped_field(
505
542
  options.
506
543
 
507
544
  :param field: The instance of ``Field`` to use for this field. If not provided,
508
- an instance that is appropriate for the type given to the field is used.
545
+ an instance that is appropriate for the type given to the field is used.
509
546
  :param init: a value of ``True`` adds this field to the constructor, and a
510
- value of ``False`` omits it from it. The default is ``True``.
547
+ value of ``False`` omits it from it. The default is ``True``.
511
548
  :param default: a default value to use for this field when one is not provided
512
- explicitly.
549
+ explicitly.
513
550
  :param default_factory: a callable that returns a default value for the field,
514
- when one isn't provided explicitly. Only one of ``factory`` and
515
- ``default_factory`` can be used.
551
+ when one isn't provided explicitly. Only one of ``factory`` and
552
+ ``default_factory`` can be used.
553
+ :param exclude: Set to ``True`` to exclude this field from the Elasticsearch
554
+ index.
516
555
  """
517
- return {
518
- "_field": field,
519
- "init": init,
520
- "default": default,
521
- "default_factory": default_factory,
556
+ return _FieldMetadataDict(
557
+ _field=field,
558
+ init=init,
559
+ default=default,
560
+ default_factory=default_factory,
561
+ exclude=exclude,
522
562
  **kwargs,
523
- }
563
+ )
524
564
 
525
565
 
526
566
  @dataclass_transform(field_specifiers=(mapped_field,))
@@ -572,7 +572,11 @@ class Object(Field):
572
572
  if isinstance(data, collections.abc.Mapping):
573
573
  return data
574
574
 
575
- return data.to_dict(skip_empty=skip_empty)
575
+ try:
576
+ return data.to_dict(skip_empty=skip_empty)
577
+ except TypeError:
578
+ # this would only happen if an AttrDict was given instead of an InnerDoc
579
+ return data.to_dict()
576
580
 
577
581
  def clean(self, data: Any) -> Any:
578
582
  data = super().clean(data)
@@ -3862,14 +3866,21 @@ class SemanticText(Field):
3862
3866
  by using the Update mapping API. Use the Create inference API to
3863
3867
  create the endpoint. If not specified, the inference endpoint
3864
3868
  defined by inference_id will be used at both index and query time.
3869
+ :arg index_options: Settings for index_options that override any
3870
+ defaults used by semantic_text, for example specific quantization
3871
+ settings.
3865
3872
  :arg chunking_settings: Settings for chunking text into smaller
3866
3873
  passages. If specified, these will override the chunking settings
3867
3874
  sent in the inference endpoint associated with inference_id. If
3868
3875
  chunking settings are updated, they will not be applied to
3869
3876
  existing documents until they are reindexed.
3877
+ :arg fields:
3870
3878
  """
3871
3879
 
3872
3880
  name = "semantic_text"
3881
+ _param_defs = {
3882
+ "fields": {"type": "field", "hash": True},
3883
+ }
3873
3884
 
3874
3885
  def __init__(
3875
3886
  self,
@@ -3877,9 +3888,13 @@ class SemanticText(Field):
3877
3888
  meta: Union[Mapping[str, str], "DefaultType"] = DEFAULT,
3878
3889
  inference_id: Union[str, "DefaultType"] = DEFAULT,
3879
3890
  search_inference_id: Union[str, "DefaultType"] = DEFAULT,
3891
+ index_options: Union[
3892
+ "types.SemanticTextIndexOptions", Dict[str, Any], "DefaultType"
3893
+ ] = DEFAULT,
3880
3894
  chunking_settings: Union[
3881
- "types.ChunkingSettings", Dict[str, Any], "DefaultType"
3895
+ "types.ChunkingSettings", None, Dict[str, Any], "DefaultType"
3882
3896
  ] = DEFAULT,
3897
+ fields: Union[Mapping[str, Field], "DefaultType"] = DEFAULT,
3883
3898
  **kwargs: Any,
3884
3899
  ):
3885
3900
  if meta is not DEFAULT:
@@ -3888,8 +3903,12 @@ class SemanticText(Field):
3888
3903
  kwargs["inference_id"] = inference_id
3889
3904
  if search_inference_id is not DEFAULT:
3890
3905
  kwargs["search_inference_id"] = search_inference_id
3906
+ if index_options is not DEFAULT:
3907
+ kwargs["index_options"] = index_options
3891
3908
  if chunking_settings is not DEFAULT:
3892
3909
  kwargs["chunking_settings"] = chunking_settings
3910
+ if fields is not DEFAULT:
3911
+ kwargs["fields"] = fields
3893
3912
  super().__init__(*args, **kwargs)
3894
3913
 
3895
3914
 
@@ -0,0 +1,152 @@
1
+ # Licensed to Elasticsearch B.V. under one or more contributor
2
+ # license agreements. See the NOTICE file distributed with
3
+ # this work for additional information regarding copyright
4
+ # ownership. Elasticsearch B.V. licenses this file to you under
5
+ # the Apache License, Version 2.0 (the "License"); you may
6
+ # not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ from typing import Any, ClassVar, Dict, List, Optional, Tuple, Type
19
+
20
+ from pydantic import BaseModel, Field, PrivateAttr
21
+ from typing_extensions import Annotated, Self, dataclass_transform
22
+
23
+ from elasticsearch import dsl
24
+
25
+
26
+ class ESMeta(BaseModel):
27
+ """Metadata items associated with Elasticsearch documents."""
28
+
29
+ id: str = ""
30
+ index: str = ""
31
+ primary_term: int = 0
32
+ seq_no: int = 0
33
+ version: int = 0
34
+ score: float = 0
35
+
36
+
37
+ class _BaseModel(BaseModel):
38
+ meta: Annotated[ESMeta, dsl.mapped_field(exclude=True)] = Field(
39
+ default=ESMeta(),
40
+ init=False,
41
+ )
42
+
43
+
44
+ class _BaseESModelMetaclass(type(BaseModel)): # type: ignore[misc]
45
+ """Generic metaclass methods for BaseEsModel and AsyncBaseESModel."""
46
+
47
+ @staticmethod
48
+ def process_annotations(
49
+ metacls: Type["_BaseESModelMetaclass"], annotations: Dict[str, Any]
50
+ ) -> Dict[str, Any]:
51
+ """Process Pydantic typing annotations and adapt them so that they can
52
+ be used to create the Elasticsearch document.
53
+ """
54
+ updated_annotations = {}
55
+ for var, ann in annotations.items():
56
+ if isinstance(ann, type(BaseModel)):
57
+ # an inner Pydantic model is transformed into an Object field
58
+ updated_annotations[var] = metacls.make_dsl_class(
59
+ metacls, dsl.InnerDoc, ann
60
+ )
61
+ elif (
62
+ hasattr(ann, "__origin__")
63
+ and ann.__origin__ in [list, List]
64
+ and isinstance(ann.__args__[0], type(BaseModel))
65
+ ):
66
+ # an inner list of Pydantic models is transformed into a Nested field
67
+ updated_annotations[var] = List[ # type: ignore[assignment,misc]
68
+ metacls.make_dsl_class(metacls, dsl.InnerDoc, ann.__args__[0])
69
+ ]
70
+ else:
71
+ updated_annotations[var] = ann
72
+ return updated_annotations
73
+
74
+ @staticmethod
75
+ def make_dsl_class(
76
+ metacls: Type["_BaseESModelMetaclass"],
77
+ dsl_class: type,
78
+ pydantic_model: type,
79
+ pydantic_attrs: Optional[Dict[str, Any]] = None,
80
+ ) -> type:
81
+ """Create a DSL document class dynamically, using the structure of a
82
+ Pydantic model."""
83
+ dsl_attrs = {
84
+ attr: value
85
+ for attr, value in dsl_class.__dict__.items()
86
+ if not attr.startswith("__")
87
+ }
88
+ pydantic_attrs = {
89
+ **(pydantic_attrs or {}),
90
+ "__annotations__": metacls.process_annotations(
91
+ metacls, pydantic_model.__annotations__
92
+ ),
93
+ }
94
+ return type(dsl_class)(
95
+ f"_ES{pydantic_model.__name__}",
96
+ (dsl_class,),
97
+ {
98
+ **pydantic_attrs,
99
+ **dsl_attrs,
100
+ "__qualname__": f"_ES{pydantic_model.__name__}",
101
+ },
102
+ )
103
+
104
+
105
+ class BaseESModelMetaclass(_BaseESModelMetaclass):
106
+ """Metaclass for the BaseESModel class."""
107
+
108
+ def __new__(cls, name: str, bases: Tuple[type, ...], attrs: Dict[str, Any]) -> Any:
109
+ model = super().__new__(cls, name, bases, attrs)
110
+ model._doc = cls.make_dsl_class(cls, dsl.Document, model, attrs)
111
+ return model
112
+
113
+
114
+ class AsyncBaseESModelMetaclass(_BaseESModelMetaclass):
115
+ """Metaclass for the AsyncBaseESModel class."""
116
+
117
+ def __new__(cls, name: str, bases: Tuple[type, ...], attrs: Dict[str, Any]) -> Any:
118
+ model = super().__new__(cls, name, bases, attrs)
119
+ model._doc = cls.make_dsl_class(cls, dsl.AsyncDocument, model, attrs)
120
+ return model
121
+
122
+
123
+ @dataclass_transform(kw_only_default=True, field_specifiers=(Field, PrivateAttr))
124
+ class BaseESModel(_BaseModel, metaclass=BaseESModelMetaclass):
125
+ _doc: ClassVar[Type[dsl.Document]]
126
+
127
+ def to_doc(self) -> dsl.Document:
128
+ """Convert this model to an Elasticsearch document."""
129
+ data = self.model_dump()
130
+ meta = {f"_{k}": v for k, v in data.pop("meta", {}).items() if v}
131
+ return self._doc(**meta, **data)
132
+
133
+ @classmethod
134
+ def from_doc(cls, dsl_obj: dsl.Document) -> Self:
135
+ """Create a model from the given Elasticsearch document."""
136
+ return cls(meta=ESMeta(**dsl_obj.meta.to_dict()), **dsl_obj.to_dict())
137
+
138
+
139
+ @dataclass_transform(kw_only_default=True, field_specifiers=(Field, PrivateAttr))
140
+ class AsyncBaseESModel(_BaseModel, metaclass=AsyncBaseESModelMetaclass):
141
+ _doc: ClassVar[Type[dsl.AsyncDocument]]
142
+
143
+ def to_doc(self) -> dsl.AsyncDocument:
144
+ """Convert this model to an Elasticsearch document."""
145
+ data = self.model_dump()
146
+ meta = {f"_{k}": v for k, v in data.pop("meta", {}).items() if v}
147
+ return self._doc(**meta, **data)
148
+
149
+ @classmethod
150
+ def from_doc(cls, dsl_obj: dsl.AsyncDocument) -> Self:
151
+ """Create a model from the given Elasticsearch document."""
152
+ return cls(meta=ESMeta(**dsl_obj.meta.to_dict()), **dsl_obj.to_dict())