elasticsearch 9.1.1__py3-none-any.whl → 9.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- elasticsearch/_async/client/__init__.py +96 -44
- elasticsearch/_async/client/async_search.py +7 -0
- elasticsearch/_async/client/cat.py +489 -26
- elasticsearch/_async/client/cluster.py +9 -8
- elasticsearch/_async/client/connector.py +3 -3
- elasticsearch/_async/client/eql.py +7 -0
- elasticsearch/_async/client/esql.py +26 -3
- elasticsearch/_async/client/fleet.py +1 -5
- elasticsearch/_async/client/graph.py +1 -5
- elasticsearch/_async/client/ilm.py +2 -10
- elasticsearch/_async/client/indices.py +181 -37
- elasticsearch/_async/client/inference.py +291 -124
- elasticsearch/_async/client/ingest.py +8 -0
- elasticsearch/_async/client/license.py +4 -2
- elasticsearch/_async/client/logstash.py +3 -1
- elasticsearch/_async/client/ml.py +2 -2
- elasticsearch/_async/client/nodes.py +3 -5
- elasticsearch/_async/client/project.py +67 -0
- elasticsearch/_async/client/security.py +39 -0
- elasticsearch/_async/client/shutdown.py +5 -15
- elasticsearch/_async/client/simulate.py +8 -0
- elasticsearch/_async/client/slm.py +1 -5
- elasticsearch/_async/client/snapshot.py +20 -10
- elasticsearch/_async/client/sql.py +7 -0
- elasticsearch/_async/client/streams.py +185 -0
- elasticsearch/_async/client/watcher.py +1 -5
- elasticsearch/_async/helpers.py +74 -12
- elasticsearch/_sync/client/__init__.py +96 -44
- elasticsearch/_sync/client/async_search.py +7 -0
- elasticsearch/_sync/client/cat.py +489 -26
- elasticsearch/_sync/client/cluster.py +9 -8
- elasticsearch/_sync/client/connector.py +3 -3
- elasticsearch/_sync/client/eql.py +7 -0
- elasticsearch/_sync/client/esql.py +26 -3
- elasticsearch/_sync/client/fleet.py +1 -5
- elasticsearch/_sync/client/graph.py +1 -5
- elasticsearch/_sync/client/ilm.py +2 -10
- elasticsearch/_sync/client/indices.py +181 -37
- elasticsearch/_sync/client/inference.py +291 -124
- elasticsearch/_sync/client/ingest.py +8 -0
- elasticsearch/_sync/client/license.py +4 -2
- elasticsearch/_sync/client/logstash.py +3 -1
- elasticsearch/_sync/client/ml.py +2 -2
- elasticsearch/_sync/client/nodes.py +3 -5
- elasticsearch/_sync/client/project.py +67 -0
- elasticsearch/_sync/client/security.py +39 -0
- elasticsearch/_sync/client/shutdown.py +5 -15
- elasticsearch/_sync/client/simulate.py +8 -0
- elasticsearch/_sync/client/slm.py +1 -5
- elasticsearch/_sync/client/snapshot.py +20 -10
- elasticsearch/_sync/client/sql.py +7 -0
- elasticsearch/_sync/client/streams.py +185 -0
- elasticsearch/_sync/client/watcher.py +1 -5
- elasticsearch/_version.py +2 -1
- elasticsearch/client.py +4 -0
- elasticsearch/compat.py +30 -1
- elasticsearch/dsl/__init__.py +28 -0
- elasticsearch/dsl/_async/document.py +2 -1
- elasticsearch/dsl/_sync/document.py +2 -1
- elasticsearch/dsl/aggs.py +97 -0
- elasticsearch/dsl/document_base.py +53 -13
- elasticsearch/dsl/field.py +21 -2
- elasticsearch/dsl/pydantic.py +152 -0
- elasticsearch/dsl/query.py +5 -1
- elasticsearch/dsl/response/__init__.py +3 -0
- elasticsearch/dsl/search_base.py +5 -1
- elasticsearch/dsl/types.py +226 -14
- elasticsearch/esql/esql.py +331 -41
- elasticsearch/esql/functions.py +88 -0
- elasticsearch/helpers/__init__.py +10 -1
- elasticsearch/helpers/actions.py +106 -33
- {elasticsearch-9.1.1.dist-info → elasticsearch-9.2.0.dist-info}/METADATA +27 -5
- {elasticsearch-9.1.1.dist-info → elasticsearch-9.2.0.dist-info}/RECORD +76 -71
- {elasticsearch-9.1.1.dist-info → elasticsearch-9.2.0.dist-info}/WHEEL +0 -0
- {elasticsearch-9.1.1.dist-info → elasticsearch-9.2.0.dist-info}/licenses/LICENSE +0 -0
- {elasticsearch-9.1.1.dist-info → elasticsearch-9.2.0.dist-info}/licenses/NOTICE +0 -0
elasticsearch/compat.py
CHANGED
|
@@ -18,8 +18,10 @@
|
|
|
18
18
|
import inspect
|
|
19
19
|
import os
|
|
20
20
|
import sys
|
|
21
|
+
from contextlib import contextmanager
|
|
21
22
|
from pathlib import Path
|
|
22
|
-
from
|
|
23
|
+
from threading import Thread
|
|
24
|
+
from typing import Any, Callable, Iterator, Tuple, Type, Union
|
|
23
25
|
|
|
24
26
|
string_types: Tuple[Type[str], Type[bytes]] = (str, bytes)
|
|
25
27
|
|
|
@@ -76,9 +78,36 @@ def warn_stacklevel() -> int:
|
|
|
76
78
|
return 0
|
|
77
79
|
|
|
78
80
|
|
|
81
|
+
@contextmanager
|
|
82
|
+
def safe_thread(
|
|
83
|
+
target: Callable[..., Any], *args: Any, **kwargs: Any
|
|
84
|
+
) -> Iterator[Thread]:
|
|
85
|
+
"""Run a thread within a context manager block.
|
|
86
|
+
|
|
87
|
+
The thread is automatically joined when the block ends. If the thread raised
|
|
88
|
+
an exception, it is raised in the caller's context.
|
|
89
|
+
"""
|
|
90
|
+
captured_exception = None
|
|
91
|
+
|
|
92
|
+
def run() -> None:
|
|
93
|
+
try:
|
|
94
|
+
target(*args, **kwargs)
|
|
95
|
+
except BaseException as exc:
|
|
96
|
+
nonlocal captured_exception
|
|
97
|
+
captured_exception = exc
|
|
98
|
+
|
|
99
|
+
thread = Thread(target=run)
|
|
100
|
+
thread.start()
|
|
101
|
+
yield thread
|
|
102
|
+
thread.join()
|
|
103
|
+
if captured_exception:
|
|
104
|
+
raise captured_exception
|
|
105
|
+
|
|
106
|
+
|
|
79
107
|
__all__ = [
|
|
80
108
|
"string_types",
|
|
81
109
|
"to_str",
|
|
82
110
|
"to_bytes",
|
|
83
111
|
"warn_stacklevel",
|
|
112
|
+
"safe_thread",
|
|
84
113
|
]
|
elasticsearch/dsl/__init__.py
CHANGED
|
@@ -38,23 +38,30 @@ from .faceted_search import (
|
|
|
38
38
|
TermsFacet,
|
|
39
39
|
)
|
|
40
40
|
from .field import (
|
|
41
|
+
AggregateMetricDouble,
|
|
42
|
+
Alias,
|
|
41
43
|
Binary,
|
|
42
44
|
Boolean,
|
|
43
45
|
Byte,
|
|
44
46
|
Completion,
|
|
45
47
|
ConstantKeyword,
|
|
48
|
+
CountedKeyword,
|
|
46
49
|
CustomField,
|
|
47
50
|
Date,
|
|
51
|
+
DateNanos,
|
|
48
52
|
DateRange,
|
|
49
53
|
DenseVector,
|
|
50
54
|
Double,
|
|
51
55
|
DoubleRange,
|
|
52
56
|
Field,
|
|
57
|
+
Flattened,
|
|
53
58
|
Float,
|
|
54
59
|
FloatRange,
|
|
55
60
|
GeoPoint,
|
|
56
61
|
GeoShape,
|
|
57
62
|
HalfFloat,
|
|
63
|
+
Histogram,
|
|
64
|
+
IcuCollationKeyword,
|
|
58
65
|
Integer,
|
|
59
66
|
IntegerRange,
|
|
60
67
|
Ip,
|
|
@@ -63,21 +70,28 @@ from .field import (
|
|
|
63
70
|
Keyword,
|
|
64
71
|
Long,
|
|
65
72
|
LongRange,
|
|
73
|
+
MatchOnlyText,
|
|
66
74
|
Murmur3,
|
|
67
75
|
Nested,
|
|
68
76
|
Object,
|
|
77
|
+
Passthrough,
|
|
69
78
|
Percolator,
|
|
70
79
|
Point,
|
|
71
80
|
RangeField,
|
|
72
81
|
RankFeature,
|
|
73
82
|
RankFeatures,
|
|
83
|
+
RankVectors,
|
|
74
84
|
ScaledFloat,
|
|
75
85
|
SearchAsYouType,
|
|
86
|
+
SemanticText,
|
|
76
87
|
Shape,
|
|
77
88
|
Short,
|
|
78
89
|
SparseVector,
|
|
79
90
|
Text,
|
|
80
91
|
TokenCount,
|
|
92
|
+
UnsignedLong,
|
|
93
|
+
Version,
|
|
94
|
+
Wildcard,
|
|
81
95
|
construct_field,
|
|
82
96
|
)
|
|
83
97
|
from .function import SF
|
|
@@ -108,6 +122,8 @@ __all__ = [
|
|
|
108
122
|
"A",
|
|
109
123
|
"Agg",
|
|
110
124
|
"AggResponse",
|
|
125
|
+
"AggregateMetricDouble",
|
|
126
|
+
"Alias",
|
|
111
127
|
"AsyncComposableIndexTemplate",
|
|
112
128
|
"AsyncDocument",
|
|
113
129
|
"AsyncEmptySearch",
|
|
@@ -126,9 +142,11 @@ __all__ = [
|
|
|
126
142
|
"Completion",
|
|
127
143
|
"ComposableIndexTemplate",
|
|
128
144
|
"ConstantKeyword",
|
|
145
|
+
"CountedKeyword",
|
|
129
146
|
"CustomField",
|
|
130
147
|
"Date",
|
|
131
148
|
"DateHistogramFacet",
|
|
149
|
+
"DateNanos",
|
|
132
150
|
"DateRange",
|
|
133
151
|
"DenseVector",
|
|
134
152
|
"Document",
|
|
@@ -142,12 +160,15 @@ __all__ = [
|
|
|
142
160
|
"FacetedResponse",
|
|
143
161
|
"FacetedSearch",
|
|
144
162
|
"Field",
|
|
163
|
+
"Flattened",
|
|
145
164
|
"Float",
|
|
146
165
|
"FloatRange",
|
|
147
166
|
"GeoPoint",
|
|
148
167
|
"GeoShape",
|
|
149
168
|
"HalfFloat",
|
|
169
|
+
"Histogram",
|
|
150
170
|
"HistogramFacet",
|
|
171
|
+
"IcuCollationKeyword",
|
|
151
172
|
"IllegalOperation",
|
|
152
173
|
"Index",
|
|
153
174
|
"IndexTemplate",
|
|
@@ -162,12 +183,14 @@ __all__ = [
|
|
|
162
183
|
"LongRange",
|
|
163
184
|
"M",
|
|
164
185
|
"Mapping",
|
|
186
|
+
"MatchOnlyText",
|
|
165
187
|
"MetaField",
|
|
166
188
|
"MultiSearch",
|
|
167
189
|
"Murmur3",
|
|
168
190
|
"Nested",
|
|
169
191
|
"NestedFacet",
|
|
170
192
|
"Object",
|
|
193
|
+
"Passthrough",
|
|
171
194
|
"Percolator",
|
|
172
195
|
"Point",
|
|
173
196
|
"Q",
|
|
@@ -177,11 +200,13 @@ __all__ = [
|
|
|
177
200
|
"RangeField",
|
|
178
201
|
"RankFeature",
|
|
179
202
|
"RankFeatures",
|
|
203
|
+
"RankVectors",
|
|
180
204
|
"Response",
|
|
181
205
|
"SF",
|
|
182
206
|
"ScaledFloat",
|
|
183
207
|
"Search",
|
|
184
208
|
"SearchAsYouType",
|
|
209
|
+
"SemanticText",
|
|
185
210
|
"Shape",
|
|
186
211
|
"Short",
|
|
187
212
|
"SparseVector",
|
|
@@ -189,9 +214,12 @@ __all__ = [
|
|
|
189
214
|
"Text",
|
|
190
215
|
"TokenCount",
|
|
191
216
|
"UnknownDslObject",
|
|
217
|
+
"UnsignedLong",
|
|
192
218
|
"UpdateByQuery",
|
|
193
219
|
"UpdateByQueryResponse",
|
|
194
220
|
"ValidationException",
|
|
221
|
+
"Version",
|
|
222
|
+
"Wildcard",
|
|
195
223
|
"analyzer",
|
|
196
224
|
"async_connections",
|
|
197
225
|
"char_filter",
|
|
@@ -126,9 +126,10 @@ class AsyncDocument(DocumentBase, metaclass=AsyncIndexMeta):
|
|
|
126
126
|
Create an :class:`~elasticsearch.dsl.Search` instance that will search
|
|
127
127
|
over this ``Document``.
|
|
128
128
|
"""
|
|
129
|
-
|
|
129
|
+
s = AsyncSearch[Self](
|
|
130
130
|
using=cls._get_using(using), index=cls._default_index(index), doc_type=[cls]
|
|
131
131
|
)
|
|
132
|
+
return s.source(exclude_vectors=False)
|
|
132
133
|
|
|
133
134
|
@classmethod
|
|
134
135
|
async def get(
|
|
@@ -120,9 +120,10 @@ class Document(DocumentBase, metaclass=IndexMeta):
|
|
|
120
120
|
Create an :class:`~elasticsearch.dsl.Search` instance that will search
|
|
121
121
|
over this ``Document``.
|
|
122
122
|
"""
|
|
123
|
-
|
|
123
|
+
s = Search[Self](
|
|
124
124
|
using=cls._get_using(using), index=cls._default_index(index), doc_type=[cls]
|
|
125
125
|
)
|
|
126
|
+
return s.source(exclude_vectors=False)
|
|
126
127
|
|
|
127
128
|
@classmethod
|
|
128
129
|
def get(
|
elasticsearch/dsl/aggs.py
CHANGED
|
@@ -653,6 +653,54 @@ class Cardinality(Agg[_R]):
|
|
|
653
653
|
)
|
|
654
654
|
|
|
655
655
|
|
|
656
|
+
class CartesianBounds(Agg[_R]):
|
|
657
|
+
"""
|
|
658
|
+
A metric aggregation that computes the spatial bounding box containing
|
|
659
|
+
all values for a Point or Shape field.
|
|
660
|
+
|
|
661
|
+
:arg field: The field on which to run the aggregation.
|
|
662
|
+
:arg missing: The value to apply to documents that do not have a
|
|
663
|
+
value. By default, documents without a value are ignored.
|
|
664
|
+
:arg script:
|
|
665
|
+
"""
|
|
666
|
+
|
|
667
|
+
name = "cartesian_bounds"
|
|
668
|
+
|
|
669
|
+
def __init__(
|
|
670
|
+
self,
|
|
671
|
+
*,
|
|
672
|
+
field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT,
|
|
673
|
+
missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT,
|
|
674
|
+
script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT,
|
|
675
|
+
**kwargs: Any,
|
|
676
|
+
):
|
|
677
|
+
super().__init__(field=field, missing=missing, script=script, **kwargs)
|
|
678
|
+
|
|
679
|
+
|
|
680
|
+
class CartesianCentroid(Agg[_R]):
|
|
681
|
+
"""
|
|
682
|
+
A metric aggregation that computes the weighted centroid from all
|
|
683
|
+
coordinate values for point and shape fields.
|
|
684
|
+
|
|
685
|
+
:arg field: The field on which to run the aggregation.
|
|
686
|
+
:arg missing: The value to apply to documents that do not have a
|
|
687
|
+
value. By default, documents without a value are ignored.
|
|
688
|
+
:arg script:
|
|
689
|
+
"""
|
|
690
|
+
|
|
691
|
+
name = "cartesian_centroid"
|
|
692
|
+
|
|
693
|
+
def __init__(
|
|
694
|
+
self,
|
|
695
|
+
*,
|
|
696
|
+
field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT,
|
|
697
|
+
missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT,
|
|
698
|
+
script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT,
|
|
699
|
+
**kwargs: Any,
|
|
700
|
+
):
|
|
701
|
+
super().__init__(field=field, missing=missing, script=script, **kwargs)
|
|
702
|
+
|
|
703
|
+
|
|
656
704
|
class CategorizeText(Bucket[_R]):
|
|
657
705
|
"""
|
|
658
706
|
A multi-bucket aggregation that groups semi-structured text into
|
|
@@ -735,6 +783,43 @@ class CategorizeText(Bucket[_R]):
|
|
|
735
783
|
)
|
|
736
784
|
|
|
737
785
|
|
|
786
|
+
class ChangePoint(Pipeline[_R]):
|
|
787
|
+
"""
|
|
788
|
+
A sibling pipeline that detects, spikes, dips, and change points in a
|
|
789
|
+
metric. Given a distribution of values provided by the sibling multi-
|
|
790
|
+
bucket aggregation, this aggregation indicates the bucket of any spike
|
|
791
|
+
or dip and/or the bucket at which the largest change in the
|
|
792
|
+
distribution of values, if they are statistically significant. There
|
|
793
|
+
must be at least 22 bucketed values. Fewer than 1,000 is preferred.
|
|
794
|
+
|
|
795
|
+
:arg format: `DecimalFormat` pattern for the output value. If
|
|
796
|
+
specified, the formatted value is returned in the aggregation’s
|
|
797
|
+
`value_as_string` property.
|
|
798
|
+
:arg gap_policy: Policy to apply when gaps are found in the data.
|
|
799
|
+
Defaults to `skip` if omitted.
|
|
800
|
+
:arg buckets_path: Path to the buckets that contain one set of values
|
|
801
|
+
to correlate.
|
|
802
|
+
"""
|
|
803
|
+
|
|
804
|
+
name = "change_point"
|
|
805
|
+
|
|
806
|
+
def __init__(
|
|
807
|
+
self,
|
|
808
|
+
*,
|
|
809
|
+
format: Union[str, "DefaultType"] = DEFAULT,
|
|
810
|
+
gap_policy: Union[
|
|
811
|
+
Literal["skip", "insert_zeros", "keep_values"], "DefaultType"
|
|
812
|
+
] = DEFAULT,
|
|
813
|
+
buckets_path: Union[
|
|
814
|
+
str, Sequence[str], Mapping[str, str], "DefaultType"
|
|
815
|
+
] = DEFAULT,
|
|
816
|
+
**kwargs: Any,
|
|
817
|
+
):
|
|
818
|
+
super().__init__(
|
|
819
|
+
format=format, gap_policy=gap_policy, buckets_path=buckets_path, **kwargs
|
|
820
|
+
)
|
|
821
|
+
|
|
822
|
+
|
|
738
823
|
class Children(Bucket[_R]):
|
|
739
824
|
"""
|
|
740
825
|
A single bucket aggregation that selects child documents that have the
|
|
@@ -2980,6 +3065,14 @@ class SignificantTerms(Bucket[_R]):
|
|
|
2980
3065
|
the foreground sample with a term divided by the number of
|
|
2981
3066
|
documents in the background with the term.
|
|
2982
3067
|
:arg script_heuristic: Customized score, implemented via a script.
|
|
3068
|
+
:arg p_value: Significant terms heuristic that calculates the p-value
|
|
3069
|
+
between the term existing in foreground and background sets. The
|
|
3070
|
+
p-value is the probability of obtaining test results at least as
|
|
3071
|
+
extreme as the results actually observed, under the assumption
|
|
3072
|
+
that the null hypothesis is correct. The p-value is calculated
|
|
3073
|
+
assuming that the foreground set and the background set are
|
|
3074
|
+
independent https://en.wikipedia.org/wiki/Bernoulli_trial, with
|
|
3075
|
+
the null hypothesis that the probabilities are the same.
|
|
2983
3076
|
:arg shard_min_doc_count: Regulates the certainty a shard has if the
|
|
2984
3077
|
term should actually be added to the candidate list or not with
|
|
2985
3078
|
respect to the `min_doc_count`. Terms will only be considered if
|
|
@@ -3033,6 +3126,9 @@ class SignificantTerms(Bucket[_R]):
|
|
|
3033
3126
|
script_heuristic: Union[
|
|
3034
3127
|
"types.ScriptedHeuristic", Dict[str, Any], "DefaultType"
|
|
3035
3128
|
] = DEFAULT,
|
|
3129
|
+
p_value: Union[
|
|
3130
|
+
"types.PValueHeuristic", Dict[str, Any], "DefaultType"
|
|
3131
|
+
] = DEFAULT,
|
|
3036
3132
|
shard_min_doc_count: Union[int, "DefaultType"] = DEFAULT,
|
|
3037
3133
|
shard_size: Union[int, "DefaultType"] = DEFAULT,
|
|
3038
3134
|
size: Union[int, "DefaultType"] = DEFAULT,
|
|
@@ -3051,6 +3147,7 @@ class SignificantTerms(Bucket[_R]):
|
|
|
3051
3147
|
mutual_information=mutual_information,
|
|
3052
3148
|
percentage=percentage,
|
|
3053
3149
|
script_heuristic=script_heuristic,
|
|
3150
|
+
p_value=p_value,
|
|
3054
3151
|
shard_min_doc_count=shard_min_doc_count,
|
|
3055
3152
|
shard_size=shard_size,
|
|
3056
3153
|
size=size,
|
|
@@ -34,6 +34,13 @@ from typing import (
|
|
|
34
34
|
overload,
|
|
35
35
|
)
|
|
36
36
|
|
|
37
|
+
from typing_extensions import _AnnotatedAlias
|
|
38
|
+
|
|
39
|
+
try:
|
|
40
|
+
import annotationlib
|
|
41
|
+
except ImportError:
|
|
42
|
+
annotationlib = None
|
|
43
|
+
|
|
37
44
|
try:
|
|
38
45
|
from types import UnionType
|
|
39
46
|
except ImportError:
|
|
@@ -332,6 +339,16 @@ class DocumentOptions:
|
|
|
332
339
|
# # ignore attributes
|
|
333
340
|
# field10: ClassVar[string] = "a regular class variable"
|
|
334
341
|
annotations = attrs.get("__annotations__", {})
|
|
342
|
+
if not annotations and annotationlib:
|
|
343
|
+
# Python 3.14+ uses annotationlib
|
|
344
|
+
annotate = annotationlib.get_annotate_from_class_namespace(attrs)
|
|
345
|
+
if annotate:
|
|
346
|
+
annotations = (
|
|
347
|
+
annotationlib.call_annotate_function(
|
|
348
|
+
annotate, format=annotationlib.Format.VALUE
|
|
349
|
+
)
|
|
350
|
+
or {}
|
|
351
|
+
)
|
|
335
352
|
fields = {n for n in attrs if isinstance(attrs[n], Field)}
|
|
336
353
|
fields.update(annotations.keys())
|
|
337
354
|
field_defaults = {}
|
|
@@ -343,6 +360,10 @@ class DocumentOptions:
|
|
|
343
360
|
# the field has a type annotation, so next we try to figure out
|
|
344
361
|
# what field type we can use
|
|
345
362
|
type_ = annotations[name]
|
|
363
|
+
type_metadata = []
|
|
364
|
+
if isinstance(type_, _AnnotatedAlias):
|
|
365
|
+
type_metadata = type_.__metadata__
|
|
366
|
+
type_ = type_.__origin__
|
|
346
367
|
skip = False
|
|
347
368
|
required = True
|
|
348
369
|
multi = False
|
|
@@ -389,6 +410,12 @@ class DocumentOptions:
|
|
|
389
410
|
# use best field type for the type hint provided
|
|
390
411
|
field, field_kwargs = self.type_annotation_map[type_] # type: ignore[assignment]
|
|
391
412
|
|
|
413
|
+
# if this field does not have a right-hand value, we look in the metadata
|
|
414
|
+
# of the annotation to see if we find it there
|
|
415
|
+
for md in type_metadata:
|
|
416
|
+
if isinstance(md, (_FieldMetadataDict, Field)):
|
|
417
|
+
attrs[name] = md
|
|
418
|
+
|
|
392
419
|
if field:
|
|
393
420
|
field_kwargs = {
|
|
394
421
|
"multi": multi,
|
|
@@ -401,17 +428,20 @@ class DocumentOptions:
|
|
|
401
428
|
# this field has a right-side value, which can be field
|
|
402
429
|
# instance on its own or wrapped with mapped_field()
|
|
403
430
|
attr_value = attrs[name]
|
|
404
|
-
if isinstance(attr_value,
|
|
431
|
+
if isinstance(attr_value, _FieldMetadataDict):
|
|
405
432
|
# the mapped_field() wrapper function was used so we need
|
|
406
433
|
# to look for the field instance and also record any
|
|
407
434
|
# dataclass-style defaults
|
|
435
|
+
if attr_value.get("exclude"):
|
|
436
|
+
# skip this field
|
|
437
|
+
continue
|
|
408
438
|
attr_value = attrs[name].get("_field")
|
|
409
439
|
default_value = attrs[name].get("default") or attrs[name].get(
|
|
410
440
|
"default_factory"
|
|
411
441
|
)
|
|
412
442
|
if default_value:
|
|
413
443
|
field_defaults[name] = default_value
|
|
414
|
-
if attr_value:
|
|
444
|
+
if isinstance(attr_value, Field):
|
|
415
445
|
value = attr_value
|
|
416
446
|
if required is not None:
|
|
417
447
|
value._required = required
|
|
@@ -490,12 +520,19 @@ class Mapped(Generic[_FieldType]):
|
|
|
490
520
|
M = Mapped
|
|
491
521
|
|
|
492
522
|
|
|
523
|
+
class _FieldMetadataDict(dict[str, Any]):
|
|
524
|
+
"""This class is used to identify metadata returned by the `mapped_field()` function."""
|
|
525
|
+
|
|
526
|
+
pass
|
|
527
|
+
|
|
528
|
+
|
|
493
529
|
def mapped_field(
|
|
494
530
|
field: Optional[Field] = None,
|
|
495
531
|
*,
|
|
496
532
|
init: bool = True,
|
|
497
533
|
default: Any = None,
|
|
498
534
|
default_factory: Optional[Callable[[], Any]] = None,
|
|
535
|
+
exclude: bool = False,
|
|
499
536
|
**kwargs: Any,
|
|
500
537
|
) -> Any:
|
|
501
538
|
"""Construct a field using dataclass behaviors
|
|
@@ -505,22 +542,25 @@ def mapped_field(
|
|
|
505
542
|
options.
|
|
506
543
|
|
|
507
544
|
:param field: The instance of ``Field`` to use for this field. If not provided,
|
|
508
|
-
|
|
545
|
+
an instance that is appropriate for the type given to the field is used.
|
|
509
546
|
:param init: a value of ``True`` adds this field to the constructor, and a
|
|
510
|
-
|
|
547
|
+
value of ``False`` omits it from it. The default is ``True``.
|
|
511
548
|
:param default: a default value to use for this field when one is not provided
|
|
512
|
-
|
|
549
|
+
explicitly.
|
|
513
550
|
:param default_factory: a callable that returns a default value for the field,
|
|
514
|
-
|
|
515
|
-
|
|
551
|
+
when one isn't provided explicitly. Only one of ``factory`` and
|
|
552
|
+
``default_factory`` can be used.
|
|
553
|
+
:param exclude: Set to ``True`` to exclude this field from the Elasticsearch
|
|
554
|
+
index.
|
|
516
555
|
"""
|
|
517
|
-
return
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
556
|
+
return _FieldMetadataDict(
|
|
557
|
+
_field=field,
|
|
558
|
+
init=init,
|
|
559
|
+
default=default,
|
|
560
|
+
default_factory=default_factory,
|
|
561
|
+
exclude=exclude,
|
|
522
562
|
**kwargs,
|
|
523
|
-
|
|
563
|
+
)
|
|
524
564
|
|
|
525
565
|
|
|
526
566
|
@dataclass_transform(field_specifiers=(mapped_field,))
|
elasticsearch/dsl/field.py
CHANGED
|
@@ -572,7 +572,11 @@ class Object(Field):
|
|
|
572
572
|
if isinstance(data, collections.abc.Mapping):
|
|
573
573
|
return data
|
|
574
574
|
|
|
575
|
-
|
|
575
|
+
try:
|
|
576
|
+
return data.to_dict(skip_empty=skip_empty)
|
|
577
|
+
except TypeError:
|
|
578
|
+
# this would only happen if an AttrDict was given instead of an InnerDoc
|
|
579
|
+
return data.to_dict()
|
|
576
580
|
|
|
577
581
|
def clean(self, data: Any) -> Any:
|
|
578
582
|
data = super().clean(data)
|
|
@@ -3862,14 +3866,21 @@ class SemanticText(Field):
|
|
|
3862
3866
|
by using the Update mapping API. Use the Create inference API to
|
|
3863
3867
|
create the endpoint. If not specified, the inference endpoint
|
|
3864
3868
|
defined by inference_id will be used at both index and query time.
|
|
3869
|
+
:arg index_options: Settings for index_options that override any
|
|
3870
|
+
defaults used by semantic_text, for example specific quantization
|
|
3871
|
+
settings.
|
|
3865
3872
|
:arg chunking_settings: Settings for chunking text into smaller
|
|
3866
3873
|
passages. If specified, these will override the chunking settings
|
|
3867
3874
|
sent in the inference endpoint associated with inference_id. If
|
|
3868
3875
|
chunking settings are updated, they will not be applied to
|
|
3869
3876
|
existing documents until they are reindexed.
|
|
3877
|
+
:arg fields:
|
|
3870
3878
|
"""
|
|
3871
3879
|
|
|
3872
3880
|
name = "semantic_text"
|
|
3881
|
+
_param_defs = {
|
|
3882
|
+
"fields": {"type": "field", "hash": True},
|
|
3883
|
+
}
|
|
3873
3884
|
|
|
3874
3885
|
def __init__(
|
|
3875
3886
|
self,
|
|
@@ -3877,9 +3888,13 @@ class SemanticText(Field):
|
|
|
3877
3888
|
meta: Union[Mapping[str, str], "DefaultType"] = DEFAULT,
|
|
3878
3889
|
inference_id: Union[str, "DefaultType"] = DEFAULT,
|
|
3879
3890
|
search_inference_id: Union[str, "DefaultType"] = DEFAULT,
|
|
3891
|
+
index_options: Union[
|
|
3892
|
+
"types.SemanticTextIndexOptions", Dict[str, Any], "DefaultType"
|
|
3893
|
+
] = DEFAULT,
|
|
3880
3894
|
chunking_settings: Union[
|
|
3881
|
-
"types.ChunkingSettings", Dict[str, Any], "DefaultType"
|
|
3895
|
+
"types.ChunkingSettings", None, Dict[str, Any], "DefaultType"
|
|
3882
3896
|
] = DEFAULT,
|
|
3897
|
+
fields: Union[Mapping[str, Field], "DefaultType"] = DEFAULT,
|
|
3883
3898
|
**kwargs: Any,
|
|
3884
3899
|
):
|
|
3885
3900
|
if meta is not DEFAULT:
|
|
@@ -3888,8 +3903,12 @@ class SemanticText(Field):
|
|
|
3888
3903
|
kwargs["inference_id"] = inference_id
|
|
3889
3904
|
if search_inference_id is not DEFAULT:
|
|
3890
3905
|
kwargs["search_inference_id"] = search_inference_id
|
|
3906
|
+
if index_options is not DEFAULT:
|
|
3907
|
+
kwargs["index_options"] = index_options
|
|
3891
3908
|
if chunking_settings is not DEFAULT:
|
|
3892
3909
|
kwargs["chunking_settings"] = chunking_settings
|
|
3910
|
+
if fields is not DEFAULT:
|
|
3911
|
+
kwargs["fields"] = fields
|
|
3893
3912
|
super().__init__(*args, **kwargs)
|
|
3894
3913
|
|
|
3895
3914
|
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
# Licensed to Elasticsearch B.V. under one or more contributor
|
|
2
|
+
# license agreements. See the NOTICE file distributed with
|
|
3
|
+
# this work for additional information regarding copyright
|
|
4
|
+
# ownership. Elasticsearch B.V. licenses this file to you under
|
|
5
|
+
# the Apache License, Version 2.0 (the "License"); you may
|
|
6
|
+
# not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
17
|
+
|
|
18
|
+
from typing import Any, ClassVar, Dict, List, Optional, Tuple, Type
|
|
19
|
+
|
|
20
|
+
from pydantic import BaseModel, Field, PrivateAttr
|
|
21
|
+
from typing_extensions import Annotated, Self, dataclass_transform
|
|
22
|
+
|
|
23
|
+
from elasticsearch import dsl
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ESMeta(BaseModel):
|
|
27
|
+
"""Metadata items associated with Elasticsearch documents."""
|
|
28
|
+
|
|
29
|
+
id: str = ""
|
|
30
|
+
index: str = ""
|
|
31
|
+
primary_term: int = 0
|
|
32
|
+
seq_no: int = 0
|
|
33
|
+
version: int = 0
|
|
34
|
+
score: float = 0
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class _BaseModel(BaseModel):
|
|
38
|
+
meta: Annotated[ESMeta, dsl.mapped_field(exclude=True)] = Field(
|
|
39
|
+
default=ESMeta(),
|
|
40
|
+
init=False,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class _BaseESModelMetaclass(type(BaseModel)): # type: ignore[misc]
|
|
45
|
+
"""Generic metaclass methods for BaseEsModel and AsyncBaseESModel."""
|
|
46
|
+
|
|
47
|
+
@staticmethod
|
|
48
|
+
def process_annotations(
|
|
49
|
+
metacls: Type["_BaseESModelMetaclass"], annotations: Dict[str, Any]
|
|
50
|
+
) -> Dict[str, Any]:
|
|
51
|
+
"""Process Pydantic typing annotations and adapt them so that they can
|
|
52
|
+
be used to create the Elasticsearch document.
|
|
53
|
+
"""
|
|
54
|
+
updated_annotations = {}
|
|
55
|
+
for var, ann in annotations.items():
|
|
56
|
+
if isinstance(ann, type(BaseModel)):
|
|
57
|
+
# an inner Pydantic model is transformed into an Object field
|
|
58
|
+
updated_annotations[var] = metacls.make_dsl_class(
|
|
59
|
+
metacls, dsl.InnerDoc, ann
|
|
60
|
+
)
|
|
61
|
+
elif (
|
|
62
|
+
hasattr(ann, "__origin__")
|
|
63
|
+
and ann.__origin__ in [list, List]
|
|
64
|
+
and isinstance(ann.__args__[0], type(BaseModel))
|
|
65
|
+
):
|
|
66
|
+
# an inner list of Pydantic models is transformed into a Nested field
|
|
67
|
+
updated_annotations[var] = List[ # type: ignore[assignment,misc]
|
|
68
|
+
metacls.make_dsl_class(metacls, dsl.InnerDoc, ann.__args__[0])
|
|
69
|
+
]
|
|
70
|
+
else:
|
|
71
|
+
updated_annotations[var] = ann
|
|
72
|
+
return updated_annotations
|
|
73
|
+
|
|
74
|
+
@staticmethod
|
|
75
|
+
def make_dsl_class(
|
|
76
|
+
metacls: Type["_BaseESModelMetaclass"],
|
|
77
|
+
dsl_class: type,
|
|
78
|
+
pydantic_model: type,
|
|
79
|
+
pydantic_attrs: Optional[Dict[str, Any]] = None,
|
|
80
|
+
) -> type:
|
|
81
|
+
"""Create a DSL document class dynamically, using the structure of a
|
|
82
|
+
Pydantic model."""
|
|
83
|
+
dsl_attrs = {
|
|
84
|
+
attr: value
|
|
85
|
+
for attr, value in dsl_class.__dict__.items()
|
|
86
|
+
if not attr.startswith("__")
|
|
87
|
+
}
|
|
88
|
+
pydantic_attrs = {
|
|
89
|
+
**(pydantic_attrs or {}),
|
|
90
|
+
"__annotations__": metacls.process_annotations(
|
|
91
|
+
metacls, pydantic_model.__annotations__
|
|
92
|
+
),
|
|
93
|
+
}
|
|
94
|
+
return type(dsl_class)(
|
|
95
|
+
f"_ES{pydantic_model.__name__}",
|
|
96
|
+
(dsl_class,),
|
|
97
|
+
{
|
|
98
|
+
**pydantic_attrs,
|
|
99
|
+
**dsl_attrs,
|
|
100
|
+
"__qualname__": f"_ES{pydantic_model.__name__}",
|
|
101
|
+
},
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class BaseESModelMetaclass(_BaseESModelMetaclass):
|
|
106
|
+
"""Metaclass for the BaseESModel class."""
|
|
107
|
+
|
|
108
|
+
def __new__(cls, name: str, bases: Tuple[type, ...], attrs: Dict[str, Any]) -> Any:
|
|
109
|
+
model = super().__new__(cls, name, bases, attrs)
|
|
110
|
+
model._doc = cls.make_dsl_class(cls, dsl.Document, model, attrs)
|
|
111
|
+
return model
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class AsyncBaseESModelMetaclass(_BaseESModelMetaclass):
|
|
115
|
+
"""Metaclass for the AsyncBaseESModel class."""
|
|
116
|
+
|
|
117
|
+
def __new__(cls, name: str, bases: Tuple[type, ...], attrs: Dict[str, Any]) -> Any:
|
|
118
|
+
model = super().__new__(cls, name, bases, attrs)
|
|
119
|
+
model._doc = cls.make_dsl_class(cls, dsl.AsyncDocument, model, attrs)
|
|
120
|
+
return model
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
@dataclass_transform(kw_only_default=True, field_specifiers=(Field, PrivateAttr))
|
|
124
|
+
class BaseESModel(_BaseModel, metaclass=BaseESModelMetaclass):
|
|
125
|
+
_doc: ClassVar[Type[dsl.Document]]
|
|
126
|
+
|
|
127
|
+
def to_doc(self) -> dsl.Document:
|
|
128
|
+
"""Convert this model to an Elasticsearch document."""
|
|
129
|
+
data = self.model_dump()
|
|
130
|
+
meta = {f"_{k}": v for k, v in data.pop("meta", {}).items() if v}
|
|
131
|
+
return self._doc(**meta, **data)
|
|
132
|
+
|
|
133
|
+
@classmethod
|
|
134
|
+
def from_doc(cls, dsl_obj: dsl.Document) -> Self:
|
|
135
|
+
"""Create a model from the given Elasticsearch document."""
|
|
136
|
+
return cls(meta=ESMeta(**dsl_obj.meta.to_dict()), **dsl_obj.to_dict())
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
@dataclass_transform(kw_only_default=True, field_specifiers=(Field, PrivateAttr))
|
|
140
|
+
class AsyncBaseESModel(_BaseModel, metaclass=AsyncBaseESModelMetaclass):
|
|
141
|
+
_doc: ClassVar[Type[dsl.AsyncDocument]]
|
|
142
|
+
|
|
143
|
+
def to_doc(self) -> dsl.AsyncDocument:
|
|
144
|
+
"""Convert this model to an Elasticsearch document."""
|
|
145
|
+
data = self.model_dump()
|
|
146
|
+
meta = {f"_{k}": v for k, v in data.pop("meta", {}).items() if v}
|
|
147
|
+
return self._doc(**meta, **data)
|
|
148
|
+
|
|
149
|
+
@classmethod
|
|
150
|
+
def from_doc(cls, dsl_obj: dsl.AsyncDocument) -> Self:
|
|
151
|
+
"""Create a model from the given Elasticsearch document."""
|
|
152
|
+
return cls(meta=ESMeta(**dsl_obj.meta.to_dict()), **dsl_obj.to_dict())
|