elasticsearch 8.19.1__py3-none-any.whl → 8.19.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- elasticsearch/_async/client/__init__.py +27 -49
- elasticsearch/_async/client/cat.py +481 -25
- elasticsearch/_async/client/connector.py +3 -3
- elasticsearch/_async/client/fleet.py +1 -5
- elasticsearch/_async/client/graph.py +1 -5
- elasticsearch/_async/client/ilm.py +2 -10
- elasticsearch/_async/client/indices.py +158 -31
- elasticsearch/_async/client/inference.py +35 -121
- elasticsearch/_async/client/nodes.py +2 -2
- elasticsearch/_async/client/shutdown.py +5 -15
- elasticsearch/_async/client/slm.py +1 -5
- elasticsearch/_async/client/streams.py +185 -0
- elasticsearch/_async/client/watcher.py +1 -5
- elasticsearch/_async/helpers.py +58 -9
- elasticsearch/_sync/client/__init__.py +27 -49
- elasticsearch/_sync/client/cat.py +481 -25
- elasticsearch/_sync/client/connector.py +3 -3
- elasticsearch/_sync/client/fleet.py +1 -5
- elasticsearch/_sync/client/graph.py +1 -5
- elasticsearch/_sync/client/ilm.py +2 -10
- elasticsearch/_sync/client/indices.py +158 -31
- elasticsearch/_sync/client/inference.py +35 -121
- elasticsearch/_sync/client/nodes.py +2 -2
- elasticsearch/_sync/client/shutdown.py +5 -15
- elasticsearch/_sync/client/slm.py +1 -5
- elasticsearch/_sync/client/streams.py +185 -0
- elasticsearch/_sync/client/watcher.py +1 -5
- elasticsearch/_version.py +2 -1
- elasticsearch/client.py +2 -0
- elasticsearch/compat.py +45 -1
- elasticsearch/dsl/__init__.py +28 -0
- elasticsearch/dsl/aggs.py +97 -0
- elasticsearch/dsl/document_base.py +16 -1
- elasticsearch/dsl/field.py +12 -1
- elasticsearch/dsl/query.py +1 -1
- elasticsearch/dsl/response/__init__.py +3 -0
- elasticsearch/dsl/types.py +185 -9
- elasticsearch/helpers/__init__.py +10 -1
- elasticsearch/helpers/actions.py +106 -33
- {elasticsearch-8.19.1.dist-info → elasticsearch-8.19.2.dist-info}/METADATA +2 -2
- {elasticsearch-8.19.1.dist-info → elasticsearch-8.19.2.dist-info}/RECORD +44 -42
- {elasticsearch-8.19.1.dist-info → elasticsearch-8.19.2.dist-info}/WHEEL +0 -0
- {elasticsearch-8.19.1.dist-info → elasticsearch-8.19.2.dist-info}/licenses/LICENSE +0 -0
- {elasticsearch-8.19.1.dist-info → elasticsearch-8.19.2.dist-info}/licenses/NOTICE +0 -0
elasticsearch/dsl/types.py
CHANGED
|
@@ -2323,9 +2323,7 @@ class LikeDocument(AttrDict[Any]):
|
|
|
2323
2323
|
per_field_analyzer: Union[Mapping[Union[str, InstrumentedField], str], DefaultType]
|
|
2324
2324
|
routing: Union[str, DefaultType]
|
|
2325
2325
|
version: Union[int, DefaultType]
|
|
2326
|
-
version_type: Union[
|
|
2327
|
-
Literal["internal", "external", "external_gte", "force"], DefaultType
|
|
2328
|
-
]
|
|
2326
|
+
version_type: Union[Literal["internal", "external", "external_gte"], DefaultType]
|
|
2329
2327
|
|
|
2330
2328
|
def __init__(
|
|
2331
2329
|
self,
|
|
@@ -2340,7 +2338,7 @@ class LikeDocument(AttrDict[Any]):
|
|
|
2340
2338
|
routing: Union[str, DefaultType] = DEFAULT,
|
|
2341
2339
|
version: Union[int, DefaultType] = DEFAULT,
|
|
2342
2340
|
version_type: Union[
|
|
2343
|
-
Literal["internal", "external", "external_gte"
|
|
2341
|
+
Literal["internal", "external", "external_gte"], DefaultType
|
|
2344
2342
|
] = DEFAULT,
|
|
2345
2343
|
**kwargs: Any,
|
|
2346
2344
|
):
|
|
@@ -2771,6 +2769,31 @@ class NumericFielddata(AttrDict[Any]):
|
|
|
2771
2769
|
super().__init__(kwargs)
|
|
2772
2770
|
|
|
2773
2771
|
|
|
2772
|
+
class PValueHeuristic(AttrDict[Any]):
|
|
2773
|
+
"""
|
|
2774
|
+
:arg background_is_superset:
|
|
2775
|
+
:arg normalize_above: Should the results be normalized when above the
|
|
2776
|
+
given value. Allows for consistent significance results at various
|
|
2777
|
+
scales. Note: `0` is a special value which means no normalization
|
|
2778
|
+
"""
|
|
2779
|
+
|
|
2780
|
+
background_is_superset: Union[bool, DefaultType]
|
|
2781
|
+
normalize_above: Union[int, DefaultType]
|
|
2782
|
+
|
|
2783
|
+
def __init__(
|
|
2784
|
+
self,
|
|
2785
|
+
*,
|
|
2786
|
+
background_is_superset: Union[bool, DefaultType] = DEFAULT,
|
|
2787
|
+
normalize_above: Union[int, DefaultType] = DEFAULT,
|
|
2788
|
+
**kwargs: Any,
|
|
2789
|
+
):
|
|
2790
|
+
if background_is_superset is not DEFAULT:
|
|
2791
|
+
kwargs["background_is_superset"] = background_is_superset
|
|
2792
|
+
if normalize_above is not DEFAULT:
|
|
2793
|
+
kwargs["normalize_above"] = normalize_above
|
|
2794
|
+
super().__init__(kwargs)
|
|
2795
|
+
|
|
2796
|
+
|
|
2774
2797
|
class PercentageScoreHeuristic(AttrDict[Any]):
|
|
2775
2798
|
pass
|
|
2776
2799
|
|
|
@@ -4024,24 +4047,25 @@ class TestPopulation(AttrDict[Any]):
|
|
|
4024
4047
|
|
|
4025
4048
|
class TextEmbedding(AttrDict[Any]):
|
|
4026
4049
|
"""
|
|
4027
|
-
:arg model_id: (required)
|
|
4028
4050
|
:arg model_text: (required)
|
|
4051
|
+
:arg model_id: Model ID is required for all dense_vector fields but
|
|
4052
|
+
may be inferred for semantic_text fields
|
|
4029
4053
|
"""
|
|
4030
4054
|
|
|
4031
|
-
model_id: Union[str, DefaultType]
|
|
4032
4055
|
model_text: Union[str, DefaultType]
|
|
4056
|
+
model_id: Union[str, DefaultType]
|
|
4033
4057
|
|
|
4034
4058
|
def __init__(
|
|
4035
4059
|
self,
|
|
4036
4060
|
*,
|
|
4037
|
-
model_id: Union[str, DefaultType] = DEFAULT,
|
|
4038
4061
|
model_text: Union[str, DefaultType] = DEFAULT,
|
|
4062
|
+
model_id: Union[str, DefaultType] = DEFAULT,
|
|
4039
4063
|
**kwargs: Any,
|
|
4040
4064
|
):
|
|
4041
|
-
if model_id is not DEFAULT:
|
|
4042
|
-
kwargs["model_id"] = model_id
|
|
4043
4065
|
if model_text is not DEFAULT:
|
|
4044
4066
|
kwargs["model_text"] = model_text
|
|
4067
|
+
if model_id is not DEFAULT:
|
|
4068
|
+
kwargs["model_id"] = model_id
|
|
4045
4069
|
super().__init__(kwargs)
|
|
4046
4070
|
|
|
4047
4071
|
|
|
@@ -4672,6 +4696,82 @@ class CardinalityAggregate(AttrDict[Any]):
|
|
|
4672
4696
|
meta: Mapping[str, Any]
|
|
4673
4697
|
|
|
4674
4698
|
|
|
4699
|
+
class CartesianBoundsAggregate(AttrDict[Any]):
|
|
4700
|
+
"""
|
|
4701
|
+
:arg bounds:
|
|
4702
|
+
:arg meta:
|
|
4703
|
+
"""
|
|
4704
|
+
|
|
4705
|
+
bounds: "TopLeftBottomRightGeoBounds"
|
|
4706
|
+
meta: Mapping[str, Any]
|
|
4707
|
+
|
|
4708
|
+
|
|
4709
|
+
class CartesianCentroidAggregate(AttrDict[Any]):
|
|
4710
|
+
"""
|
|
4711
|
+
:arg count: (required)
|
|
4712
|
+
:arg location:
|
|
4713
|
+
:arg meta:
|
|
4714
|
+
"""
|
|
4715
|
+
|
|
4716
|
+
count: int
|
|
4717
|
+
location: "CartesianPoint"
|
|
4718
|
+
meta: Mapping[str, Any]
|
|
4719
|
+
|
|
4720
|
+
|
|
4721
|
+
class CartesianPoint(AttrDict[Any]):
|
|
4722
|
+
"""
|
|
4723
|
+
:arg x: (required)
|
|
4724
|
+
:arg y: (required)
|
|
4725
|
+
"""
|
|
4726
|
+
|
|
4727
|
+
x: float
|
|
4728
|
+
y: float
|
|
4729
|
+
|
|
4730
|
+
|
|
4731
|
+
class ChangePointAggregate(AttrDict[Any]):
|
|
4732
|
+
"""
|
|
4733
|
+
:arg type: (required)
|
|
4734
|
+
:arg bucket:
|
|
4735
|
+
:arg meta:
|
|
4736
|
+
"""
|
|
4737
|
+
|
|
4738
|
+
type: "ChangeType"
|
|
4739
|
+
bucket: "ChangePointBucket"
|
|
4740
|
+
meta: Mapping[str, Any]
|
|
4741
|
+
|
|
4742
|
+
|
|
4743
|
+
class ChangePointBucket(AttrDict[Any]):
|
|
4744
|
+
"""
|
|
4745
|
+
:arg key: (required)
|
|
4746
|
+
:arg doc_count: (required)
|
|
4747
|
+
"""
|
|
4748
|
+
|
|
4749
|
+
key: Union[int, float, str, bool, None, Any]
|
|
4750
|
+
doc_count: int
|
|
4751
|
+
|
|
4752
|
+
|
|
4753
|
+
class ChangeType(AttrDict[Any]):
|
|
4754
|
+
"""
|
|
4755
|
+
:arg dip:
|
|
4756
|
+
:arg distribution_change:
|
|
4757
|
+
:arg indeterminable:
|
|
4758
|
+
:arg non_stationary:
|
|
4759
|
+
:arg spike:
|
|
4760
|
+
:arg stationary:
|
|
4761
|
+
:arg step_change:
|
|
4762
|
+
:arg trend_change:
|
|
4763
|
+
"""
|
|
4764
|
+
|
|
4765
|
+
dip: "Dip"
|
|
4766
|
+
distribution_change: "DistributionChange"
|
|
4767
|
+
indeterminable: "Indeterminable"
|
|
4768
|
+
non_stationary: "NonStationary"
|
|
4769
|
+
spike: "Spike"
|
|
4770
|
+
stationary: "Stationary"
|
|
4771
|
+
step_change: "StepChange"
|
|
4772
|
+
trend_change: "TrendChange"
|
|
4773
|
+
|
|
4774
|
+
|
|
4675
4775
|
class ChildrenAggregate(AttrDict[Any]):
|
|
4676
4776
|
"""
|
|
4677
4777
|
:arg doc_count: (required)
|
|
@@ -4949,6 +5049,26 @@ class DfsStatisticsProfile(AttrDict[Any]):
|
|
|
4949
5049
|
children: Sequence["DfsStatisticsProfile"]
|
|
4950
5050
|
|
|
4951
5051
|
|
|
5052
|
+
class Dip(AttrDict[Any]):
|
|
5053
|
+
"""
|
|
5054
|
+
:arg p_value: (required)
|
|
5055
|
+
:arg change_point: (required)
|
|
5056
|
+
"""
|
|
5057
|
+
|
|
5058
|
+
p_value: float
|
|
5059
|
+
change_point: int
|
|
5060
|
+
|
|
5061
|
+
|
|
5062
|
+
class DistributionChange(AttrDict[Any]):
|
|
5063
|
+
"""
|
|
5064
|
+
:arg p_value: (required)
|
|
5065
|
+
:arg change_point: (required)
|
|
5066
|
+
"""
|
|
5067
|
+
|
|
5068
|
+
p_value: float
|
|
5069
|
+
change_point: int
|
|
5070
|
+
|
|
5071
|
+
|
|
4952
5072
|
class DoubleTermsAggregate(AttrDict[Any]):
|
|
4953
5073
|
"""
|
|
4954
5074
|
Result of a `terms` aggregation when the field is some kind of decimal
|
|
@@ -5512,6 +5632,14 @@ class HitsMetadata(AttrDict[Any]):
|
|
|
5512
5632
|
max_score: Union[float, None]
|
|
5513
5633
|
|
|
5514
5634
|
|
|
5635
|
+
class Indeterminable(AttrDict[Any]):
|
|
5636
|
+
"""
|
|
5637
|
+
:arg reason: (required)
|
|
5638
|
+
"""
|
|
5639
|
+
|
|
5640
|
+
reason: str
|
|
5641
|
+
|
|
5642
|
+
|
|
5515
5643
|
class InferenceAggregate(AttrDict[Any]):
|
|
5516
5644
|
"""
|
|
5517
5645
|
:arg value:
|
|
@@ -5914,6 +6042,18 @@ class NestedIdentity(AttrDict[Any]):
|
|
|
5914
6042
|
_nested: "NestedIdentity"
|
|
5915
6043
|
|
|
5916
6044
|
|
|
6045
|
+
class NonStationary(AttrDict[Any]):
|
|
6046
|
+
"""
|
|
6047
|
+
:arg p_value: (required)
|
|
6048
|
+
:arg r_value: (required)
|
|
6049
|
+
:arg trend: (required)
|
|
6050
|
+
"""
|
|
6051
|
+
|
|
6052
|
+
p_value: float
|
|
6053
|
+
r_value: float
|
|
6054
|
+
trend: str
|
|
6055
|
+
|
|
6056
|
+
|
|
5917
6057
|
class ParentAggregate(AttrDict[Any]):
|
|
5918
6058
|
"""
|
|
5919
6059
|
:arg doc_count: (required)
|
|
@@ -6271,6 +6411,16 @@ class SimpleValueAggregate(AttrDict[Any]):
|
|
|
6271
6411
|
meta: Mapping[str, Any]
|
|
6272
6412
|
|
|
6273
6413
|
|
|
6414
|
+
class Spike(AttrDict[Any]):
|
|
6415
|
+
"""
|
|
6416
|
+
:arg p_value: (required)
|
|
6417
|
+
:arg change_point: (required)
|
|
6418
|
+
"""
|
|
6419
|
+
|
|
6420
|
+
p_value: float
|
|
6421
|
+
change_point: int
|
|
6422
|
+
|
|
6423
|
+
|
|
6274
6424
|
class StandardDeviationBounds(AttrDict[Any]):
|
|
6275
6425
|
"""
|
|
6276
6426
|
:arg upper: (required)
|
|
@@ -6307,6 +6457,10 @@ class StandardDeviationBoundsAsString(AttrDict[Any]):
|
|
|
6307
6457
|
lower_sampling: str
|
|
6308
6458
|
|
|
6309
6459
|
|
|
6460
|
+
class Stationary(AttrDict[Any]):
|
|
6461
|
+
pass
|
|
6462
|
+
|
|
6463
|
+
|
|
6310
6464
|
class StatsAggregate(AttrDict[Any]):
|
|
6311
6465
|
"""
|
|
6312
6466
|
Statistics aggregation result. `min`, `max` and `avg` are missing if
|
|
@@ -6362,6 +6516,16 @@ class StatsBucketAggregate(AttrDict[Any]):
|
|
|
6362
6516
|
meta: Mapping[str, Any]
|
|
6363
6517
|
|
|
6364
6518
|
|
|
6519
|
+
class StepChange(AttrDict[Any]):
|
|
6520
|
+
"""
|
|
6521
|
+
:arg p_value: (required)
|
|
6522
|
+
:arg change_point: (required)
|
|
6523
|
+
"""
|
|
6524
|
+
|
|
6525
|
+
p_value: float
|
|
6526
|
+
change_point: int
|
|
6527
|
+
|
|
6528
|
+
|
|
6365
6529
|
class StringRareTermsAggregate(AttrDict[Any]):
|
|
6366
6530
|
"""
|
|
6367
6531
|
Result of the `rare_terms` aggregation when the field is a string.
|
|
@@ -6593,6 +6757,18 @@ class TotalHits(AttrDict[Any]):
|
|
|
6593
6757
|
value: int
|
|
6594
6758
|
|
|
6595
6759
|
|
|
6760
|
+
class TrendChange(AttrDict[Any]):
|
|
6761
|
+
"""
|
|
6762
|
+
:arg p_value: (required)
|
|
6763
|
+
:arg r_value: (required)
|
|
6764
|
+
:arg change_point: (required)
|
|
6765
|
+
"""
|
|
6766
|
+
|
|
6767
|
+
p_value: float
|
|
6768
|
+
r_value: float
|
|
6769
|
+
change_point: int
|
|
6770
|
+
|
|
6771
|
+
|
|
6596
6772
|
class UnmappedRareTermsAggregate(AttrDict[Any]):
|
|
6597
6773
|
"""
|
|
6598
6774
|
Result of a `rare_terms` aggregation when the field is unmapped.
|
|
@@ -19,12 +19,21 @@ from .._async.helpers import async_bulk, async_reindex, async_scan, async_stream
|
|
|
19
19
|
from .._utils import fixup_module_metadata
|
|
20
20
|
from .actions import _chunk_actions # noqa: F401
|
|
21
21
|
from .actions import _process_bulk_chunk # noqa: F401
|
|
22
|
-
from .actions import
|
|
22
|
+
from .actions import (
|
|
23
|
+
BULK_FLUSH,
|
|
24
|
+
bulk,
|
|
25
|
+
expand_action,
|
|
26
|
+
parallel_bulk,
|
|
27
|
+
reindex,
|
|
28
|
+
scan,
|
|
29
|
+
streaming_bulk,
|
|
30
|
+
)
|
|
23
31
|
from .errors import BulkIndexError, ScanError
|
|
24
32
|
|
|
25
33
|
__all__ = [
|
|
26
34
|
"BulkIndexError",
|
|
27
35
|
"ScanError",
|
|
36
|
+
"BULK_FLUSH",
|
|
28
37
|
"expand_action",
|
|
29
38
|
"streaming_bulk",
|
|
30
39
|
"bulk",
|
elasticsearch/helpers/actions.py
CHANGED
|
@@ -16,9 +16,10 @@
|
|
|
16
16
|
# under the License.
|
|
17
17
|
|
|
18
18
|
import logging
|
|
19
|
+
import queue
|
|
19
20
|
import time
|
|
21
|
+
from enum import Enum
|
|
20
22
|
from operator import methodcaller
|
|
21
|
-
from queue import Queue
|
|
22
23
|
from typing import (
|
|
23
24
|
Any,
|
|
24
25
|
Callable,
|
|
@@ -37,13 +38,21 @@ from typing import (
|
|
|
37
38
|
from elastic_transport import OpenTelemetrySpan
|
|
38
39
|
|
|
39
40
|
from .. import Elasticsearch
|
|
40
|
-
from ..compat import to_bytes
|
|
41
|
+
from ..compat import safe_thread, to_bytes
|
|
41
42
|
from ..exceptions import ApiError, NotFoundError, TransportError
|
|
42
43
|
from ..serializer import Serializer
|
|
43
44
|
from .errors import BulkIndexError, ScanError
|
|
44
45
|
|
|
45
46
|
logger = logging.getLogger("elasticsearch.helpers")
|
|
46
47
|
|
|
48
|
+
|
|
49
|
+
class BulkMeta(Enum):
|
|
50
|
+
flush = 1
|
|
51
|
+
done = 2
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
BULK_FLUSH = BulkMeta.flush
|
|
55
|
+
|
|
47
56
|
_TYPE_BULK_ACTION = Union[bytes, str, Dict[str, Any]]
|
|
48
57
|
_TYPE_BULK_ACTION_HEADER = Dict[str, Any]
|
|
49
58
|
_TYPE_BULK_ACTION_BODY = Union[None, bytes, Dict[str, Any]]
|
|
@@ -51,6 +60,13 @@ _TYPE_BULK_ACTION_HEADER_AND_BODY = Tuple[
|
|
|
51
60
|
_TYPE_BULK_ACTION_HEADER, _TYPE_BULK_ACTION_BODY
|
|
52
61
|
]
|
|
53
62
|
|
|
63
|
+
_TYPE_BULK_ACTION_WITH_META = Union[bytes, str, Dict[str, Any], BulkMeta]
|
|
64
|
+
_TYPE_BULK_ACTION_HEADER_WITH_META = Union[Dict[str, Any], BulkMeta]
|
|
65
|
+
_TYPE_BULK_ACTION_HEADER_WITH_META_AND_BODY = Union[
|
|
66
|
+
Tuple[_TYPE_BULK_ACTION_HEADER, _TYPE_BULK_ACTION_BODY],
|
|
67
|
+
Tuple[BulkMeta, Any],
|
|
68
|
+
]
|
|
69
|
+
|
|
54
70
|
|
|
55
71
|
def expand_action(data: _TYPE_BULK_ACTION) -> _TYPE_BULK_ACTION_HEADER_AND_BODY:
|
|
56
72
|
"""
|
|
@@ -139,7 +155,9 @@ class _ActionChunker:
|
|
|
139
155
|
] = []
|
|
140
156
|
|
|
141
157
|
def feed(
|
|
142
|
-
self,
|
|
158
|
+
self,
|
|
159
|
+
action: _TYPE_BULK_ACTION_HEADER_WITH_META,
|
|
160
|
+
data: _TYPE_BULK_ACTION_BODY,
|
|
143
161
|
) -> Optional[
|
|
144
162
|
Tuple[
|
|
145
163
|
List[
|
|
@@ -152,23 +170,25 @@ class _ActionChunker:
|
|
|
152
170
|
]
|
|
153
171
|
]:
|
|
154
172
|
ret = None
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
173
|
+
action_bytes = b""
|
|
174
|
+
data_bytes: Optional[bytes] = None
|
|
175
|
+
cur_size = 0
|
|
176
|
+
if not isinstance(action, BulkMeta):
|
|
177
|
+
action_bytes = to_bytes(self.serializer.dumps(action), "utf-8")
|
|
178
|
+
# +1 to account for the trailing new line character
|
|
179
|
+
cur_size = len(action_bytes) + 1
|
|
180
|
+
|
|
181
|
+
if data is not None:
|
|
182
|
+
data_bytes = to_bytes(self.serializer.dumps(data), "utf-8")
|
|
183
|
+
cur_size += len(data_bytes) + 1
|
|
184
|
+
else:
|
|
185
|
+
data_bytes = None
|
|
167
186
|
|
|
168
187
|
# full chunk, send it and start a new one
|
|
169
188
|
if self.bulk_actions and (
|
|
170
189
|
self.size + cur_size > self.max_chunk_bytes
|
|
171
190
|
or self.action_count == self.chunk_size
|
|
191
|
+
or (action == BulkMeta.flush and self.bulk_actions)
|
|
172
192
|
):
|
|
173
193
|
ret = (self.bulk_data, self.bulk_actions)
|
|
174
194
|
self.bulk_actions = []
|
|
@@ -176,15 +196,16 @@ class _ActionChunker:
|
|
|
176
196
|
self.size = 0
|
|
177
197
|
self.action_count = 0
|
|
178
198
|
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
199
|
+
if not isinstance(action, BulkMeta):
|
|
200
|
+
self.bulk_actions.append(action_bytes)
|
|
201
|
+
if data_bytes is not None:
|
|
202
|
+
self.bulk_actions.append(data_bytes)
|
|
203
|
+
self.bulk_data.append((action, data))
|
|
204
|
+
else:
|
|
205
|
+
self.bulk_data.append((action,))
|
|
185
206
|
|
|
186
|
-
|
|
187
|
-
|
|
207
|
+
self.size += cur_size
|
|
208
|
+
self.action_count += 1
|
|
188
209
|
return ret
|
|
189
210
|
|
|
190
211
|
def flush(
|
|
@@ -209,9 +230,10 @@ class _ActionChunker:
|
|
|
209
230
|
|
|
210
231
|
|
|
211
232
|
def _chunk_actions(
|
|
212
|
-
actions: Iterable[
|
|
233
|
+
actions: Iterable[_TYPE_BULK_ACTION_HEADER_WITH_META_AND_BODY],
|
|
213
234
|
chunk_size: int,
|
|
214
235
|
max_chunk_bytes: int,
|
|
236
|
+
flush_after_seconds: Optional[float],
|
|
215
237
|
serializer: Serializer,
|
|
216
238
|
) -> Iterable[
|
|
217
239
|
Tuple[
|
|
@@ -231,10 +253,41 @@ def _chunk_actions(
|
|
|
231
253
|
chunker = _ActionChunker(
|
|
232
254
|
chunk_size=chunk_size, max_chunk_bytes=max_chunk_bytes, serializer=serializer
|
|
233
255
|
)
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
256
|
+
|
|
257
|
+
if not flush_after_seconds:
|
|
258
|
+
for action, data in actions:
|
|
259
|
+
ret = chunker.feed(action, data)
|
|
260
|
+
if ret:
|
|
261
|
+
yield ret
|
|
262
|
+
else:
|
|
263
|
+
item_queue: queue.Queue[_TYPE_BULK_ACTION_HEADER_WITH_META_AND_BODY] = (
|
|
264
|
+
queue.Queue()
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
def get_items() -> None:
|
|
268
|
+
try:
|
|
269
|
+
for item in actions:
|
|
270
|
+
item_queue.put(item)
|
|
271
|
+
finally:
|
|
272
|
+
# make sure we signal the end even if there is an exception
|
|
273
|
+
item_queue.put((BulkMeta.done, None))
|
|
274
|
+
|
|
275
|
+
with safe_thread(get_items):
|
|
276
|
+
timeout: Optional[float] = flush_after_seconds
|
|
277
|
+
while True:
|
|
278
|
+
try:
|
|
279
|
+
action, data = item_queue.get(timeout=timeout)
|
|
280
|
+
timeout = flush_after_seconds
|
|
281
|
+
except queue.Empty:
|
|
282
|
+
action, data = BulkMeta.flush, None
|
|
283
|
+
timeout = None
|
|
284
|
+
|
|
285
|
+
if action is BulkMeta.done:
|
|
286
|
+
break
|
|
287
|
+
ret = chunker.feed(action, data)
|
|
288
|
+
if ret:
|
|
289
|
+
yield ret
|
|
290
|
+
|
|
238
291
|
ret = chunker.flush()
|
|
239
292
|
if ret:
|
|
240
293
|
yield ret
|
|
@@ -361,9 +414,10 @@ def _process_bulk_chunk(
|
|
|
361
414
|
|
|
362
415
|
def streaming_bulk(
|
|
363
416
|
client: Elasticsearch,
|
|
364
|
-
actions: Iterable[
|
|
417
|
+
actions: Iterable[_TYPE_BULK_ACTION_WITH_META],
|
|
365
418
|
chunk_size: int = 500,
|
|
366
419
|
max_chunk_bytes: int = 100 * 1024 * 1024,
|
|
420
|
+
flush_after_seconds: Optional[float] = None,
|
|
367
421
|
raise_on_error: bool = True,
|
|
368
422
|
expand_action_callback: Callable[
|
|
369
423
|
[_TYPE_BULK_ACTION], _TYPE_BULK_ACTION_HEADER_AND_BODY
|
|
@@ -397,6 +451,9 @@ def streaming_bulk(
|
|
|
397
451
|
:arg actions: iterable containing the actions to be executed
|
|
398
452
|
:arg chunk_size: number of docs in one chunk sent to es (default: 500)
|
|
399
453
|
:arg max_chunk_bytes: the maximum size of the request in bytes (default: 100MB)
|
|
454
|
+
:arg flush_after_seconds: time in seconds after which a chunk is written even
|
|
455
|
+
if hasn't reached `chunk_size` or `max_chunk_bytes`. Set to 0 to not use a
|
|
456
|
+
timeout-based flush. (default: 0)
|
|
400
457
|
:arg raise_on_error: raise ``BulkIndexError`` containing errors (as `.errors`)
|
|
401
458
|
from the execution of the last chunk when some occur. By default we raise.
|
|
402
459
|
:arg raise_on_exception: if ``False`` then don't propagate exceptions from
|
|
@@ -425,6 +482,13 @@ def streaming_bulk(
|
|
|
425
482
|
|
|
426
483
|
serializer = client.transport.serializers.get_serializer("application/json")
|
|
427
484
|
|
|
485
|
+
def expand_action_with_meta(
|
|
486
|
+
data: _TYPE_BULK_ACTION_WITH_META,
|
|
487
|
+
) -> _TYPE_BULK_ACTION_HEADER_WITH_META_AND_BODY:
|
|
488
|
+
if isinstance(data, BulkMeta):
|
|
489
|
+
return data, None
|
|
490
|
+
return expand_action_callback(data)
|
|
491
|
+
|
|
428
492
|
bulk_data: List[
|
|
429
493
|
Union[
|
|
430
494
|
Tuple[_TYPE_BULK_ACTION_HEADER],
|
|
@@ -433,9 +497,10 @@ def streaming_bulk(
|
|
|
433
497
|
]
|
|
434
498
|
bulk_actions: List[bytes]
|
|
435
499
|
for bulk_data, bulk_actions in _chunk_actions(
|
|
436
|
-
map(
|
|
500
|
+
map(expand_action_with_meta, actions),
|
|
437
501
|
chunk_size,
|
|
438
502
|
max_chunk_bytes,
|
|
503
|
+
flush_after_seconds,
|
|
439
504
|
serializer,
|
|
440
505
|
):
|
|
441
506
|
for attempt in range(max_retries + 1):
|
|
@@ -557,6 +622,7 @@ def parallel_bulk(
|
|
|
557
622
|
thread_count: int = 4,
|
|
558
623
|
chunk_size: int = 500,
|
|
559
624
|
max_chunk_bytes: int = 100 * 1024 * 1024,
|
|
625
|
+
flush_after_seconds: Optional[float] = None,
|
|
560
626
|
queue_size: int = 4,
|
|
561
627
|
expand_action_callback: Callable[
|
|
562
628
|
[_TYPE_BULK_ACTION], _TYPE_BULK_ACTION_HEADER_AND_BODY
|
|
@@ -573,6 +639,9 @@ def parallel_bulk(
|
|
|
573
639
|
:arg thread_count: size of the threadpool to use for the bulk requests
|
|
574
640
|
:arg chunk_size: number of docs in one chunk sent to es (default: 500)
|
|
575
641
|
:arg max_chunk_bytes: the maximum size of the request in bytes (default: 100MB)
|
|
642
|
+
:arg flush_after_seconds: time in seconds after which a chunk is written even
|
|
643
|
+
if hasn't reached `chunk_size` or `max_chunk_bytes`. Set to 0 to not use a
|
|
644
|
+
timeout-based flush. (default: 0)
|
|
576
645
|
:arg raise_on_error: raise ``BulkIndexError`` containing errors (as `.errors`)
|
|
577
646
|
from the execution of the last chunk when some occur. By default we raise.
|
|
578
647
|
:arg raise_on_exception: if ``False`` then don't propagate exceptions from
|
|
@@ -596,7 +665,7 @@ def parallel_bulk(
|
|
|
596
665
|
super()._setup_queues() # type: ignore[misc]
|
|
597
666
|
# The queue must be at least the size of the number of threads to
|
|
598
667
|
# prevent hanging when inserting sentinel values during teardown.
|
|
599
|
-
self._inqueue: Queue[
|
|
668
|
+
self._inqueue: queue.Queue[
|
|
600
669
|
Tuple[
|
|
601
670
|
List[
|
|
602
671
|
Union[
|
|
@@ -605,7 +674,7 @@ def parallel_bulk(
|
|
|
605
674
|
],
|
|
606
675
|
List[bytes],
|
|
607
676
|
]
|
|
608
|
-
] = Queue(max(queue_size, thread_count))
|
|
677
|
+
] = queue.Queue(max(queue_size, thread_count))
|
|
609
678
|
self._quick_put = self._inqueue.put
|
|
610
679
|
|
|
611
680
|
with client._otel.helpers_span("helpers.parallel_bulk") as otel_span:
|
|
@@ -625,7 +694,11 @@ def parallel_bulk(
|
|
|
625
694
|
)
|
|
626
695
|
),
|
|
627
696
|
_chunk_actions(
|
|
628
|
-
expanded_actions,
|
|
697
|
+
expanded_actions,
|
|
698
|
+
chunk_size,
|
|
699
|
+
max_chunk_bytes,
|
|
700
|
+
flush_after_seconds,
|
|
701
|
+
serializer,
|
|
629
702
|
),
|
|
630
703
|
):
|
|
631
704
|
yield from result
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: elasticsearch
|
|
3
|
-
Version: 8.19.
|
|
3
|
+
Version: 8.19.2
|
|
4
4
|
Summary: Python client for Elasticsearch
|
|
5
5
|
Project-URL: Documentation, https://elasticsearch-py.readthedocs.io/
|
|
6
6
|
Project-URL: Homepage, https://github.com/elastic/elasticsearch-py
|
|
@@ -45,7 +45,7 @@ Requires-Dist: nox; extra == 'dev'
|
|
|
45
45
|
Requires-Dist: numpy; extra == 'dev'
|
|
46
46
|
Requires-Dist: orjson; extra == 'dev'
|
|
47
47
|
Requires-Dist: pandas; extra == 'dev'
|
|
48
|
-
Requires-Dist: pyarrow; extra == 'dev'
|
|
48
|
+
Requires-Dist: pyarrow; (python_version < '3.14') and extra == 'dev'
|
|
49
49
|
Requires-Dist: pyright; extra == 'dev'
|
|
50
50
|
Requires-Dist: pytest; extra == 'dev'
|
|
51
51
|
Requires-Dist: pytest-asyncio; extra == 'dev'
|