elasticsearch 9.1.1__py3-none-any.whl → 9.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. elasticsearch/_async/client/__init__.py +2 -0
  2. elasticsearch/_async/client/cat.py +481 -25
  3. elasticsearch/_async/client/connector.py +3 -3
  4. elasticsearch/_async/client/indices.py +23 -9
  5. elasticsearch/_async/client/inference.py +11 -1
  6. elasticsearch/_async/client/logstash.py +3 -1
  7. elasticsearch/_async/client/nodes.py +2 -2
  8. elasticsearch/_async/client/shutdown.py +5 -15
  9. elasticsearch/_async/client/streams.py +186 -0
  10. elasticsearch/_async/client/watcher.py +1 -5
  11. elasticsearch/_async/helpers.py +58 -9
  12. elasticsearch/_sync/client/__init__.py +2 -0
  13. elasticsearch/_sync/client/cat.py +481 -25
  14. elasticsearch/_sync/client/connector.py +3 -3
  15. elasticsearch/_sync/client/indices.py +23 -9
  16. elasticsearch/_sync/client/inference.py +11 -1
  17. elasticsearch/_sync/client/logstash.py +3 -1
  18. elasticsearch/_sync/client/nodes.py +2 -2
  19. elasticsearch/_sync/client/shutdown.py +5 -15
  20. elasticsearch/_sync/client/streams.py +186 -0
  21. elasticsearch/_sync/client/watcher.py +1 -5
  22. elasticsearch/_version.py +2 -1
  23. elasticsearch/client.py +2 -0
  24. elasticsearch/compat.py +43 -1
  25. elasticsearch/dsl/__init__.py +28 -0
  26. elasticsearch/dsl/aggs.py +97 -0
  27. elasticsearch/dsl/document_base.py +15 -0
  28. elasticsearch/dsl/field.py +21 -2
  29. elasticsearch/dsl/query.py +5 -1
  30. elasticsearch/dsl/response/__init__.py +3 -0
  31. elasticsearch/dsl/types.py +226 -14
  32. elasticsearch/helpers/__init__.py +10 -1
  33. elasticsearch/helpers/actions.py +106 -33
  34. {elasticsearch-9.1.1.dist-info → elasticsearch-9.1.2.dist-info}/METADATA +2 -2
  35. {elasticsearch-9.1.1.dist-info → elasticsearch-9.1.2.dist-info}/RECORD +38 -36
  36. {elasticsearch-9.1.1.dist-info → elasticsearch-9.1.2.dist-info}/WHEEL +0 -0
  37. {elasticsearch-9.1.1.dist-info → elasticsearch-9.1.2.dist-info}/licenses/LICENSE +0 -0
  38. {elasticsearch-9.1.1.dist-info → elasticsearch-9.1.2.dist-info}/licenses/NOTICE +0 -0
@@ -103,7 +103,7 @@ class ConnectorClient(NamespacedClient):
103
103
 
104
104
  :param connector_id: The unique identifier of the connector to be deleted
105
105
  :param delete_sync_jobs: A flag indicating if associated sync jobs should be
106
- also removed. Defaults to false.
106
+ also removed.
107
107
  :param hard: A flag indicating if the connector should be hard deleted.
108
108
  """
109
109
  if connector_id in SKIP_IN_PATH:
@@ -360,7 +360,7 @@ class ConnectorClient(NamespacedClient):
360
360
 
361
361
  :param connector_name: A comma-separated list of connector names to fetch connector
362
362
  documents for
363
- :param from_: Starting offset (default: 0)
363
+ :param from_: Starting offset
364
364
  :param include_deleted: A flag to indicate if the desired connector should be
365
365
  fetched, even if it was soft-deleted.
366
366
  :param index_name: A comma-separated list of connector index names to fetch connector
@@ -955,7 +955,7 @@ class ConnectorClient(NamespacedClient):
955
955
  `<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-connector-sync-job-list>`_
956
956
 
957
957
  :param connector_id: A connector id to fetch connector sync jobs for
958
- :param from_: Starting offset (default: 0)
958
+ :param from_: Starting offset
959
959
  :param job_type: A comma-separated list of job types to fetch the sync jobs for
960
960
  :param size: Specifies a max number of results to get
961
961
  :param status: A sync job status to fetch connector sync jobs for
@@ -812,11 +812,7 @@ class IndicesClient(NamespacedClient):
812
812
  raise ValueError("Empty value passed for parameter 'source'")
813
813
  if dest in SKIP_IN_PATH:
814
814
  raise ValueError("Empty value passed for parameter 'dest'")
815
- if create_from is None and body is None:
816
- raise ValueError(
817
- "Empty value passed for parameters 'create_from' and 'body', one of them should be set."
818
- )
819
- elif create_from is not None and body is not None:
815
+ if create_from is not None and body is not None:
820
816
  raise ValueError("Cannot set both 'create_from' and 'body'")
821
817
  __path_parts: t.Dict[str, str] = {
822
818
  "source": _quote(source),
@@ -833,7 +829,11 @@ class IndicesClient(NamespacedClient):
833
829
  if pretty is not None:
834
830
  __query["pretty"] = pretty
835
831
  __body = create_from if create_from is not None else body
836
- __headers = {"accept": "application/json", "content-type": "application/json"}
832
+ if not __body:
833
+ __body = None
834
+ __headers = {"accept": "application/json"}
835
+ if __body is not None:
836
+ __headers["content-type"] = "application/json"
837
837
  return await self.perform_request( # type: ignore[return-value]
838
838
  "PUT",
839
839
  __path,
@@ -4549,6 +4549,7 @@ class IndicesClient(NamespacedClient):
4549
4549
  For data streams, the API runs the refresh operation on the stream’s backing indices.</p>
4550
4550
  <p>By default, Elasticsearch periodically refreshes indices every second, but only on indices that have received one search request or more in the last 30 seconds.
4551
4551
  You can change this default interval with the <code>index.refresh_interval</code> setting.</p>
4552
+ <p>In Elastic Cloud Serverless, the default refresh interval is 5 seconds across all indices.</p>
4552
4553
  <p>Refresh requests are synchronous and do not return a response until the refresh operation completes.</p>
4553
4554
  <p>Refreshes are resource-intensive.
4554
4555
  To ensure good cluster performance, it's recommended to wait for Elasticsearch's periodic refresh rather than performing an explicit refresh when possible.</p>
@@ -5414,7 +5415,9 @@ class IndicesClient(NamespacedClient):
5414
5415
  path_parts=__path_parts,
5415
5416
  )
5416
5417
 
5417
- @_rewrite_parameters()
5418
+ @_rewrite_parameters(
5419
+ body_name="index_template",
5420
+ )
5418
5421
  async def simulate_index_template(
5419
5422
  self,
5420
5423
  *,
@@ -5425,6 +5428,8 @@ class IndicesClient(NamespacedClient):
5425
5428
  filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
5426
5429
  human: t.Optional[bool] = None,
5427
5430
  include_defaults: t.Optional[bool] = None,
5431
+ index_template: t.Optional[t.Mapping[str, t.Any]] = None,
5432
+ body: t.Optional[t.Mapping[str, t.Any]] = None,
5428
5433
  master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
5429
5434
  pretty: t.Optional[bool] = None,
5430
5435
  ) -> ObjectApiResponse[t.Any]:
@@ -5444,12 +5449,15 @@ class IndicesClient(NamespacedClient):
5444
5449
  only be dry-run added if new or can also replace an existing one
5445
5450
  :param include_defaults: If true, returns all relevant default configurations
5446
5451
  for the index template.
5452
+ :param index_template:
5447
5453
  :param master_timeout: Period to wait for a connection to the master node. If
5448
5454
  no response is received before the timeout expires, the request fails and
5449
5455
  returns an error.
5450
5456
  """
5451
5457
  if name in SKIP_IN_PATH:
5452
5458
  raise ValueError("Empty value passed for parameter 'name'")
5459
+ if index_template is not None and body is not None:
5460
+ raise ValueError("Cannot set both 'index_template' and 'body'")
5453
5461
  __path_parts: t.Dict[str, str] = {"name": _quote(name)}
5454
5462
  __path = f'/_index_template/_simulate_index/{__path_parts["name"]}'
5455
5463
  __query: t.Dict[str, t.Any] = {}
@@ -5469,12 +5477,18 @@ class IndicesClient(NamespacedClient):
5469
5477
  __query["master_timeout"] = master_timeout
5470
5478
  if pretty is not None:
5471
5479
  __query["pretty"] = pretty
5480
+ __body = index_template if index_template is not None else body
5481
+ if not __body:
5482
+ __body = None
5472
5483
  __headers = {"accept": "application/json"}
5484
+ if __body is not None:
5485
+ __headers["content-type"] = "application/json"
5473
5486
  return await self.perform_request( # type: ignore[return-value]
5474
5487
  "POST",
5475
5488
  __path,
5476
5489
  params=__query,
5477
5490
  headers=__headers,
5491
+ body=__body,
5478
5492
  endpoint_id="indices.simulate_index_template",
5479
5493
  path_parts=__path_parts,
5480
5494
  )
@@ -5823,8 +5837,8 @@ class IndicesClient(NamespacedClient):
5823
5837
  are requested).
5824
5838
  :param include_unloaded_segments: If true, the response includes information
5825
5839
  from segments that are not loaded into memory.
5826
- :param level: Indicates whether statistics are aggregated at the cluster, index,
5827
- or shard level.
5840
+ :param level: Indicates whether statistics are aggregated at the cluster, indices,
5841
+ or shards level.
5828
5842
  """
5829
5843
  __path_parts: t.Dict[str, str]
5830
5844
  if index not in SKIP_IN_PATH and metric not in SKIP_IN_PATH:
@@ -2504,7 +2504,7 @@ class InferenceClient(NamespacedClient):
2504
2504
  )
2505
2505
 
2506
2506
  @_rewrite_parameters(
2507
- body_fields=("input", "task_settings"),
2507
+ body_fields=("input", "input_type", "task_settings"),
2508
2508
  )
2509
2509
  async def text_embedding(
2510
2510
  self,
@@ -2514,6 +2514,7 @@ class InferenceClient(NamespacedClient):
2514
2514
  error_trace: t.Optional[bool] = None,
2515
2515
  filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
2516
2516
  human: t.Optional[bool] = None,
2517
+ input_type: t.Optional[str] = None,
2517
2518
  pretty: t.Optional[bool] = None,
2518
2519
  task_settings: t.Optional[t.Any] = None,
2519
2520
  timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
@@ -2529,6 +2530,13 @@ class InferenceClient(NamespacedClient):
2529
2530
 
2530
2531
  :param inference_id: The inference Id
2531
2532
  :param input: Inference input. Either a string or an array of strings.
2533
+ :param input_type: The input data type for the text embedding model. Possible
2534
+ values include: * `SEARCH` * `INGEST` * `CLASSIFICATION` * `CLUSTERING` Not
2535
+ all services support all values. Unsupported values will trigger a validation
2536
+ exception. Accepted values depend on the configured inference service, refer
2537
+ to the relevant service-specific documentation for more info. > info > The
2538
+ `input_type` parameter specified on the root level of the request body will
2539
+ take precedence over the `input_type` parameter specified in `task_settings`.
2532
2540
  :param task_settings: Optional task settings
2533
2541
  :param timeout: Specifies the amount of time to wait for the inference request
2534
2542
  to complete.
@@ -2554,6 +2562,8 @@ class InferenceClient(NamespacedClient):
2554
2562
  if not __body:
2555
2563
  if input is not None:
2556
2564
  __body["input"] = input
2565
+ if input_type is not None:
2566
+ __body["input_type"] = input_type
2557
2567
  if task_settings is not None:
2558
2568
  __body["task_settings"] = task_settings
2559
2569
  if not __body:
@@ -141,7 +141,9 @@ class LogstashClient(NamespacedClient):
141
141
 
142
142
  `<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-logstash-put-pipeline>`_
143
143
 
144
- :param id: An identifier for the pipeline.
144
+ :param id: An identifier for the pipeline. Pipeline IDs must begin with a letter
145
+ or underscore and contain only letters, underscores, dashes, hyphens and
146
+ numbers.
145
147
  :param pipeline:
146
148
  """
147
149
  if id in SKIP_IN_PATH:
@@ -404,8 +404,8 @@ class NodesClient(NamespacedClient):
404
404
  are requested).
405
405
  :param include_unloaded_segments: If `true`, the response includes information
406
406
  from segments that are not loaded into memory.
407
- :param level: Indicates whether statistics are aggregated at the cluster, index,
408
- or shard level.
407
+ :param level: Indicates whether statistics are aggregated at the node, indices,
408
+ or shards level.
409
409
  :param timeout: Period to wait for a response. If no response is received before
410
410
  the timeout expires, the request fails and returns an error.
411
411
  :param types: A comma-separated list of document types for the indexing index
@@ -33,13 +33,9 @@ class ShutdownClient(NamespacedClient):
33
33
  error_trace: t.Optional[bool] = None,
34
34
  filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
35
35
  human: t.Optional[bool] = None,
36
- master_timeout: t.Optional[
37
- t.Union[str, t.Literal["d", "h", "m", "micros", "ms", "nanos", "s"]]
38
- ] = None,
36
+ master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
39
37
  pretty: t.Optional[bool] = None,
40
- timeout: t.Optional[
41
- t.Union[str, t.Literal["d", "h", "m", "micros", "ms", "nanos", "s"]]
42
- ] = None,
38
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
43
39
  ) -> ObjectApiResponse[t.Any]:
44
40
  """
45
41
  .. raw:: html
@@ -97,9 +93,7 @@ class ShutdownClient(NamespacedClient):
97
93
  error_trace: t.Optional[bool] = None,
98
94
  filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
99
95
  human: t.Optional[bool] = None,
100
- master_timeout: t.Optional[
101
- t.Union[str, t.Literal["d", "h", "m", "micros", "ms", "nanos", "s"]]
102
- ] = None,
96
+ master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
103
97
  pretty: t.Optional[bool] = None,
104
98
  ) -> ObjectApiResponse[t.Any]:
105
99
  """
@@ -162,14 +156,10 @@ class ShutdownClient(NamespacedClient):
162
156
  error_trace: t.Optional[bool] = None,
163
157
  filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
164
158
  human: t.Optional[bool] = None,
165
- master_timeout: t.Optional[
166
- t.Union[str, t.Literal["d", "h", "m", "micros", "ms", "nanos", "s"]]
167
- ] = None,
159
+ master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
168
160
  pretty: t.Optional[bool] = None,
169
161
  target_node_name: t.Optional[str] = None,
170
- timeout: t.Optional[
171
- t.Union[str, t.Literal["d", "h", "m", "micros", "ms", "nanos", "s"]]
172
- ] = None,
162
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
173
163
  body: t.Optional[t.Dict[str, t.Any]] = None,
174
164
  ) -> ObjectApiResponse[t.Any]:
175
165
  """
@@ -0,0 +1,186 @@
1
+ # Licensed to Elasticsearch B.V. under one or more contributor
2
+ # license agreements. See the NOTICE file distributed with
3
+ # this work for additional information regarding copyright
4
+ # ownership. Elasticsearch B.V. licenses this file to you under
5
+ # the Apache License, Version 2.0 (the "License"); you may
6
+ # not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ import typing as t
19
+
20
+ from elastic_transport import ObjectApiResponse, TextApiResponse
21
+
22
+ from ._base import NamespacedClient
23
+ from .utils import (
24
+ Stability,
25
+ _rewrite_parameters,
26
+ _stability_warning,
27
+ )
28
+
29
+
30
+ class StreamsClient(NamespacedClient):
31
+
32
+ @_rewrite_parameters()
33
+ @_stability_warning(Stability.EXPERIMENTAL)
34
+ async def logs_disable(
35
+ self,
36
+ *,
37
+ error_trace: t.Optional[bool] = None,
38
+ filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
39
+ human: t.Optional[bool] = None,
40
+ master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
41
+ pretty: t.Optional[bool] = None,
42
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
43
+ ) -> t.Union[ObjectApiResponse[t.Any], TextApiResponse]:
44
+ """
45
+ .. raw:: html
46
+
47
+ <p>Disable logs stream.</p>
48
+ <p>Turn off the logs stream feature for this cluster.</p>
49
+
50
+
51
+ `<https://www.elastic.co/docs/api/doc/elasticsearch#TODO>`_
52
+
53
+ :param master_timeout: The period to wait for a connection to the master node.
54
+ If no response is received before the timeout expires, the request fails
55
+ and returns an error.
56
+ :param timeout: The period to wait for a response. If no response is received
57
+ before the timeout expires, the request fails and returns an error.
58
+ """
59
+ __path_parts: t.Dict[str, str] = {}
60
+ __path = "/_streams/logs/_disable"
61
+ __query: t.Dict[str, t.Any] = {}
62
+ if error_trace is not None:
63
+ __query["error_trace"] = error_trace
64
+ if filter_path is not None:
65
+ __query["filter_path"] = filter_path
66
+ if human is not None:
67
+ __query["human"] = human
68
+ if master_timeout is not None:
69
+ __query["master_timeout"] = master_timeout
70
+ if pretty is not None:
71
+ __query["pretty"] = pretty
72
+ if timeout is not None:
73
+ __query["timeout"] = timeout
74
+ __headers = {"accept": "application/json,text/plain"}
75
+ return await self.perform_request( # type: ignore[return-value]
76
+ "POST",
77
+ __path,
78
+ params=__query,
79
+ headers=__headers,
80
+ endpoint_id="streams.logs_disable",
81
+ path_parts=__path_parts,
82
+ )
83
+
84
+ @_rewrite_parameters()
85
+ @_stability_warning(Stability.EXPERIMENTAL)
86
+ async def logs_enable(
87
+ self,
88
+ *,
89
+ error_trace: t.Optional[bool] = None,
90
+ filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
91
+ human: t.Optional[bool] = None,
92
+ master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
93
+ pretty: t.Optional[bool] = None,
94
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
95
+ ) -> t.Union[ObjectApiResponse[t.Any], TextApiResponse]:
96
+ """
97
+ .. raw:: html
98
+
99
+ <p>Enable logs stream.</p>
100
+ <p>Turn on the logs stream feature for this cluster.</p>
101
+ <p>NOTE: To protect existing data, this feature can be turned on only if the
102
+ cluster does not have existing indices or data streams that match the pattern <code>logs|logs.*</code>.
103
+ If those indices or data streams exist, a <code>409 - Conflict</code> response and error is returned.</p>
104
+
105
+
106
+ `<https://www.elastic.co/docs/api/doc/elasticsearch#TODO>`_
107
+
108
+ :param master_timeout: The period to wait for a connection to the master node.
109
+ If no response is received before the timeout expires, the request fails
110
+ and returns an error.
111
+ :param timeout: The period to wait for a response. If no response is received
112
+ before the timeout expires, the request fails and returns an error.
113
+ """
114
+ __path_parts: t.Dict[str, str] = {}
115
+ __path = "/_streams/logs/_enable"
116
+ __query: t.Dict[str, t.Any] = {}
117
+ if error_trace is not None:
118
+ __query["error_trace"] = error_trace
119
+ if filter_path is not None:
120
+ __query["filter_path"] = filter_path
121
+ if human is not None:
122
+ __query["human"] = human
123
+ if master_timeout is not None:
124
+ __query["master_timeout"] = master_timeout
125
+ if pretty is not None:
126
+ __query["pretty"] = pretty
127
+ if timeout is not None:
128
+ __query["timeout"] = timeout
129
+ __headers = {"accept": "application/json,text/plain"}
130
+ return await self.perform_request( # type: ignore[return-value]
131
+ "POST",
132
+ __path,
133
+ params=__query,
134
+ headers=__headers,
135
+ endpoint_id="streams.logs_enable",
136
+ path_parts=__path_parts,
137
+ )
138
+
139
+ @_rewrite_parameters()
140
+ @_stability_warning(Stability.EXPERIMENTAL)
141
+ async def status(
142
+ self,
143
+ *,
144
+ error_trace: t.Optional[bool] = None,
145
+ filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
146
+ human: t.Optional[bool] = None,
147
+ master_timeout: t.Optional[
148
+ t.Union[str, t.Literal["d", "h", "m", "micros", "ms", "nanos", "s"]]
149
+ ] = None,
150
+ pretty: t.Optional[bool] = None,
151
+ ) -> ObjectApiResponse[t.Any]:
152
+ """
153
+ .. raw:: html
154
+
155
+ <p>Get the status of streams.</p>
156
+ <p>Get the current status for all types of streams.</p>
157
+
158
+
159
+ `<https://www.elastic.co/docs/api/doc/elasticsearch#TODO>`_
160
+
161
+ :param master_timeout: Period to wait for a connection to the master node. If
162
+ no response is received before the timeout expires, the request fails and
163
+ returns an error.
164
+ """
165
+ __path_parts: t.Dict[str, str] = {}
166
+ __path = "/_streams/status"
167
+ __query: t.Dict[str, t.Any] = {}
168
+ if error_trace is not None:
169
+ __query["error_trace"] = error_trace
170
+ if filter_path is not None:
171
+ __query["filter_path"] = filter_path
172
+ if human is not None:
173
+ __query["human"] = human
174
+ if master_timeout is not None:
175
+ __query["master_timeout"] = master_timeout
176
+ if pretty is not None:
177
+ __query["pretty"] = pretty
178
+ __headers = {"accept": "application/json"}
179
+ return await self.perform_request( # type: ignore[return-value]
180
+ "GET",
181
+ __path,
182
+ params=__query,
183
+ headers=__headers,
184
+ endpoint_id="streams.status",
185
+ path_parts=__path_parts,
186
+ )
@@ -552,11 +552,7 @@ class WatcherClient(NamespacedClient):
552
552
  __body["transform"] = transform
553
553
  if trigger is not None:
554
554
  __body["trigger"] = trigger
555
- if not __body:
556
- __body = None # type: ignore[assignment]
557
- __headers = {"accept": "application/json"}
558
- if __body is not None:
559
- __headers["content-type"] = "application/json"
555
+ __headers = {"accept": "application/json", "content-type": "application/json"}
560
556
  return await self.perform_request( # type: ignore[return-value]
561
557
  "PUT",
562
558
  __path,
@@ -33,12 +33,16 @@ from typing import (
33
33
  Union,
34
34
  )
35
35
 
36
+ from ..compat import safe_task
36
37
  from ..exceptions import ApiError, NotFoundError, TransportError
37
38
  from ..helpers.actions import (
38
39
  _TYPE_BULK_ACTION,
39
40
  _TYPE_BULK_ACTION_BODY,
40
41
  _TYPE_BULK_ACTION_HEADER,
41
42
  _TYPE_BULK_ACTION_HEADER_AND_BODY,
43
+ _TYPE_BULK_ACTION_HEADER_WITH_META_AND_BODY,
44
+ _TYPE_BULK_ACTION_WITH_META,
45
+ BulkMeta,
42
46
  _ActionChunker,
43
47
  _process_bulk_chunk_error,
44
48
  _process_bulk_chunk_success,
@@ -54,9 +58,10 @@ T = TypeVar("T")
54
58
 
55
59
 
56
60
  async def _chunk_actions(
57
- actions: AsyncIterable[_TYPE_BULK_ACTION_HEADER_AND_BODY],
61
+ actions: AsyncIterable[_TYPE_BULK_ACTION_HEADER_WITH_META_AND_BODY],
58
62
  chunk_size: int,
59
63
  max_chunk_bytes: int,
64
+ flush_after_seconds: Optional[float],
60
65
  serializer: Serializer,
61
66
  ) -> AsyncIterable[
62
67
  Tuple[
@@ -76,10 +81,42 @@ async def _chunk_actions(
76
81
  chunker = _ActionChunker(
77
82
  chunk_size=chunk_size, max_chunk_bytes=max_chunk_bytes, serializer=serializer
78
83
  )
79
- async for action, data in actions:
80
- ret = chunker.feed(action, data)
81
- if ret:
82
- yield ret
84
+
85
+ if not flush_after_seconds:
86
+ async for action, data in actions:
87
+ ret = chunker.feed(action, data)
88
+ if ret:
89
+ yield ret
90
+ else:
91
+ item_queue: asyncio.Queue[_TYPE_BULK_ACTION_HEADER_WITH_META_AND_BODY] = (
92
+ asyncio.Queue()
93
+ )
94
+
95
+ async def get_items() -> None:
96
+ try:
97
+ async for item in actions:
98
+ await item_queue.put(item)
99
+ finally:
100
+ await item_queue.put((BulkMeta.done, None))
101
+
102
+ async with safe_task(get_items()):
103
+ timeout: Optional[float] = flush_after_seconds
104
+ while True:
105
+ try:
106
+ action, data = await asyncio.wait_for(
107
+ item_queue.get(), timeout=timeout
108
+ )
109
+ timeout = flush_after_seconds
110
+ except asyncio.TimeoutError:
111
+ action, data = BulkMeta.flush, None
112
+ timeout = None
113
+
114
+ if action is BulkMeta.done:
115
+ break
116
+ ret = chunker.feed(action, data)
117
+ if ret:
118
+ yield ret
119
+
83
120
  ret = chunker.flush()
84
121
  if ret:
85
122
  yield ret
@@ -159,9 +196,13 @@ async def azip(
159
196
 
160
197
  async def async_streaming_bulk(
161
198
  client: AsyncElasticsearch,
162
- actions: Union[Iterable[_TYPE_BULK_ACTION], AsyncIterable[_TYPE_BULK_ACTION]],
199
+ actions: Union[
200
+ Iterable[_TYPE_BULK_ACTION_WITH_META],
201
+ AsyncIterable[_TYPE_BULK_ACTION_WITH_META],
202
+ ],
163
203
  chunk_size: int = 500,
164
204
  max_chunk_bytes: int = 100 * 1024 * 1024,
205
+ flush_after_seconds: Optional[float] = None,
165
206
  raise_on_error: bool = True,
166
207
  expand_action_callback: Callable[
167
208
  [_TYPE_BULK_ACTION], _TYPE_BULK_ACTION_HEADER_AND_BODY
@@ -194,6 +235,9 @@ async def async_streaming_bulk(
194
235
  :arg actions: iterable or async iterable containing the actions to be executed
195
236
  :arg chunk_size: number of docs in one chunk sent to es (default: 500)
196
237
  :arg max_chunk_bytes: the maximum size of the request in bytes (default: 100MB)
238
+ :arg flush_after_seconds: time in seconds after which a chunk is written even
239
+ if hasn't reached `chunk_size` or `max_chunk_bytes`. Set to 0 to not use a
240
+ timeout-based flush. (default: 0)
197
241
  :arg raise_on_error: raise ``BulkIndexError`` containing errors (as `.errors`)
198
242
  from the execution of the last chunk when some occur. By default we raise.
199
243
  :arg raise_on_exception: if ``False`` then don't propagate exceptions from
@@ -220,9 +264,14 @@ async def async_streaming_bulk(
220
264
  if isinstance(retry_on_status, int):
221
265
  retry_on_status = (retry_on_status,)
222
266
 
223
- async def map_actions() -> AsyncIterable[_TYPE_BULK_ACTION_HEADER_AND_BODY]:
267
+ async def map_actions() -> (
268
+ AsyncIterable[_TYPE_BULK_ACTION_HEADER_WITH_META_AND_BODY]
269
+ ):
224
270
  async for item in aiter(actions):
225
- yield expand_action_callback(item)
271
+ if isinstance(item, BulkMeta):
272
+ yield item, None
273
+ else:
274
+ yield expand_action_callback(item)
226
275
 
227
276
  serializer = client.transport.serializers.get_serializer("application/json")
228
277
 
@@ -234,7 +283,7 @@ async def async_streaming_bulk(
234
283
  ]
235
284
  bulk_actions: List[bytes]
236
285
  async for bulk_data, bulk_actions in _chunk_actions(
237
- map_actions(), chunk_size, max_chunk_bytes, serializer
286
+ map_actions(), chunk_size, max_chunk_bytes, flush_after_seconds, serializer
238
287
  ):
239
288
  for attempt in range(max_retries + 1):
240
289
  to_retry: List[bytes] = []
@@ -74,6 +74,7 @@ from .slm import SlmClient
74
74
  from .snapshot import SnapshotClient
75
75
  from .sql import SqlClient
76
76
  from .ssl import SslClient
77
+ from .streams import StreamsClient
77
78
  from .synonyms import SynonymsClient
78
79
  from .tasks import TasksClient
79
80
  from .text_structure import TextStructureClient
@@ -378,6 +379,7 @@ class Elasticsearch(BaseClient):
378
379
  self.shutdown = ShutdownClient(self)
379
380
  self.sql = SqlClient(self)
380
381
  self.ssl = SslClient(self)
382
+ self.streams = StreamsClient(self)
381
383
  self.synonyms = SynonymsClient(self)
382
384
  self.text_structure = TextStructureClient(self)
383
385
  self.transform = TransformClient(self)