elasticsearch9 9.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. elasticsearch9/__init__.py +98 -0
  2. elasticsearch9/_async/__init__.py +16 -0
  3. elasticsearch9/_async/client/__init__.py +6531 -0
  4. elasticsearch9/_async/client/_base.py +430 -0
  5. elasticsearch9/_async/client/async_search.py +637 -0
  6. elasticsearch9/_async/client/autoscaling.py +252 -0
  7. elasticsearch9/_async/client/cat.py +2995 -0
  8. elasticsearch9/_async/client/ccr.py +1043 -0
  9. elasticsearch9/_async/client/cluster.py +1223 -0
  10. elasticsearch9/_async/client/connector.py +1978 -0
  11. elasticsearch9/_async/client/dangling_indices.py +192 -0
  12. elasticsearch9/_async/client/enrich.py +291 -0
  13. elasticsearch9/_async/client/eql.py +359 -0
  14. elasticsearch9/_async/client/esql.py +490 -0
  15. elasticsearch9/_async/client/features.py +130 -0
  16. elasticsearch9/_async/client/fleet.py +658 -0
  17. elasticsearch9/_async/client/graph.py +113 -0
  18. elasticsearch9/_async/client/ilm.py +668 -0
  19. elasticsearch9/_async/client/indices.py +5582 -0
  20. elasticsearch9/_async/client/inference.py +2247 -0
  21. elasticsearch9/_async/client/ingest.py +766 -0
  22. elasticsearch9/_async/client/license.py +400 -0
  23. elasticsearch9/_async/client/logstash.py +176 -0
  24. elasticsearch9/_async/client/migration.py +160 -0
  25. elasticsearch9/_async/client/ml.py +5835 -0
  26. elasticsearch9/_async/client/monitoring.py +100 -0
  27. elasticsearch9/_async/client/nodes.py +543 -0
  28. elasticsearch9/_async/client/query_rules.py +485 -0
  29. elasticsearch9/_async/client/rollup.py +616 -0
  30. elasticsearch9/_async/client/search_application.py +574 -0
  31. elasticsearch9/_async/client/searchable_snapshots.py +313 -0
  32. elasticsearch9/_async/client/security.py +4688 -0
  33. elasticsearch9/_async/client/shutdown.py +268 -0
  34. elasticsearch9/_async/client/simulate.py +145 -0
  35. elasticsearch9/_async/client/slm.py +559 -0
  36. elasticsearch9/_async/client/snapshot.py +1338 -0
  37. elasticsearch9/_async/client/sql.py +469 -0
  38. elasticsearch9/_async/client/ssl.py +76 -0
  39. elasticsearch9/_async/client/synonyms.py +413 -0
  40. elasticsearch9/_async/client/tasks.py +295 -0
  41. elasticsearch9/_async/client/text_structure.py +664 -0
  42. elasticsearch9/_async/client/transform.py +922 -0
  43. elasticsearch9/_async/client/utils.py +48 -0
  44. elasticsearch9/_async/client/watcher.py +894 -0
  45. elasticsearch9/_async/client/xpack.py +134 -0
  46. elasticsearch9/_async/helpers.py +596 -0
  47. elasticsearch9/_otel.py +110 -0
  48. elasticsearch9/_sync/__init__.py +16 -0
  49. elasticsearch9/_sync/client/__init__.py +6529 -0
  50. elasticsearch9/_sync/client/_base.py +430 -0
  51. elasticsearch9/_sync/client/async_search.py +637 -0
  52. elasticsearch9/_sync/client/autoscaling.py +252 -0
  53. elasticsearch9/_sync/client/cat.py +2995 -0
  54. elasticsearch9/_sync/client/ccr.py +1043 -0
  55. elasticsearch9/_sync/client/cluster.py +1223 -0
  56. elasticsearch9/_sync/client/connector.py +1978 -0
  57. elasticsearch9/_sync/client/dangling_indices.py +192 -0
  58. elasticsearch9/_sync/client/enrich.py +291 -0
  59. elasticsearch9/_sync/client/eql.py +359 -0
  60. elasticsearch9/_sync/client/esql.py +490 -0
  61. elasticsearch9/_sync/client/features.py +130 -0
  62. elasticsearch9/_sync/client/fleet.py +658 -0
  63. elasticsearch9/_sync/client/graph.py +113 -0
  64. elasticsearch9/_sync/client/ilm.py +668 -0
  65. elasticsearch9/_sync/client/indices.py +5582 -0
  66. elasticsearch9/_sync/client/inference.py +2247 -0
  67. elasticsearch9/_sync/client/ingest.py +766 -0
  68. elasticsearch9/_sync/client/license.py +400 -0
  69. elasticsearch9/_sync/client/logstash.py +176 -0
  70. elasticsearch9/_sync/client/migration.py +160 -0
  71. elasticsearch9/_sync/client/ml.py +5835 -0
  72. elasticsearch9/_sync/client/monitoring.py +100 -0
  73. elasticsearch9/_sync/client/nodes.py +543 -0
  74. elasticsearch9/_sync/client/query_rules.py +485 -0
  75. elasticsearch9/_sync/client/rollup.py +616 -0
  76. elasticsearch9/_sync/client/search_application.py +574 -0
  77. elasticsearch9/_sync/client/searchable_snapshots.py +313 -0
  78. elasticsearch9/_sync/client/security.py +4688 -0
  79. elasticsearch9/_sync/client/shutdown.py +268 -0
  80. elasticsearch9/_sync/client/simulate.py +145 -0
  81. elasticsearch9/_sync/client/slm.py +559 -0
  82. elasticsearch9/_sync/client/snapshot.py +1338 -0
  83. elasticsearch9/_sync/client/sql.py +469 -0
  84. elasticsearch9/_sync/client/ssl.py +76 -0
  85. elasticsearch9/_sync/client/synonyms.py +413 -0
  86. elasticsearch9/_sync/client/tasks.py +295 -0
  87. elasticsearch9/_sync/client/text_structure.py +664 -0
  88. elasticsearch9/_sync/client/transform.py +922 -0
  89. elasticsearch9/_sync/client/utils.py +475 -0
  90. elasticsearch9/_sync/client/watcher.py +894 -0
  91. elasticsearch9/_sync/client/xpack.py +134 -0
  92. elasticsearch9/_utils.py +34 -0
  93. elasticsearch9/_version.py +18 -0
  94. elasticsearch9/client.py +126 -0
  95. elasticsearch9/compat.py +79 -0
  96. elasticsearch9/dsl/__init__.py +203 -0
  97. elasticsearch9/dsl/_async/__init__.py +16 -0
  98. elasticsearch9/dsl/_async/document.py +522 -0
  99. elasticsearch9/dsl/_async/faceted_search.py +50 -0
  100. elasticsearch9/dsl/_async/index.py +639 -0
  101. elasticsearch9/dsl/_async/mapping.py +49 -0
  102. elasticsearch9/dsl/_async/search.py +237 -0
  103. elasticsearch9/dsl/_async/update_by_query.py +47 -0
  104. elasticsearch9/dsl/_sync/__init__.py +16 -0
  105. elasticsearch9/dsl/_sync/document.py +514 -0
  106. elasticsearch9/dsl/_sync/faceted_search.py +50 -0
  107. elasticsearch9/dsl/_sync/index.py +597 -0
  108. elasticsearch9/dsl/_sync/mapping.py +49 -0
  109. elasticsearch9/dsl/_sync/search.py +230 -0
  110. elasticsearch9/dsl/_sync/update_by_query.py +45 -0
  111. elasticsearch9/dsl/aggs.py +3734 -0
  112. elasticsearch9/dsl/analysis.py +341 -0
  113. elasticsearch9/dsl/async_connections.py +37 -0
  114. elasticsearch9/dsl/connections.py +142 -0
  115. elasticsearch9/dsl/document.py +20 -0
  116. elasticsearch9/dsl/document_base.py +444 -0
  117. elasticsearch9/dsl/exceptions.py +32 -0
  118. elasticsearch9/dsl/faceted_search.py +28 -0
  119. elasticsearch9/dsl/faceted_search_base.py +489 -0
  120. elasticsearch9/dsl/field.py +4392 -0
  121. elasticsearch9/dsl/function.py +180 -0
  122. elasticsearch9/dsl/index.py +23 -0
  123. elasticsearch9/dsl/index_base.py +178 -0
  124. elasticsearch9/dsl/mapping.py +19 -0
  125. elasticsearch9/dsl/mapping_base.py +219 -0
  126. elasticsearch9/dsl/query.py +2822 -0
  127. elasticsearch9/dsl/response/__init__.py +388 -0
  128. elasticsearch9/dsl/response/aggs.py +100 -0
  129. elasticsearch9/dsl/response/hit.py +53 -0
  130. elasticsearch9/dsl/search.py +20 -0
  131. elasticsearch9/dsl/search_base.py +1053 -0
  132. elasticsearch9/dsl/serializer.py +34 -0
  133. elasticsearch9/dsl/types.py +6453 -0
  134. elasticsearch9/dsl/update_by_query.py +19 -0
  135. elasticsearch9/dsl/update_by_query_base.py +149 -0
  136. elasticsearch9/dsl/utils.py +687 -0
  137. elasticsearch9/dsl/wrappers.py +144 -0
  138. elasticsearch9/exceptions.py +133 -0
  139. elasticsearch9/helpers/__init__.py +41 -0
  140. elasticsearch9/helpers/actions.py +875 -0
  141. elasticsearch9/helpers/errors.py +40 -0
  142. elasticsearch9/helpers/vectorstore/__init__.py +62 -0
  143. elasticsearch9/helpers/vectorstore/_async/__init__.py +16 -0
  144. elasticsearch9/helpers/vectorstore/_async/_utils.py +39 -0
  145. elasticsearch9/helpers/vectorstore/_async/embedding_service.py +89 -0
  146. elasticsearch9/helpers/vectorstore/_async/strategies.py +487 -0
  147. elasticsearch9/helpers/vectorstore/_async/vectorstore.py +421 -0
  148. elasticsearch9/helpers/vectorstore/_sync/__init__.py +16 -0
  149. elasticsearch9/helpers/vectorstore/_sync/_utils.py +39 -0
  150. elasticsearch9/helpers/vectorstore/_sync/embedding_service.py +89 -0
  151. elasticsearch9/helpers/vectorstore/_sync/strategies.py +487 -0
  152. elasticsearch9/helpers/vectorstore/_sync/vectorstore.py +421 -0
  153. elasticsearch9/helpers/vectorstore/_utils.py +116 -0
  154. elasticsearch9/py.typed +0 -0
  155. elasticsearch9/serializer.py +250 -0
  156. elasticsearch9-9.0.0.dist-info/METADATA +175 -0
  157. elasticsearch9-9.0.0.dist-info/RECORD +160 -0
  158. elasticsearch9-9.0.0.dist-info/WHEEL +4 -0
  159. elasticsearch9-9.0.0.dist-info/licenses/LICENSE +176 -0
  160. elasticsearch9-9.0.0.dist-info/licenses/NOTICE +2 -0
@@ -0,0 +1,596 @@
1
+ # Licensed to Elasticsearch B.V. under one or more contributor
2
+ # license agreements. See the NOTICE file distributed with
3
+ # this work for additional information regarding copyright
4
+ # ownership. Elasticsearch B.V. licenses this file to you under
5
+ # the Apache License, Version 2.0 (the "License"); you may
6
+ # not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ import asyncio
19
+ import logging
20
+ from typing import (
21
+ Any,
22
+ AsyncIterable,
23
+ AsyncIterator,
24
+ Callable,
25
+ Collection,
26
+ Dict,
27
+ Iterable,
28
+ List,
29
+ MutableMapping,
30
+ Optional,
31
+ Tuple,
32
+ TypeVar,
33
+ Union,
34
+ )
35
+
36
+ from ..exceptions import ApiError, NotFoundError, TransportError
37
+ from ..helpers.actions import (
38
+ _TYPE_BULK_ACTION,
39
+ _TYPE_BULK_ACTION_BODY,
40
+ _TYPE_BULK_ACTION_HEADER,
41
+ _TYPE_BULK_ACTION_HEADER_AND_BODY,
42
+ _ActionChunker,
43
+ _process_bulk_chunk_error,
44
+ _process_bulk_chunk_success,
45
+ expand_action,
46
+ )
47
+ from ..helpers.errors import ScanError
48
+ from ..serializer import Serializer
49
+ from .client import AsyncElasticsearch # noqa
50
+
51
+ logger = logging.getLogger("elasticsearch.helpers")
52
+
53
+ T = TypeVar("T")
54
+
55
+
56
+ async def _chunk_actions(
57
+ actions: AsyncIterable[_TYPE_BULK_ACTION_HEADER_AND_BODY],
58
+ chunk_size: int,
59
+ max_chunk_bytes: int,
60
+ serializer: Serializer,
61
+ ) -> AsyncIterable[
62
+ Tuple[
63
+ List[
64
+ Union[
65
+ Tuple[_TYPE_BULK_ACTION_HEADER],
66
+ Tuple[_TYPE_BULK_ACTION_HEADER, _TYPE_BULK_ACTION_BODY],
67
+ ]
68
+ ],
69
+ List[bytes],
70
+ ]
71
+ ]:
72
+ """
73
+ Split actions into chunks by number or size, serialize them into strings in
74
+ the process.
75
+ """
76
+ chunker = _ActionChunker(
77
+ chunk_size=chunk_size, max_chunk_bytes=max_chunk_bytes, serializer=serializer
78
+ )
79
+ async for action, data in actions:
80
+ ret = chunker.feed(action, data)
81
+ if ret:
82
+ yield ret
83
+ ret = chunker.flush()
84
+ if ret:
85
+ yield ret
86
+
87
+
88
+ async def _process_bulk_chunk(
89
+ client: AsyncElasticsearch,
90
+ bulk_actions: List[bytes],
91
+ bulk_data: List[
92
+ Union[
93
+ Tuple[_TYPE_BULK_ACTION_HEADER],
94
+ Tuple[_TYPE_BULK_ACTION_HEADER, _TYPE_BULK_ACTION_BODY],
95
+ ]
96
+ ],
97
+ raise_on_exception: bool = True,
98
+ raise_on_error: bool = True,
99
+ ignore_status: Union[int, Collection[int]] = (),
100
+ *args: Any,
101
+ **kwargs: Any,
102
+ ) -> AsyncIterable[Tuple[bool, Dict[str, Any]]]:
103
+ """
104
+ Send a bulk request to elasticsearch and process the output.
105
+ """
106
+ if isinstance(ignore_status, int):
107
+ ignore_status = (ignore_status,)
108
+
109
+ try:
110
+ # send the actual request
111
+ resp = await client.bulk(*args, operations=bulk_actions, **kwargs) # type: ignore[arg-type]
112
+ except ApiError as e:
113
+ gen = _process_bulk_chunk_error(
114
+ error=e,
115
+ bulk_data=bulk_data,
116
+ ignore_status=ignore_status,
117
+ raise_on_exception=raise_on_exception,
118
+ raise_on_error=raise_on_error,
119
+ )
120
+ else:
121
+ gen = _process_bulk_chunk_success(
122
+ resp=resp.body,
123
+ bulk_data=bulk_data,
124
+ ignore_status=ignore_status,
125
+ raise_on_error=raise_on_error,
126
+ )
127
+ for item in gen:
128
+ yield item
129
+
130
+
131
+ def aiter(x: Union[Iterable[T], AsyncIterable[T]]) -> AsyncIterator[T]:
132
+ """Turns an async iterable or iterable into an async iterator"""
133
+ if hasattr(x, "__anext__"):
134
+ return x # type: ignore[return-value]
135
+ elif hasattr(x, "__aiter__"):
136
+ return x.__aiter__()
137
+
138
+ async def f() -> AsyncIterable[T]:
139
+ ix: Iterable[T] = x
140
+ for item in ix:
141
+ yield item
142
+
143
+ return f().__aiter__()
144
+
145
+
146
+ async def azip(
147
+ *iterables: Union[Iterable[T], AsyncIterable[T]]
148
+ ) -> AsyncIterable[Tuple[T, ...]]:
149
+ """Zips async iterables and iterables into an async iterator
150
+ with the same behavior as zip()
151
+ """
152
+ aiters = [aiter(x) for x in iterables]
153
+ try:
154
+ while True:
155
+ yield tuple([await x.__anext__() for x in aiters])
156
+ except StopAsyncIteration:
157
+ pass
158
+
159
+
160
+ async def async_streaming_bulk(
161
+ client: AsyncElasticsearch,
162
+ actions: Union[Iterable[_TYPE_BULK_ACTION], AsyncIterable[_TYPE_BULK_ACTION]],
163
+ chunk_size: int = 500,
164
+ max_chunk_bytes: int = 100 * 1024 * 1024,
165
+ raise_on_error: bool = True,
166
+ expand_action_callback: Callable[
167
+ [_TYPE_BULK_ACTION], _TYPE_BULK_ACTION_HEADER_AND_BODY
168
+ ] = expand_action,
169
+ raise_on_exception: bool = True,
170
+ max_retries: int = 0,
171
+ initial_backoff: float = 2,
172
+ max_backoff: float = 600,
173
+ yield_ok: bool = True,
174
+ ignore_status: Union[int, Collection[int]] = (),
175
+ retry_on_status: Union[int, Collection[int]] = (429,),
176
+ *args: Any,
177
+ **kwargs: Any,
178
+ ) -> AsyncIterable[Tuple[bool, Dict[str, Any]]]:
179
+ """
180
+ Streaming bulk consumes actions from the iterable passed in and yields
181
+ results per action. For non-streaming usecases use
182
+ :func:`~elasticsearch.helpers.async_bulk` which is a wrapper around streaming
183
+ bulk that returns summary information about the bulk operation once the
184
+ entire input is consumed and sent.
185
+
186
+ If you specify ``max_retries`` it will also retry any documents that were
187
+ rejected with a ``429`` status code. Use ``retry_on_status`` to
188
+ configure which status codes will be retried. To do this it will wait
189
+ (**by calling asyncio.sleep which will block**) for ``initial_backoff`` seconds
190
+ and then, every subsequent rejection for the same chunk, for double the time
191
+ every time up to ``max_backoff`` seconds.
192
+
193
+ :arg client: instance of :class:`~elasticsearch.AsyncElasticsearch` to use
194
+ :arg actions: iterable or async iterable containing the actions to be executed
195
+ :arg chunk_size: number of docs in one chunk sent to es (default: 500)
196
+ :arg max_chunk_bytes: the maximum size of the request in bytes (default: 100MB)
197
+ :arg raise_on_error: raise ``BulkIndexError`` containing errors (as `.errors`)
198
+ from the execution of the last chunk when some occur. By default we raise.
199
+ :arg raise_on_exception: if ``False`` then don't propagate exceptions from
200
+ call to ``bulk`` and just report the items that failed as failed.
201
+ :arg expand_action_callback: callback executed on each action passed in,
202
+ should return a tuple containing the action line and the data line
203
+ (`None` if data line should be omitted).
204
+ :arg retry_on_status: HTTP status code that will trigger a retry.
205
+ (if `None` is specified only status 429 will retry).
206
+ :arg max_retries: maximum number of times a document will be retried when
207
+ retry_on_status (defaulting to ``429``) is received,
208
+ set to 0 (default) for no retries
209
+ :arg initial_backoff: number of seconds we should wait before the first
210
+ retry. Any subsequent retries will be powers of ``initial_backoff *
211
+ 2**retry_number``
212
+ :arg max_backoff: maximum number of seconds a retry will wait
213
+ :arg yield_ok: if set to False will skip successful documents in the output
214
+ :arg ignore_status: list of HTTP status code that you want to ignore
215
+ """
216
+
217
+ client = client.options()
218
+ client._client_meta = (("h", "bp"),)
219
+
220
+ if isinstance(retry_on_status, int):
221
+ retry_on_status = (retry_on_status,)
222
+
223
+ async def map_actions() -> AsyncIterable[_TYPE_BULK_ACTION_HEADER_AND_BODY]:
224
+ async for item in aiter(actions):
225
+ yield expand_action_callback(item)
226
+
227
+ serializer = client.transport.serializers.get_serializer("application/json")
228
+
229
+ bulk_data: List[
230
+ Union[
231
+ Tuple[_TYPE_BULK_ACTION_HEADER],
232
+ Tuple[_TYPE_BULK_ACTION_HEADER, _TYPE_BULK_ACTION_BODY],
233
+ ]
234
+ ]
235
+ bulk_actions: List[bytes]
236
+ async for bulk_data, bulk_actions in _chunk_actions(
237
+ map_actions(), chunk_size, max_chunk_bytes, serializer
238
+ ):
239
+ for attempt in range(max_retries + 1):
240
+ to_retry: List[bytes] = []
241
+ to_retry_data: List[
242
+ Union[
243
+ Tuple[_TYPE_BULK_ACTION_HEADER],
244
+ Tuple[_TYPE_BULK_ACTION_HEADER, _TYPE_BULK_ACTION_BODY],
245
+ ]
246
+ ] = []
247
+ if attempt:
248
+ await asyncio.sleep(
249
+ min(max_backoff, initial_backoff * 2 ** (attempt - 1))
250
+ )
251
+
252
+ try:
253
+ data: Union[
254
+ Tuple[_TYPE_BULK_ACTION_HEADER],
255
+ Tuple[_TYPE_BULK_ACTION_HEADER, _TYPE_BULK_ACTION_BODY],
256
+ ]
257
+ ok: bool
258
+ info: Dict[str, Any]
259
+ async for data, (ok, info) in azip( # type: ignore[assignment, misc]
260
+ bulk_data,
261
+ _process_bulk_chunk(
262
+ client,
263
+ bulk_actions,
264
+ bulk_data,
265
+ raise_on_exception,
266
+ raise_on_error,
267
+ ignore_status,
268
+ *args,
269
+ **kwargs,
270
+ ),
271
+ ):
272
+ if not ok:
273
+ action, info = info.popitem()
274
+ # retry if retries enabled, we are not in the last attempt,
275
+ # and status in retry_on_status (defaulting to 429)
276
+ if (
277
+ max_retries
278
+ and info["status"] in retry_on_status
279
+ and (attempt + 1) <= max_retries
280
+ ):
281
+ # _process_bulk_chunk expects strings so we need to
282
+ # re-serialize the data
283
+ to_retry.extend(map(serializer.dumps, data))
284
+ to_retry_data.append(data)
285
+ else:
286
+ yield ok, {action: info}
287
+ elif yield_ok:
288
+ yield ok, info
289
+
290
+ except ApiError as e:
291
+ # suppress any status in retry_on_status (429 by default)
292
+ # since we will retry them
293
+ if attempt == max_retries or e.status_code not in retry_on_status:
294
+ raise
295
+ else:
296
+ if not to_retry:
297
+ break
298
+ # retry only subset of documents that didn't succeed
299
+ bulk_actions, bulk_data = to_retry, to_retry_data
300
+
301
+
302
+ async def async_bulk(
303
+ client: AsyncElasticsearch,
304
+ actions: Union[Iterable[_TYPE_BULK_ACTION], AsyncIterable[_TYPE_BULK_ACTION]],
305
+ stats_only: bool = False,
306
+ ignore_status: Union[int, Collection[int]] = (),
307
+ *args: Any,
308
+ **kwargs: Any,
309
+ ) -> Tuple[int, Union[int, List[Any]]]:
310
+ """
311
+ Helper for the :meth:`~elasticsearch.AsyncElasticsearch.bulk` api that provides
312
+ a more human friendly interface - it consumes an iterator of actions and
313
+ sends them to elasticsearch in chunks. It returns a tuple with summary
314
+ information - number of successfully executed actions and either list of
315
+ errors or number of errors if ``stats_only`` is set to ``True``. Note that
316
+ by default we raise a ``BulkIndexError`` when we encounter an error so
317
+ options like ``stats_only`` only+ apply when ``raise_on_error`` is set to
318
+ ``False``.
319
+
320
+ When errors are being collected original document data is included in the
321
+ error dictionary which can lead to an extra high memory usage. If you need
322
+ to process a lot of data and want to ignore/collect errors please consider
323
+ using the :func:`~elasticsearch.helpers.async_streaming_bulk` helper which will
324
+ just return the errors and not store them in memory.
325
+
326
+
327
+ :arg client: instance of :class:`~elasticsearch.AsyncElasticsearch` to use
328
+ :arg actions: iterator containing the actions
329
+ :arg stats_only: if `True` only report number of successful/failed
330
+ operations instead of just number of successful and a list of error responses
331
+ :arg ignore_status: list of HTTP status code that you want to ignore
332
+
333
+ Any additional keyword arguments will be passed to
334
+ :func:`~elasticsearch.helpers.async_streaming_bulk` which is used to execute
335
+ the operation, see :func:`~elasticsearch.helpers.async_streaming_bulk` for more
336
+ accepted parameters.
337
+ """
338
+ success, failed = 0, 0
339
+
340
+ # list of errors to be collected is not stats_only
341
+ errors = []
342
+
343
+ # make streaming_bulk yield successful results so we can count them
344
+ kwargs["yield_ok"] = True
345
+ async for ok, item in async_streaming_bulk(
346
+ client, actions, ignore_status=ignore_status, *args, **kwargs # type: ignore[misc]
347
+ ):
348
+ # go through request-response pairs and detect failures
349
+ if not ok:
350
+ if not stats_only:
351
+ errors.append(item)
352
+ failed += 1
353
+ else:
354
+ success += 1
355
+
356
+ return success, failed if stats_only else errors
357
+
358
+
359
+ async def async_scan(
360
+ client: AsyncElasticsearch,
361
+ query: Optional[Any] = None,
362
+ scroll: str = "5m",
363
+ raise_on_error: bool = True,
364
+ preserve_order: bool = False,
365
+ size: int = 1000,
366
+ request_timeout: Optional[float] = None,
367
+ clear_scroll: bool = True,
368
+ scroll_kwargs: Optional[MutableMapping[str, Any]] = None,
369
+ **kwargs: Any,
370
+ ) -> AsyncIterable[Dict[str, Any]]:
371
+ """
372
+ Simple abstraction on top of the
373
+ :meth:`~elasticsearch.AsyncElasticsearch.scroll` api - a simple iterator that
374
+ yields all hits as returned by underlining scroll requests.
375
+
376
+ By default scan does not return results in any pre-determined order. To
377
+ have a standard order in the returned documents (either by score or
378
+ explicit sort definition) when scrolling, use ``preserve_order=True``. This
379
+ may be an expensive operation and will negate the performance benefits of
380
+ using ``scan``.
381
+
382
+ :arg client: instance of :class:`~elasticsearch.AsyncElasticsearch` to use
383
+ :arg query: body for the :meth:`~elasticsearch.AsyncElasticsearch.search` api
384
+ :arg scroll: Specify how long a consistent view of the index should be
385
+ maintained for scrolled search
386
+ :arg raise_on_error: raises an exception (``ScanError``) if an error is
387
+ encountered (some shards fail to execute). By default we raise.
388
+ :arg preserve_order: don't set the ``search_type`` to ``scan`` - this will
389
+ cause the scroll to paginate with preserving the order. Note that this
390
+ can be an extremely expensive operation and can easily lead to
391
+ unpredictable results, use with caution.
392
+ :arg size: size (per shard) of the batch send at each iteration.
393
+ :arg request_timeout: explicit timeout for each call to ``scan``
394
+ :arg clear_scroll: explicitly calls delete on the scroll id via the clear
395
+ scroll API at the end of the method on completion or error, defaults
396
+ to true.
397
+ :arg scroll_kwargs: additional kwargs to be passed to
398
+ :meth:`~elasticsearch.AsyncElasticsearch.scroll`
399
+
400
+ Any additional keyword arguments will be passed to the initial
401
+ :meth:`~elasticsearch.AsyncElasticsearch.search` call:
402
+
403
+ .. code-block:: python
404
+
405
+ async_scan(
406
+ client,
407
+ query={"query": {"match": {"title": "python"}}},
408
+ index="orders-*"
409
+ )
410
+ """
411
+ scroll_kwargs = scroll_kwargs or {}
412
+
413
+ if not preserve_order:
414
+ query = query.copy() if query else {}
415
+ query["sort"] = "_doc"
416
+
417
+ def pop_transport_kwargs(kw: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
418
+ # Grab options that should be propagated to every
419
+ # API call within this helper instead of just 'search()'
420
+ transport_kwargs = {}
421
+ for key in ("headers", "api_key", "http_auth", "basic_auth", "bearer_auth"):
422
+ try:
423
+ value = kw.pop(key)
424
+ if key == "http_auth":
425
+ key = "basic_auth"
426
+ transport_kwargs[key] = value
427
+ except KeyError:
428
+ pass
429
+ return transport_kwargs
430
+
431
+ client = client.options(
432
+ request_timeout=request_timeout, **pop_transport_kwargs(kwargs)
433
+ )
434
+ client._client_meta = (("h", "s"),)
435
+
436
+ # Setting query={"from": ...} would make 'from' be used
437
+ # as a keyword argument instead of 'from_'. We handle that here.
438
+ def normalize_from_keyword(kw: MutableMapping[str, Any]) -> None:
439
+ if "from" in kw:
440
+ kw["from_"] = kw.pop("from")
441
+
442
+ normalize_from_keyword(kwargs)
443
+ try:
444
+ search_kwargs = query.copy() if query else {}
445
+ normalize_from_keyword(search_kwargs)
446
+ search_kwargs.update(kwargs)
447
+ search_kwargs["scroll"] = scroll
448
+ search_kwargs["size"] = size
449
+ resp = await client.search(**search_kwargs)
450
+
451
+ # Try the old deprecated way if we fail immediately on parameters.
452
+ except TypeError:
453
+ search_kwargs = kwargs.copy()
454
+ search_kwargs["scroll"] = scroll
455
+ search_kwargs["size"] = size
456
+ resp = await client.search(body=query, **search_kwargs)
457
+
458
+ scroll_id: Optional[str] = resp.get("_scroll_id")
459
+ scroll_transport_kwargs = pop_transport_kwargs(scroll_kwargs)
460
+ if scroll_transport_kwargs:
461
+ scroll_client = client.options(**scroll_transport_kwargs)
462
+ else:
463
+ scroll_client = client
464
+
465
+ try:
466
+ while scroll_id and resp["hits"]["hits"]:
467
+ for hit in resp["hits"]["hits"]:
468
+ yield hit
469
+
470
+ # Default to 0 if the value isn't included in the response
471
+ shards_info: Dict[str, int] = resp["_shards"]
472
+ shards_successful = shards_info.get("successful", 0)
473
+ shards_skipped = shards_info.get("skipped", 0)
474
+ shards_total = shards_info.get("total", 0)
475
+
476
+ # check if we have any errors
477
+ if (shards_successful + shards_skipped) < shards_total:
478
+ shards_message = "Scroll request has only succeeded on %d (+%d skipped) shards out of %d."
479
+ logger.warning(
480
+ shards_message,
481
+ shards_successful,
482
+ shards_skipped,
483
+ shards_total,
484
+ )
485
+ if raise_on_error:
486
+ raise ScanError(
487
+ scroll_id,
488
+ shards_message
489
+ % (
490
+ shards_successful,
491
+ shards_skipped,
492
+ shards_total,
493
+ ),
494
+ )
495
+ resp = await scroll_client.scroll(
496
+ scroll_id=scroll_id, scroll=scroll, **scroll_kwargs
497
+ )
498
+ scroll_id = resp.get("_scroll_id")
499
+
500
+ finally:
501
+ if scroll_id and clear_scroll:
502
+ await client.options(ignore_status=404).clear_scroll(scroll_id=scroll_id)
503
+
504
+
505
+ async def async_reindex(
506
+ client: AsyncElasticsearch,
507
+ source_index: Union[str, Collection[str]],
508
+ target_index: str,
509
+ query: Any = None,
510
+ target_client: Optional[AsyncElasticsearch] = None,
511
+ chunk_size: int = 500,
512
+ scroll: str = "5m",
513
+ op_type: Optional[str] = None,
514
+ scan_kwargs: MutableMapping[str, Any] = {},
515
+ bulk_kwargs: MutableMapping[str, Any] = {},
516
+ ) -> Tuple[int, Union[int, List[Any]]]:
517
+ """
518
+ Reindex all documents from one index that satisfy a given query
519
+ to another, potentially (if `target_client` is specified) on a different cluster.
520
+ If you don't specify the query you will reindex all the documents.
521
+
522
+ Since ``2.3`` a :meth:`~elasticsearch.AsyncElasticsearch.reindex` api is
523
+ available as part of elasticsearch itself. It is recommended to use the api
524
+ instead of this helper wherever possible. The helper is here mostly for
525
+ backwards compatibility and for situations where more flexibility is
526
+ needed.
527
+
528
+ .. note::
529
+
530
+ This helper doesn't transfer mappings, just the data.
531
+
532
+ :arg client: instance of :class:`~elasticsearch.AsyncElasticsearch` to use (for
533
+ read if `target_client` is specified as well)
534
+ :arg source_index: index (or list of indices) to read documents from
535
+ :arg target_index: name of the index in the target cluster to populate
536
+ :arg query: body for the :meth:`~elasticsearch.AsyncElasticsearch.search` api
537
+ :arg target_client: optional, is specified will be used for writing (thus
538
+ enabling reindex between clusters)
539
+ :arg chunk_size: number of docs in one chunk sent to es (default: 500)
540
+ :arg scroll: Specify how long a consistent view of the index should be
541
+ maintained for scrolled search
542
+ :arg op_type: Explicit operation type. Defaults to '_index'. Data streams must
543
+ be set to 'create'. If not specified, will auto-detect if target_index is a
544
+ data stream.
545
+ :arg scan_kwargs: additional kwargs to be passed to
546
+ :func:`~elasticsearch.helpers.async_scan`
547
+ :arg bulk_kwargs: additional kwargs to be passed to
548
+ :func:`~elasticsearch.helpers.async_bulk`
549
+ """
550
+ target_client = client if target_client is None else target_client
551
+ docs = async_scan(
552
+ client, query=query, index=source_index, scroll=scroll, **scan_kwargs
553
+ )
554
+
555
+ async def _change_doc_index(
556
+ hits: AsyncIterable[Dict[str, Any]],
557
+ index: str,
558
+ op_type: Optional[str],
559
+ ) -> AsyncIterable[Dict[str, Any]]:
560
+ async for h in hits:
561
+ h["_index"] = index
562
+ if op_type is not None:
563
+ h["_op_type"] = op_type
564
+ if "fields" in h:
565
+ h.update(h.pop("fields"))
566
+ yield h
567
+
568
+ kwargs = {"stats_only": True}
569
+ kwargs.update(bulk_kwargs)
570
+
571
+ is_data_stream = False
572
+ try:
573
+ # Verify if the target_index is data stream or index
574
+ data_streams = await target_client.indices.get_data_stream(
575
+ name=target_index, expand_wildcards="all"
576
+ )
577
+ is_data_stream = any(
578
+ data_stream["name"] == target_index
579
+ for data_stream in data_streams["data_streams"]
580
+ )
581
+ except (TransportError, KeyError, NotFoundError):
582
+ # If its not data stream, might be index
583
+ pass
584
+
585
+ if is_data_stream:
586
+ if op_type not in (None, "create"):
587
+ raise ValueError("Data streams must have 'op_type' set to 'create'")
588
+ else:
589
+ op_type = "create"
590
+
591
+ return await async_bulk(
592
+ target_client,
593
+ _change_doc_index(docs, target_index, op_type),
594
+ chunk_size=chunk_size,
595
+ **kwargs,
596
+ )
@@ -0,0 +1,110 @@
1
+ # Licensed to Elasticsearch B.V. under one or more contributor
2
+ # license agreements. See the NOTICE file distributed with
3
+ # this work for additional information regarding copyright
4
+ # ownership. Elasticsearch B.V. licenses this file to you under
5
+ # the Apache License, Version 2.0 (the "License"); you may
6
+ # not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ from __future__ import annotations
19
+
20
+ import contextlib
21
+ import os
22
+ from typing import Generator, Literal, Mapping
23
+
24
+ try:
25
+ from opentelemetry import trace
26
+
27
+ _tracer: trace.Tracer | None = trace.get_tracer("elasticsearch-api")
28
+ except ImportError:
29
+ _tracer = None
30
+
31
+ from elastic_transport import OpenTelemetrySpan
32
+
33
+ # Valid values for the enabled config are 'true' and 'false'. Default is 'true'.
34
+ ENABLED_ENV_VAR = "OTEL_PYTHON_INSTRUMENTATION_ELASTICSEARCH_ENABLED"
35
+ # Describes how to handle search queries in the request body when assigned to
36
+ # a span attribute.
37
+ # Valid values are 'omit' and 'raw'.
38
+ # Default is 'omit' as 'raw' has security implications.
39
+ BODY_STRATEGY_ENV_VAR = "OTEL_PYTHON_INSTRUMENTATION_ELASTICSEARCH_CAPTURE_SEARCH_QUERY"
40
+ DEFAULT_BODY_STRATEGY = "omit"
41
+
42
+
43
+ class OpenTelemetry:
44
+ def __init__(
45
+ self,
46
+ enabled: bool | None = None,
47
+ tracer: trace.Tracer | None = None,
48
+ body_strategy: Literal["omit", "raw"] | None = None,
49
+ ):
50
+ if enabled is None:
51
+ enabled = os.environ.get(ENABLED_ENV_VAR, "true") == "true"
52
+ self.tracer = tracer or _tracer
53
+ self.enabled = enabled and self.tracer is not None
54
+
55
+ if body_strategy is not None:
56
+ self.body_strategy = body_strategy
57
+ else:
58
+ self.body_strategy = os.environ.get(
59
+ BODY_STRATEGY_ENV_VAR, DEFAULT_BODY_STRATEGY
60
+ ) # type: ignore[assignment]
61
+ assert self.body_strategy in ("omit", "raw")
62
+
63
+ @contextlib.contextmanager
64
+ def span(
65
+ self,
66
+ method: str,
67
+ *,
68
+ endpoint_id: str | None,
69
+ path_parts: Mapping[str, str],
70
+ ) -> Generator[OpenTelemetrySpan, None, None]:
71
+ if not self.enabled or self.tracer is None:
72
+ yield OpenTelemetrySpan(None)
73
+ return
74
+
75
+ span_name = endpoint_id or method
76
+ with self.tracer.start_as_current_span(span_name) as otel_span:
77
+ otel_span.set_attribute("http.request.method", method)
78
+ otel_span.set_attribute("db.system", "elasticsearch")
79
+ if endpoint_id is not None:
80
+ otel_span.set_attribute("db.operation", endpoint_id)
81
+ for key, value in path_parts.items():
82
+ otel_span.set_attribute(f"db.elasticsearch.path_parts.{key}", value)
83
+
84
+ yield OpenTelemetrySpan(
85
+ otel_span,
86
+ endpoint_id=endpoint_id,
87
+ body_strategy=self.body_strategy,
88
+ )
89
+
90
+ @contextlib.contextmanager
91
+ def helpers_span(self, span_name: str) -> Generator[OpenTelemetrySpan, None, None]:
92
+ if not self.enabled or self.tracer is None:
93
+ yield OpenTelemetrySpan(None)
94
+ return
95
+
96
+ with self.tracer.start_as_current_span(span_name) as otel_span:
97
+ otel_span.set_attribute("db.system", "elasticsearch")
98
+ otel_span.set_attribute("db.operation", span_name)
99
+ # Without a request method, Elastic APM does not display the traces
100
+ otel_span.set_attribute("http.request.method", "null")
101
+ yield OpenTelemetrySpan(otel_span)
102
+
103
+ @contextlib.contextmanager
104
+ def use_span(self, span: OpenTelemetrySpan) -> Generator[None, None, None]:
105
+ if not self.enabled or self.tracer is None or span.otel_span is None:
106
+ yield
107
+ return
108
+
109
+ with trace.use_span(span.otel_span):
110
+ yield