elasticsearch 9.1.0__py3-none-any.whl → 9.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. elasticsearch/_async/client/__init__.py +21 -6
  2. elasticsearch/_async/client/cat.py +1091 -51
  3. elasticsearch/_async/client/cluster.py +7 -2
  4. elasticsearch/_async/client/connector.py +3 -3
  5. elasticsearch/_async/client/esql.py +20 -6
  6. elasticsearch/_async/client/indices.py +27 -13
  7. elasticsearch/_async/client/inference.py +16 -5
  8. elasticsearch/_async/client/logstash.py +3 -1
  9. elasticsearch/_async/client/nodes.py +2 -2
  10. elasticsearch/_async/client/shutdown.py +5 -15
  11. elasticsearch/_async/client/sql.py +1 -1
  12. elasticsearch/_async/client/streams.py +186 -0
  13. elasticsearch/_async/client/transform.py +60 -0
  14. elasticsearch/_async/client/watcher.py +1 -5
  15. elasticsearch/_async/helpers.py +58 -9
  16. elasticsearch/_sync/client/__init__.py +21 -6
  17. elasticsearch/_sync/client/cat.py +1091 -51
  18. elasticsearch/_sync/client/cluster.py +7 -2
  19. elasticsearch/_sync/client/connector.py +3 -3
  20. elasticsearch/_sync/client/esql.py +20 -6
  21. elasticsearch/_sync/client/indices.py +27 -13
  22. elasticsearch/_sync/client/inference.py +16 -5
  23. elasticsearch/_sync/client/logstash.py +3 -1
  24. elasticsearch/_sync/client/nodes.py +2 -2
  25. elasticsearch/_sync/client/shutdown.py +5 -15
  26. elasticsearch/_sync/client/sql.py +1 -1
  27. elasticsearch/_sync/client/streams.py +186 -0
  28. elasticsearch/_sync/client/transform.py +60 -0
  29. elasticsearch/_sync/client/watcher.py +1 -5
  30. elasticsearch/_version.py +2 -1
  31. elasticsearch/client.py +2 -0
  32. elasticsearch/compat.py +43 -1
  33. elasticsearch/dsl/__init__.py +28 -0
  34. elasticsearch/dsl/_async/document.py +84 -0
  35. elasticsearch/dsl/_sync/document.py +84 -0
  36. elasticsearch/dsl/aggs.py +97 -0
  37. elasticsearch/dsl/document_base.py +57 -0
  38. elasticsearch/dsl/field.py +43 -11
  39. elasticsearch/dsl/query.py +5 -1
  40. elasticsearch/dsl/response/__init__.py +3 -0
  41. elasticsearch/dsl/response/aggs.py +1 -1
  42. elasticsearch/dsl/types.py +273 -24
  43. elasticsearch/dsl/utils.py +1 -1
  44. elasticsearch/esql/__init__.py +2 -1
  45. elasticsearch/esql/esql.py +85 -34
  46. elasticsearch/esql/functions.py +37 -25
  47. elasticsearch/helpers/__init__.py +10 -1
  48. elasticsearch/helpers/actions.py +106 -33
  49. {elasticsearch-9.1.0.dist-info → elasticsearch-9.1.2.dist-info}/METADATA +2 -4
  50. {elasticsearch-9.1.0.dist-info → elasticsearch-9.1.2.dist-info}/RECORD +53 -52
  51. elasticsearch/esql/esql1.py1 +0 -307
  52. {elasticsearch-9.1.0.dist-info → elasticsearch-9.1.2.dist-info}/WHEEL +0 -0
  53. {elasticsearch-9.1.0.dist-info → elasticsearch-9.1.2.dist-info}/licenses/LICENSE +0 -0
  54. {elasticsearch-9.1.0.dist-info → elasticsearch-9.1.2.dist-info}/licenses/NOTICE +0 -0
@@ -552,11 +552,7 @@ class WatcherClient(NamespacedClient):
552
552
  __body["transform"] = transform
553
553
  if trigger is not None:
554
554
  __body["trigger"] = trigger
555
- if not __body:
556
- __body = None # type: ignore[assignment]
557
- __headers = {"accept": "application/json"}
558
- if __body is not None:
559
- __headers["content-type"] = "application/json"
555
+ __headers = {"accept": "application/json", "content-type": "application/json"}
560
556
  return await self.perform_request( # type: ignore[return-value]
561
557
  "PUT",
562
558
  __path,
@@ -33,12 +33,16 @@ from typing import (
33
33
  Union,
34
34
  )
35
35
 
36
+ from ..compat import safe_task
36
37
  from ..exceptions import ApiError, NotFoundError, TransportError
37
38
  from ..helpers.actions import (
38
39
  _TYPE_BULK_ACTION,
39
40
  _TYPE_BULK_ACTION_BODY,
40
41
  _TYPE_BULK_ACTION_HEADER,
41
42
  _TYPE_BULK_ACTION_HEADER_AND_BODY,
43
+ _TYPE_BULK_ACTION_HEADER_WITH_META_AND_BODY,
44
+ _TYPE_BULK_ACTION_WITH_META,
45
+ BulkMeta,
42
46
  _ActionChunker,
43
47
  _process_bulk_chunk_error,
44
48
  _process_bulk_chunk_success,
@@ -54,9 +58,10 @@ T = TypeVar("T")
54
58
 
55
59
 
56
60
  async def _chunk_actions(
57
- actions: AsyncIterable[_TYPE_BULK_ACTION_HEADER_AND_BODY],
61
+ actions: AsyncIterable[_TYPE_BULK_ACTION_HEADER_WITH_META_AND_BODY],
58
62
  chunk_size: int,
59
63
  max_chunk_bytes: int,
64
+ flush_after_seconds: Optional[float],
60
65
  serializer: Serializer,
61
66
  ) -> AsyncIterable[
62
67
  Tuple[
@@ -76,10 +81,42 @@ async def _chunk_actions(
76
81
  chunker = _ActionChunker(
77
82
  chunk_size=chunk_size, max_chunk_bytes=max_chunk_bytes, serializer=serializer
78
83
  )
79
- async for action, data in actions:
80
- ret = chunker.feed(action, data)
81
- if ret:
82
- yield ret
84
+
85
+ if not flush_after_seconds:
86
+ async for action, data in actions:
87
+ ret = chunker.feed(action, data)
88
+ if ret:
89
+ yield ret
90
+ else:
91
+ item_queue: asyncio.Queue[_TYPE_BULK_ACTION_HEADER_WITH_META_AND_BODY] = (
92
+ asyncio.Queue()
93
+ )
94
+
95
+ async def get_items() -> None:
96
+ try:
97
+ async for item in actions:
98
+ await item_queue.put(item)
99
+ finally:
100
+ await item_queue.put((BulkMeta.done, None))
101
+
102
+ async with safe_task(get_items()):
103
+ timeout: Optional[float] = flush_after_seconds
104
+ while True:
105
+ try:
106
+ action, data = await asyncio.wait_for(
107
+ item_queue.get(), timeout=timeout
108
+ )
109
+ timeout = flush_after_seconds
110
+ except asyncio.TimeoutError:
111
+ action, data = BulkMeta.flush, None
112
+ timeout = None
113
+
114
+ if action is BulkMeta.done:
115
+ break
116
+ ret = chunker.feed(action, data)
117
+ if ret:
118
+ yield ret
119
+
83
120
  ret = chunker.flush()
84
121
  if ret:
85
122
  yield ret
@@ -159,9 +196,13 @@ async def azip(
159
196
 
160
197
  async def async_streaming_bulk(
161
198
  client: AsyncElasticsearch,
162
- actions: Union[Iterable[_TYPE_BULK_ACTION], AsyncIterable[_TYPE_BULK_ACTION]],
199
+ actions: Union[
200
+ Iterable[_TYPE_BULK_ACTION_WITH_META],
201
+ AsyncIterable[_TYPE_BULK_ACTION_WITH_META],
202
+ ],
163
203
  chunk_size: int = 500,
164
204
  max_chunk_bytes: int = 100 * 1024 * 1024,
205
+ flush_after_seconds: Optional[float] = None,
165
206
  raise_on_error: bool = True,
166
207
  expand_action_callback: Callable[
167
208
  [_TYPE_BULK_ACTION], _TYPE_BULK_ACTION_HEADER_AND_BODY
@@ -194,6 +235,9 @@ async def async_streaming_bulk(
194
235
  :arg actions: iterable or async iterable containing the actions to be executed
195
236
  :arg chunk_size: number of docs in one chunk sent to es (default: 500)
196
237
  :arg max_chunk_bytes: the maximum size of the request in bytes (default: 100MB)
238
+ :arg flush_after_seconds: time in seconds after which a chunk is written even
239
+ if hasn't reached `chunk_size` or `max_chunk_bytes`. Set to 0 to not use a
240
+ timeout-based flush. (default: 0)
197
241
  :arg raise_on_error: raise ``BulkIndexError`` containing errors (as `.errors`)
198
242
  from the execution of the last chunk when some occur. By default we raise.
199
243
  :arg raise_on_exception: if ``False`` then don't propagate exceptions from
@@ -220,9 +264,14 @@ async def async_streaming_bulk(
220
264
  if isinstance(retry_on_status, int):
221
265
  retry_on_status = (retry_on_status,)
222
266
 
223
- async def map_actions() -> AsyncIterable[_TYPE_BULK_ACTION_HEADER_AND_BODY]:
267
+ async def map_actions() -> (
268
+ AsyncIterable[_TYPE_BULK_ACTION_HEADER_WITH_META_AND_BODY]
269
+ ):
224
270
  async for item in aiter(actions):
225
- yield expand_action_callback(item)
271
+ if isinstance(item, BulkMeta):
272
+ yield item, None
273
+ else:
274
+ yield expand_action_callback(item)
226
275
 
227
276
  serializer = client.transport.serializers.get_serializer("application/json")
228
277
 
@@ -234,7 +283,7 @@ async def async_streaming_bulk(
234
283
  ]
235
284
  bulk_actions: List[bytes]
236
285
  async for bulk_data, bulk_actions in _chunk_actions(
237
- map_actions(), chunk_size, max_chunk_bytes, serializer
286
+ map_actions(), chunk_size, max_chunk_bytes, flush_after_seconds, serializer
238
287
  ):
239
288
  for attempt in range(max_retries + 1):
240
289
  to_retry: List[bytes] = []
@@ -74,6 +74,7 @@ from .slm import SlmClient
74
74
  from .snapshot import SnapshotClient
75
75
  from .sql import SqlClient
76
76
  from .ssl import SslClient
77
+ from .streams import StreamsClient
77
78
  from .synonyms import SynonymsClient
78
79
  from .tasks import TasksClient
79
80
  from .text_structure import TextStructureClient
@@ -378,6 +379,7 @@ class Elasticsearch(BaseClient):
378
379
  self.shutdown = ShutdownClient(self)
379
380
  self.sql = SqlClient(self)
380
381
  self.ssl = SslClient(self)
382
+ self.streams = StreamsClient(self)
381
383
  self.synonyms = SynonymsClient(self)
382
384
  self.text_structure = TextStructureClient(self)
383
385
  self.transform = TransformClient(self)
@@ -606,6 +608,7 @@ class Elasticsearch(BaseClient):
606
608
  <li>JavaScript: Check out <code>client.helpers.*</code></li>
607
609
  <li>.NET: Check out <code>BulkAllObservable</code></li>
608
610
  <li>PHP: Check out bulk indexing.</li>
611
+ <li>Ruby: Check out <code>Elasticsearch::Helpers::BulkHelper</code></li>
609
612
  </ul>
610
613
  <p><strong>Submitting bulk requests with cURL</strong></p>
611
614
  <p>If you're providing text file input to <code>curl</code>, you must use the <code>--data-binary</code> flag instead of plain <code>-d</code>.
@@ -1324,7 +1327,7 @@ class Elasticsearch(BaseClient):
1324
1327
  )
1325
1328
 
1326
1329
  @_rewrite_parameters(
1327
- body_fields=("max_docs", "query", "slice"),
1330
+ body_fields=("max_docs", "query", "slice", "sort"),
1328
1331
  parameter_aliases={"from": "from_"},
1329
1332
  )
1330
1333
  def delete_by_query(
@@ -1368,7 +1371,12 @@ class Elasticsearch(BaseClient):
1368
1371
  ] = None,
1369
1372
  slice: t.Optional[t.Mapping[str, t.Any]] = None,
1370
1373
  slices: t.Optional[t.Union[int, t.Union[str, t.Literal["auto"]]]] = None,
1371
- sort: t.Optional[t.Sequence[str]] = None,
1374
+ sort: t.Optional[
1375
+ t.Union[
1376
+ t.Sequence[t.Union[str, t.Mapping[str, t.Any]]],
1377
+ t.Union[str, t.Mapping[str, t.Any]],
1378
+ ]
1379
+ ] = None,
1372
1380
  stats: t.Optional[t.Sequence[str]] = None,
1373
1381
  terminate_after: t.Optional[int] = None,
1374
1382
  timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
@@ -1500,7 +1508,7 @@ class Elasticsearch(BaseClient):
1500
1508
  :param slice: Slice the request manually using the provided slice ID and total
1501
1509
  number of slices.
1502
1510
  :param slices: The number of slices this task should be divided into.
1503
- :param sort: A comma-separated list of `<field>:<direction>` pairs.
1511
+ :param sort: A sort object that specifies the order of deleted documents.
1504
1512
  :param stats: The specific `tag` of the request for logging and statistical purposes.
1505
1513
  :param terminate_after: The maximum number of documents to collect for each shard.
1506
1514
  If a query reaches this limit, Elasticsearch terminates the query early.
@@ -1590,8 +1598,6 @@ class Elasticsearch(BaseClient):
1590
1598
  __query["search_type"] = search_type
1591
1599
  if slices is not None:
1592
1600
  __query["slices"] = slices
1593
- if sort is not None:
1594
- __query["sort"] = sort
1595
1601
  if stats is not None:
1596
1602
  __query["stats"] = stats
1597
1603
  if terminate_after is not None:
@@ -1611,6 +1617,8 @@ class Elasticsearch(BaseClient):
1611
1617
  __body["query"] = query
1612
1618
  if slice is not None:
1613
1619
  __body["slice"] = slice
1620
+ if sort is not None:
1621
+ __body["sort"] = sort
1614
1622
  __headers = {"accept": "application/json", "content-type": "application/json"}
1615
1623
  return self.perform_request( # type: ignore[return-value]
1616
1624
  "POST",
@@ -3868,6 +3876,13 @@ class Elasticsearch(BaseClient):
3868
3876
  In this case, the response includes a count of the version conflicts that were encountered.
3869
3877
  Note that the handling of other error types is unaffected by the <code>conflicts</code> property.
3870
3878
  Additionally, if you opt to count version conflicts, the operation could attempt to reindex more documents from the source than <code>max_docs</code> until it has successfully indexed <code>max_docs</code> documents into the target or it has gone through every document in the source query.</p>
3879
+ <p>It's recommended to reindex on indices with a green status. Reindexing can fail when a node shuts down or crashes.</p>
3880
+ <ul>
3881
+ <li>When requested with <code>wait_for_completion=true</code> (default), the request fails if the node shuts down.</li>
3882
+ <li>When requested with <code>wait_for_completion=false</code>, a task id is returned, for use with the task management APIs. The task may disappear or fail if the node shuts down.
3883
+ When retrying a failed reindex operation, it might be necessary to set <code>conflicts=proceed</code> or to first delete the partial destination index.
3884
+ Additionally, dry runs, checking disk space, and fetching index recovery information can help address the root cause.</li>
3885
+ </ul>
3871
3886
  <p>Refer to the linked documentation for examples of how to reindex documents.</p>
3872
3887
 
3873
3888
 
@@ -5647,7 +5662,7 @@ class Elasticsearch(BaseClient):
5647
5662
  doc: t.Optional[t.Mapping[str, t.Any]] = None,
5648
5663
  error_trace: t.Optional[bool] = None,
5649
5664
  field_statistics: t.Optional[bool] = None,
5650
- fields: t.Optional[t.Union[str, t.Sequence[str]]] = None,
5665
+ fields: t.Optional[t.Sequence[str]] = None,
5651
5666
  filter: t.Optional[t.Mapping[str, t.Any]] = None,
5652
5667
  filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
5653
5668
  human: t.Optional[bool] = None,