elasticsearch 8.18.0__py3-none-any.whl → 8.19.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. elasticsearch/_async/client/__init__.py +56 -76
  2. elasticsearch/_async/client/async_search.py +5 -9
  3. elasticsearch/_async/client/autoscaling.py +4 -4
  4. elasticsearch/_async/client/cat.py +620 -65
  5. elasticsearch/_async/client/ccr.py +13 -13
  6. elasticsearch/_async/client/cluster.py +33 -24
  7. elasticsearch/_async/client/connector.py +30 -30
  8. elasticsearch/_async/client/dangling_indices.py +3 -3
  9. elasticsearch/_async/client/enrich.py +5 -5
  10. elasticsearch/_async/client/eql.py +13 -5
  11. elasticsearch/_async/client/esql.py +38 -9
  12. elasticsearch/_async/client/features.py +2 -2
  13. elasticsearch/_async/client/fleet.py +13 -13
  14. elasticsearch/_async/client/graph.py +1 -1
  15. elasticsearch/_async/client/ilm.py +11 -11
  16. elasticsearch/_async/client/indices.py +131 -82
  17. elasticsearch/_async/client/inference.py +516 -110
  18. elasticsearch/_async/client/ingest.py +9 -16
  19. elasticsearch/_async/client/license.py +11 -11
  20. elasticsearch/_async/client/logstash.py +3 -3
  21. elasticsearch/_async/client/migration.py +3 -3
  22. elasticsearch/_async/client/ml.py +81 -93
  23. elasticsearch/_async/client/nodes.py +9 -8
  24. elasticsearch/_async/client/query_rules.py +8 -8
  25. elasticsearch/_async/client/rollup.py +8 -8
  26. elasticsearch/_async/client/search_application.py +10 -10
  27. elasticsearch/_async/client/searchable_snapshots.py +4 -4
  28. elasticsearch/_async/client/security.py +72 -80
  29. elasticsearch/_async/client/shutdown.py +3 -3
  30. elasticsearch/_async/client/simulate.py +1 -1
  31. elasticsearch/_async/client/slm.py +9 -9
  32. elasticsearch/_async/client/snapshot.py +19 -13
  33. elasticsearch/_async/client/sql.py +6 -6
  34. elasticsearch/_async/client/ssl.py +1 -1
  35. elasticsearch/_async/client/synonyms.py +7 -7
  36. elasticsearch/_async/client/tasks.py +3 -3
  37. elasticsearch/_async/client/text_structure.py +4 -4
  38. elasticsearch/_async/client/transform.py +11 -11
  39. elasticsearch/_async/client/watcher.py +13 -13
  40. elasticsearch/_async/client/xpack.py +2 -2
  41. elasticsearch/_sync/client/__init__.py +56 -76
  42. elasticsearch/_sync/client/async_search.py +5 -9
  43. elasticsearch/_sync/client/autoscaling.py +4 -4
  44. elasticsearch/_sync/client/cat.py +620 -65
  45. elasticsearch/_sync/client/ccr.py +13 -13
  46. elasticsearch/_sync/client/cluster.py +33 -24
  47. elasticsearch/_sync/client/connector.py +30 -30
  48. elasticsearch/_sync/client/dangling_indices.py +3 -3
  49. elasticsearch/_sync/client/enrich.py +5 -5
  50. elasticsearch/_sync/client/eql.py +13 -5
  51. elasticsearch/_sync/client/esql.py +38 -9
  52. elasticsearch/_sync/client/features.py +2 -2
  53. elasticsearch/_sync/client/fleet.py +13 -13
  54. elasticsearch/_sync/client/graph.py +1 -1
  55. elasticsearch/_sync/client/ilm.py +11 -11
  56. elasticsearch/_sync/client/indices.py +131 -82
  57. elasticsearch/_sync/client/inference.py +516 -110
  58. elasticsearch/_sync/client/ingest.py +9 -16
  59. elasticsearch/_sync/client/license.py +11 -11
  60. elasticsearch/_sync/client/logstash.py +3 -3
  61. elasticsearch/_sync/client/migration.py +3 -3
  62. elasticsearch/_sync/client/ml.py +81 -93
  63. elasticsearch/_sync/client/nodes.py +9 -8
  64. elasticsearch/_sync/client/query_rules.py +8 -8
  65. elasticsearch/_sync/client/rollup.py +8 -8
  66. elasticsearch/_sync/client/search_application.py +10 -10
  67. elasticsearch/_sync/client/searchable_snapshots.py +4 -4
  68. elasticsearch/_sync/client/security.py +72 -80
  69. elasticsearch/_sync/client/shutdown.py +3 -3
  70. elasticsearch/_sync/client/simulate.py +1 -1
  71. elasticsearch/_sync/client/slm.py +9 -9
  72. elasticsearch/_sync/client/snapshot.py +19 -13
  73. elasticsearch/_sync/client/sql.py +6 -6
  74. elasticsearch/_sync/client/ssl.py +1 -1
  75. elasticsearch/_sync/client/synonyms.py +7 -7
  76. elasticsearch/_sync/client/tasks.py +3 -3
  77. elasticsearch/_sync/client/text_structure.py +4 -4
  78. elasticsearch/_sync/client/transform.py +11 -11
  79. elasticsearch/_sync/client/watcher.py +13 -13
  80. elasticsearch/_sync/client/xpack.py +2 -2
  81. elasticsearch/_version.py +1 -1
  82. elasticsearch/compat.py +5 -0
  83. elasticsearch/dsl/__init__.py +2 -1
  84. elasticsearch/dsl/_async/document.py +1 -1
  85. elasticsearch/dsl/_sync/document.py +1 -1
  86. elasticsearch/dsl/aggs.py +2 -3
  87. elasticsearch/dsl/document_base.py +176 -16
  88. elasticsearch/dsl/field.py +361 -38
  89. elasticsearch/dsl/query.py +55 -4
  90. elasticsearch/dsl/types.py +151 -22
  91. elasticsearch/dsl/utils.py +1 -1
  92. elasticsearch/esql/__init__.py +18 -0
  93. elasticsearch/esql/esql.py +1105 -0
  94. elasticsearch/esql/functions.py +1738 -0
  95. elasticsearch/exceptions.py +2 -0
  96. {elasticsearch-8.18.0.dist-info → elasticsearch-8.19.0.dist-info}/METADATA +1 -1
  97. elasticsearch-8.19.0.dist-info/RECORD +164 -0
  98. elasticsearch-8.18.0.dist-info/RECORD +0 -161
  99. {elasticsearch-8.18.0.dist-info → elasticsearch-8.19.0.dist-info}/WHEEL +0 -0
  100. {elasticsearch-8.18.0.dist-info → elasticsearch-8.19.0.dist-info}/licenses/LICENSE +0 -0
  101. {elasticsearch-8.18.0.dist-info → elasticsearch-8.19.0.dist-info}/licenses/NOTICE +0 -0
@@ -47,7 +47,7 @@ class InferenceClient(NamespacedClient):
47
47
  <p>Perform completion inference on the service</p>
48
48
 
49
49
 
50
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/post-inference-api.html>`_
50
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/post-inference-api.html>`_
51
51
 
52
52
  :param inference_id: The inference Id
53
53
  :param input: Inference input. Either a string or an array of strings.
@@ -123,7 +123,7 @@ class InferenceClient(NamespacedClient):
123
123
  <p>Delete an inference endpoint</p>
124
124
 
125
125
 
126
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/delete-inference-api.html>`_
126
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/delete-inference-api.html>`_
127
127
 
128
128
  :param inference_id: The inference identifier.
129
129
  :param task_type: The task type
@@ -197,7 +197,7 @@ class InferenceClient(NamespacedClient):
197
197
  <p>Get an inference endpoint</p>
198
198
 
199
199
 
200
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/get-inference-api.html>`_
200
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/get-inference-api.html>`_
201
201
 
202
202
  :param task_type: The task type
203
203
  :param inference_id: The inference Id
@@ -234,6 +234,125 @@ class InferenceClient(NamespacedClient):
234
234
  path_parts=__path_parts,
235
235
  )
236
236
 
237
+ @_rewrite_parameters(
238
+ body_fields=("input", "input_type", "query", "task_settings"),
239
+ )
240
+ async def inference(
241
+ self,
242
+ *,
243
+ inference_id: str,
244
+ input: t.Optional[t.Union[str, t.Sequence[str]]] = None,
245
+ task_type: t.Optional[
246
+ t.Union[
247
+ str,
248
+ t.Literal[
249
+ "chat_completion",
250
+ "completion",
251
+ "rerank",
252
+ "sparse_embedding",
253
+ "text_embedding",
254
+ ],
255
+ ]
256
+ ] = None,
257
+ error_trace: t.Optional[bool] = None,
258
+ filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
259
+ human: t.Optional[bool] = None,
260
+ input_type: t.Optional[str] = None,
261
+ pretty: t.Optional[bool] = None,
262
+ query: t.Optional[str] = None,
263
+ task_settings: t.Optional[t.Any] = None,
264
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
265
+ body: t.Optional[t.Dict[str, t.Any]] = None,
266
+ ) -> ObjectApiResponse[t.Any]:
267
+ """
268
+ .. raw:: html
269
+
270
+ <p>Perform inference on the service.</p>
271
+ <p>This API enables you to use machine learning models to perform specific tasks on data that you provide as an input.
272
+ It returns a response with the results of the tasks.
273
+ The inference endpoint you use can perform one specific task that has been defined when the endpoint was created with the create inference API.</p>
274
+ <p>For details about using this API with a service, such as Amazon Bedrock, Anthropic, or HuggingFace, refer to the service-specific documentation.</p>
275
+ <blockquote>
276
+ <p>info
277
+ The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.</p>
278
+ </blockquote>
279
+
280
+
281
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/post-inference-api.html>`_
282
+
283
+ :param inference_id: The unique identifier for the inference endpoint.
284
+ :param input: The text on which you want to perform the inference task. It can
285
+ be a single string or an array. > info > Inference endpoints for the `completion`
286
+ task type currently only support a single string as input.
287
+ :param task_type: The type of inference task that the model performs.
288
+ :param input_type: Specifies the input data type for the text embedding model.
289
+ The `input_type` parameter only applies to Inference Endpoints with the `text_embedding`
290
+ task type. Possible values include: * `SEARCH` * `INGEST` * `CLASSIFICATION`
291
+ * `CLUSTERING` Not all services support all values. Unsupported values will
292
+ trigger a validation exception. Accepted values depend on the configured
293
+ inference service, refer to the relevant service-specific documentation for
294
+ more info. > info > The `input_type` parameter specified on the root level
295
+ of the request body will take precedence over the `input_type` parameter
296
+ specified in `task_settings`.
297
+ :param query: The query input, which is required only for the `rerank` task.
298
+ It is not required for other tasks.
299
+ :param task_settings: Task settings for the individual inference request. These
300
+ settings are specific to the task type you specified and override the task
301
+ settings specified when initializing the service.
302
+ :param timeout: The amount of time to wait for the inference request to complete.
303
+ """
304
+ if inference_id in SKIP_IN_PATH:
305
+ raise ValueError("Empty value passed for parameter 'inference_id'")
306
+ if input is None and body is None:
307
+ raise ValueError("Empty value passed for parameter 'input'")
308
+ __path_parts: t.Dict[str, str]
309
+ if task_type not in SKIP_IN_PATH and inference_id not in SKIP_IN_PATH:
310
+ __path_parts = {
311
+ "task_type": _quote(task_type),
312
+ "inference_id": _quote(inference_id),
313
+ }
314
+ __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["inference_id"]}'
315
+ elif inference_id not in SKIP_IN_PATH:
316
+ __path_parts = {"inference_id": _quote(inference_id)}
317
+ __path = f'/_inference/{__path_parts["inference_id"]}'
318
+ else:
319
+ raise ValueError("Couldn't find a path for the given parameters")
320
+ __query: t.Dict[str, t.Any] = {}
321
+ __body: t.Dict[str, t.Any] = body if body is not None else {}
322
+ if error_trace is not None:
323
+ __query["error_trace"] = error_trace
324
+ if filter_path is not None:
325
+ __query["filter_path"] = filter_path
326
+ if human is not None:
327
+ __query["human"] = human
328
+ if pretty is not None:
329
+ __query["pretty"] = pretty
330
+ if timeout is not None:
331
+ __query["timeout"] = timeout
332
+ if not __body:
333
+ if input is not None:
334
+ __body["input"] = input
335
+ if input_type is not None:
336
+ __body["input_type"] = input_type
337
+ if query is not None:
338
+ __body["query"] = query
339
+ if task_settings is not None:
340
+ __body["task_settings"] = task_settings
341
+ if not __body:
342
+ __body = None # type: ignore[assignment]
343
+ __headers = {"accept": "application/json"}
344
+ if __body is not None:
345
+ __headers["content-type"] = "application/json"
346
+ return await self.perform_request( # type: ignore[return-value]
347
+ "POST",
348
+ __path,
349
+ params=__query,
350
+ headers=__headers,
351
+ body=__body,
352
+ endpoint_id="inference.inference",
353
+ path_parts=__path_parts,
354
+ )
355
+
237
356
  @_rewrite_parameters(
238
357
  body_name="inference_config",
239
358
  )
@@ -259,26 +378,45 @@ class InferenceClient(NamespacedClient):
259
378
  filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
260
379
  human: t.Optional[bool] = None,
261
380
  pretty: t.Optional[bool] = None,
381
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
262
382
  ) -> ObjectApiResponse[t.Any]:
263
383
  """
264
384
  .. raw:: html
265
385
 
266
- <p>Create an inference endpoint.
267
- When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
268
- After creating the endpoint, wait for the model deployment to complete before using it.
269
- To verify the deployment status, use the get trained model statistics API.
270
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
271
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
386
+ <p>Create an inference endpoint.</p>
272
387
  <p>IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.
273
388
  For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.
274
389
  However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.</p>
390
+ <p>The following integrations are available through the inference API. You can find the available task types next to the integration name:</p>
391
+ <ul>
392
+ <li>AlibabaCloud AI Search (<code>completion</code>, <code>rerank</code>, <code>sparse_embedding</code>, <code>text_embedding</code>)</li>
393
+ <li>Amazon Bedrock (<code>completion</code>, <code>text_embedding</code>)</li>
394
+ <li>Anthropic (<code>completion</code>)</li>
395
+ <li>Azure AI Studio (<code>completion</code>, <code>text_embedding</code>)</li>
396
+ <li>Azure OpenAI (<code>completion</code>, <code>text_embedding</code>)</li>
397
+ <li>Cohere (<code>completion</code>, <code>rerank</code>, <code>text_embedding</code>)</li>
398
+ <li>DeepSeek (<code>completion</code>, <code>chat_completion</code>)</li>
399
+ <li>Elasticsearch (<code>rerank</code>, <code>sparse_embedding</code>, <code>text_embedding</code> - this service is for built-in models and models uploaded through Eland)</li>
400
+ <li>ELSER (<code>sparse_embedding</code>)</li>
401
+ <li>Google AI Studio (<code>completion</code>, <code>text_embedding</code>)</li>
402
+ <li>Google Vertex AI (<code>rerank</code>, <code>text_embedding</code>)</li>
403
+ <li>Hugging Face (<code>chat_completion</code>, <code>completion</code>, <code>rerank</code>, <code>text_embedding</code>)</li>
404
+ <li>Mistral (<code>chat_completion</code>, <code>completion</code>, <code>text_embedding</code>)</li>
405
+ <li>OpenAI (<code>chat_completion</code>, <code>completion</code>, <code>text_embedding</code>)</li>
406
+ <li>VoyageAI (<code>text_embedding</code>, <code>rerank</code>)</li>
407
+ <li>Watsonx inference integration (<code>text_embedding</code>)</li>
408
+ <li>JinaAI (<code>text_embedding</code>, <code>rerank</code>)</li>
409
+ </ul>
275
410
 
276
411
 
277
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/put-inference-api.html>`_
412
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/put-inference-api.html>`_
278
413
 
279
414
  :param inference_id: The inference Id
280
415
  :param inference_config:
281
- :param task_type: The task type
416
+ :param task_type: The task type. Refer to the integration list in the API description
417
+ for the available task types.
418
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
419
+ to be created.
282
420
  """
283
421
  if inference_id in SKIP_IN_PATH:
284
422
  raise ValueError("Empty value passed for parameter 'inference_id'")
@@ -309,6 +447,8 @@ class InferenceClient(NamespacedClient):
309
447
  __query["human"] = human
310
448
  if pretty is not None:
311
449
  __query["pretty"] = pretty
450
+ if timeout is not None:
451
+ __query["timeout"] = timeout
312
452
  __body = inference_config if inference_config is not None else body
313
453
  __headers = {"accept": "application/json", "content-type": "application/json"}
314
454
  return await self.perform_request( # type: ignore[return-value]
@@ -344,6 +484,7 @@ class InferenceClient(NamespacedClient):
344
484
  human: t.Optional[bool] = None,
345
485
  pretty: t.Optional[bool] = None,
346
486
  task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
487
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
347
488
  body: t.Optional[t.Dict[str, t.Any]] = None,
348
489
  ) -> ObjectApiResponse[t.Any]:
349
490
  """
@@ -351,14 +492,9 @@ class InferenceClient(NamespacedClient):
351
492
 
352
493
  <p>Create an AlibabaCloud AI Search inference endpoint.</p>
353
494
  <p>Create an inference endpoint to perform an inference task with the <code>alibabacloud-ai-search</code> service.</p>
354
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
355
- After creating the endpoint, wait for the model deployment to complete before using it.
356
- To verify the deployment status, use the get trained model statistics API.
357
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
358
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
359
495
 
360
496
 
361
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-alibabacloud-ai-search.html>`_
497
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-alibabacloud-ai-search.html>`_
362
498
 
363
499
  :param task_type: The type of the inference task that the model will perform.
364
500
  :param alibabacloud_inference_id: The unique identifier of the inference endpoint.
@@ -369,6 +505,8 @@ class InferenceClient(NamespacedClient):
369
505
  :param chunking_settings: The chunking configuration object.
370
506
  :param task_settings: Settings to configure the inference task. These settings
371
507
  are specific to the task type you specified.
508
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
509
+ to be created.
372
510
  """
373
511
  if task_type in SKIP_IN_PATH:
374
512
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -395,6 +533,8 @@ class InferenceClient(NamespacedClient):
395
533
  __query["human"] = human
396
534
  if pretty is not None:
397
535
  __query["pretty"] = pretty
536
+ if timeout is not None:
537
+ __query["timeout"] = timeout
398
538
  if not __body:
399
539
  if service is not None:
400
540
  __body["service"] = service
@@ -440,25 +580,21 @@ class InferenceClient(NamespacedClient):
440
580
  human: t.Optional[bool] = None,
441
581
  pretty: t.Optional[bool] = None,
442
582
  task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
583
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
443
584
  body: t.Optional[t.Dict[str, t.Any]] = None,
444
585
  ) -> ObjectApiResponse[t.Any]:
445
586
  """
446
587
  .. raw:: html
447
588
 
448
589
  <p>Create an Amazon Bedrock inference endpoint.</p>
449
- <p>Creates an inference endpoint to perform an inference task with the <code>amazonbedrock</code> service.</p>
590
+ <p>Create an inference endpoint to perform an inference task with the <code>amazonbedrock</code> service.</p>
450
591
  <blockquote>
451
592
  <p>info
452
593
  You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.</p>
453
594
  </blockquote>
454
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
455
- After creating the endpoint, wait for the model deployment to complete before using it.
456
- To verify the deployment status, use the get trained model statistics API.
457
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
458
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
459
595
 
460
596
 
461
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-amazon-bedrock.html>`_
597
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-amazon-bedrock.html>`_
462
598
 
463
599
  :param task_type: The type of the inference task that the model will perform.
464
600
  :param amazonbedrock_inference_id: The unique identifier of the inference endpoint.
@@ -469,6 +605,8 @@ class InferenceClient(NamespacedClient):
469
605
  :param chunking_settings: The chunking configuration object.
470
606
  :param task_settings: Settings to configure the inference task. These settings
471
607
  are specific to the task type you specified.
608
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
609
+ to be created.
472
610
  """
473
611
  if task_type in SKIP_IN_PATH:
474
612
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -495,6 +633,8 @@ class InferenceClient(NamespacedClient):
495
633
  __query["human"] = human
496
634
  if pretty is not None:
497
635
  __query["pretty"] = pretty
636
+ if timeout is not None:
637
+ __query["timeout"] = timeout
498
638
  if not __body:
499
639
  if service is not None:
500
640
  __body["service"] = service
@@ -540,6 +680,7 @@ class InferenceClient(NamespacedClient):
540
680
  human: t.Optional[bool] = None,
541
681
  pretty: t.Optional[bool] = None,
542
682
  task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
683
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
543
684
  body: t.Optional[t.Dict[str, t.Any]] = None,
544
685
  ) -> ObjectApiResponse[t.Any]:
545
686
  """
@@ -547,14 +688,9 @@ class InferenceClient(NamespacedClient):
547
688
 
548
689
  <p>Create an Anthropic inference endpoint.</p>
549
690
  <p>Create an inference endpoint to perform an inference task with the <code>anthropic</code> service.</p>
550
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
551
- After creating the endpoint, wait for the model deployment to complete before using it.
552
- To verify the deployment status, use the get trained model statistics API.
553
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
554
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
555
691
 
556
692
 
557
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-anthropic.html>`_
693
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-anthropic.html>`_
558
694
 
559
695
  :param task_type: The task type. The only valid task type for the model to perform
560
696
  is `completion`.
@@ -566,6 +702,8 @@ class InferenceClient(NamespacedClient):
566
702
  :param chunking_settings: The chunking configuration object.
567
703
  :param task_settings: Settings to configure the inference task. These settings
568
704
  are specific to the task type you specified.
705
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
706
+ to be created.
569
707
  """
570
708
  if task_type in SKIP_IN_PATH:
571
709
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -592,6 +730,8 @@ class InferenceClient(NamespacedClient):
592
730
  __query["human"] = human
593
731
  if pretty is not None:
594
732
  __query["pretty"] = pretty
733
+ if timeout is not None:
734
+ __query["timeout"] = timeout
595
735
  if not __body:
596
736
  if service is not None:
597
737
  __body["service"] = service
@@ -637,6 +777,7 @@ class InferenceClient(NamespacedClient):
637
777
  human: t.Optional[bool] = None,
638
778
  pretty: t.Optional[bool] = None,
639
779
  task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
780
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
640
781
  body: t.Optional[t.Dict[str, t.Any]] = None,
641
782
  ) -> ObjectApiResponse[t.Any]:
642
783
  """
@@ -644,14 +785,9 @@ class InferenceClient(NamespacedClient):
644
785
 
645
786
  <p>Create an Azure AI studio inference endpoint.</p>
646
787
  <p>Create an inference endpoint to perform an inference task with the <code>azureaistudio</code> service.</p>
647
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
648
- After creating the endpoint, wait for the model deployment to complete before using it.
649
- To verify the deployment status, use the get trained model statistics API.
650
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
651
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
652
788
 
653
789
 
654
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-azure-ai-studio.html>`_
790
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-azure-ai-studio.html>`_
655
791
 
656
792
  :param task_type: The type of the inference task that the model will perform.
657
793
  :param azureaistudio_inference_id: The unique identifier of the inference endpoint.
@@ -662,6 +798,8 @@ class InferenceClient(NamespacedClient):
662
798
  :param chunking_settings: The chunking configuration object.
663
799
  :param task_settings: Settings to configure the inference task. These settings
664
800
  are specific to the task type you specified.
801
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
802
+ to be created.
665
803
  """
666
804
  if task_type in SKIP_IN_PATH:
667
805
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -688,6 +826,8 @@ class InferenceClient(NamespacedClient):
688
826
  __query["human"] = human
689
827
  if pretty is not None:
690
828
  __query["pretty"] = pretty
829
+ if timeout is not None:
830
+ __query["timeout"] = timeout
691
831
  if not __body:
692
832
  if service is not None:
693
833
  __body["service"] = service
@@ -733,6 +873,7 @@ class InferenceClient(NamespacedClient):
733
873
  human: t.Optional[bool] = None,
734
874
  pretty: t.Optional[bool] = None,
735
875
  task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
876
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
736
877
  body: t.Optional[t.Dict[str, t.Any]] = None,
737
878
  ) -> ObjectApiResponse[t.Any]:
738
879
  """
@@ -746,14 +887,9 @@ class InferenceClient(NamespacedClient):
746
887
  <li><a href="https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-35">GPT-3.5</a></li>
747
888
  </ul>
748
889
  <p>The list of embeddings models that you can choose from in your deployment can be found in the <a href="https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings">Azure models documentation</a>.</p>
749
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
750
- After creating the endpoint, wait for the model deployment to complete before using it.
751
- To verify the deployment status, use the get trained model statistics API.
752
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
753
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
754
890
 
755
891
 
756
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-azure-openai.html>`_
892
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-azure-openai.html>`_
757
893
 
758
894
  :param task_type: The type of the inference task that the model will perform.
759
895
  NOTE: The `chat_completion` task type only supports streaming and only through
@@ -766,6 +902,8 @@ class InferenceClient(NamespacedClient):
766
902
  :param chunking_settings: The chunking configuration object.
767
903
  :param task_settings: Settings to configure the inference task. These settings
768
904
  are specific to the task type you specified.
905
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
906
+ to be created.
769
907
  """
770
908
  if task_type in SKIP_IN_PATH:
771
909
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -792,6 +930,8 @@ class InferenceClient(NamespacedClient):
792
930
  __query["human"] = human
793
931
  if pretty is not None:
794
932
  __query["pretty"] = pretty
933
+ if timeout is not None:
934
+ __query["timeout"] = timeout
795
935
  if not __body:
796
936
  if service is not None:
797
937
  __body["service"] = service
@@ -837,6 +977,7 @@ class InferenceClient(NamespacedClient):
837
977
  human: t.Optional[bool] = None,
838
978
  pretty: t.Optional[bool] = None,
839
979
  task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
980
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
840
981
  body: t.Optional[t.Dict[str, t.Any]] = None,
841
982
  ) -> ObjectApiResponse[t.Any]:
842
983
  """
@@ -844,14 +985,9 @@ class InferenceClient(NamespacedClient):
844
985
 
845
986
  <p>Create a Cohere inference endpoint.</p>
846
987
  <p>Create an inference endpoint to perform an inference task with the <code>cohere</code> service.</p>
847
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
848
- After creating the endpoint, wait for the model deployment to complete before using it.
849
- To verify the deployment status, use the get trained model statistics API.
850
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
851
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
852
988
 
853
989
 
854
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-cohere.html>`_
990
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-cohere.html>`_
855
991
 
856
992
  :param task_type: The type of the inference task that the model will perform.
857
993
  :param cohere_inference_id: The unique identifier of the inference endpoint.
@@ -862,6 +998,8 @@ class InferenceClient(NamespacedClient):
862
998
  :param chunking_settings: The chunking configuration object.
863
999
  :param task_settings: Settings to configure the inference task. These settings
864
1000
  are specific to the task type you specified.
1001
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
1002
+ to be created.
865
1003
  """
866
1004
  if task_type in SKIP_IN_PATH:
867
1005
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -886,6 +1024,8 @@ class InferenceClient(NamespacedClient):
886
1024
  __query["human"] = human
887
1025
  if pretty is not None:
888
1026
  __query["pretty"] = pretty
1027
+ if timeout is not None:
1028
+ __query["timeout"] = timeout
889
1029
  if not __body:
890
1030
  if service is not None:
891
1031
  __body["service"] = service
@@ -910,6 +1050,221 @@ class InferenceClient(NamespacedClient):
910
1050
  path_parts=__path_parts,
911
1051
  )
912
1052
 
1053
+ @_rewrite_parameters(
1054
+ body_fields=(
1055
+ "service",
1056
+ "service_settings",
1057
+ "chunking_settings",
1058
+ "task_settings",
1059
+ ),
1060
+ )
1061
+ async def put_custom(
1062
+ self,
1063
+ *,
1064
+ task_type: t.Union[
1065
+ str, t.Literal["completion", "rerank", "sparse_embedding", "text_embedding"]
1066
+ ],
1067
+ custom_inference_id: str,
1068
+ service: t.Optional[t.Union[str, t.Literal["custom"]]] = None,
1069
+ service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
1070
+ chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None,
1071
+ error_trace: t.Optional[bool] = None,
1072
+ filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
1073
+ human: t.Optional[bool] = None,
1074
+ pretty: t.Optional[bool] = None,
1075
+ task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
1076
+ body: t.Optional[t.Dict[str, t.Any]] = None,
1077
+ ) -> ObjectApiResponse[t.Any]:
1078
+ """
1079
+ .. raw:: html
1080
+
1081
+ <p>Create a custom inference endpoint.</p>
1082
+ <p>The custom service gives more control over how to interact with external inference services that aren't explicitly supported through dedicated integrations.
1083
+ The custom service gives you the ability to define the headers, url, query parameters, request body, and secrets.
1084
+ The custom service supports the template replacement functionality, which enables you to define a template that can be replaced with the value associated with that key.
1085
+ Templates are portions of a string that start with <code>${</code> and end with <code>}</code>.
1086
+ The parameters <code>secret_parameters</code> and <code>task_settings</code> are checked for keys for template replacement. Template replacement is supported in the <code>request</code>, <code>headers</code>, <code>url</code>, and <code>query_parameters</code>.
1087
+ If the definition (key) is not found for a template, an error message is returned.
1088
+ In case of an endpoint definition like the following:</p>
1089
+ <pre><code>PUT _inference/text_embedding/test-text-embedding
1090
+ {
1091
+ &quot;service&quot;: &quot;custom&quot;,
1092
+ &quot;service_settings&quot;: {
1093
+ &quot;secret_parameters&quot;: {
1094
+ &quot;api_key&quot;: &quot;&lt;some api key&gt;&quot;
1095
+ },
1096
+ &quot;url&quot;: &quot;...endpoints.huggingface.cloud/v1/embeddings&quot;,
1097
+ &quot;headers&quot;: {
1098
+ &quot;Authorization&quot;: &quot;Bearer ${api_key}&quot;,
1099
+ &quot;Content-Type&quot;: &quot;application/json&quot;
1100
+ },
1101
+ &quot;request&quot;: &quot;{\\&quot;input\\&quot;: ${input}}&quot;,
1102
+ &quot;response&quot;: {
1103
+ &quot;json_parser&quot;: {
1104
+ &quot;text_embeddings&quot;:&quot;$.data[*].embedding[*]&quot;
1105
+ }
1106
+ }
1107
+ }
1108
+ }
1109
+ </code></pre>
1110
+ <p>To replace <code>${api_key}</code> the <code>secret_parameters</code> and <code>task_settings</code> are checked for a key named <code>api_key</code>.</p>
1111
+ <blockquote>
1112
+ <p>info
1113
+ Templates should not be surrounded by quotes.</p>
1114
+ </blockquote>
1115
+ <p>Pre-defined templates:</p>
1116
+ <ul>
1117
+ <li><code>${input}</code> refers to the array of input strings that comes from the <code>input</code> field of the subsequent inference requests.</li>
1118
+ <li><code>${input_type}</code> refers to the input type translation values.</li>
1119
+ <li><code>${query}</code> refers to the query field used specifically for reranking tasks.</li>
1120
+ <li><code>${top_n}</code> refers to the <code>top_n</code> field available when performing rerank requests.</li>
1121
+ <li><code>${return_documents}</code> refers to the <code>return_documents</code> field available when performing rerank requests.</li>
1122
+ </ul>
1123
+
1124
+
1125
+ `<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-custom>`_
1126
+
1127
+ :param task_type: The type of the inference task that the model will perform.
1128
+ :param custom_inference_id: The unique identifier of the inference endpoint.
1129
+ :param service: The type of service supported for the specified task type. In
1130
+ this case, `custom`.
1131
+ :param service_settings: Settings used to install the inference model. These
1132
+ settings are specific to the `custom` service.
1133
+ :param chunking_settings: The chunking configuration object.
1134
+ :param task_settings: Settings to configure the inference task. These settings
1135
+ are specific to the task type you specified.
1136
+ """
1137
+ if task_type in SKIP_IN_PATH:
1138
+ raise ValueError("Empty value passed for parameter 'task_type'")
1139
+ if custom_inference_id in SKIP_IN_PATH:
1140
+ raise ValueError("Empty value passed for parameter 'custom_inference_id'")
1141
+ if service is None and body is None:
1142
+ raise ValueError("Empty value passed for parameter 'service'")
1143
+ if service_settings is None and body is None:
1144
+ raise ValueError("Empty value passed for parameter 'service_settings'")
1145
+ __path_parts: t.Dict[str, str] = {
1146
+ "task_type": _quote(task_type),
1147
+ "custom_inference_id": _quote(custom_inference_id),
1148
+ }
1149
+ __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["custom_inference_id"]}'
1150
+ __query: t.Dict[str, t.Any] = {}
1151
+ __body: t.Dict[str, t.Any] = body if body is not None else {}
1152
+ if error_trace is not None:
1153
+ __query["error_trace"] = error_trace
1154
+ if filter_path is not None:
1155
+ __query["filter_path"] = filter_path
1156
+ if human is not None:
1157
+ __query["human"] = human
1158
+ if pretty is not None:
1159
+ __query["pretty"] = pretty
1160
+ if not __body:
1161
+ if service is not None:
1162
+ __body["service"] = service
1163
+ if service_settings is not None:
1164
+ __body["service_settings"] = service_settings
1165
+ if chunking_settings is not None:
1166
+ __body["chunking_settings"] = chunking_settings
1167
+ if task_settings is not None:
1168
+ __body["task_settings"] = task_settings
1169
+ if not __body:
1170
+ __body = None # type: ignore[assignment]
1171
+ __headers = {"accept": "application/json"}
1172
+ if __body is not None:
1173
+ __headers["content-type"] = "application/json"
1174
+ return await self.perform_request( # type: ignore[return-value]
1175
+ "PUT",
1176
+ __path,
1177
+ params=__query,
1178
+ headers=__headers,
1179
+ body=__body,
1180
+ endpoint_id="inference.put_custom",
1181
+ path_parts=__path_parts,
1182
+ )
1183
+
1184
+ @_rewrite_parameters(
1185
+ body_fields=("service", "service_settings", "chunking_settings"),
1186
+ )
1187
+ async def put_deepseek(
1188
+ self,
1189
+ *,
1190
+ task_type: t.Union[str, t.Literal["chat_completion", "completion"]],
1191
+ deepseek_inference_id: str,
1192
+ service: t.Optional[t.Union[str, t.Literal["deepseek"]]] = None,
1193
+ service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
1194
+ chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None,
1195
+ error_trace: t.Optional[bool] = None,
1196
+ filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
1197
+ human: t.Optional[bool] = None,
1198
+ pretty: t.Optional[bool] = None,
1199
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
1200
+ body: t.Optional[t.Dict[str, t.Any]] = None,
1201
+ ) -> ObjectApiResponse[t.Any]:
1202
+ """
1203
+ .. raw:: html
1204
+
1205
+ <p>Create a DeepSeek inference endpoint.</p>
1206
+ <p>Create an inference endpoint to perform an inference task with the <code>deepseek</code> service.</p>
1207
+
1208
+
1209
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-deepseek.html>`_
1210
+
1211
+ :param task_type: The type of the inference task that the model will perform.
1212
+ :param deepseek_inference_id: The unique identifier of the inference endpoint.
1213
+ :param service: The type of service supported for the specified task type. In
1214
+ this case, `deepseek`.
1215
+ :param service_settings: Settings used to install the inference model. These
1216
+ settings are specific to the `deepseek` service.
1217
+ :param chunking_settings: The chunking configuration object.
1218
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
1219
+ to be created.
1220
+ """
1221
+ if task_type in SKIP_IN_PATH:
1222
+ raise ValueError("Empty value passed for parameter 'task_type'")
1223
+ if deepseek_inference_id in SKIP_IN_PATH:
1224
+ raise ValueError("Empty value passed for parameter 'deepseek_inference_id'")
1225
+ if service is None and body is None:
1226
+ raise ValueError("Empty value passed for parameter 'service'")
1227
+ if service_settings is None and body is None:
1228
+ raise ValueError("Empty value passed for parameter 'service_settings'")
1229
+ __path_parts: t.Dict[str, str] = {
1230
+ "task_type": _quote(task_type),
1231
+ "deepseek_inference_id": _quote(deepseek_inference_id),
1232
+ }
1233
+ __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["deepseek_inference_id"]}'
1234
+ __query: t.Dict[str, t.Any] = {}
1235
+ __body: t.Dict[str, t.Any] = body if body is not None else {}
1236
+ if error_trace is not None:
1237
+ __query["error_trace"] = error_trace
1238
+ if filter_path is not None:
1239
+ __query["filter_path"] = filter_path
1240
+ if human is not None:
1241
+ __query["human"] = human
1242
+ if pretty is not None:
1243
+ __query["pretty"] = pretty
1244
+ if timeout is not None:
1245
+ __query["timeout"] = timeout
1246
+ if not __body:
1247
+ if service is not None:
1248
+ __body["service"] = service
1249
+ if service_settings is not None:
1250
+ __body["service_settings"] = service_settings
1251
+ if chunking_settings is not None:
1252
+ __body["chunking_settings"] = chunking_settings
1253
+ if not __body:
1254
+ __body = None # type: ignore[assignment]
1255
+ __headers = {"accept": "application/json"}
1256
+ if __body is not None:
1257
+ __headers["content-type"] = "application/json"
1258
+ return await self.perform_request( # type: ignore[return-value]
1259
+ "PUT",
1260
+ __path,
1261
+ params=__query,
1262
+ headers=__headers,
1263
+ body=__body,
1264
+ endpoint_id="inference.put_deepseek",
1265
+ path_parts=__path_parts,
1266
+ )
1267
+
913
1268
  @_rewrite_parameters(
914
1269
  body_fields=(
915
1270
  "service",
@@ -933,6 +1288,7 @@ class InferenceClient(NamespacedClient):
933
1288
  human: t.Optional[bool] = None,
934
1289
  pretty: t.Optional[bool] = None,
935
1290
  task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
1291
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
936
1292
  body: t.Optional[t.Dict[str, t.Any]] = None,
937
1293
  ) -> ObjectApiResponse[t.Any]:
938
1294
  """
@@ -955,7 +1311,7 @@ class InferenceClient(NamespacedClient):
955
1311
  Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
956
1312
 
957
1313
 
958
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-elasticsearch.html>`_
1314
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-elasticsearch.html>`_
959
1315
 
960
1316
  :param task_type: The type of the inference task that the model will perform.
961
1317
  :param elasticsearch_inference_id: The unique identifier of the inference endpoint.
@@ -967,6 +1323,8 @@ class InferenceClient(NamespacedClient):
967
1323
  :param chunking_settings: The chunking configuration object.
968
1324
  :param task_settings: Settings to configure the inference task. These settings
969
1325
  are specific to the task type you specified.
1326
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
1327
+ to be created.
970
1328
  """
971
1329
  if task_type in SKIP_IN_PATH:
972
1330
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -993,6 +1351,8 @@ class InferenceClient(NamespacedClient):
993
1351
  __query["human"] = human
994
1352
  if pretty is not None:
995
1353
  __query["pretty"] = pretty
1354
+ if timeout is not None:
1355
+ __query["timeout"] = timeout
996
1356
  if not __body:
997
1357
  if service is not None:
998
1358
  __body["service"] = service
@@ -1032,6 +1392,7 @@ class InferenceClient(NamespacedClient):
1032
1392
  filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
1033
1393
  human: t.Optional[bool] = None,
1034
1394
  pretty: t.Optional[bool] = None,
1395
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
1035
1396
  body: t.Optional[t.Dict[str, t.Any]] = None,
1036
1397
  ) -> ObjectApiResponse[t.Any]:
1037
1398
  """
@@ -1055,7 +1416,7 @@ class InferenceClient(NamespacedClient):
1055
1416
  Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
1056
1417
 
1057
1418
 
1058
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-elser.html>`_
1419
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-elser.html>`_
1059
1420
 
1060
1421
  :param task_type: The type of the inference task that the model will perform.
1061
1422
  :param elser_inference_id: The unique identifier of the inference endpoint.
@@ -1064,6 +1425,8 @@ class InferenceClient(NamespacedClient):
1064
1425
  :param service_settings: Settings used to install the inference model. These
1065
1426
  settings are specific to the `elser` service.
1066
1427
  :param chunking_settings: The chunking configuration object.
1428
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
1429
+ to be created.
1067
1430
  """
1068
1431
  if task_type in SKIP_IN_PATH:
1069
1432
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -1088,6 +1451,8 @@ class InferenceClient(NamespacedClient):
1088
1451
  __query["human"] = human
1089
1452
  if pretty is not None:
1090
1453
  __query["pretty"] = pretty
1454
+ if timeout is not None:
1455
+ __query["timeout"] = timeout
1091
1456
  if not __body:
1092
1457
  if service is not None:
1093
1458
  __body["service"] = service
@@ -1125,6 +1490,7 @@ class InferenceClient(NamespacedClient):
1125
1490
  filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
1126
1491
  human: t.Optional[bool] = None,
1127
1492
  pretty: t.Optional[bool] = None,
1493
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
1128
1494
  body: t.Optional[t.Dict[str, t.Any]] = None,
1129
1495
  ) -> ObjectApiResponse[t.Any]:
1130
1496
  """
@@ -1132,14 +1498,9 @@ class InferenceClient(NamespacedClient):
1132
1498
 
1133
1499
  <p>Create an Google AI Studio inference endpoint.</p>
1134
1500
  <p>Create an inference endpoint to perform an inference task with the <code>googleaistudio</code> service.</p>
1135
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
1136
- After creating the endpoint, wait for the model deployment to complete before using it.
1137
- To verify the deployment status, use the get trained model statistics API.
1138
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
1139
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
1140
1501
 
1141
1502
 
1142
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-google-ai-studio.html>`_
1503
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-google-ai-studio.html>`_
1143
1504
 
1144
1505
  :param task_type: The type of the inference task that the model will perform.
1145
1506
  :param googleaistudio_inference_id: The unique identifier of the inference endpoint.
@@ -1148,6 +1509,8 @@ class InferenceClient(NamespacedClient):
1148
1509
  :param service_settings: Settings used to install the inference model. These
1149
1510
  settings are specific to the `googleaistudio` service.
1150
1511
  :param chunking_settings: The chunking configuration object.
1512
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
1513
+ to be created.
1151
1514
  """
1152
1515
  if task_type in SKIP_IN_PATH:
1153
1516
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -1174,6 +1537,8 @@ class InferenceClient(NamespacedClient):
1174
1537
  __query["human"] = human
1175
1538
  if pretty is not None:
1176
1539
  __query["pretty"] = pretty
1540
+ if timeout is not None:
1541
+ __query["timeout"] = timeout
1177
1542
  if not __body:
1178
1543
  if service is not None:
1179
1544
  __body["service"] = service
@@ -1207,7 +1572,9 @@ class InferenceClient(NamespacedClient):
1207
1572
  async def put_googlevertexai(
1208
1573
  self,
1209
1574
  *,
1210
- task_type: t.Union[str, t.Literal["rerank", "text_embedding"]],
1575
+ task_type: t.Union[
1576
+ str, t.Literal["chat_completion", "completion", "rerank", "text_embedding"]
1577
+ ],
1211
1578
  googlevertexai_inference_id: str,
1212
1579
  service: t.Optional[t.Union[str, t.Literal["googlevertexai"]]] = None,
1213
1580
  service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
@@ -1217,6 +1584,7 @@ class InferenceClient(NamespacedClient):
1217
1584
  human: t.Optional[bool] = None,
1218
1585
  pretty: t.Optional[bool] = None,
1219
1586
  task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
1587
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
1220
1588
  body: t.Optional[t.Dict[str, t.Any]] = None,
1221
1589
  ) -> ObjectApiResponse[t.Any]:
1222
1590
  """
@@ -1224,14 +1592,9 @@ class InferenceClient(NamespacedClient):
1224
1592
 
1225
1593
  <p>Create a Google Vertex AI inference endpoint.</p>
1226
1594
  <p>Create an inference endpoint to perform an inference task with the <code>googlevertexai</code> service.</p>
1227
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
1228
- After creating the endpoint, wait for the model deployment to complete before using it.
1229
- To verify the deployment status, use the get trained model statistics API.
1230
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
1231
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
1232
1595
 
1233
1596
 
1234
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-google-vertex-ai.html>`_
1597
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-google-vertex-ai.html>`_
1235
1598
 
1236
1599
  :param task_type: The type of the inference task that the model will perform.
1237
1600
  :param googlevertexai_inference_id: The unique identifier of the inference endpoint.
@@ -1242,6 +1605,8 @@ class InferenceClient(NamespacedClient):
1242
1605
  :param chunking_settings: The chunking configuration object.
1243
1606
  :param task_settings: Settings to configure the inference task. These settings
1244
1607
  are specific to the task type you specified.
1608
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
1609
+ to be created.
1245
1610
  """
1246
1611
  if task_type in SKIP_IN_PATH:
1247
1612
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -1268,6 +1633,8 @@ class InferenceClient(NamespacedClient):
1268
1633
  __query["human"] = human
1269
1634
  if pretty is not None:
1270
1635
  __query["pretty"] = pretty
1636
+ if timeout is not None:
1637
+ __query["timeout"] = timeout
1271
1638
  if not __body:
1272
1639
  if service is not None:
1273
1640
  __body["service"] = service
@@ -1293,12 +1660,19 @@ class InferenceClient(NamespacedClient):
1293
1660
  )
1294
1661
 
1295
1662
  @_rewrite_parameters(
1296
- body_fields=("service", "service_settings", "chunking_settings"),
1663
+ body_fields=(
1664
+ "service",
1665
+ "service_settings",
1666
+ "chunking_settings",
1667
+ "task_settings",
1668
+ ),
1297
1669
  )
1298
1670
  async def put_hugging_face(
1299
1671
  self,
1300
1672
  *,
1301
- task_type: t.Union[str, t.Literal["text_embedding"]],
1673
+ task_type: t.Union[
1674
+ str, t.Literal["chat_completion", "completion", "rerank", "text_embedding"]
1675
+ ],
1302
1676
  huggingface_inference_id: str,
1303
1677
  service: t.Optional[t.Union[str, t.Literal["hugging_face"]]] = None,
1304
1678
  service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
@@ -1307,17 +1681,22 @@ class InferenceClient(NamespacedClient):
1307
1681
  filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
1308
1682
  human: t.Optional[bool] = None,
1309
1683
  pretty: t.Optional[bool] = None,
1684
+ task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
1685
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
1310
1686
  body: t.Optional[t.Dict[str, t.Any]] = None,
1311
1687
  ) -> ObjectApiResponse[t.Any]:
1312
1688
  """
1313
1689
  .. raw:: html
1314
1690
 
1315
1691
  <p>Create a Hugging Face inference endpoint.</p>
1316
- <p>Create an inference endpoint to perform an inference task with the <code>hugging_face</code> service.</p>
1317
- <p>You must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.
1318
- Select the model you want to use on the new endpoint creation page (for example <code>intfloat/e5-small-v2</code>), then select the sentence embeddings task under the advanced configuration section.
1319
- Create the endpoint and copy the URL after the endpoint initialization has been finished.</p>
1320
- <p>The following models are recommended for the Hugging Face service:</p>
1692
+ <p>Create an inference endpoint to perform an inference task with the <code>hugging_face</code> service.
1693
+ Supported tasks include: <code>text_embedding</code>, <code>completion</code>, and <code>chat_completion</code>.</p>
1694
+ <p>To configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.
1695
+ Select a model that supports the task you intend to use.</p>
1696
+ <p>For Elastic's <code>text_embedding</code> task:
1697
+ The selected model must support the <code>Sentence Embeddings</code> task. On the new endpoint creation page, select the <code>Sentence Embeddings</code> task under the <code>Advanced Configuration</code> section.
1698
+ After the endpoint has initialized, copy the generated endpoint URL.
1699
+ Recommended models for <code>text_embedding</code> task:</p>
1321
1700
  <ul>
1322
1701
  <li><code>all-MiniLM-L6-v2</code></li>
1323
1702
  <li><code>all-MiniLM-L12-v2</code></li>
@@ -1327,14 +1706,27 @@ class InferenceClient(NamespacedClient):
1327
1706
  <li><code>multilingual-e5-base</code></li>
1328
1707
  <li><code>multilingual-e5-small</code></li>
1329
1708
  </ul>
1330
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
1331
- After creating the endpoint, wait for the model deployment to complete before using it.
1332
- To verify the deployment status, use the get trained model statistics API.
1333
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
1334
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
1709
+ <p>For Elastic's <code>chat_completion</code> and <code>completion</code> tasks:
1710
+ The selected model must support the <code>Text Generation</code> task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for <code>Text Generation</code>. When creating dedicated endpoint select the <code>Text Generation</code> task.
1711
+ After the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes <code>/v1/chat/completions</code> part in URL. Then, copy the full endpoint URL for use.
1712
+ Recommended models for <code>chat_completion</code> and <code>completion</code> tasks:</p>
1713
+ <ul>
1714
+ <li><code>Mistral-7B-Instruct-v0.2</code></li>
1715
+ <li><code>QwQ-32B</code></li>
1716
+ <li><code>Phi-3-mini-128k-instruct</code></li>
1717
+ </ul>
1718
+ <p>For Elastic's <code>rerank</code> task:
1719
+ The selected model must support the <code>sentence-ranking</code> task and expose OpenAI API.
1720
+ HuggingFace supports only dedicated (not serverless) endpoints for <code>Rerank</code> so far.
1721
+ After the endpoint is initialized, copy the full endpoint URL for use.
1722
+ Tested models for <code>rerank</code> task:</p>
1723
+ <ul>
1724
+ <li><code>bge-reranker-base</code></li>
1725
+ <li><code>jina-reranker-v1-turbo-en-GGUF</code></li>
1726
+ </ul>
1335
1727
 
1336
1728
 
1337
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-hugging-face.html>`_
1729
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-hugging-face.html>`_
1338
1730
 
1339
1731
  :param task_type: The type of the inference task that the model will perform.
1340
1732
  :param huggingface_inference_id: The unique identifier of the inference endpoint.
@@ -1343,6 +1735,10 @@ class InferenceClient(NamespacedClient):
1343
1735
  :param service_settings: Settings used to install the inference model. These
1344
1736
  settings are specific to the `hugging_face` service.
1345
1737
  :param chunking_settings: The chunking configuration object.
1738
+ :param task_settings: Settings to configure the inference task. These settings
1739
+ are specific to the task type you specified.
1740
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
1741
+ to be created.
1346
1742
  """
1347
1743
  if task_type in SKIP_IN_PATH:
1348
1744
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -1369,6 +1765,8 @@ class InferenceClient(NamespacedClient):
1369
1765
  __query["human"] = human
1370
1766
  if pretty is not None:
1371
1767
  __query["pretty"] = pretty
1768
+ if timeout is not None:
1769
+ __query["timeout"] = timeout
1372
1770
  if not __body:
1373
1771
  if service is not None:
1374
1772
  __body["service"] = service
@@ -1376,6 +1774,8 @@ class InferenceClient(NamespacedClient):
1376
1774
  __body["service_settings"] = service_settings
1377
1775
  if chunking_settings is not None:
1378
1776
  __body["chunking_settings"] = chunking_settings
1777
+ if task_settings is not None:
1778
+ __body["task_settings"] = task_settings
1379
1779
  if not __body:
1380
1780
  __body = None # type: ignore[assignment]
1381
1781
  __headers = {"accept": "application/json"}
@@ -1412,6 +1812,7 @@ class InferenceClient(NamespacedClient):
1412
1812
  human: t.Optional[bool] = None,
1413
1813
  pretty: t.Optional[bool] = None,
1414
1814
  task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
1815
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
1415
1816
  body: t.Optional[t.Dict[str, t.Any]] = None,
1416
1817
  ) -> ObjectApiResponse[t.Any]:
1417
1818
  """
@@ -1421,14 +1822,9 @@ class InferenceClient(NamespacedClient):
1421
1822
  <p>Create an inference endpoint to perform an inference task with the <code>jinaai</code> service.</p>
1422
1823
  <p>To review the available <code>rerank</code> models, refer to <a href="https://jina.ai/reranker">https://jina.ai/reranker</a>.
1423
1824
  To review the available <code>text_embedding</code> models, refer to the <a href="https://jina.ai/embeddings/">https://jina.ai/embeddings/</a>.</p>
1424
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
1425
- After creating the endpoint, wait for the model deployment to complete before using it.
1426
- To verify the deployment status, use the get trained model statistics API.
1427
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
1428
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
1429
1825
 
1430
1826
 
1431
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-jinaai.html>`_
1827
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-jinaai.html>`_
1432
1828
 
1433
1829
  :param task_type: The type of the inference task that the model will perform.
1434
1830
  :param jinaai_inference_id: The unique identifier of the inference endpoint.
@@ -1439,6 +1835,8 @@ class InferenceClient(NamespacedClient):
1439
1835
  :param chunking_settings: The chunking configuration object.
1440
1836
  :param task_settings: Settings to configure the inference task. These settings
1441
1837
  are specific to the task type you specified.
1838
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
1839
+ to be created.
1442
1840
  """
1443
1841
  if task_type in SKIP_IN_PATH:
1444
1842
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -1463,6 +1861,8 @@ class InferenceClient(NamespacedClient):
1463
1861
  __query["human"] = human
1464
1862
  if pretty is not None:
1465
1863
  __query["pretty"] = pretty
1864
+ if timeout is not None:
1865
+ __query["timeout"] = timeout
1466
1866
  if not __body:
1467
1867
  if service is not None:
1468
1868
  __body["service"] = service
@@ -1493,7 +1893,9 @@ class InferenceClient(NamespacedClient):
1493
1893
  async def put_mistral(
1494
1894
  self,
1495
1895
  *,
1496
- task_type: t.Union[str, t.Literal["text_embedding"]],
1896
+ task_type: t.Union[
1897
+ str, t.Literal["chat_completion", "completion", "text_embedding"]
1898
+ ],
1497
1899
  mistral_inference_id: str,
1498
1900
  service: t.Optional[t.Union[str, t.Literal["mistral"]]] = None,
1499
1901
  service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
@@ -1502,30 +1904,27 @@ class InferenceClient(NamespacedClient):
1502
1904
  filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
1503
1905
  human: t.Optional[bool] = None,
1504
1906
  pretty: t.Optional[bool] = None,
1907
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
1505
1908
  body: t.Optional[t.Dict[str, t.Any]] = None,
1506
1909
  ) -> ObjectApiResponse[t.Any]:
1507
1910
  """
1508
1911
  .. raw:: html
1509
1912
 
1510
1913
  <p>Create a Mistral inference endpoint.</p>
1511
- <p>Creates an inference endpoint to perform an inference task with the <code>mistral</code> service.</p>
1512
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
1513
- After creating the endpoint, wait for the model deployment to complete before using it.
1514
- To verify the deployment status, use the get trained model statistics API.
1515
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
1516
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
1914
+ <p>Create an inference endpoint to perform an inference task with the <code>mistral</code> service.</p>
1517
1915
 
1518
1916
 
1519
- `<https://www.elastic.co/guide/en/elasticsearch/reference/{brnach}/infer-service-mistral.html>`_
1917
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-mistral.html>`_
1520
1918
 
1521
- :param task_type: The task type. The only valid task type for the model to perform
1522
- is `text_embedding`.
1919
+ :param task_type: The type of the inference task that the model will perform.
1523
1920
  :param mistral_inference_id: The unique identifier of the inference endpoint.
1524
1921
  :param service: The type of service supported for the specified task type. In
1525
1922
  this case, `mistral`.
1526
1923
  :param service_settings: Settings used to install the inference model. These
1527
1924
  settings are specific to the `mistral` service.
1528
1925
  :param chunking_settings: The chunking configuration object.
1926
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
1927
+ to be created.
1529
1928
  """
1530
1929
  if task_type in SKIP_IN_PATH:
1531
1930
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -1550,6 +1949,8 @@ class InferenceClient(NamespacedClient):
1550
1949
  __query["human"] = human
1551
1950
  if pretty is not None:
1552
1951
  __query["pretty"] = pretty
1952
+ if timeout is not None:
1953
+ __query["timeout"] = timeout
1553
1954
  if not __body:
1554
1955
  if service is not None:
1555
1956
  __body["service"] = service
@@ -1595,6 +1996,7 @@ class InferenceClient(NamespacedClient):
1595
1996
  human: t.Optional[bool] = None,
1596
1997
  pretty: t.Optional[bool] = None,
1597
1998
  task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
1999
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
1598
2000
  body: t.Optional[t.Dict[str, t.Any]] = None,
1599
2001
  ) -> ObjectApiResponse[t.Any]:
1600
2002
  """
@@ -1602,14 +2004,9 @@ class InferenceClient(NamespacedClient):
1602
2004
 
1603
2005
  <p>Create an OpenAI inference endpoint.</p>
1604
2006
  <p>Create an inference endpoint to perform an inference task with the <code>openai</code> service or <code>openai</code> compatible APIs.</p>
1605
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
1606
- After creating the endpoint, wait for the model deployment to complete before using it.
1607
- To verify the deployment status, use the get trained model statistics API.
1608
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
1609
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
1610
2007
 
1611
2008
 
1612
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-openai.html>`_
2009
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-openai.html>`_
1613
2010
 
1614
2011
  :param task_type: The type of the inference task that the model will perform.
1615
2012
  NOTE: The `chat_completion` task type only supports streaming and only through
@@ -1622,6 +2019,8 @@ class InferenceClient(NamespacedClient):
1622
2019
  :param chunking_settings: The chunking configuration object.
1623
2020
  :param task_settings: Settings to configure the inference task. These settings
1624
2021
  are specific to the task type you specified.
2022
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
2023
+ to be created.
1625
2024
  """
1626
2025
  if task_type in SKIP_IN_PATH:
1627
2026
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -1646,6 +2045,8 @@ class InferenceClient(NamespacedClient):
1646
2045
  __query["human"] = human
1647
2046
  if pretty is not None:
1648
2047
  __query["pretty"] = pretty
2048
+ if timeout is not None:
2049
+ __query["timeout"] = timeout
1649
2050
  if not __body:
1650
2051
  if service is not None:
1651
2052
  __body["service"] = service
@@ -1691,6 +2092,7 @@ class InferenceClient(NamespacedClient):
1691
2092
  human: t.Optional[bool] = None,
1692
2093
  pretty: t.Optional[bool] = None,
1693
2094
  task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
2095
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
1694
2096
  body: t.Optional[t.Dict[str, t.Any]] = None,
1695
2097
  ) -> ObjectApiResponse[t.Any]:
1696
2098
  """
@@ -1701,7 +2103,7 @@ class InferenceClient(NamespacedClient):
1701
2103
  <p>Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
1702
2104
 
1703
2105
 
1704
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-voyageai.html>`_
2106
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-voyageai.html>`_
1705
2107
 
1706
2108
  :param task_type: The type of the inference task that the model will perform.
1707
2109
  :param voyageai_inference_id: The unique identifier of the inference endpoint.
@@ -1712,6 +2114,8 @@ class InferenceClient(NamespacedClient):
1712
2114
  :param chunking_settings: The chunking configuration object.
1713
2115
  :param task_settings: Settings to configure the inference task. These settings
1714
2116
  are specific to the task type you specified.
2117
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
2118
+ to be created.
1715
2119
  """
1716
2120
  if task_type in SKIP_IN_PATH:
1717
2121
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -1736,6 +2140,8 @@ class InferenceClient(NamespacedClient):
1736
2140
  __query["human"] = human
1737
2141
  if pretty is not None:
1738
2142
  __query["pretty"] = pretty
2143
+ if timeout is not None:
2144
+ __query["timeout"] = timeout
1739
2145
  if not __body:
1740
2146
  if service is not None:
1741
2147
  __body["service"] = service
@@ -1774,6 +2180,7 @@ class InferenceClient(NamespacedClient):
1774
2180
  filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
1775
2181
  human: t.Optional[bool] = None,
1776
2182
  pretty: t.Optional[bool] = None,
2183
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
1777
2184
  body: t.Optional[t.Dict[str, t.Any]] = None,
1778
2185
  ) -> ObjectApiResponse[t.Any]:
1779
2186
  """
@@ -1783,14 +2190,9 @@ class InferenceClient(NamespacedClient):
1783
2190
  <p>Create an inference endpoint to perform an inference task with the <code>watsonxai</code> service.
1784
2191
  You need an IBM Cloud Databases for Elasticsearch deployment to use the <code>watsonxai</code> inference service.
1785
2192
  You can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform.</p>
1786
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
1787
- After creating the endpoint, wait for the model deployment to complete before using it.
1788
- To verify the deployment status, use the get trained model statistics API.
1789
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
1790
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
1791
2193
 
1792
2194
 
1793
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-watsonx-ai.html>`_
2195
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-watsonx-ai.html>`_
1794
2196
 
1795
2197
  :param task_type: The task type. The only valid task type for the model to perform
1796
2198
  is `text_embedding`.
@@ -1799,6 +2201,8 @@ class InferenceClient(NamespacedClient):
1799
2201
  this case, `watsonxai`.
1800
2202
  :param service_settings: Settings used to install the inference model. These
1801
2203
  settings are specific to the `watsonxai` service.
2204
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
2205
+ to be created.
1802
2206
  """
1803
2207
  if task_type in SKIP_IN_PATH:
1804
2208
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -1823,6 +2227,8 @@ class InferenceClient(NamespacedClient):
1823
2227
  __query["human"] = human
1824
2228
  if pretty is not None:
1825
2229
  __query["pretty"] = pretty
2230
+ if timeout is not None:
2231
+ __query["timeout"] = timeout
1826
2232
  if not __body:
1827
2233
  if service is not None:
1828
2234
  __body["service"] = service
@@ -1863,10 +2269,10 @@ class InferenceClient(NamespacedClient):
1863
2269
  """
1864
2270
  .. raw:: html
1865
2271
 
1866
- <p>Perform rereanking inference on the service</p>
2272
+ <p>Perform reranking inference on the service</p>
1867
2273
 
1868
2274
 
1869
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/post-inference-api.html>`_
2275
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/post-inference-api.html>`_
1870
2276
 
1871
2277
  :param inference_id: The unique identifier for the inference endpoint.
1872
2278
  :param input: The text on which you want to perform the inference task. It can
@@ -1942,7 +2348,7 @@ class InferenceClient(NamespacedClient):
1942
2348
  <p>Perform sparse embedding inference on the service</p>
1943
2349
 
1944
2350
 
1945
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/post-inference-api.html>`_
2351
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/post-inference-api.html>`_
1946
2352
 
1947
2353
  :param inference_id: The inference Id
1948
2354
  :param input: Inference input. Either a string or an array of strings.
@@ -2010,7 +2416,7 @@ class InferenceClient(NamespacedClient):
2010
2416
  <p>Perform text embedding inference on the service</p>
2011
2417
 
2012
2418
 
2013
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/post-inference-api.html>`_
2419
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/post-inference-api.html>`_
2014
2420
 
2015
2421
  :param inference_id: The inference Id
2016
2422
  :param input: Inference input. Either a string or an array of strings.
@@ -2092,7 +2498,7 @@ class InferenceClient(NamespacedClient):
2092
2498
  However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.</p>
2093
2499
 
2094
2500
 
2095
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/update-inference-api.html>`_
2501
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/update-inference-api.html>`_
2096
2502
 
2097
2503
  :param inference_id: The unique identifier of the inference endpoint.
2098
2504
  :param inference_config: