elasticsearch 9.0.1__py3-none-any.whl → 9.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. elasticsearch/__init__.py +3 -3
  2. elasticsearch/_async/client/__init__.py +93 -249
  3. elasticsearch/_async/client/async_search.py +4 -4
  4. elasticsearch/_async/client/autoscaling.py +4 -4
  5. elasticsearch/_async/client/cat.py +620 -58
  6. elasticsearch/_async/client/ccr.py +13 -13
  7. elasticsearch/_async/client/cluster.py +32 -22
  8. elasticsearch/_async/client/connector.py +30 -30
  9. elasticsearch/_async/client/dangling_indices.py +3 -3
  10. elasticsearch/_async/client/enrich.py +5 -5
  11. elasticsearch/_async/client/eql.py +14 -6
  12. elasticsearch/_async/client/esql.py +125 -9
  13. elasticsearch/_async/client/features.py +2 -2
  14. elasticsearch/_async/client/fleet.py +3 -3
  15. elasticsearch/_async/client/graph.py +1 -1
  16. elasticsearch/_async/client/ilm.py +11 -11
  17. elasticsearch/_async/client/indices.py +605 -117
  18. elasticsearch/_async/client/inference.py +523 -116
  19. elasticsearch/_async/client/ingest.py +9 -16
  20. elasticsearch/_async/client/license.py +11 -11
  21. elasticsearch/_async/client/logstash.py +3 -3
  22. elasticsearch/_async/client/migration.py +3 -3
  23. elasticsearch/_async/client/ml.py +75 -87
  24. elasticsearch/_async/client/monitoring.py +1 -1
  25. elasticsearch/_async/client/nodes.py +7 -7
  26. elasticsearch/_async/client/query_rules.py +8 -8
  27. elasticsearch/_async/client/rollup.py +9 -30
  28. elasticsearch/_async/client/search_application.py +10 -10
  29. elasticsearch/_async/client/searchable_snapshots.py +4 -4
  30. elasticsearch/_async/client/security.py +79 -81
  31. elasticsearch/_async/client/shutdown.py +3 -3
  32. elasticsearch/_async/client/simulate.py +1 -1
  33. elasticsearch/_async/client/slm.py +9 -9
  34. elasticsearch/_async/client/snapshot.py +64 -21
  35. elasticsearch/_async/client/sql.py +6 -6
  36. elasticsearch/_async/client/ssl.py +1 -1
  37. elasticsearch/_async/client/synonyms.py +26 -7
  38. elasticsearch/_async/client/tasks.py +4 -4
  39. elasticsearch/_async/client/text_structure.py +4 -4
  40. elasticsearch/_async/client/transform.py +11 -11
  41. elasticsearch/_async/client/watcher.py +17 -15
  42. elasticsearch/_async/client/xpack.py +2 -2
  43. elasticsearch/_otel.py +8 -8
  44. elasticsearch/_sync/client/__init__.py +93 -249
  45. elasticsearch/_sync/client/async_search.py +4 -4
  46. elasticsearch/_sync/client/autoscaling.py +4 -4
  47. elasticsearch/_sync/client/cat.py +620 -58
  48. elasticsearch/_sync/client/ccr.py +13 -13
  49. elasticsearch/_sync/client/cluster.py +32 -22
  50. elasticsearch/_sync/client/connector.py +30 -30
  51. elasticsearch/_sync/client/dangling_indices.py +3 -3
  52. elasticsearch/_sync/client/enrich.py +5 -5
  53. elasticsearch/_sync/client/eql.py +14 -6
  54. elasticsearch/_sync/client/esql.py +125 -9
  55. elasticsearch/_sync/client/features.py +2 -2
  56. elasticsearch/_sync/client/fleet.py +3 -3
  57. elasticsearch/_sync/client/graph.py +1 -1
  58. elasticsearch/_sync/client/ilm.py +11 -11
  59. elasticsearch/_sync/client/indices.py +605 -117
  60. elasticsearch/_sync/client/inference.py +523 -116
  61. elasticsearch/_sync/client/ingest.py +9 -16
  62. elasticsearch/_sync/client/license.py +11 -11
  63. elasticsearch/_sync/client/logstash.py +3 -3
  64. elasticsearch/_sync/client/migration.py +3 -3
  65. elasticsearch/_sync/client/ml.py +75 -87
  66. elasticsearch/_sync/client/monitoring.py +1 -1
  67. elasticsearch/_sync/client/nodes.py +7 -7
  68. elasticsearch/_sync/client/query_rules.py +8 -8
  69. elasticsearch/_sync/client/rollup.py +9 -30
  70. elasticsearch/_sync/client/search_application.py +10 -10
  71. elasticsearch/_sync/client/searchable_snapshots.py +4 -4
  72. elasticsearch/_sync/client/security.py +79 -81
  73. elasticsearch/_sync/client/shutdown.py +3 -3
  74. elasticsearch/_sync/client/simulate.py +1 -1
  75. elasticsearch/_sync/client/slm.py +9 -9
  76. elasticsearch/_sync/client/snapshot.py +64 -21
  77. elasticsearch/_sync/client/sql.py +6 -6
  78. elasticsearch/_sync/client/ssl.py +1 -1
  79. elasticsearch/_sync/client/synonyms.py +26 -7
  80. elasticsearch/_sync/client/tasks.py +4 -4
  81. elasticsearch/_sync/client/text_structure.py +4 -4
  82. elasticsearch/_sync/client/transform.py +11 -11
  83. elasticsearch/_sync/client/watcher.py +17 -15
  84. elasticsearch/_sync/client/xpack.py +2 -2
  85. elasticsearch/_version.py +1 -1
  86. elasticsearch/compat.py +5 -0
  87. elasticsearch/dsl/__init__.py +2 -1
  88. elasticsearch/dsl/_async/document.py +1 -1
  89. elasticsearch/dsl/_sync/document.py +1 -1
  90. elasticsearch/dsl/aggs.py +20 -0
  91. elasticsearch/dsl/document_base.py +177 -17
  92. elasticsearch/dsl/field.py +241 -38
  93. elasticsearch/dsl/query.py +50 -5
  94. elasticsearch/dsl/response/__init__.py +1 -1
  95. elasticsearch/dsl/types.py +245 -21
  96. elasticsearch/dsl/utils.py +1 -1
  97. elasticsearch/esql/__init__.py +18 -0
  98. elasticsearch/esql/esql.py +1105 -0
  99. elasticsearch/esql/esql1.py1 +307 -0
  100. elasticsearch/esql/functions.py +1738 -0
  101. {elasticsearch-9.0.1.dist-info → elasticsearch-9.1.0.dist-info}/METADATA +3 -6
  102. elasticsearch-9.1.0.dist-info/RECORD +164 -0
  103. elasticsearch-9.0.1.dist-info/RECORD +0 -162
  104. elasticsearch-9.0.1.dist-info/licenses/LICENSE.txt +0 -175
  105. elasticsearch-9.0.1.dist-info/licenses/NOTICE.txt +0 -559
  106. {elasticsearch-9.0.1.dist-info → elasticsearch-9.1.0.dist-info}/WHEEL +0 -0
  107. {elasticsearch-9.0.1.dist-info → elasticsearch-9.1.0.dist-info}/licenses/LICENSE +0 -0
  108. {elasticsearch-9.0.1.dist-info → elasticsearch-9.1.0.dist-info}/licenses/NOTICE +0 -0
@@ -47,7 +47,7 @@ class InferenceClient(NamespacedClient):
47
47
  <p>Perform completion inference on the service</p>
48
48
 
49
49
 
50
- `<https://www.elastic.co/docs/api/doc/elasticsearch/v9/operation/operation-inference-inference>`_
50
+ `<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-inference>`_
51
51
 
52
52
  :param inference_id: The inference Id
53
53
  :param input: Inference input. Either a string or an array of strings.
@@ -123,7 +123,7 @@ class InferenceClient(NamespacedClient):
123
123
  <p>Delete an inference endpoint</p>
124
124
 
125
125
 
126
- `<https://www.elastic.co/docs/api/doc/elasticsearch/v9/operation/operation-inference-delete>`_
126
+ `<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-delete>`_
127
127
 
128
128
  :param inference_id: The inference identifier.
129
129
  :param task_type: The task type
@@ -197,7 +197,7 @@ class InferenceClient(NamespacedClient):
197
197
  <p>Get an inference endpoint</p>
198
198
 
199
199
 
200
- `<https://www.elastic.co/docs/api/doc/elasticsearch/v9/operation/operation-inference-get>`_
200
+ `<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-get>`_
201
201
 
202
202
  :param task_type: The task type
203
203
  :param inference_id: The inference Id
@@ -235,7 +235,7 @@ class InferenceClient(NamespacedClient):
235
235
  )
236
236
 
237
237
  @_rewrite_parameters(
238
- body_fields=("input", "query", "task_settings"),
238
+ body_fields=("input", "input_type", "query", "task_settings"),
239
239
  )
240
240
  async def inference(
241
241
  self,
@@ -257,6 +257,7 @@ class InferenceClient(NamespacedClient):
257
257
  error_trace: t.Optional[bool] = None,
258
258
  filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
259
259
  human: t.Optional[bool] = None,
260
+ input_type: t.Optional[str] = None,
260
261
  pretty: t.Optional[bool] = None,
261
262
  query: t.Optional[str] = None,
262
263
  task_settings: t.Optional[t.Any] = None,
@@ -277,13 +278,22 @@ class InferenceClient(NamespacedClient):
277
278
  </blockquote>
278
279
 
279
280
 
280
- `<https://www.elastic.co/docs/api/doc/elasticsearch/v9/operation/operation-inference-inference>`_
281
+ `<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-inference>`_
281
282
 
282
283
  :param inference_id: The unique identifier for the inference endpoint.
283
284
  :param input: The text on which you want to perform the inference task. It can
284
285
  be a single string or an array. > info > Inference endpoints for the `completion`
285
286
  task type currently only support a single string as input.
286
287
  :param task_type: The type of inference task that the model performs.
288
+ :param input_type: Specifies the input data type for the text embedding model.
289
+ The `input_type` parameter only applies to Inference Endpoints with the `text_embedding`
290
+ task type. Possible values include: * `SEARCH` * `INGEST` * `CLASSIFICATION`
291
+ * `CLUSTERING` Not all services support all values. Unsupported values will
292
+ trigger a validation exception. Accepted values depend on the configured
293
+ inference service, refer to the relevant service-specific documentation for
294
+ more info. > info > The `input_type` parameter specified on the root level
295
+ of the request body will take precedence over the `input_type` parameter
296
+ specified in `task_settings`.
287
297
  :param query: The query input, which is required only for the `rerank` task.
288
298
  It is not required for other tasks.
289
299
  :param task_settings: Task settings for the individual inference request. These
@@ -322,6 +332,8 @@ class InferenceClient(NamespacedClient):
322
332
  if not __body:
323
333
  if input is not None:
324
334
  __body["input"] = input
335
+ if input_type is not None:
336
+ __body["input_type"] = input_type
325
337
  if query is not None:
326
338
  __body["query"] = query
327
339
  if task_settings is not None:
@@ -366,26 +378,46 @@ class InferenceClient(NamespacedClient):
366
378
  filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
367
379
  human: t.Optional[bool] = None,
368
380
  pretty: t.Optional[bool] = None,
381
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
369
382
  ) -> ObjectApiResponse[t.Any]:
370
383
  """
371
384
  .. raw:: html
372
385
 
373
- <p>Create an inference endpoint.
374
- When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
375
- After creating the endpoint, wait for the model deployment to complete before using it.
376
- To verify the deployment status, use the get trained model statistics API.
377
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
378
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
386
+ <p>Create an inference endpoint.</p>
379
387
  <p>IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.
380
388
  For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.
381
389
  However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.</p>
390
+ <p>The following integrations are available through the inference API. You can find the available task types next to the integration name:</p>
391
+ <ul>
392
+ <li>AlibabaCloud AI Search (<code>completion</code>, <code>rerank</code>, <code>sparse_embedding</code>, <code>text_embedding</code>)</li>
393
+ <li>Amazon Bedrock (<code>completion</code>, <code>text_embedding</code>)</li>
394
+ <li>Amazon SageMaker (<code>chat_completion</code>, <code>completion</code>, <code>rerank</code>, <code>sparse_embedding</code>, <code>text_embedding</code>)</li>
395
+ <li>Anthropic (<code>completion</code>)</li>
396
+ <li>Azure AI Studio (<code>completion</code>, <code>text_embedding</code>)</li>
397
+ <li>Azure OpenAI (<code>completion</code>, <code>text_embedding</code>)</li>
398
+ <li>Cohere (<code>completion</code>, <code>rerank</code>, <code>text_embedding</code>)</li>
399
+ <li>DeepSeek (<code>completion</code>, <code>chat_completion</code>)</li>
400
+ <li>Elasticsearch (<code>rerank</code>, <code>sparse_embedding</code>, <code>text_embedding</code> - this service is for built-in models and models uploaded through Eland)</li>
401
+ <li>ELSER (<code>sparse_embedding</code>)</li>
402
+ <li>Google AI Studio (<code>completion</code>, <code>text_embedding</code>)</li>
403
+ <li>Google Vertex AI (<code>rerank</code>, <code>text_embedding</code>)</li>
404
+ <li>Hugging Face (<code>chat_completion</code>, <code>completion</code>, <code>rerank</code>, <code>text_embedding</code>)</li>
405
+ <li>Mistral (<code>chat_completion</code>, <code>completion</code>, <code>text_embedding</code>)</li>
406
+ <li>OpenAI (<code>chat_completion</code>, <code>completion</code>, <code>text_embedding</code>)</li>
407
+ <li>VoyageAI (<code>text_embedding</code>, <code>rerank</code>)</li>
408
+ <li>Watsonx inference integration (<code>text_embedding</code>)</li>
409
+ <li>JinaAI (<code>text_embedding</code>, <code>rerank</code>)</li>
410
+ </ul>
382
411
 
383
412
 
384
- `<https://www.elastic.co/docs/api/doc/elasticsearch/v9/operation/operation-inference-put>`_
413
+ `<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put>`_
385
414
 
386
415
  :param inference_id: The inference Id
387
416
  :param inference_config:
388
- :param task_type: The task type
417
+ :param task_type: The task type. Refer to the integration list in the API description
418
+ for the available task types.
419
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
420
+ to be created.
389
421
  """
390
422
  if inference_id in SKIP_IN_PATH:
391
423
  raise ValueError("Empty value passed for parameter 'inference_id'")
@@ -416,6 +448,8 @@ class InferenceClient(NamespacedClient):
416
448
  __query["human"] = human
417
449
  if pretty is not None:
418
450
  __query["pretty"] = pretty
451
+ if timeout is not None:
452
+ __query["timeout"] = timeout
419
453
  __body = inference_config if inference_config is not None else body
420
454
  __headers = {"accept": "application/json", "content-type": "application/json"}
421
455
  return await self.perform_request( # type: ignore[return-value]
@@ -451,6 +485,7 @@ class InferenceClient(NamespacedClient):
451
485
  human: t.Optional[bool] = None,
452
486
  pretty: t.Optional[bool] = None,
453
487
  task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
488
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
454
489
  body: t.Optional[t.Dict[str, t.Any]] = None,
455
490
  ) -> ObjectApiResponse[t.Any]:
456
491
  """
@@ -458,14 +493,9 @@ class InferenceClient(NamespacedClient):
458
493
 
459
494
  <p>Create an AlibabaCloud AI Search inference endpoint.</p>
460
495
  <p>Create an inference endpoint to perform an inference task with the <code>alibabacloud-ai-search</code> service.</p>
461
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
462
- After creating the endpoint, wait for the model deployment to complete before using it.
463
- To verify the deployment status, use the get trained model statistics API.
464
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
465
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
466
496
 
467
497
 
468
- `<https://www.elastic.co/docs/api/doc/elasticsearch/v9/operation/operation-inference-put-alibabacloud>`_
498
+ `<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-alibabacloud>`_
469
499
 
470
500
  :param task_type: The type of the inference task that the model will perform.
471
501
  :param alibabacloud_inference_id: The unique identifier of the inference endpoint.
@@ -476,6 +506,8 @@ class InferenceClient(NamespacedClient):
476
506
  :param chunking_settings: The chunking configuration object.
477
507
  :param task_settings: Settings to configure the inference task. These settings
478
508
  are specific to the task type you specified.
509
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
510
+ to be created.
479
511
  """
480
512
  if task_type in SKIP_IN_PATH:
481
513
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -502,6 +534,8 @@ class InferenceClient(NamespacedClient):
502
534
  __query["human"] = human
503
535
  if pretty is not None:
504
536
  __query["pretty"] = pretty
537
+ if timeout is not None:
538
+ __query["timeout"] = timeout
505
539
  if not __body:
506
540
  if service is not None:
507
541
  __body["service"] = service
@@ -547,25 +581,21 @@ class InferenceClient(NamespacedClient):
547
581
  human: t.Optional[bool] = None,
548
582
  pretty: t.Optional[bool] = None,
549
583
  task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
584
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
550
585
  body: t.Optional[t.Dict[str, t.Any]] = None,
551
586
  ) -> ObjectApiResponse[t.Any]:
552
587
  """
553
588
  .. raw:: html
554
589
 
555
590
  <p>Create an Amazon Bedrock inference endpoint.</p>
556
- <p>Creates an inference endpoint to perform an inference task with the <code>amazonbedrock</code> service.</p>
591
+ <p>Create an inference endpoint to perform an inference task with the <code>amazonbedrock</code> service.</p>
557
592
  <blockquote>
558
593
  <p>info
559
594
  You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.</p>
560
595
  </blockquote>
561
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
562
- After creating the endpoint, wait for the model deployment to complete before using it.
563
- To verify the deployment status, use the get trained model statistics API.
564
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
565
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
566
596
 
567
597
 
568
- `<https://www.elastic.co/docs/api/doc/elasticsearch/v9/operation/operation-inference-put-amazonbedrock>`_
598
+ `<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-amazonbedrock>`_
569
599
 
570
600
  :param task_type: The type of the inference task that the model will perform.
571
601
  :param amazonbedrock_inference_id: The unique identifier of the inference endpoint.
@@ -576,6 +606,8 @@ class InferenceClient(NamespacedClient):
576
606
  :param chunking_settings: The chunking configuration object.
577
607
  :param task_settings: Settings to configure the inference task. These settings
578
608
  are specific to the task type you specified.
609
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
610
+ to be created.
579
611
  """
580
612
  if task_type in SKIP_IN_PATH:
581
613
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -602,6 +634,8 @@ class InferenceClient(NamespacedClient):
602
634
  __query["human"] = human
603
635
  if pretty is not None:
604
636
  __query["pretty"] = pretty
637
+ if timeout is not None:
638
+ __query["timeout"] = timeout
605
639
  if not __body:
606
640
  if service is not None:
607
641
  __body["service"] = service
@@ -626,6 +660,112 @@ class InferenceClient(NamespacedClient):
626
660
  path_parts=__path_parts,
627
661
  )
628
662
 
663
+ @_rewrite_parameters(
664
+ body_fields=(
665
+ "service",
666
+ "service_settings",
667
+ "chunking_settings",
668
+ "task_settings",
669
+ ),
670
+ )
671
+ async def put_amazonsagemaker(
672
+ self,
673
+ *,
674
+ task_type: t.Union[
675
+ str,
676
+ t.Literal[
677
+ "chat_completion",
678
+ "completion",
679
+ "rerank",
680
+ "sparse_embedding",
681
+ "text_embedding",
682
+ ],
683
+ ],
684
+ amazonsagemaker_inference_id: str,
685
+ service: t.Optional[t.Union[str, t.Literal["amazon_sagemaker"]]] = None,
686
+ service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
687
+ chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None,
688
+ error_trace: t.Optional[bool] = None,
689
+ filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
690
+ human: t.Optional[bool] = None,
691
+ pretty: t.Optional[bool] = None,
692
+ task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
693
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
694
+ body: t.Optional[t.Dict[str, t.Any]] = None,
695
+ ) -> ObjectApiResponse[t.Any]:
696
+ """
697
+ .. raw:: html
698
+
699
+ <p>Create an Amazon SageMaker inference endpoint.</p>
700
+ <p>Create an inference endpoint to perform an inference task with the <code>amazon_sagemaker</code> service.</p>
701
+
702
+
703
+ `<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-amazonsagemaker>`_
704
+
705
+ :param task_type: The type of the inference task that the model will perform.
706
+ :param amazonsagemaker_inference_id: The unique identifier of the inference endpoint.
707
+ :param service: The type of service supported for the specified task type. In
708
+ this case, `amazon_sagemaker`.
709
+ :param service_settings: Settings used to install the inference model. These
710
+ settings are specific to the `amazon_sagemaker` service and `service_settings.api`
711
+ you specified.
712
+ :param chunking_settings: The chunking configuration object.
713
+ :param task_settings: Settings to configure the inference task. These settings
714
+ are specific to the task type and `service_settings.api` you specified.
715
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
716
+ to be created.
717
+ """
718
+ if task_type in SKIP_IN_PATH:
719
+ raise ValueError("Empty value passed for parameter 'task_type'")
720
+ if amazonsagemaker_inference_id in SKIP_IN_PATH:
721
+ raise ValueError(
722
+ "Empty value passed for parameter 'amazonsagemaker_inference_id'"
723
+ )
724
+ if service is None and body is None:
725
+ raise ValueError("Empty value passed for parameter 'service'")
726
+ if service_settings is None and body is None:
727
+ raise ValueError("Empty value passed for parameter 'service_settings'")
728
+ __path_parts: t.Dict[str, str] = {
729
+ "task_type": _quote(task_type),
730
+ "amazonsagemaker_inference_id": _quote(amazonsagemaker_inference_id),
731
+ }
732
+ __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["amazonsagemaker_inference_id"]}'
733
+ __query: t.Dict[str, t.Any] = {}
734
+ __body: t.Dict[str, t.Any] = body if body is not None else {}
735
+ if error_trace is not None:
736
+ __query["error_trace"] = error_trace
737
+ if filter_path is not None:
738
+ __query["filter_path"] = filter_path
739
+ if human is not None:
740
+ __query["human"] = human
741
+ if pretty is not None:
742
+ __query["pretty"] = pretty
743
+ if timeout is not None:
744
+ __query["timeout"] = timeout
745
+ if not __body:
746
+ if service is not None:
747
+ __body["service"] = service
748
+ if service_settings is not None:
749
+ __body["service_settings"] = service_settings
750
+ if chunking_settings is not None:
751
+ __body["chunking_settings"] = chunking_settings
752
+ if task_settings is not None:
753
+ __body["task_settings"] = task_settings
754
+ if not __body:
755
+ __body = None # type: ignore[assignment]
756
+ __headers = {"accept": "application/json"}
757
+ if __body is not None:
758
+ __headers["content-type"] = "application/json"
759
+ return await self.perform_request( # type: ignore[return-value]
760
+ "PUT",
761
+ __path,
762
+ params=__query,
763
+ headers=__headers,
764
+ body=__body,
765
+ endpoint_id="inference.put_amazonsagemaker",
766
+ path_parts=__path_parts,
767
+ )
768
+
629
769
  @_rewrite_parameters(
630
770
  body_fields=(
631
771
  "service",
@@ -647,6 +787,7 @@ class InferenceClient(NamespacedClient):
647
787
  human: t.Optional[bool] = None,
648
788
  pretty: t.Optional[bool] = None,
649
789
  task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
790
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
650
791
  body: t.Optional[t.Dict[str, t.Any]] = None,
651
792
  ) -> ObjectApiResponse[t.Any]:
652
793
  """
@@ -654,14 +795,9 @@ class InferenceClient(NamespacedClient):
654
795
 
655
796
  <p>Create an Anthropic inference endpoint.</p>
656
797
  <p>Create an inference endpoint to perform an inference task with the <code>anthropic</code> service.</p>
657
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
658
- After creating the endpoint, wait for the model deployment to complete before using it.
659
- To verify the deployment status, use the get trained model statistics API.
660
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
661
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
662
798
 
663
799
 
664
- `<https://www.elastic.co/docs/api/doc/elasticsearch/v9/operation/operation-inference-put-anthropic>`_
800
+ `<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-anthropic>`_
665
801
 
666
802
  :param task_type: The task type. The only valid task type for the model to perform
667
803
  is `completion`.
@@ -673,6 +809,8 @@ class InferenceClient(NamespacedClient):
673
809
  :param chunking_settings: The chunking configuration object.
674
810
  :param task_settings: Settings to configure the inference task. These settings
675
811
  are specific to the task type you specified.
812
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
813
+ to be created.
676
814
  """
677
815
  if task_type in SKIP_IN_PATH:
678
816
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -699,6 +837,8 @@ class InferenceClient(NamespacedClient):
699
837
  __query["human"] = human
700
838
  if pretty is not None:
701
839
  __query["pretty"] = pretty
840
+ if timeout is not None:
841
+ __query["timeout"] = timeout
702
842
  if not __body:
703
843
  if service is not None:
704
844
  __body["service"] = service
@@ -744,6 +884,7 @@ class InferenceClient(NamespacedClient):
744
884
  human: t.Optional[bool] = None,
745
885
  pretty: t.Optional[bool] = None,
746
886
  task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
887
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
747
888
  body: t.Optional[t.Dict[str, t.Any]] = None,
748
889
  ) -> ObjectApiResponse[t.Any]:
749
890
  """
@@ -751,14 +892,9 @@ class InferenceClient(NamespacedClient):
751
892
 
752
893
  <p>Create an Azure AI studio inference endpoint.</p>
753
894
  <p>Create an inference endpoint to perform an inference task with the <code>azureaistudio</code> service.</p>
754
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
755
- After creating the endpoint, wait for the model deployment to complete before using it.
756
- To verify the deployment status, use the get trained model statistics API.
757
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
758
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
759
895
 
760
896
 
761
- `<https://www.elastic.co/docs/api/doc/elasticsearch/v9/operation/operation-inference-put-azureaistudio>`_
897
+ `<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-azureaistudio>`_
762
898
 
763
899
  :param task_type: The type of the inference task that the model will perform.
764
900
  :param azureaistudio_inference_id: The unique identifier of the inference endpoint.
@@ -769,6 +905,8 @@ class InferenceClient(NamespacedClient):
769
905
  :param chunking_settings: The chunking configuration object.
770
906
  :param task_settings: Settings to configure the inference task. These settings
771
907
  are specific to the task type you specified.
908
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
909
+ to be created.
772
910
  """
773
911
  if task_type in SKIP_IN_PATH:
774
912
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -795,6 +933,8 @@ class InferenceClient(NamespacedClient):
795
933
  __query["human"] = human
796
934
  if pretty is not None:
797
935
  __query["pretty"] = pretty
936
+ if timeout is not None:
937
+ __query["timeout"] = timeout
798
938
  if not __body:
799
939
  if service is not None:
800
940
  __body["service"] = service
@@ -840,6 +980,7 @@ class InferenceClient(NamespacedClient):
840
980
  human: t.Optional[bool] = None,
841
981
  pretty: t.Optional[bool] = None,
842
982
  task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
983
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
843
984
  body: t.Optional[t.Dict[str, t.Any]] = None,
844
985
  ) -> ObjectApiResponse[t.Any]:
845
986
  """
@@ -853,14 +994,9 @@ class InferenceClient(NamespacedClient):
853
994
  <li><a href="https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-35">GPT-3.5</a></li>
854
995
  </ul>
855
996
  <p>The list of embeddings models that you can choose from in your deployment can be found in the <a href="https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings">Azure models documentation</a>.</p>
856
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
857
- After creating the endpoint, wait for the model deployment to complete before using it.
858
- To verify the deployment status, use the get trained model statistics API.
859
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
860
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
861
997
 
862
998
 
863
- `<https://www.elastic.co/docs/api/doc/elasticsearch/v9/operation/operation-inference-put-azureopenai>`_
999
+ `<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-azureopenai>`_
864
1000
 
865
1001
  :param task_type: The type of the inference task that the model will perform.
866
1002
  NOTE: The `chat_completion` task type only supports streaming and only through
@@ -873,6 +1009,8 @@ class InferenceClient(NamespacedClient):
873
1009
  :param chunking_settings: The chunking configuration object.
874
1010
  :param task_settings: Settings to configure the inference task. These settings
875
1011
  are specific to the task type you specified.
1012
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
1013
+ to be created.
876
1014
  """
877
1015
  if task_type in SKIP_IN_PATH:
878
1016
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -899,6 +1037,8 @@ class InferenceClient(NamespacedClient):
899
1037
  __query["human"] = human
900
1038
  if pretty is not None:
901
1039
  __query["pretty"] = pretty
1040
+ if timeout is not None:
1041
+ __query["timeout"] = timeout
902
1042
  if not __body:
903
1043
  if service is not None:
904
1044
  __body["service"] = service
@@ -944,6 +1084,7 @@ class InferenceClient(NamespacedClient):
944
1084
  human: t.Optional[bool] = None,
945
1085
  pretty: t.Optional[bool] = None,
946
1086
  task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
1087
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
947
1088
  body: t.Optional[t.Dict[str, t.Any]] = None,
948
1089
  ) -> ObjectApiResponse[t.Any]:
949
1090
  """
@@ -951,14 +1092,9 @@ class InferenceClient(NamespacedClient):
951
1092
 
952
1093
  <p>Create a Cohere inference endpoint.</p>
953
1094
  <p>Create an inference endpoint to perform an inference task with the <code>cohere</code> service.</p>
954
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
955
- After creating the endpoint, wait for the model deployment to complete before using it.
956
- To verify the deployment status, use the get trained model statistics API.
957
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
958
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
959
1095
 
960
1096
 
961
- `<https://www.elastic.co/docs/api/doc/elasticsearch/v9/operation/operation-inference-put-cohere>`_
1097
+ `<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-cohere>`_
962
1098
 
963
1099
  :param task_type: The type of the inference task that the model will perform.
964
1100
  :param cohere_inference_id: The unique identifier of the inference endpoint.
@@ -969,6 +1105,8 @@ class InferenceClient(NamespacedClient):
969
1105
  :param chunking_settings: The chunking configuration object.
970
1106
  :param task_settings: Settings to configure the inference task. These settings
971
1107
  are specific to the task type you specified.
1108
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
1109
+ to be created.
972
1110
  """
973
1111
  if task_type in SKIP_IN_PATH:
974
1112
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -993,6 +1131,8 @@ class InferenceClient(NamespacedClient):
993
1131
  __query["human"] = human
994
1132
  if pretty is not None:
995
1133
  __query["pretty"] = pretty
1134
+ if timeout is not None:
1135
+ __query["timeout"] = timeout
996
1136
  if not __body:
997
1137
  if service is not None:
998
1138
  __body["service"] = service
@@ -1017,6 +1157,221 @@ class InferenceClient(NamespacedClient):
1017
1157
  path_parts=__path_parts,
1018
1158
  )
1019
1159
 
1160
+ @_rewrite_parameters(
1161
+ body_fields=(
1162
+ "service",
1163
+ "service_settings",
1164
+ "chunking_settings",
1165
+ "task_settings",
1166
+ ),
1167
+ )
1168
+ async def put_custom(
1169
+ self,
1170
+ *,
1171
+ task_type: t.Union[
1172
+ str, t.Literal["completion", "rerank", "sparse_embedding", "text_embedding"]
1173
+ ],
1174
+ custom_inference_id: str,
1175
+ service: t.Optional[t.Union[str, t.Literal["custom"]]] = None,
1176
+ service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
1177
+ chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None,
1178
+ error_trace: t.Optional[bool] = None,
1179
+ filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
1180
+ human: t.Optional[bool] = None,
1181
+ pretty: t.Optional[bool] = None,
1182
+ task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
1183
+ body: t.Optional[t.Dict[str, t.Any]] = None,
1184
+ ) -> ObjectApiResponse[t.Any]:
1185
+ """
1186
+ .. raw:: html
1187
+
1188
+ <p>Create a custom inference endpoint.</p>
1189
+ <p>The custom service gives more control over how to interact with external inference services that aren't explicitly supported through dedicated integrations.
1190
+ The custom service gives you the ability to define the headers, url, query parameters, request body, and secrets.
1191
+ The custom service supports the template replacement functionality, which enables you to define a template that can be replaced with the value associated with that key.
1192
+ Templates are portions of a string that start with <code>${</code> and end with <code>}</code>.
1193
+ The parameters <code>secret_parameters</code> and <code>task_settings</code> are checked for keys for template replacement. Template replacement is supported in the <code>request</code>, <code>headers</code>, <code>url</code>, and <code>query_parameters</code>.
1194
+ If the definition (key) is not found for a template, an error message is returned.
1195
+ In case of an endpoint definition like the following:</p>
1196
+ <pre><code>PUT _inference/text_embedding/test-text-embedding
1197
+ {
1198
+ &quot;service&quot;: &quot;custom&quot;,
1199
+ &quot;service_settings&quot;: {
1200
+ &quot;secret_parameters&quot;: {
1201
+ &quot;api_key&quot;: &quot;&lt;some api key&gt;&quot;
1202
+ },
1203
+ &quot;url&quot;: &quot;...endpoints.huggingface.cloud/v1/embeddings&quot;,
1204
+ &quot;headers&quot;: {
1205
+ &quot;Authorization&quot;: &quot;Bearer ${api_key}&quot;,
1206
+ &quot;Content-Type&quot;: &quot;application/json&quot;
1207
+ },
1208
+ &quot;request&quot;: &quot;{\\&quot;input\\&quot;: ${input}}&quot;,
1209
+ &quot;response&quot;: {
1210
+ &quot;json_parser&quot;: {
1211
+ &quot;text_embeddings&quot;:&quot;$.data[*].embedding[*]&quot;
1212
+ }
1213
+ }
1214
+ }
1215
+ }
1216
+ </code></pre>
1217
+ <p>To replace <code>${api_key}</code> the <code>secret_parameters</code> and <code>task_settings</code> are checked for a key named <code>api_key</code>.</p>
1218
+ <blockquote>
1219
+ <p>info
1220
+ Templates should not be surrounded by quotes.</p>
1221
+ </blockquote>
1222
+ <p>Pre-defined templates:</p>
1223
+ <ul>
1224
+ <li><code>${input}</code> refers to the array of input strings that comes from the <code>input</code> field of the subsequent inference requests.</li>
1225
+ <li><code>${input_type}</code> refers to the input type translation values.</li>
1226
+ <li><code>${query}</code> refers to the query field used specifically for reranking tasks.</li>
1227
+ <li><code>${top_n}</code> refers to the <code>top_n</code> field available when performing rerank requests.</li>
1228
+ <li><code>${return_documents}</code> refers to the <code>return_documents</code> field available when performing rerank requests.</li>
1229
+ </ul>
1230
+
1231
+
1232
+ `<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-custom>`_
1233
+
1234
+ :param task_type: The type of the inference task that the model will perform.
1235
+ :param custom_inference_id: The unique identifier of the inference endpoint.
1236
+ :param service: The type of service supported for the specified task type. In
1237
+ this case, `custom`.
1238
+ :param service_settings: Settings used to install the inference model. These
1239
+ settings are specific to the `custom` service.
1240
+ :param chunking_settings: The chunking configuration object.
1241
+ :param task_settings: Settings to configure the inference task. These settings
1242
+ are specific to the task type you specified.
1243
+ """
1244
+ if task_type in SKIP_IN_PATH:
1245
+ raise ValueError("Empty value passed for parameter 'task_type'")
1246
+ if custom_inference_id in SKIP_IN_PATH:
1247
+ raise ValueError("Empty value passed for parameter 'custom_inference_id'")
1248
+ if service is None and body is None:
1249
+ raise ValueError("Empty value passed for parameter 'service'")
1250
+ if service_settings is None and body is None:
1251
+ raise ValueError("Empty value passed for parameter 'service_settings'")
1252
+ __path_parts: t.Dict[str, str] = {
1253
+ "task_type": _quote(task_type),
1254
+ "custom_inference_id": _quote(custom_inference_id),
1255
+ }
1256
+ __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["custom_inference_id"]}'
1257
+ __query: t.Dict[str, t.Any] = {}
1258
+ __body: t.Dict[str, t.Any] = body if body is not None else {}
1259
+ if error_trace is not None:
1260
+ __query["error_trace"] = error_trace
1261
+ if filter_path is not None:
1262
+ __query["filter_path"] = filter_path
1263
+ if human is not None:
1264
+ __query["human"] = human
1265
+ if pretty is not None:
1266
+ __query["pretty"] = pretty
1267
+ if not __body:
1268
+ if service is not None:
1269
+ __body["service"] = service
1270
+ if service_settings is not None:
1271
+ __body["service_settings"] = service_settings
1272
+ if chunking_settings is not None:
1273
+ __body["chunking_settings"] = chunking_settings
1274
+ if task_settings is not None:
1275
+ __body["task_settings"] = task_settings
1276
+ if not __body:
1277
+ __body = None # type: ignore[assignment]
1278
+ __headers = {"accept": "application/json"}
1279
+ if __body is not None:
1280
+ __headers["content-type"] = "application/json"
1281
+ return await self.perform_request( # type: ignore[return-value]
1282
+ "PUT",
1283
+ __path,
1284
+ params=__query,
1285
+ headers=__headers,
1286
+ body=__body,
1287
+ endpoint_id="inference.put_custom",
1288
+ path_parts=__path_parts,
1289
+ )
1290
+
1291
+ @_rewrite_parameters(
1292
+ body_fields=("service", "service_settings", "chunking_settings"),
1293
+ )
1294
+ async def put_deepseek(
1295
+ self,
1296
+ *,
1297
+ task_type: t.Union[str, t.Literal["chat_completion", "completion"]],
1298
+ deepseek_inference_id: str,
1299
+ service: t.Optional[t.Union[str, t.Literal["deepseek"]]] = None,
1300
+ service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
1301
+ chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None,
1302
+ error_trace: t.Optional[bool] = None,
1303
+ filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
1304
+ human: t.Optional[bool] = None,
1305
+ pretty: t.Optional[bool] = None,
1306
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
1307
+ body: t.Optional[t.Dict[str, t.Any]] = None,
1308
+ ) -> ObjectApiResponse[t.Any]:
1309
+ """
1310
+ .. raw:: html
1311
+
1312
+ <p>Create a DeepSeek inference endpoint.</p>
1313
+ <p>Create an inference endpoint to perform an inference task with the <code>deepseek</code> service.</p>
1314
+
1315
+
1316
+ `<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-deepseek>`_
1317
+
1318
+ :param task_type: The type of the inference task that the model will perform.
1319
+ :param deepseek_inference_id: The unique identifier of the inference endpoint.
1320
+ :param service: The type of service supported for the specified task type. In
1321
+ this case, `deepseek`.
1322
+ :param service_settings: Settings used to install the inference model. These
1323
+ settings are specific to the `deepseek` service.
1324
+ :param chunking_settings: The chunking configuration object.
1325
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
1326
+ to be created.
1327
+ """
1328
+ if task_type in SKIP_IN_PATH:
1329
+ raise ValueError("Empty value passed for parameter 'task_type'")
1330
+ if deepseek_inference_id in SKIP_IN_PATH:
1331
+ raise ValueError("Empty value passed for parameter 'deepseek_inference_id'")
1332
+ if service is None and body is None:
1333
+ raise ValueError("Empty value passed for parameter 'service'")
1334
+ if service_settings is None and body is None:
1335
+ raise ValueError("Empty value passed for parameter 'service_settings'")
1336
+ __path_parts: t.Dict[str, str] = {
1337
+ "task_type": _quote(task_type),
1338
+ "deepseek_inference_id": _quote(deepseek_inference_id),
1339
+ }
1340
+ __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["deepseek_inference_id"]}'
1341
+ __query: t.Dict[str, t.Any] = {}
1342
+ __body: t.Dict[str, t.Any] = body if body is not None else {}
1343
+ if error_trace is not None:
1344
+ __query["error_trace"] = error_trace
1345
+ if filter_path is not None:
1346
+ __query["filter_path"] = filter_path
1347
+ if human is not None:
1348
+ __query["human"] = human
1349
+ if pretty is not None:
1350
+ __query["pretty"] = pretty
1351
+ if timeout is not None:
1352
+ __query["timeout"] = timeout
1353
+ if not __body:
1354
+ if service is not None:
1355
+ __body["service"] = service
1356
+ if service_settings is not None:
1357
+ __body["service_settings"] = service_settings
1358
+ if chunking_settings is not None:
1359
+ __body["chunking_settings"] = chunking_settings
1360
+ if not __body:
1361
+ __body = None # type: ignore[assignment]
1362
+ __headers = {"accept": "application/json"}
1363
+ if __body is not None:
1364
+ __headers["content-type"] = "application/json"
1365
+ return await self.perform_request( # type: ignore[return-value]
1366
+ "PUT",
1367
+ __path,
1368
+ params=__query,
1369
+ headers=__headers,
1370
+ body=__body,
1371
+ endpoint_id="inference.put_deepseek",
1372
+ path_parts=__path_parts,
1373
+ )
1374
+
1020
1375
  @_rewrite_parameters(
1021
1376
  body_fields=(
1022
1377
  "service",
@@ -1040,6 +1395,7 @@ class InferenceClient(NamespacedClient):
1040
1395
  human: t.Optional[bool] = None,
1041
1396
  pretty: t.Optional[bool] = None,
1042
1397
  task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
1398
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
1043
1399
  body: t.Optional[t.Dict[str, t.Any]] = None,
1044
1400
  ) -> ObjectApiResponse[t.Any]:
1045
1401
  """
@@ -1062,7 +1418,7 @@ class InferenceClient(NamespacedClient):
1062
1418
  Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
1063
1419
 
1064
1420
 
1065
- `<https://www.elastic.co/docs/api/doc/elasticsearch/v9/operation/operation-inference-put-elasticsearch>`_
1421
+ `<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-elasticsearch>`_
1066
1422
 
1067
1423
  :param task_type: The type of the inference task that the model will perform.
1068
1424
  :param elasticsearch_inference_id: The unique identifier of the inference endpoint.
@@ -1074,6 +1430,8 @@ class InferenceClient(NamespacedClient):
1074
1430
  :param chunking_settings: The chunking configuration object.
1075
1431
  :param task_settings: Settings to configure the inference task. These settings
1076
1432
  are specific to the task type you specified.
1433
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
1434
+ to be created.
1077
1435
  """
1078
1436
  if task_type in SKIP_IN_PATH:
1079
1437
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -1100,6 +1458,8 @@ class InferenceClient(NamespacedClient):
1100
1458
  __query["human"] = human
1101
1459
  if pretty is not None:
1102
1460
  __query["pretty"] = pretty
1461
+ if timeout is not None:
1462
+ __query["timeout"] = timeout
1103
1463
  if not __body:
1104
1464
  if service is not None:
1105
1465
  __body["service"] = service
@@ -1139,6 +1499,7 @@ class InferenceClient(NamespacedClient):
1139
1499
  filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
1140
1500
  human: t.Optional[bool] = None,
1141
1501
  pretty: t.Optional[bool] = None,
1502
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
1142
1503
  body: t.Optional[t.Dict[str, t.Any]] = None,
1143
1504
  ) -> ObjectApiResponse[t.Any]:
1144
1505
  """
@@ -1162,7 +1523,7 @@ class InferenceClient(NamespacedClient):
1162
1523
  Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
1163
1524
 
1164
1525
 
1165
- `<https://www.elastic.co/docs/api/doc/elasticsearch/v9/operation/operation-inference-put-elser>`_
1526
+ `<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-elser>`_
1166
1527
 
1167
1528
  :param task_type: The type of the inference task that the model will perform.
1168
1529
  :param elser_inference_id: The unique identifier of the inference endpoint.
@@ -1171,6 +1532,8 @@ class InferenceClient(NamespacedClient):
1171
1532
  :param service_settings: Settings used to install the inference model. These
1172
1533
  settings are specific to the `elser` service.
1173
1534
  :param chunking_settings: The chunking configuration object.
1535
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
1536
+ to be created.
1174
1537
  """
1175
1538
  if task_type in SKIP_IN_PATH:
1176
1539
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -1195,6 +1558,8 @@ class InferenceClient(NamespacedClient):
1195
1558
  __query["human"] = human
1196
1559
  if pretty is not None:
1197
1560
  __query["pretty"] = pretty
1561
+ if timeout is not None:
1562
+ __query["timeout"] = timeout
1198
1563
  if not __body:
1199
1564
  if service is not None:
1200
1565
  __body["service"] = service
@@ -1232,6 +1597,7 @@ class InferenceClient(NamespacedClient):
1232
1597
  filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
1233
1598
  human: t.Optional[bool] = None,
1234
1599
  pretty: t.Optional[bool] = None,
1600
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
1235
1601
  body: t.Optional[t.Dict[str, t.Any]] = None,
1236
1602
  ) -> ObjectApiResponse[t.Any]:
1237
1603
  """
@@ -1239,14 +1605,9 @@ class InferenceClient(NamespacedClient):
1239
1605
 
1240
1606
  <p>Create an Google AI Studio inference endpoint.</p>
1241
1607
  <p>Create an inference endpoint to perform an inference task with the <code>googleaistudio</code> service.</p>
1242
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
1243
- After creating the endpoint, wait for the model deployment to complete before using it.
1244
- To verify the deployment status, use the get trained model statistics API.
1245
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
1246
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
1247
1608
 
1248
1609
 
1249
- `<https://www.elastic.co/docs/api/doc/elasticsearch/v9/operation/operation-inference-put-googleaistudio>`_
1610
+ `<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-googleaistudio>`_
1250
1611
 
1251
1612
  :param task_type: The type of the inference task that the model will perform.
1252
1613
  :param googleaistudio_inference_id: The unique identifier of the inference endpoint.
@@ -1255,6 +1616,8 @@ class InferenceClient(NamespacedClient):
1255
1616
  :param service_settings: Settings used to install the inference model. These
1256
1617
  settings are specific to the `googleaistudio` service.
1257
1618
  :param chunking_settings: The chunking configuration object.
1619
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
1620
+ to be created.
1258
1621
  """
1259
1622
  if task_type in SKIP_IN_PATH:
1260
1623
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -1281,6 +1644,8 @@ class InferenceClient(NamespacedClient):
1281
1644
  __query["human"] = human
1282
1645
  if pretty is not None:
1283
1646
  __query["pretty"] = pretty
1647
+ if timeout is not None:
1648
+ __query["timeout"] = timeout
1284
1649
  if not __body:
1285
1650
  if service is not None:
1286
1651
  __body["service"] = service
@@ -1314,7 +1679,9 @@ class InferenceClient(NamespacedClient):
1314
1679
  async def put_googlevertexai(
1315
1680
  self,
1316
1681
  *,
1317
- task_type: t.Union[str, t.Literal["rerank", "text_embedding"]],
1682
+ task_type: t.Union[
1683
+ str, t.Literal["chat_completion", "completion", "rerank", "text_embedding"]
1684
+ ],
1318
1685
  googlevertexai_inference_id: str,
1319
1686
  service: t.Optional[t.Union[str, t.Literal["googlevertexai"]]] = None,
1320
1687
  service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
@@ -1324,6 +1691,7 @@ class InferenceClient(NamespacedClient):
1324
1691
  human: t.Optional[bool] = None,
1325
1692
  pretty: t.Optional[bool] = None,
1326
1693
  task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
1694
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
1327
1695
  body: t.Optional[t.Dict[str, t.Any]] = None,
1328
1696
  ) -> ObjectApiResponse[t.Any]:
1329
1697
  """
@@ -1331,14 +1699,9 @@ class InferenceClient(NamespacedClient):
1331
1699
 
1332
1700
  <p>Create a Google Vertex AI inference endpoint.</p>
1333
1701
  <p>Create an inference endpoint to perform an inference task with the <code>googlevertexai</code> service.</p>
1334
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
1335
- After creating the endpoint, wait for the model deployment to complete before using it.
1336
- To verify the deployment status, use the get trained model statistics API.
1337
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
1338
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
1339
1702
 
1340
1703
 
1341
- `<https://www.elastic.co/docs/api/doc/elasticsearch/v9/operation/operation-inference-put-googlevertexai>`_
1704
+ `<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-googlevertexai>`_
1342
1705
 
1343
1706
  :param task_type: The type of the inference task that the model will perform.
1344
1707
  :param googlevertexai_inference_id: The unique identifier of the inference endpoint.
@@ -1349,6 +1712,8 @@ class InferenceClient(NamespacedClient):
1349
1712
  :param chunking_settings: The chunking configuration object.
1350
1713
  :param task_settings: Settings to configure the inference task. These settings
1351
1714
  are specific to the task type you specified.
1715
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
1716
+ to be created.
1352
1717
  """
1353
1718
  if task_type in SKIP_IN_PATH:
1354
1719
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -1375,6 +1740,8 @@ class InferenceClient(NamespacedClient):
1375
1740
  __query["human"] = human
1376
1741
  if pretty is not None:
1377
1742
  __query["pretty"] = pretty
1743
+ if timeout is not None:
1744
+ __query["timeout"] = timeout
1378
1745
  if not __body:
1379
1746
  if service is not None:
1380
1747
  __body["service"] = service
@@ -1400,12 +1767,19 @@ class InferenceClient(NamespacedClient):
1400
1767
  )
1401
1768
 
1402
1769
  @_rewrite_parameters(
1403
- body_fields=("service", "service_settings", "chunking_settings"),
1770
+ body_fields=(
1771
+ "service",
1772
+ "service_settings",
1773
+ "chunking_settings",
1774
+ "task_settings",
1775
+ ),
1404
1776
  )
1405
1777
  async def put_hugging_face(
1406
1778
  self,
1407
1779
  *,
1408
- task_type: t.Union[str, t.Literal["text_embedding"]],
1780
+ task_type: t.Union[
1781
+ str, t.Literal["chat_completion", "completion", "rerank", "text_embedding"]
1782
+ ],
1409
1783
  huggingface_inference_id: str,
1410
1784
  service: t.Optional[t.Union[str, t.Literal["hugging_face"]]] = None,
1411
1785
  service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
@@ -1414,17 +1788,22 @@ class InferenceClient(NamespacedClient):
1414
1788
  filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
1415
1789
  human: t.Optional[bool] = None,
1416
1790
  pretty: t.Optional[bool] = None,
1791
+ task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
1792
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
1417
1793
  body: t.Optional[t.Dict[str, t.Any]] = None,
1418
1794
  ) -> ObjectApiResponse[t.Any]:
1419
1795
  """
1420
1796
  .. raw:: html
1421
1797
 
1422
1798
  <p>Create a Hugging Face inference endpoint.</p>
1423
- <p>Create an inference endpoint to perform an inference task with the <code>hugging_face</code> service.</p>
1424
- <p>You must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.
1425
- Select the model you want to use on the new endpoint creation page (for example <code>intfloat/e5-small-v2</code>), then select the sentence embeddings task under the advanced configuration section.
1426
- Create the endpoint and copy the URL after the endpoint initialization has been finished.</p>
1427
- <p>The following models are recommended for the Hugging Face service:</p>
1799
+ <p>Create an inference endpoint to perform an inference task with the <code>hugging_face</code> service.
1800
+ Supported tasks include: <code>text_embedding</code>, <code>completion</code>, and <code>chat_completion</code>.</p>
1801
+ <p>To configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.
1802
+ Select a model that supports the task you intend to use.</p>
1803
+ <p>For Elastic's <code>text_embedding</code> task:
1804
+ The selected model must support the <code>Sentence Embeddings</code> task. On the new endpoint creation page, select the <code>Sentence Embeddings</code> task under the <code>Advanced Configuration</code> section.
1805
+ After the endpoint has initialized, copy the generated endpoint URL.
1806
+ Recommended models for <code>text_embedding</code> task:</p>
1428
1807
  <ul>
1429
1808
  <li><code>all-MiniLM-L6-v2</code></li>
1430
1809
  <li><code>all-MiniLM-L12-v2</code></li>
@@ -1434,14 +1813,27 @@ class InferenceClient(NamespacedClient):
1434
1813
  <li><code>multilingual-e5-base</code></li>
1435
1814
  <li><code>multilingual-e5-small</code></li>
1436
1815
  </ul>
1437
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
1438
- After creating the endpoint, wait for the model deployment to complete before using it.
1439
- To verify the deployment status, use the get trained model statistics API.
1440
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
1441
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
1816
+ <p>For Elastic's <code>chat_completion</code> and <code>completion</code> tasks:
1817
+ The selected model must support the <code>Text Generation</code> task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for <code>Text Generation</code>. When creating dedicated endpoint select the <code>Text Generation</code> task.
1818
+ After the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes <code>/v1/chat/completions</code> part in URL. Then, copy the full endpoint URL for use.
1819
+ Recommended models for <code>chat_completion</code> and <code>completion</code> tasks:</p>
1820
+ <ul>
1821
+ <li><code>Mistral-7B-Instruct-v0.2</code></li>
1822
+ <li><code>QwQ-32B</code></li>
1823
+ <li><code>Phi-3-mini-128k-instruct</code></li>
1824
+ </ul>
1825
+ <p>For Elastic's <code>rerank</code> task:
1826
+ The selected model must support the <code>sentence-ranking</code> task and expose OpenAI API.
1827
+ HuggingFace supports only dedicated (not serverless) endpoints for <code>Rerank</code> so far.
1828
+ After the endpoint is initialized, copy the full endpoint URL for use.
1829
+ Tested models for <code>rerank</code> task:</p>
1830
+ <ul>
1831
+ <li><code>bge-reranker-base</code></li>
1832
+ <li><code>jina-reranker-v1-turbo-en-GGUF</code></li>
1833
+ </ul>
1442
1834
 
1443
1835
 
1444
- `<https://www.elastic.co/docs/api/doc/elasticsearch/v9/operation/operation-inference-put-hugging-face>`_
1836
+ `<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-hugging-face>`_
1445
1837
 
1446
1838
  :param task_type: The type of the inference task that the model will perform.
1447
1839
  :param huggingface_inference_id: The unique identifier of the inference endpoint.
@@ -1450,6 +1842,10 @@ class InferenceClient(NamespacedClient):
1450
1842
  :param service_settings: Settings used to install the inference model. These
1451
1843
  settings are specific to the `hugging_face` service.
1452
1844
  :param chunking_settings: The chunking configuration object.
1845
+ :param task_settings: Settings to configure the inference task. These settings
1846
+ are specific to the task type you specified.
1847
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
1848
+ to be created.
1453
1849
  """
1454
1850
  if task_type in SKIP_IN_PATH:
1455
1851
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -1476,6 +1872,8 @@ class InferenceClient(NamespacedClient):
1476
1872
  __query["human"] = human
1477
1873
  if pretty is not None:
1478
1874
  __query["pretty"] = pretty
1875
+ if timeout is not None:
1876
+ __query["timeout"] = timeout
1479
1877
  if not __body:
1480
1878
  if service is not None:
1481
1879
  __body["service"] = service
@@ -1483,6 +1881,8 @@ class InferenceClient(NamespacedClient):
1483
1881
  __body["service_settings"] = service_settings
1484
1882
  if chunking_settings is not None:
1485
1883
  __body["chunking_settings"] = chunking_settings
1884
+ if task_settings is not None:
1885
+ __body["task_settings"] = task_settings
1486
1886
  if not __body:
1487
1887
  __body = None # type: ignore[assignment]
1488
1888
  __headers = {"accept": "application/json"}
@@ -1519,6 +1919,7 @@ class InferenceClient(NamespacedClient):
1519
1919
  human: t.Optional[bool] = None,
1520
1920
  pretty: t.Optional[bool] = None,
1521
1921
  task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
1922
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
1522
1923
  body: t.Optional[t.Dict[str, t.Any]] = None,
1523
1924
  ) -> ObjectApiResponse[t.Any]:
1524
1925
  """
@@ -1528,14 +1929,9 @@ class InferenceClient(NamespacedClient):
1528
1929
  <p>Create an inference endpoint to perform an inference task with the <code>jinaai</code> service.</p>
1529
1930
  <p>To review the available <code>rerank</code> models, refer to <a href="https://jina.ai/reranker">https://jina.ai/reranker</a>.
1530
1931
  To review the available <code>text_embedding</code> models, refer to the <a href="https://jina.ai/embeddings/">https://jina.ai/embeddings/</a>.</p>
1531
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
1532
- After creating the endpoint, wait for the model deployment to complete before using it.
1533
- To verify the deployment status, use the get trained model statistics API.
1534
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
1535
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
1536
1932
 
1537
1933
 
1538
- `<https://www.elastic.co/docs/api/doc/elasticsearch/v9/operation/operation-inference-put-jinaai>`_
1934
+ `<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-jinaai>`_
1539
1935
 
1540
1936
  :param task_type: The type of the inference task that the model will perform.
1541
1937
  :param jinaai_inference_id: The unique identifier of the inference endpoint.
@@ -1546,6 +1942,8 @@ class InferenceClient(NamespacedClient):
1546
1942
  :param chunking_settings: The chunking configuration object.
1547
1943
  :param task_settings: Settings to configure the inference task. These settings
1548
1944
  are specific to the task type you specified.
1945
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
1946
+ to be created.
1549
1947
  """
1550
1948
  if task_type in SKIP_IN_PATH:
1551
1949
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -1570,6 +1968,8 @@ class InferenceClient(NamespacedClient):
1570
1968
  __query["human"] = human
1571
1969
  if pretty is not None:
1572
1970
  __query["pretty"] = pretty
1971
+ if timeout is not None:
1972
+ __query["timeout"] = timeout
1573
1973
  if not __body:
1574
1974
  if service is not None:
1575
1975
  __body["service"] = service
@@ -1600,7 +2000,9 @@ class InferenceClient(NamespacedClient):
1600
2000
  async def put_mistral(
1601
2001
  self,
1602
2002
  *,
1603
- task_type: t.Union[str, t.Literal["text_embedding"]],
2003
+ task_type: t.Union[
2004
+ str, t.Literal["chat_completion", "completion", "text_embedding"]
2005
+ ],
1604
2006
  mistral_inference_id: str,
1605
2007
  service: t.Optional[t.Union[str, t.Literal["mistral"]]] = None,
1606
2008
  service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
@@ -1609,30 +2011,27 @@ class InferenceClient(NamespacedClient):
1609
2011
  filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
1610
2012
  human: t.Optional[bool] = None,
1611
2013
  pretty: t.Optional[bool] = None,
2014
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
1612
2015
  body: t.Optional[t.Dict[str, t.Any]] = None,
1613
2016
  ) -> ObjectApiResponse[t.Any]:
1614
2017
  """
1615
2018
  .. raw:: html
1616
2019
 
1617
2020
  <p>Create a Mistral inference endpoint.</p>
1618
- <p>Creates an inference endpoint to perform an inference task with the <code>mistral</code> service.</p>
1619
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
1620
- After creating the endpoint, wait for the model deployment to complete before using it.
1621
- To verify the deployment status, use the get trained model statistics API.
1622
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
1623
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
2021
+ <p>Create an inference endpoint to perform an inference task with the <code>mistral</code> service.</p>
1624
2022
 
1625
2023
 
1626
- `<https://www.elastic.co/docs/api/doc/elasticsearch/v9/operation/operation-inference-put-mistral>`_
2024
+ `<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-mistral>`_
1627
2025
 
1628
- :param task_type: The task type. The only valid task type for the model to perform
1629
- is `text_embedding`.
2026
+ :param task_type: The type of the inference task that the model will perform.
1630
2027
  :param mistral_inference_id: The unique identifier of the inference endpoint.
1631
2028
  :param service: The type of service supported for the specified task type. In
1632
2029
  this case, `mistral`.
1633
2030
  :param service_settings: Settings used to install the inference model. These
1634
2031
  settings are specific to the `mistral` service.
1635
2032
  :param chunking_settings: The chunking configuration object.
2033
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
2034
+ to be created.
1636
2035
  """
1637
2036
  if task_type in SKIP_IN_PATH:
1638
2037
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -1657,6 +2056,8 @@ class InferenceClient(NamespacedClient):
1657
2056
  __query["human"] = human
1658
2057
  if pretty is not None:
1659
2058
  __query["pretty"] = pretty
2059
+ if timeout is not None:
2060
+ __query["timeout"] = timeout
1660
2061
  if not __body:
1661
2062
  if service is not None:
1662
2063
  __body["service"] = service
@@ -1702,21 +2103,17 @@ class InferenceClient(NamespacedClient):
1702
2103
  human: t.Optional[bool] = None,
1703
2104
  pretty: t.Optional[bool] = None,
1704
2105
  task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
2106
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
1705
2107
  body: t.Optional[t.Dict[str, t.Any]] = None,
1706
2108
  ) -> ObjectApiResponse[t.Any]:
1707
2109
  """
1708
2110
  .. raw:: html
1709
2111
 
1710
2112
  <p>Create an OpenAI inference endpoint.</p>
1711
- <p>Create an inference endpoint to perform an inference task with the <code>openai</code> service.</p>
1712
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
1713
- After creating the endpoint, wait for the model deployment to complete before using it.
1714
- To verify the deployment status, use the get trained model statistics API.
1715
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
1716
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
2113
+ <p>Create an inference endpoint to perform an inference task with the <code>openai</code> service or <code>openai</code> compatible APIs.</p>
1717
2114
 
1718
2115
 
1719
- `<https://www.elastic.co/docs/api/doc/elasticsearch/v9/operation/operation-inference-put-openai>`_
2116
+ `<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-openai>`_
1720
2117
 
1721
2118
  :param task_type: The type of the inference task that the model will perform.
1722
2119
  NOTE: The `chat_completion` task type only supports streaming and only through
@@ -1729,6 +2126,8 @@ class InferenceClient(NamespacedClient):
1729
2126
  :param chunking_settings: The chunking configuration object.
1730
2127
  :param task_settings: Settings to configure the inference task. These settings
1731
2128
  are specific to the task type you specified.
2129
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
2130
+ to be created.
1732
2131
  """
1733
2132
  if task_type in SKIP_IN_PATH:
1734
2133
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -1753,6 +2152,8 @@ class InferenceClient(NamespacedClient):
1753
2152
  __query["human"] = human
1754
2153
  if pretty is not None:
1755
2154
  __query["pretty"] = pretty
2155
+ if timeout is not None:
2156
+ __query["timeout"] = timeout
1756
2157
  if not __body:
1757
2158
  if service is not None:
1758
2159
  __body["service"] = service
@@ -1798,6 +2199,7 @@ class InferenceClient(NamespacedClient):
1798
2199
  human: t.Optional[bool] = None,
1799
2200
  pretty: t.Optional[bool] = None,
1800
2201
  task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
2202
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
1801
2203
  body: t.Optional[t.Dict[str, t.Any]] = None,
1802
2204
  ) -> ObjectApiResponse[t.Any]:
1803
2205
  """
@@ -1808,7 +2210,7 @@ class InferenceClient(NamespacedClient):
1808
2210
  <p>Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
1809
2211
 
1810
2212
 
1811
- `<https://www.elastic.co/docs/api/doc/elasticsearch/v9/operation/operation-inference-put-voyageai>`_
2213
+ `<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-voyageai>`_
1812
2214
 
1813
2215
  :param task_type: The type of the inference task that the model will perform.
1814
2216
  :param voyageai_inference_id: The unique identifier of the inference endpoint.
@@ -1819,6 +2221,8 @@ class InferenceClient(NamespacedClient):
1819
2221
  :param chunking_settings: The chunking configuration object.
1820
2222
  :param task_settings: Settings to configure the inference task. These settings
1821
2223
  are specific to the task type you specified.
2224
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
2225
+ to be created.
1822
2226
  """
1823
2227
  if task_type in SKIP_IN_PATH:
1824
2228
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -1843,6 +2247,8 @@ class InferenceClient(NamespacedClient):
1843
2247
  __query["human"] = human
1844
2248
  if pretty is not None:
1845
2249
  __query["pretty"] = pretty
2250
+ if timeout is not None:
2251
+ __query["timeout"] = timeout
1846
2252
  if not __body:
1847
2253
  if service is not None:
1848
2254
  __body["service"] = service
@@ -1873,7 +2279,9 @@ class InferenceClient(NamespacedClient):
1873
2279
  async def put_watsonx(
1874
2280
  self,
1875
2281
  *,
1876
- task_type: t.Union[str, t.Literal["text_embedding"]],
2282
+ task_type: t.Union[
2283
+ str, t.Literal["chat_completion", "completion", "text_embedding"]
2284
+ ],
1877
2285
  watsonx_inference_id: str,
1878
2286
  service: t.Optional[t.Union[str, t.Literal["watsonxai"]]] = None,
1879
2287
  service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
@@ -1881,6 +2289,7 @@ class InferenceClient(NamespacedClient):
1881
2289
  filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
1882
2290
  human: t.Optional[bool] = None,
1883
2291
  pretty: t.Optional[bool] = None,
2292
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
1884
2293
  body: t.Optional[t.Dict[str, t.Any]] = None,
1885
2294
  ) -> ObjectApiResponse[t.Any]:
1886
2295
  """
@@ -1890,22 +2299,18 @@ class InferenceClient(NamespacedClient):
1890
2299
  <p>Create an inference endpoint to perform an inference task with the <code>watsonxai</code> service.
1891
2300
  You need an IBM Cloud Databases for Elasticsearch deployment to use the <code>watsonxai</code> inference service.
1892
2301
  You can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform.</p>
1893
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
1894
- After creating the endpoint, wait for the model deployment to complete before using it.
1895
- To verify the deployment status, use the get trained model statistics API.
1896
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
1897
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
1898
2302
 
1899
2303
 
1900
- `<https://www.elastic.co/docs/api/doc/elasticsearch/v9/operation/operation-inference-put-watsonx>`_
2304
+ `<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-watsonx>`_
1901
2305
 
1902
- :param task_type: The task type. The only valid task type for the model to perform
1903
- is `text_embedding`.
2306
+ :param task_type: The type of the inference task that the model will perform.
1904
2307
  :param watsonx_inference_id: The unique identifier of the inference endpoint.
1905
2308
  :param service: The type of service supported for the specified task type. In
1906
2309
  this case, `watsonxai`.
1907
2310
  :param service_settings: Settings used to install the inference model. These
1908
2311
  settings are specific to the `watsonxai` service.
2312
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
2313
+ to be created.
1909
2314
  """
1910
2315
  if task_type in SKIP_IN_PATH:
1911
2316
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -1930,6 +2335,8 @@ class InferenceClient(NamespacedClient):
1930
2335
  __query["human"] = human
1931
2336
  if pretty is not None:
1932
2337
  __query["pretty"] = pretty
2338
+ if timeout is not None:
2339
+ __query["timeout"] = timeout
1933
2340
  if not __body:
1934
2341
  if service is not None:
1935
2342
  __body["service"] = service
@@ -1970,10 +2377,10 @@ class InferenceClient(NamespacedClient):
1970
2377
  """
1971
2378
  .. raw:: html
1972
2379
 
1973
- <p>Perform rereanking inference on the service</p>
2380
+ <p>Perform reranking inference on the service</p>
1974
2381
 
1975
2382
 
1976
- `<https://www.elastic.co/docs/api/doc/elasticsearch/v9/operation/operation-inference-inference>`_
2383
+ `<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-inference>`_
1977
2384
 
1978
2385
  :param inference_id: The unique identifier for the inference endpoint.
1979
2386
  :param input: The text on which you want to perform the inference task. It can
@@ -2049,7 +2456,7 @@ class InferenceClient(NamespacedClient):
2049
2456
  <p>Perform sparse embedding inference on the service</p>
2050
2457
 
2051
2458
 
2052
- `<https://www.elastic.co/docs/api/doc/elasticsearch/v9/operation/operation-inference-inference>`_
2459
+ `<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-inference>`_
2053
2460
 
2054
2461
  :param inference_id: The inference Id
2055
2462
  :param input: Inference input. Either a string or an array of strings.
@@ -2117,7 +2524,7 @@ class InferenceClient(NamespacedClient):
2117
2524
  <p>Perform text embedding inference on the service</p>
2118
2525
 
2119
2526
 
2120
- `<https://www.elastic.co/docs/api/doc/elasticsearch/v9/operation/operation-inference-inference>`_
2527
+ `<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-inference>`_
2121
2528
 
2122
2529
  :param inference_id: The inference Id
2123
2530
  :param input: Inference input. Either a string or an array of strings.
@@ -2199,7 +2606,7 @@ class InferenceClient(NamespacedClient):
2199
2606
  However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.</p>
2200
2607
 
2201
2608
 
2202
- `<https://www.elastic.co/docs/api/doc/elasticsearch/v9/operation/operation-inference-update>`_
2609
+ `<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-update>`_
2203
2610
 
2204
2611
  :param inference_id: The unique identifier of the inference endpoint.
2205
2612
  :param inference_config: