elasticsearch 8.18.1__py3-none-any.whl → 8.19.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. elasticsearch/_async/client/__init__.py +68 -82
  2. elasticsearch/_async/client/async_search.py +5 -9
  3. elasticsearch/_async/client/autoscaling.py +4 -4
  4. elasticsearch/_async/client/cat.py +744 -75
  5. elasticsearch/_async/client/ccr.py +13 -13
  6. elasticsearch/_async/client/cluster.py +38 -24
  7. elasticsearch/_async/client/connector.py +30 -30
  8. elasticsearch/_async/client/dangling_indices.py +3 -3
  9. elasticsearch/_async/client/enrich.py +5 -5
  10. elasticsearch/_async/client/eql.py +13 -5
  11. elasticsearch/_async/client/esql.py +54 -15
  12. elasticsearch/_async/client/features.py +2 -2
  13. elasticsearch/_async/client/fleet.py +13 -13
  14. elasticsearch/_async/client/graph.py +1 -1
  15. elasticsearch/_async/client/ilm.py +11 -11
  16. elasticsearch/_async/client/indices.py +132 -83
  17. elasticsearch/_async/client/inference.py +519 -112
  18. elasticsearch/_async/client/ingest.py +9 -16
  19. elasticsearch/_async/client/license.py +10 -10
  20. elasticsearch/_async/client/logstash.py +3 -3
  21. elasticsearch/_async/client/migration.py +3 -3
  22. elasticsearch/_async/client/ml.py +76 -88
  23. elasticsearch/_async/client/nodes.py +9 -8
  24. elasticsearch/_async/client/query_rules.py +8 -8
  25. elasticsearch/_async/client/rollup.py +8 -8
  26. elasticsearch/_async/client/search_application.py +10 -10
  27. elasticsearch/_async/client/searchable_snapshots.py +4 -4
  28. elasticsearch/_async/client/security.py +72 -80
  29. elasticsearch/_async/client/shutdown.py +3 -3
  30. elasticsearch/_async/client/simulate.py +1 -1
  31. elasticsearch/_async/client/slm.py +9 -9
  32. elasticsearch/_async/client/snapshot.py +286 -130
  33. elasticsearch/_async/client/sql.py +7 -7
  34. elasticsearch/_async/client/ssl.py +1 -1
  35. elasticsearch/_async/client/synonyms.py +7 -7
  36. elasticsearch/_async/client/tasks.py +3 -3
  37. elasticsearch/_async/client/text_structure.py +4 -4
  38. elasticsearch/_async/client/transform.py +69 -9
  39. elasticsearch/_async/client/xpack.py +1 -1
  40. elasticsearch/_sync/client/__init__.py +68 -82
  41. elasticsearch/_sync/client/async_search.py +5 -9
  42. elasticsearch/_sync/client/autoscaling.py +4 -4
  43. elasticsearch/_sync/client/cat.py +744 -75
  44. elasticsearch/_sync/client/ccr.py +13 -13
  45. elasticsearch/_sync/client/cluster.py +38 -24
  46. elasticsearch/_sync/client/connector.py +30 -30
  47. elasticsearch/_sync/client/dangling_indices.py +3 -3
  48. elasticsearch/_sync/client/enrich.py +5 -5
  49. elasticsearch/_sync/client/eql.py +13 -5
  50. elasticsearch/_sync/client/esql.py +54 -15
  51. elasticsearch/_sync/client/features.py +2 -2
  52. elasticsearch/_sync/client/fleet.py +13 -13
  53. elasticsearch/_sync/client/graph.py +1 -1
  54. elasticsearch/_sync/client/ilm.py +11 -11
  55. elasticsearch/_sync/client/indices.py +132 -83
  56. elasticsearch/_sync/client/inference.py +519 -112
  57. elasticsearch/_sync/client/ingest.py +9 -16
  58. elasticsearch/_sync/client/license.py +10 -10
  59. elasticsearch/_sync/client/logstash.py +3 -3
  60. elasticsearch/_sync/client/migration.py +3 -3
  61. elasticsearch/_sync/client/ml.py +76 -88
  62. elasticsearch/_sync/client/nodes.py +9 -8
  63. elasticsearch/_sync/client/query_rules.py +8 -8
  64. elasticsearch/_sync/client/rollup.py +8 -8
  65. elasticsearch/_sync/client/search_application.py +10 -10
  66. elasticsearch/_sync/client/searchable_snapshots.py +4 -4
  67. elasticsearch/_sync/client/security.py +72 -80
  68. elasticsearch/_sync/client/shutdown.py +3 -3
  69. elasticsearch/_sync/client/simulate.py +1 -1
  70. elasticsearch/_sync/client/slm.py +9 -9
  71. elasticsearch/_sync/client/snapshot.py +286 -130
  72. elasticsearch/_sync/client/sql.py +7 -7
  73. elasticsearch/_sync/client/ssl.py +1 -1
  74. elasticsearch/_sync/client/synonyms.py +7 -7
  75. elasticsearch/_sync/client/tasks.py +3 -3
  76. elasticsearch/_sync/client/text_structure.py +4 -4
  77. elasticsearch/_sync/client/transform.py +69 -9
  78. elasticsearch/_sync/client/xpack.py +1 -1
  79. elasticsearch/_version.py +1 -1
  80. elasticsearch/compat.py +5 -0
  81. elasticsearch/dsl/__init__.py +2 -1
  82. elasticsearch/dsl/_async/document.py +85 -1
  83. elasticsearch/dsl/_sync/document.py +85 -1
  84. elasticsearch/dsl/aggs.py +22 -3
  85. elasticsearch/dsl/document_base.py +219 -16
  86. elasticsearch/dsl/field.py +272 -48
  87. elasticsearch/dsl/query.py +49 -4
  88. elasticsearch/dsl/response/aggs.py +1 -1
  89. elasticsearch/dsl/types.py +247 -27
  90. elasticsearch/dsl/utils.py +2 -2
  91. elasticsearch/esql/__init__.py +19 -0
  92. elasticsearch/esql/esql.py +1156 -0
  93. elasticsearch/esql/functions.py +1750 -0
  94. elasticsearch/exceptions.py +2 -0
  95. {elasticsearch-8.18.1.dist-info → elasticsearch-8.19.1.dist-info}/METADATA +1 -5
  96. elasticsearch-8.19.1.dist-info/RECORD +164 -0
  97. elasticsearch-8.18.1.dist-info/RECORD +0 -163
  98. elasticsearch-8.18.1.dist-info/licenses/LICENSE.txt +0 -175
  99. elasticsearch-8.18.1.dist-info/licenses/NOTICE.txt +0 -559
  100. {elasticsearch-8.18.1.dist-info → elasticsearch-8.19.1.dist-info}/WHEEL +0 -0
  101. {elasticsearch-8.18.1.dist-info → elasticsearch-8.19.1.dist-info}/licenses/LICENSE +0 -0
  102. {elasticsearch-8.18.1.dist-info → elasticsearch-8.19.1.dist-info}/licenses/NOTICE +0 -0
@@ -47,7 +47,7 @@ class InferenceClient(NamespacedClient):
47
47
  <p>Perform completion inference on the service</p>
48
48
 
49
49
 
50
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/post-inference-api.html>`_
50
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/post-inference-api.html>`_
51
51
 
52
52
  :param inference_id: The inference Id
53
53
  :param input: Inference input. Either a string or an array of strings.
@@ -123,7 +123,7 @@ class InferenceClient(NamespacedClient):
123
123
  <p>Delete an inference endpoint</p>
124
124
 
125
125
 
126
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/delete-inference-api.html>`_
126
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/delete-inference-api.html>`_
127
127
 
128
128
  :param inference_id: The inference identifier.
129
129
  :param task_type: The task type
@@ -197,7 +197,7 @@ class InferenceClient(NamespacedClient):
197
197
  <p>Get an inference endpoint</p>
198
198
 
199
199
 
200
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/get-inference-api.html>`_
200
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/get-inference-api.html>`_
201
201
 
202
202
  :param task_type: The task type
203
203
  :param inference_id: The inference Id
@@ -235,7 +235,7 @@ class InferenceClient(NamespacedClient):
235
235
  )
236
236
 
237
237
  @_rewrite_parameters(
238
- body_fields=("input", "query", "task_settings"),
238
+ body_fields=("input", "input_type", "query", "task_settings"),
239
239
  )
240
240
  async def inference(
241
241
  self,
@@ -257,6 +257,7 @@ class InferenceClient(NamespacedClient):
257
257
  error_trace: t.Optional[bool] = None,
258
258
  filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
259
259
  human: t.Optional[bool] = None,
260
+ input_type: t.Optional[str] = None,
260
261
  pretty: t.Optional[bool] = None,
261
262
  query: t.Optional[str] = None,
262
263
  task_settings: t.Optional[t.Any] = None,
@@ -277,13 +278,22 @@ class InferenceClient(NamespacedClient):
277
278
  </blockquote>
278
279
 
279
280
 
280
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/post-inference-api.html>`_
281
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/post-inference-api.html>`_
281
282
 
282
283
  :param inference_id: The unique identifier for the inference endpoint.
283
284
  :param input: The text on which you want to perform the inference task. It can
284
285
  be a single string or an array. > info > Inference endpoints for the `completion`
285
286
  task type currently only support a single string as input.
286
287
  :param task_type: The type of inference task that the model performs.
288
+ :param input_type: Specifies the input data type for the text embedding model.
289
+ The `input_type` parameter only applies to Inference Endpoints with the `text_embedding`
290
+ task type. Possible values include: * `SEARCH` * `INGEST` * `CLASSIFICATION`
291
+ * `CLUSTERING` Not all services support all values. Unsupported values will
292
+ trigger a validation exception. Accepted values depend on the configured
293
+ inference service, refer to the relevant service-specific documentation for
294
+ more info. > info > The `input_type` parameter specified on the root level
295
+ of the request body will take precedence over the `input_type` parameter
296
+ specified in `task_settings`.
287
297
  :param query: The query input, which is required only for the `rerank` task.
288
298
  It is not required for other tasks.
289
299
  :param task_settings: Task settings for the individual inference request. These
@@ -322,6 +332,8 @@ class InferenceClient(NamespacedClient):
322
332
  if not __body:
323
333
  if input is not None:
324
334
  __body["input"] = input
335
+ if input_type is not None:
336
+ __body["input_type"] = input_type
325
337
  if query is not None:
326
338
  __body["query"] = query
327
339
  if task_settings is not None:
@@ -366,26 +378,47 @@ class InferenceClient(NamespacedClient):
366
378
  filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
367
379
  human: t.Optional[bool] = None,
368
380
  pretty: t.Optional[bool] = None,
381
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
369
382
  ) -> ObjectApiResponse[t.Any]:
370
383
  """
371
384
  .. raw:: html
372
385
 
373
- <p>Create an inference endpoint.
374
- When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
375
- After creating the endpoint, wait for the model deployment to complete before using it.
376
- To verify the deployment status, use the get trained model statistics API.
377
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
378
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
386
+ <p>Create an inference endpoint.</p>
379
387
  <p>IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.
380
388
  For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.
381
389
  However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.</p>
390
+ <p>The following integrations are available through the inference API. You can find the available task types next to the integration name:</p>
391
+ <ul>
392
+ <li>AlibabaCloud AI Search (<code>completion</code>, <code>rerank</code>, <code>sparse_embedding</code>, <code>text_embedding</code>)</li>
393
+ <li>Amazon Bedrock (<code>completion</code>, <code>text_embedding</code>)</li>
394
+ <li>Amazon SageMaker (<code>chat_completion</code>, <code>completion</code>, <code>rerank</code>, <code>sparse_embedding</code>, <code>text_embedding</code>)</li>
395
+ <li>Anthropic (<code>completion</code>)</li>
396
+ <li>Azure AI Studio (<code>completion</code>, <code>text_embedding</code>)</li>
397
+ <li>Azure OpenAI (<code>completion</code>, <code>text_embedding</code>)</li>
398
+ <li>Cohere (<code>completion</code>, <code>rerank</code>, <code>text_embedding</code>)</li>
399
+ <li>DeepSeek (<code>chat_completion</code>, <code>completion</code>)</li>
400
+ <li>Elasticsearch (<code>rerank</code>, <code>sparse_embedding</code>, <code>text_embedding</code> - this service is for built-in models and models uploaded through Eland)</li>
401
+ <li>ELSER (<code>sparse_embedding</code>)</li>
402
+ <li>Google AI Studio (<code>completion</code>, <code>text_embedding</code>)</li>
403
+ <li>Google Vertex AI (<code>chat_completion</code>, <code>completion</code>, <code>rerank</code>, <code>text_embedding</code>)</li>
404
+ <li>Hugging Face (<code>chat_completion</code>, <code>completion</code>, <code>rerank</code>, <code>text_embedding</code>)</li>
405
+ <li>JinaAI (<code>rerank</code>, <code>text_embedding</code>)</li>
406
+ <li>Llama (<code>chat_completion</code>, <code>completion</code>, <code>text_embedding</code>)</li>
407
+ <li>Mistral (<code>chat_completion</code>, <code>completion</code>, <code>text_embedding</code>)</li>
408
+ <li>OpenAI (<code>chat_completion</code>, <code>completion</code>, <code>text_embedding</code>)</li>
409
+ <li>VoyageAI (<code>rerank</code>, <code>text_embedding</code>)</li>
410
+ <li>Watsonx inference integration (<code>text_embedding</code>)</li>
411
+ </ul>
382
412
 
383
413
 
384
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/put-inference-api.html>`_
414
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/put-inference-api.html>`_
385
415
 
386
416
  :param inference_id: The inference Id
387
417
  :param inference_config:
388
- :param task_type: The task type
418
+ :param task_type: The task type. Refer to the integration list in the API description
419
+ for the available task types.
420
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
421
+ to be created.
389
422
  """
390
423
  if inference_id in SKIP_IN_PATH:
391
424
  raise ValueError("Empty value passed for parameter 'inference_id'")
@@ -416,6 +449,8 @@ class InferenceClient(NamespacedClient):
416
449
  __query["human"] = human
417
450
  if pretty is not None:
418
451
  __query["pretty"] = pretty
452
+ if timeout is not None:
453
+ __query["timeout"] = timeout
419
454
  __body = inference_config if inference_config is not None else body
420
455
  __headers = {"accept": "application/json", "content-type": "application/json"}
421
456
  return await self.perform_request( # type: ignore[return-value]
@@ -451,6 +486,7 @@ class InferenceClient(NamespacedClient):
451
486
  human: t.Optional[bool] = None,
452
487
  pretty: t.Optional[bool] = None,
453
488
  task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
489
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
454
490
  body: t.Optional[t.Dict[str, t.Any]] = None,
455
491
  ) -> ObjectApiResponse[t.Any]:
456
492
  """
@@ -458,14 +494,9 @@ class InferenceClient(NamespacedClient):
458
494
 
459
495
  <p>Create an AlibabaCloud AI Search inference endpoint.</p>
460
496
  <p>Create an inference endpoint to perform an inference task with the <code>alibabacloud-ai-search</code> service.</p>
461
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
462
- After creating the endpoint, wait for the model deployment to complete before using it.
463
- To verify the deployment status, use the get trained model statistics API.
464
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
465
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
466
497
 
467
498
 
468
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-alibabacloud-ai-search.html>`_
499
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-alibabacloud-ai-search.html>`_
469
500
 
470
501
  :param task_type: The type of the inference task that the model will perform.
471
502
  :param alibabacloud_inference_id: The unique identifier of the inference endpoint.
@@ -476,6 +507,8 @@ class InferenceClient(NamespacedClient):
476
507
  :param chunking_settings: The chunking configuration object.
477
508
  :param task_settings: Settings to configure the inference task. These settings
478
509
  are specific to the task type you specified.
510
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
511
+ to be created.
479
512
  """
480
513
  if task_type in SKIP_IN_PATH:
481
514
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -502,6 +535,8 @@ class InferenceClient(NamespacedClient):
502
535
  __query["human"] = human
503
536
  if pretty is not None:
504
537
  __query["pretty"] = pretty
538
+ if timeout is not None:
539
+ __query["timeout"] = timeout
505
540
  if not __body:
506
541
  if service is not None:
507
542
  __body["service"] = service
@@ -547,25 +582,21 @@ class InferenceClient(NamespacedClient):
547
582
  human: t.Optional[bool] = None,
548
583
  pretty: t.Optional[bool] = None,
549
584
  task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
585
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
550
586
  body: t.Optional[t.Dict[str, t.Any]] = None,
551
587
  ) -> ObjectApiResponse[t.Any]:
552
588
  """
553
589
  .. raw:: html
554
590
 
555
591
  <p>Create an Amazon Bedrock inference endpoint.</p>
556
- <p>Creates an inference endpoint to perform an inference task with the <code>amazonbedrock</code> service.</p>
592
+ <p>Create an inference endpoint to perform an inference task with the <code>amazonbedrock</code> service.</p>
557
593
  <blockquote>
558
594
  <p>info
559
595
  You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.</p>
560
596
  </blockquote>
561
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
562
- After creating the endpoint, wait for the model deployment to complete before using it.
563
- To verify the deployment status, use the get trained model statistics API.
564
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
565
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
566
597
 
567
598
 
568
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-amazon-bedrock.html>`_
599
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-amazon-bedrock.html>`_
569
600
 
570
601
  :param task_type: The type of the inference task that the model will perform.
571
602
  :param amazonbedrock_inference_id: The unique identifier of the inference endpoint.
@@ -576,6 +607,8 @@ class InferenceClient(NamespacedClient):
576
607
  :param chunking_settings: The chunking configuration object.
577
608
  :param task_settings: Settings to configure the inference task. These settings
578
609
  are specific to the task type you specified.
610
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
611
+ to be created.
579
612
  """
580
613
  if task_type in SKIP_IN_PATH:
581
614
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -602,6 +635,8 @@ class InferenceClient(NamespacedClient):
602
635
  __query["human"] = human
603
636
  if pretty is not None:
604
637
  __query["pretty"] = pretty
638
+ if timeout is not None:
639
+ __query["timeout"] = timeout
605
640
  if not __body:
606
641
  if service is not None:
607
642
  __body["service"] = service
@@ -626,6 +661,112 @@ class InferenceClient(NamespacedClient):
626
661
  path_parts=__path_parts,
627
662
  )
628
663
 
664
+ @_rewrite_parameters(
665
+ body_fields=(
666
+ "service",
667
+ "service_settings",
668
+ "chunking_settings",
669
+ "task_settings",
670
+ ),
671
+ )
672
+ async def put_amazonsagemaker(
673
+ self,
674
+ *,
675
+ task_type: t.Union[
676
+ str,
677
+ t.Literal[
678
+ "chat_completion",
679
+ "completion",
680
+ "rerank",
681
+ "sparse_embedding",
682
+ "text_embedding",
683
+ ],
684
+ ],
685
+ amazonsagemaker_inference_id: str,
686
+ service: t.Optional[t.Union[str, t.Literal["amazon_sagemaker"]]] = None,
687
+ service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
688
+ chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None,
689
+ error_trace: t.Optional[bool] = None,
690
+ filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
691
+ human: t.Optional[bool] = None,
692
+ pretty: t.Optional[bool] = None,
693
+ task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
694
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
695
+ body: t.Optional[t.Dict[str, t.Any]] = None,
696
+ ) -> ObjectApiResponse[t.Any]:
697
+ """
698
+ .. raw:: html
699
+
700
+ <p>Create an Amazon SageMaker inference endpoint.</p>
701
+ <p>Create an inference endpoint to perform an inference task with the <code>amazon_sagemaker</code> service.</p>
702
+
703
+
704
+ `<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-amazonsagemaker>`_
705
+
706
+ :param task_type: The type of the inference task that the model will perform.
707
+ :param amazonsagemaker_inference_id: The unique identifier of the inference endpoint.
708
+ :param service: The type of service supported for the specified task type. In
709
+ this case, `amazon_sagemaker`.
710
+ :param service_settings: Settings used to install the inference model. These
711
+ settings are specific to the `amazon_sagemaker` service and `service_settings.api`
712
+ you specified.
713
+ :param chunking_settings: The chunking configuration object.
714
+ :param task_settings: Settings to configure the inference task. These settings
715
+ are specific to the task type and `service_settings.api` you specified.
716
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
717
+ to be created.
718
+ """
719
+ if task_type in SKIP_IN_PATH:
720
+ raise ValueError("Empty value passed for parameter 'task_type'")
721
+ if amazonsagemaker_inference_id in SKIP_IN_PATH:
722
+ raise ValueError(
723
+ "Empty value passed for parameter 'amazonsagemaker_inference_id'"
724
+ )
725
+ if service is None and body is None:
726
+ raise ValueError("Empty value passed for parameter 'service'")
727
+ if service_settings is None and body is None:
728
+ raise ValueError("Empty value passed for parameter 'service_settings'")
729
+ __path_parts: t.Dict[str, str] = {
730
+ "task_type": _quote(task_type),
731
+ "amazonsagemaker_inference_id": _quote(amazonsagemaker_inference_id),
732
+ }
733
+ __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["amazonsagemaker_inference_id"]}'
734
+ __query: t.Dict[str, t.Any] = {}
735
+ __body: t.Dict[str, t.Any] = body if body is not None else {}
736
+ if error_trace is not None:
737
+ __query["error_trace"] = error_trace
738
+ if filter_path is not None:
739
+ __query["filter_path"] = filter_path
740
+ if human is not None:
741
+ __query["human"] = human
742
+ if pretty is not None:
743
+ __query["pretty"] = pretty
744
+ if timeout is not None:
745
+ __query["timeout"] = timeout
746
+ if not __body:
747
+ if service is not None:
748
+ __body["service"] = service
749
+ if service_settings is not None:
750
+ __body["service_settings"] = service_settings
751
+ if chunking_settings is not None:
752
+ __body["chunking_settings"] = chunking_settings
753
+ if task_settings is not None:
754
+ __body["task_settings"] = task_settings
755
+ if not __body:
756
+ __body = None # type: ignore[assignment]
757
+ __headers = {"accept": "application/json"}
758
+ if __body is not None:
759
+ __headers["content-type"] = "application/json"
760
+ return await self.perform_request( # type: ignore[return-value]
761
+ "PUT",
762
+ __path,
763
+ params=__query,
764
+ headers=__headers,
765
+ body=__body,
766
+ endpoint_id="inference.put_amazonsagemaker",
767
+ path_parts=__path_parts,
768
+ )
769
+
629
770
  @_rewrite_parameters(
630
771
  body_fields=(
631
772
  "service",
@@ -647,6 +788,7 @@ class InferenceClient(NamespacedClient):
647
788
  human: t.Optional[bool] = None,
648
789
  pretty: t.Optional[bool] = None,
649
790
  task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
791
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
650
792
  body: t.Optional[t.Dict[str, t.Any]] = None,
651
793
  ) -> ObjectApiResponse[t.Any]:
652
794
  """
@@ -654,14 +796,9 @@ class InferenceClient(NamespacedClient):
654
796
 
655
797
  <p>Create an Anthropic inference endpoint.</p>
656
798
  <p>Create an inference endpoint to perform an inference task with the <code>anthropic</code> service.</p>
657
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
658
- After creating the endpoint, wait for the model deployment to complete before using it.
659
- To verify the deployment status, use the get trained model statistics API.
660
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
661
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
662
799
 
663
800
 
664
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-anthropic.html>`_
801
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-anthropic.html>`_
665
802
 
666
803
  :param task_type: The task type. The only valid task type for the model to perform
667
804
  is `completion`.
@@ -673,6 +810,8 @@ class InferenceClient(NamespacedClient):
673
810
  :param chunking_settings: The chunking configuration object.
674
811
  :param task_settings: Settings to configure the inference task. These settings
675
812
  are specific to the task type you specified.
813
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
814
+ to be created.
676
815
  """
677
816
  if task_type in SKIP_IN_PATH:
678
817
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -699,6 +838,8 @@ class InferenceClient(NamespacedClient):
699
838
  __query["human"] = human
700
839
  if pretty is not None:
701
840
  __query["pretty"] = pretty
841
+ if timeout is not None:
842
+ __query["timeout"] = timeout
702
843
  if not __body:
703
844
  if service is not None:
704
845
  __body["service"] = service
@@ -744,6 +885,7 @@ class InferenceClient(NamespacedClient):
744
885
  human: t.Optional[bool] = None,
745
886
  pretty: t.Optional[bool] = None,
746
887
  task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
888
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
747
889
  body: t.Optional[t.Dict[str, t.Any]] = None,
748
890
  ) -> ObjectApiResponse[t.Any]:
749
891
  """
@@ -751,14 +893,9 @@ class InferenceClient(NamespacedClient):
751
893
 
752
894
  <p>Create an Azure AI studio inference endpoint.</p>
753
895
  <p>Create an inference endpoint to perform an inference task with the <code>azureaistudio</code> service.</p>
754
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
755
- After creating the endpoint, wait for the model deployment to complete before using it.
756
- To verify the deployment status, use the get trained model statistics API.
757
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
758
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
759
896
 
760
897
 
761
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-azure-ai-studio.html>`_
898
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-azure-ai-studio.html>`_
762
899
 
763
900
  :param task_type: The type of the inference task that the model will perform.
764
901
  :param azureaistudio_inference_id: The unique identifier of the inference endpoint.
@@ -769,6 +906,8 @@ class InferenceClient(NamespacedClient):
769
906
  :param chunking_settings: The chunking configuration object.
770
907
  :param task_settings: Settings to configure the inference task. These settings
771
908
  are specific to the task type you specified.
909
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
910
+ to be created.
772
911
  """
773
912
  if task_type in SKIP_IN_PATH:
774
913
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -795,6 +934,8 @@ class InferenceClient(NamespacedClient):
795
934
  __query["human"] = human
796
935
  if pretty is not None:
797
936
  __query["pretty"] = pretty
937
+ if timeout is not None:
938
+ __query["timeout"] = timeout
798
939
  if not __body:
799
940
  if service is not None:
800
941
  __body["service"] = service
@@ -840,6 +981,7 @@ class InferenceClient(NamespacedClient):
840
981
  human: t.Optional[bool] = None,
841
982
  pretty: t.Optional[bool] = None,
842
983
  task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
984
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
843
985
  body: t.Optional[t.Dict[str, t.Any]] = None,
844
986
  ) -> ObjectApiResponse[t.Any]:
845
987
  """
@@ -853,14 +995,9 @@ class InferenceClient(NamespacedClient):
853
995
  <li><a href="https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-35">GPT-3.5</a></li>
854
996
  </ul>
855
997
  <p>The list of embeddings models that you can choose from in your deployment can be found in the <a href="https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings">Azure models documentation</a>.</p>
856
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
857
- After creating the endpoint, wait for the model deployment to complete before using it.
858
- To verify the deployment status, use the get trained model statistics API.
859
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
860
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
861
998
 
862
999
 
863
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-azure-openai.html>`_
1000
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-azure-openai.html>`_
864
1001
 
865
1002
  :param task_type: The type of the inference task that the model will perform.
866
1003
  NOTE: The `chat_completion` task type only supports streaming and only through
@@ -873,6 +1010,8 @@ class InferenceClient(NamespacedClient):
873
1010
  :param chunking_settings: The chunking configuration object.
874
1011
  :param task_settings: Settings to configure the inference task. These settings
875
1012
  are specific to the task type you specified.
1013
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
1014
+ to be created.
876
1015
  """
877
1016
  if task_type in SKIP_IN_PATH:
878
1017
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -899,6 +1038,8 @@ class InferenceClient(NamespacedClient):
899
1038
  __query["human"] = human
900
1039
  if pretty is not None:
901
1040
  __query["pretty"] = pretty
1041
+ if timeout is not None:
1042
+ __query["timeout"] = timeout
902
1043
  if not __body:
903
1044
  if service is not None:
904
1045
  __body["service"] = service
@@ -944,6 +1085,7 @@ class InferenceClient(NamespacedClient):
944
1085
  human: t.Optional[bool] = None,
945
1086
  pretty: t.Optional[bool] = None,
946
1087
  task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
1088
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
947
1089
  body: t.Optional[t.Dict[str, t.Any]] = None,
948
1090
  ) -> ObjectApiResponse[t.Any]:
949
1091
  """
@@ -951,14 +1093,9 @@ class InferenceClient(NamespacedClient):
951
1093
 
952
1094
  <p>Create a Cohere inference endpoint.</p>
953
1095
  <p>Create an inference endpoint to perform an inference task with the <code>cohere</code> service.</p>
954
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
955
- After creating the endpoint, wait for the model deployment to complete before using it.
956
- To verify the deployment status, use the get trained model statistics API.
957
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
958
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
959
1096
 
960
1097
 
961
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-cohere.html>`_
1098
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-cohere.html>`_
962
1099
 
963
1100
  :param task_type: The type of the inference task that the model will perform.
964
1101
  :param cohere_inference_id: The unique identifier of the inference endpoint.
@@ -969,6 +1106,8 @@ class InferenceClient(NamespacedClient):
969
1106
  :param chunking_settings: The chunking configuration object.
970
1107
  :param task_settings: Settings to configure the inference task. These settings
971
1108
  are specific to the task type you specified.
1109
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
1110
+ to be created.
972
1111
  """
973
1112
  if task_type in SKIP_IN_PATH:
974
1113
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -993,6 +1132,8 @@ class InferenceClient(NamespacedClient):
993
1132
  __query["human"] = human
994
1133
  if pretty is not None:
995
1134
  __query["pretty"] = pretty
1135
+ if timeout is not None:
1136
+ __query["timeout"] = timeout
996
1137
  if not __body:
997
1138
  if service is not None:
998
1139
  __body["service"] = service
@@ -1017,6 +1158,221 @@ class InferenceClient(NamespacedClient):
1017
1158
  path_parts=__path_parts,
1018
1159
  )
1019
1160
 
1161
+ @_rewrite_parameters(
1162
+ body_fields=(
1163
+ "service",
1164
+ "service_settings",
1165
+ "chunking_settings",
1166
+ "task_settings",
1167
+ ),
1168
+ )
1169
+ async def put_custom(
1170
+ self,
1171
+ *,
1172
+ task_type: t.Union[
1173
+ str, t.Literal["completion", "rerank", "sparse_embedding", "text_embedding"]
1174
+ ],
1175
+ custom_inference_id: str,
1176
+ service: t.Optional[t.Union[str, t.Literal["custom"]]] = None,
1177
+ service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
1178
+ chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None,
1179
+ error_trace: t.Optional[bool] = None,
1180
+ filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
1181
+ human: t.Optional[bool] = None,
1182
+ pretty: t.Optional[bool] = None,
1183
+ task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
1184
+ body: t.Optional[t.Dict[str, t.Any]] = None,
1185
+ ) -> ObjectApiResponse[t.Any]:
1186
+ """
1187
+ .. raw:: html
1188
+
1189
+ <p>Create a custom inference endpoint.</p>
1190
+ <p>The custom service gives more control over how to interact with external inference services that aren't explicitly supported through dedicated integrations.
1191
+ The custom service gives you the ability to define the headers, url, query parameters, request body, and secrets.
1192
+ The custom service supports the template replacement functionality, which enables you to define a template that can be replaced with the value associated with that key.
1193
+ Templates are portions of a string that start with <code>${</code> and end with <code>}</code>.
1194
+ The parameters <code>secret_parameters</code> and <code>task_settings</code> are checked for keys for template replacement. Template replacement is supported in the <code>request</code>, <code>headers</code>, <code>url</code>, and <code>query_parameters</code>.
1195
+ If the definition (key) is not found for a template, an error message is returned.
1196
+ In case of an endpoint definition like the following:</p>
1197
+ <pre><code>PUT _inference/text_embedding/test-text-embedding
1198
+ {
1199
+ &quot;service&quot;: &quot;custom&quot;,
1200
+ &quot;service_settings&quot;: {
1201
+ &quot;secret_parameters&quot;: {
1202
+ &quot;api_key&quot;: &quot;&lt;some api key&gt;&quot;
1203
+ },
1204
+ &quot;url&quot;: &quot;...endpoints.huggingface.cloud/v1/embeddings&quot;,
1205
+ &quot;headers&quot;: {
1206
+ &quot;Authorization&quot;: &quot;Bearer ${api_key}&quot;,
1207
+ &quot;Content-Type&quot;: &quot;application/json&quot;
1208
+ },
1209
+ &quot;request&quot;: &quot;{\\&quot;input\\&quot;: ${input}}&quot;,
1210
+ &quot;response&quot;: {
1211
+ &quot;json_parser&quot;: {
1212
+ &quot;text_embeddings&quot;:&quot;$.data[*].embedding[*]&quot;
1213
+ }
1214
+ }
1215
+ }
1216
+ }
1217
+ </code></pre>
1218
+ <p>To replace <code>${api_key}</code> the <code>secret_parameters</code> and <code>task_settings</code> are checked for a key named <code>api_key</code>.</p>
1219
+ <blockquote>
1220
+ <p>info
1221
+ Templates should not be surrounded by quotes.</p>
1222
+ </blockquote>
1223
+ <p>Pre-defined templates:</p>
1224
+ <ul>
1225
+ <li><code>${input}</code> refers to the array of input strings that comes from the <code>input</code> field of the subsequent inference requests.</li>
1226
+ <li><code>${input_type}</code> refers to the input type translation values.</li>
1227
+ <li><code>${query}</code> refers to the query field used specifically for reranking tasks.</li>
1228
+ <li><code>${top_n}</code> refers to the <code>top_n</code> field available when performing rerank requests.</li>
1229
+ <li><code>${return_documents}</code> refers to the <code>return_documents</code> field available when performing rerank requests.</li>
1230
+ </ul>
1231
+
1232
+
1233
+ `<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-custom>`_
1234
+
1235
+ :param task_type: The type of the inference task that the model will perform.
1236
+ :param custom_inference_id: The unique identifier of the inference endpoint.
1237
+ :param service: The type of service supported for the specified task type. In
1238
+ this case, `custom`.
1239
+ :param service_settings: Settings used to install the inference model. These
1240
+ settings are specific to the `custom` service.
1241
+ :param chunking_settings: The chunking configuration object.
1242
+ :param task_settings: Settings to configure the inference task. These settings
1243
+ are specific to the task type you specified.
1244
+ """
1245
+ if task_type in SKIP_IN_PATH:
1246
+ raise ValueError("Empty value passed for parameter 'task_type'")
1247
+ if custom_inference_id in SKIP_IN_PATH:
1248
+ raise ValueError("Empty value passed for parameter 'custom_inference_id'")
1249
+ if service is None and body is None:
1250
+ raise ValueError("Empty value passed for parameter 'service'")
1251
+ if service_settings is None and body is None:
1252
+ raise ValueError("Empty value passed for parameter 'service_settings'")
1253
+ __path_parts: t.Dict[str, str] = {
1254
+ "task_type": _quote(task_type),
1255
+ "custom_inference_id": _quote(custom_inference_id),
1256
+ }
1257
+ __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["custom_inference_id"]}'
1258
+ __query: t.Dict[str, t.Any] = {}
1259
+ __body: t.Dict[str, t.Any] = body if body is not None else {}
1260
+ if error_trace is not None:
1261
+ __query["error_trace"] = error_trace
1262
+ if filter_path is not None:
1263
+ __query["filter_path"] = filter_path
1264
+ if human is not None:
1265
+ __query["human"] = human
1266
+ if pretty is not None:
1267
+ __query["pretty"] = pretty
1268
+ if not __body:
1269
+ if service is not None:
1270
+ __body["service"] = service
1271
+ if service_settings is not None:
1272
+ __body["service_settings"] = service_settings
1273
+ if chunking_settings is not None:
1274
+ __body["chunking_settings"] = chunking_settings
1275
+ if task_settings is not None:
1276
+ __body["task_settings"] = task_settings
1277
+ if not __body:
1278
+ __body = None # type: ignore[assignment]
1279
+ __headers = {"accept": "application/json"}
1280
+ if __body is not None:
1281
+ __headers["content-type"] = "application/json"
1282
+ return await self.perform_request( # type: ignore[return-value]
1283
+ "PUT",
1284
+ __path,
1285
+ params=__query,
1286
+ headers=__headers,
1287
+ body=__body,
1288
+ endpoint_id="inference.put_custom",
1289
+ path_parts=__path_parts,
1290
+ )
1291
+
1292
+ @_rewrite_parameters(
1293
+ body_fields=("service", "service_settings", "chunking_settings"),
1294
+ )
1295
+ async def put_deepseek(
1296
+ self,
1297
+ *,
1298
+ task_type: t.Union[str, t.Literal["chat_completion", "completion"]],
1299
+ deepseek_inference_id: str,
1300
+ service: t.Optional[t.Union[str, t.Literal["deepseek"]]] = None,
1301
+ service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
1302
+ chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None,
1303
+ error_trace: t.Optional[bool] = None,
1304
+ filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
1305
+ human: t.Optional[bool] = None,
1306
+ pretty: t.Optional[bool] = None,
1307
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
1308
+ body: t.Optional[t.Dict[str, t.Any]] = None,
1309
+ ) -> ObjectApiResponse[t.Any]:
1310
+ """
1311
+ .. raw:: html
1312
+
1313
+ <p>Create a DeepSeek inference endpoint.</p>
1314
+ <p>Create an inference endpoint to perform an inference task with the <code>deepseek</code> service.</p>
1315
+
1316
+
1317
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-deepseek.html>`_
1318
+
1319
+ :param task_type: The type of the inference task that the model will perform.
1320
+ :param deepseek_inference_id: The unique identifier of the inference endpoint.
1321
+ :param service: The type of service supported for the specified task type. In
1322
+ this case, `deepseek`.
1323
+ :param service_settings: Settings used to install the inference model. These
1324
+ settings are specific to the `deepseek` service.
1325
+ :param chunking_settings: The chunking configuration object.
1326
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
1327
+ to be created.
1328
+ """
1329
+ if task_type in SKIP_IN_PATH:
1330
+ raise ValueError("Empty value passed for parameter 'task_type'")
1331
+ if deepseek_inference_id in SKIP_IN_PATH:
1332
+ raise ValueError("Empty value passed for parameter 'deepseek_inference_id'")
1333
+ if service is None and body is None:
1334
+ raise ValueError("Empty value passed for parameter 'service'")
1335
+ if service_settings is None and body is None:
1336
+ raise ValueError("Empty value passed for parameter 'service_settings'")
1337
+ __path_parts: t.Dict[str, str] = {
1338
+ "task_type": _quote(task_type),
1339
+ "deepseek_inference_id": _quote(deepseek_inference_id),
1340
+ }
1341
+ __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["deepseek_inference_id"]}'
1342
+ __query: t.Dict[str, t.Any] = {}
1343
+ __body: t.Dict[str, t.Any] = body if body is not None else {}
1344
+ if error_trace is not None:
1345
+ __query["error_trace"] = error_trace
1346
+ if filter_path is not None:
1347
+ __query["filter_path"] = filter_path
1348
+ if human is not None:
1349
+ __query["human"] = human
1350
+ if pretty is not None:
1351
+ __query["pretty"] = pretty
1352
+ if timeout is not None:
1353
+ __query["timeout"] = timeout
1354
+ if not __body:
1355
+ if service is not None:
1356
+ __body["service"] = service
1357
+ if service_settings is not None:
1358
+ __body["service_settings"] = service_settings
1359
+ if chunking_settings is not None:
1360
+ __body["chunking_settings"] = chunking_settings
1361
+ if not __body:
1362
+ __body = None # type: ignore[assignment]
1363
+ __headers = {"accept": "application/json"}
1364
+ if __body is not None:
1365
+ __headers["content-type"] = "application/json"
1366
+ return await self.perform_request( # type: ignore[return-value]
1367
+ "PUT",
1368
+ __path,
1369
+ params=__query,
1370
+ headers=__headers,
1371
+ body=__body,
1372
+ endpoint_id="inference.put_deepseek",
1373
+ path_parts=__path_parts,
1374
+ )
1375
+
1020
1376
  @_rewrite_parameters(
1021
1377
  body_fields=(
1022
1378
  "service",
@@ -1040,6 +1396,7 @@ class InferenceClient(NamespacedClient):
1040
1396
  human: t.Optional[bool] = None,
1041
1397
  pretty: t.Optional[bool] = None,
1042
1398
  task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
1399
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
1043
1400
  body: t.Optional[t.Dict[str, t.Any]] = None,
1044
1401
  ) -> ObjectApiResponse[t.Any]:
1045
1402
  """
@@ -1062,7 +1419,7 @@ class InferenceClient(NamespacedClient):
1062
1419
  Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
1063
1420
 
1064
1421
 
1065
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-elasticsearch.html>`_
1422
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-elasticsearch.html>`_
1066
1423
 
1067
1424
  :param task_type: The type of the inference task that the model will perform.
1068
1425
  :param elasticsearch_inference_id: The unique identifier of the inference endpoint.
@@ -1074,6 +1431,8 @@ class InferenceClient(NamespacedClient):
1074
1431
  :param chunking_settings: The chunking configuration object.
1075
1432
  :param task_settings: Settings to configure the inference task. These settings
1076
1433
  are specific to the task type you specified.
1434
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
1435
+ to be created.
1077
1436
  """
1078
1437
  if task_type in SKIP_IN_PATH:
1079
1438
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -1100,6 +1459,8 @@ class InferenceClient(NamespacedClient):
1100
1459
  __query["human"] = human
1101
1460
  if pretty is not None:
1102
1461
  __query["pretty"] = pretty
1462
+ if timeout is not None:
1463
+ __query["timeout"] = timeout
1103
1464
  if not __body:
1104
1465
  if service is not None:
1105
1466
  __body["service"] = service
@@ -1139,6 +1500,7 @@ class InferenceClient(NamespacedClient):
1139
1500
  filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
1140
1501
  human: t.Optional[bool] = None,
1141
1502
  pretty: t.Optional[bool] = None,
1503
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
1142
1504
  body: t.Optional[t.Dict[str, t.Any]] = None,
1143
1505
  ) -> ObjectApiResponse[t.Any]:
1144
1506
  """
@@ -1162,7 +1524,7 @@ class InferenceClient(NamespacedClient):
1162
1524
  Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
1163
1525
 
1164
1526
 
1165
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-elser.html>`_
1527
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-elser.html>`_
1166
1528
 
1167
1529
  :param task_type: The type of the inference task that the model will perform.
1168
1530
  :param elser_inference_id: The unique identifier of the inference endpoint.
@@ -1171,6 +1533,8 @@ class InferenceClient(NamespacedClient):
1171
1533
  :param service_settings: Settings used to install the inference model. These
1172
1534
  settings are specific to the `elser` service.
1173
1535
  :param chunking_settings: The chunking configuration object.
1536
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
1537
+ to be created.
1174
1538
  """
1175
1539
  if task_type in SKIP_IN_PATH:
1176
1540
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -1195,6 +1559,8 @@ class InferenceClient(NamespacedClient):
1195
1559
  __query["human"] = human
1196
1560
  if pretty is not None:
1197
1561
  __query["pretty"] = pretty
1562
+ if timeout is not None:
1563
+ __query["timeout"] = timeout
1198
1564
  if not __body:
1199
1565
  if service is not None:
1200
1566
  __body["service"] = service
@@ -1232,6 +1598,7 @@ class InferenceClient(NamespacedClient):
1232
1598
  filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
1233
1599
  human: t.Optional[bool] = None,
1234
1600
  pretty: t.Optional[bool] = None,
1601
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
1235
1602
  body: t.Optional[t.Dict[str, t.Any]] = None,
1236
1603
  ) -> ObjectApiResponse[t.Any]:
1237
1604
  """
@@ -1239,14 +1606,9 @@ class InferenceClient(NamespacedClient):
1239
1606
 
1240
1607
  <p>Create an Google AI Studio inference endpoint.</p>
1241
1608
  <p>Create an inference endpoint to perform an inference task with the <code>googleaistudio</code> service.</p>
1242
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
1243
- After creating the endpoint, wait for the model deployment to complete before using it.
1244
- To verify the deployment status, use the get trained model statistics API.
1245
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
1246
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
1247
1609
 
1248
1610
 
1249
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-google-ai-studio.html>`_
1611
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-google-ai-studio.html>`_
1250
1612
 
1251
1613
  :param task_type: The type of the inference task that the model will perform.
1252
1614
  :param googleaistudio_inference_id: The unique identifier of the inference endpoint.
@@ -1255,6 +1617,8 @@ class InferenceClient(NamespacedClient):
1255
1617
  :param service_settings: Settings used to install the inference model. These
1256
1618
  settings are specific to the `googleaistudio` service.
1257
1619
  :param chunking_settings: The chunking configuration object.
1620
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
1621
+ to be created.
1258
1622
  """
1259
1623
  if task_type in SKIP_IN_PATH:
1260
1624
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -1281,6 +1645,8 @@ class InferenceClient(NamespacedClient):
1281
1645
  __query["human"] = human
1282
1646
  if pretty is not None:
1283
1647
  __query["pretty"] = pretty
1648
+ if timeout is not None:
1649
+ __query["timeout"] = timeout
1284
1650
  if not __body:
1285
1651
  if service is not None:
1286
1652
  __body["service"] = service
@@ -1314,7 +1680,9 @@ class InferenceClient(NamespacedClient):
1314
1680
  async def put_googlevertexai(
1315
1681
  self,
1316
1682
  *,
1317
- task_type: t.Union[str, t.Literal["rerank", "text_embedding"]],
1683
+ task_type: t.Union[
1684
+ str, t.Literal["chat_completion", "completion", "rerank", "text_embedding"]
1685
+ ],
1318
1686
  googlevertexai_inference_id: str,
1319
1687
  service: t.Optional[t.Union[str, t.Literal["googlevertexai"]]] = None,
1320
1688
  service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
@@ -1324,6 +1692,7 @@ class InferenceClient(NamespacedClient):
1324
1692
  human: t.Optional[bool] = None,
1325
1693
  pretty: t.Optional[bool] = None,
1326
1694
  task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
1695
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
1327
1696
  body: t.Optional[t.Dict[str, t.Any]] = None,
1328
1697
  ) -> ObjectApiResponse[t.Any]:
1329
1698
  """
@@ -1331,14 +1700,9 @@ class InferenceClient(NamespacedClient):
1331
1700
 
1332
1701
  <p>Create a Google Vertex AI inference endpoint.</p>
1333
1702
  <p>Create an inference endpoint to perform an inference task with the <code>googlevertexai</code> service.</p>
1334
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
1335
- After creating the endpoint, wait for the model deployment to complete before using it.
1336
- To verify the deployment status, use the get trained model statistics API.
1337
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
1338
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
1339
1703
 
1340
1704
 
1341
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-google-vertex-ai.html>`_
1705
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-google-vertex-ai.html>`_
1342
1706
 
1343
1707
  :param task_type: The type of the inference task that the model will perform.
1344
1708
  :param googlevertexai_inference_id: The unique identifier of the inference endpoint.
@@ -1349,6 +1713,8 @@ class InferenceClient(NamespacedClient):
1349
1713
  :param chunking_settings: The chunking configuration object.
1350
1714
  :param task_settings: Settings to configure the inference task. These settings
1351
1715
  are specific to the task type you specified.
1716
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
1717
+ to be created.
1352
1718
  """
1353
1719
  if task_type in SKIP_IN_PATH:
1354
1720
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -1375,6 +1741,8 @@ class InferenceClient(NamespacedClient):
1375
1741
  __query["human"] = human
1376
1742
  if pretty is not None:
1377
1743
  __query["pretty"] = pretty
1744
+ if timeout is not None:
1745
+ __query["timeout"] = timeout
1378
1746
  if not __body:
1379
1747
  if service is not None:
1380
1748
  __body["service"] = service
@@ -1400,12 +1768,19 @@ class InferenceClient(NamespacedClient):
1400
1768
  )
1401
1769
 
1402
1770
  @_rewrite_parameters(
1403
- body_fields=("service", "service_settings", "chunking_settings"),
1771
+ body_fields=(
1772
+ "service",
1773
+ "service_settings",
1774
+ "chunking_settings",
1775
+ "task_settings",
1776
+ ),
1404
1777
  )
1405
1778
  async def put_hugging_face(
1406
1779
  self,
1407
1780
  *,
1408
- task_type: t.Union[str, t.Literal["text_embedding"]],
1781
+ task_type: t.Union[
1782
+ str, t.Literal["chat_completion", "completion", "rerank", "text_embedding"]
1783
+ ],
1409
1784
  huggingface_inference_id: str,
1410
1785
  service: t.Optional[t.Union[str, t.Literal["hugging_face"]]] = None,
1411
1786
  service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
@@ -1414,17 +1789,22 @@ class InferenceClient(NamespacedClient):
1414
1789
  filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
1415
1790
  human: t.Optional[bool] = None,
1416
1791
  pretty: t.Optional[bool] = None,
1792
+ task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
1793
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
1417
1794
  body: t.Optional[t.Dict[str, t.Any]] = None,
1418
1795
  ) -> ObjectApiResponse[t.Any]:
1419
1796
  """
1420
1797
  .. raw:: html
1421
1798
 
1422
1799
  <p>Create a Hugging Face inference endpoint.</p>
1423
- <p>Create an inference endpoint to perform an inference task with the <code>hugging_face</code> service.</p>
1424
- <p>You must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.
1425
- Select the model you want to use on the new endpoint creation page (for example <code>intfloat/e5-small-v2</code>), then select the sentence embeddings task under the advanced configuration section.
1426
- Create the endpoint and copy the URL after the endpoint initialization has been finished.</p>
1427
- <p>The following models are recommended for the Hugging Face service:</p>
1800
+ <p>Create an inference endpoint to perform an inference task with the <code>hugging_face</code> service.
1801
+ Supported tasks include: <code>text_embedding</code>, <code>completion</code>, and <code>chat_completion</code>.</p>
1802
+ <p>To configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.
1803
+ Select a model that supports the task you intend to use.</p>
1804
+ <p>For Elastic's <code>text_embedding</code> task:
1805
+ The selected model must support the <code>Sentence Embeddings</code> task. On the new endpoint creation page, select the <code>Sentence Embeddings</code> task under the <code>Advanced Configuration</code> section.
1806
+ After the endpoint has initialized, copy the generated endpoint URL.
1807
+ Recommended models for <code>text_embedding</code> task:</p>
1428
1808
  <ul>
1429
1809
  <li><code>all-MiniLM-L6-v2</code></li>
1430
1810
  <li><code>all-MiniLM-L12-v2</code></li>
@@ -1434,14 +1814,27 @@ class InferenceClient(NamespacedClient):
1434
1814
  <li><code>multilingual-e5-base</code></li>
1435
1815
  <li><code>multilingual-e5-small</code></li>
1436
1816
  </ul>
1437
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
1438
- After creating the endpoint, wait for the model deployment to complete before using it.
1439
- To verify the deployment status, use the get trained model statistics API.
1440
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
1441
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
1817
+ <p>For Elastic's <code>chat_completion</code> and <code>completion</code> tasks:
1818
+ The selected model must support the <code>Text Generation</code> task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for <code>Text Generation</code>. When creating dedicated endpoint select the <code>Text Generation</code> task.
1819
+ After the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes <code>/v1/chat/completions</code> part in URL. Then, copy the full endpoint URL for use.
1820
+ Recommended models for <code>chat_completion</code> and <code>completion</code> tasks:</p>
1821
+ <ul>
1822
+ <li><code>Mistral-7B-Instruct-v0.2</code></li>
1823
+ <li><code>QwQ-32B</code></li>
1824
+ <li><code>Phi-3-mini-128k-instruct</code></li>
1825
+ </ul>
1826
+ <p>For Elastic's <code>rerank</code> task:
1827
+ The selected model must support the <code>sentence-ranking</code> task and expose OpenAI API.
1828
+ HuggingFace supports only dedicated (not serverless) endpoints for <code>Rerank</code> so far.
1829
+ After the endpoint is initialized, copy the full endpoint URL for use.
1830
+ Tested models for <code>rerank</code> task:</p>
1831
+ <ul>
1832
+ <li><code>bge-reranker-base</code></li>
1833
+ <li><code>jina-reranker-v1-turbo-en-GGUF</code></li>
1834
+ </ul>
1442
1835
 
1443
1836
 
1444
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-hugging-face.html>`_
1837
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-hugging-face.html>`_
1445
1838
 
1446
1839
  :param task_type: The type of the inference task that the model will perform.
1447
1840
  :param huggingface_inference_id: The unique identifier of the inference endpoint.
@@ -1450,6 +1843,10 @@ class InferenceClient(NamespacedClient):
1450
1843
  :param service_settings: Settings used to install the inference model. These
1451
1844
  settings are specific to the `hugging_face` service.
1452
1845
  :param chunking_settings: The chunking configuration object.
1846
+ :param task_settings: Settings to configure the inference task. These settings
1847
+ are specific to the task type you specified.
1848
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
1849
+ to be created.
1453
1850
  """
1454
1851
  if task_type in SKIP_IN_PATH:
1455
1852
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -1476,6 +1873,8 @@ class InferenceClient(NamespacedClient):
1476
1873
  __query["human"] = human
1477
1874
  if pretty is not None:
1478
1875
  __query["pretty"] = pretty
1876
+ if timeout is not None:
1877
+ __query["timeout"] = timeout
1479
1878
  if not __body:
1480
1879
  if service is not None:
1481
1880
  __body["service"] = service
@@ -1483,6 +1882,8 @@ class InferenceClient(NamespacedClient):
1483
1882
  __body["service_settings"] = service_settings
1484
1883
  if chunking_settings is not None:
1485
1884
  __body["chunking_settings"] = chunking_settings
1885
+ if task_settings is not None:
1886
+ __body["task_settings"] = task_settings
1486
1887
  if not __body:
1487
1888
  __body = None # type: ignore[assignment]
1488
1889
  __headers = {"accept": "application/json"}
@@ -1519,6 +1920,7 @@ class InferenceClient(NamespacedClient):
1519
1920
  human: t.Optional[bool] = None,
1520
1921
  pretty: t.Optional[bool] = None,
1521
1922
  task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
1923
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
1522
1924
  body: t.Optional[t.Dict[str, t.Any]] = None,
1523
1925
  ) -> ObjectApiResponse[t.Any]:
1524
1926
  """
@@ -1528,14 +1930,9 @@ class InferenceClient(NamespacedClient):
1528
1930
  <p>Create an inference endpoint to perform an inference task with the <code>jinaai</code> service.</p>
1529
1931
  <p>To review the available <code>rerank</code> models, refer to <a href="https://jina.ai/reranker">https://jina.ai/reranker</a>.
1530
1932
  To review the available <code>text_embedding</code> models, refer to the <a href="https://jina.ai/embeddings/">https://jina.ai/embeddings/</a>.</p>
1531
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
1532
- After creating the endpoint, wait for the model deployment to complete before using it.
1533
- To verify the deployment status, use the get trained model statistics API.
1534
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
1535
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
1536
1933
 
1537
1934
 
1538
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-jinaai.html>`_
1935
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-jinaai.html>`_
1539
1936
 
1540
1937
  :param task_type: The type of the inference task that the model will perform.
1541
1938
  :param jinaai_inference_id: The unique identifier of the inference endpoint.
@@ -1546,6 +1943,8 @@ class InferenceClient(NamespacedClient):
1546
1943
  :param chunking_settings: The chunking configuration object.
1547
1944
  :param task_settings: Settings to configure the inference task. These settings
1548
1945
  are specific to the task type you specified.
1946
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
1947
+ to be created.
1549
1948
  """
1550
1949
  if task_type in SKIP_IN_PATH:
1551
1950
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -1570,6 +1969,8 @@ class InferenceClient(NamespacedClient):
1570
1969
  __query["human"] = human
1571
1970
  if pretty is not None:
1572
1971
  __query["pretty"] = pretty
1972
+ if timeout is not None:
1973
+ __query["timeout"] = timeout
1573
1974
  if not __body:
1574
1975
  if service is not None:
1575
1976
  __body["service"] = service
@@ -1600,7 +2001,9 @@ class InferenceClient(NamespacedClient):
1600
2001
  async def put_mistral(
1601
2002
  self,
1602
2003
  *,
1603
- task_type: t.Union[str, t.Literal["text_embedding"]],
2004
+ task_type: t.Union[
2005
+ str, t.Literal["chat_completion", "completion", "text_embedding"]
2006
+ ],
1604
2007
  mistral_inference_id: str,
1605
2008
  service: t.Optional[t.Union[str, t.Literal["mistral"]]] = None,
1606
2009
  service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
@@ -1609,30 +2012,27 @@ class InferenceClient(NamespacedClient):
1609
2012
  filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
1610
2013
  human: t.Optional[bool] = None,
1611
2014
  pretty: t.Optional[bool] = None,
2015
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
1612
2016
  body: t.Optional[t.Dict[str, t.Any]] = None,
1613
2017
  ) -> ObjectApiResponse[t.Any]:
1614
2018
  """
1615
2019
  .. raw:: html
1616
2020
 
1617
2021
  <p>Create a Mistral inference endpoint.</p>
1618
- <p>Creates an inference endpoint to perform an inference task with the <code>mistral</code> service.</p>
1619
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
1620
- After creating the endpoint, wait for the model deployment to complete before using it.
1621
- To verify the deployment status, use the get trained model statistics API.
1622
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
1623
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
2022
+ <p>Create an inference endpoint to perform an inference task with the <code>mistral</code> service.</p>
1624
2023
 
1625
2024
 
1626
- `<https://www.elastic.co/guide/en/elasticsearch/reference/{brnach}/infer-service-mistral.html>`_
2025
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-mistral.html>`_
1627
2026
 
1628
- :param task_type: The task type. The only valid task type for the model to perform
1629
- is `text_embedding`.
2027
+ :param task_type: The type of the inference task that the model will perform.
1630
2028
  :param mistral_inference_id: The unique identifier of the inference endpoint.
1631
2029
  :param service: The type of service supported for the specified task type. In
1632
2030
  this case, `mistral`.
1633
2031
  :param service_settings: Settings used to install the inference model. These
1634
2032
  settings are specific to the `mistral` service.
1635
2033
  :param chunking_settings: The chunking configuration object.
2034
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
2035
+ to be created.
1636
2036
  """
1637
2037
  if task_type in SKIP_IN_PATH:
1638
2038
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -1657,6 +2057,8 @@ class InferenceClient(NamespacedClient):
1657
2057
  __query["human"] = human
1658
2058
  if pretty is not None:
1659
2059
  __query["pretty"] = pretty
2060
+ if timeout is not None:
2061
+ __query["timeout"] = timeout
1660
2062
  if not __body:
1661
2063
  if service is not None:
1662
2064
  __body["service"] = service
@@ -1702,6 +2104,7 @@ class InferenceClient(NamespacedClient):
1702
2104
  human: t.Optional[bool] = None,
1703
2105
  pretty: t.Optional[bool] = None,
1704
2106
  task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
2107
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
1705
2108
  body: t.Optional[t.Dict[str, t.Any]] = None,
1706
2109
  ) -> ObjectApiResponse[t.Any]:
1707
2110
  """
@@ -1709,14 +2112,9 @@ class InferenceClient(NamespacedClient):
1709
2112
 
1710
2113
  <p>Create an OpenAI inference endpoint.</p>
1711
2114
  <p>Create an inference endpoint to perform an inference task with the <code>openai</code> service or <code>openai</code> compatible APIs.</p>
1712
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
1713
- After creating the endpoint, wait for the model deployment to complete before using it.
1714
- To verify the deployment status, use the get trained model statistics API.
1715
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
1716
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
1717
2115
 
1718
2116
 
1719
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-openai.html>`_
2117
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-openai.html>`_
1720
2118
 
1721
2119
  :param task_type: The type of the inference task that the model will perform.
1722
2120
  NOTE: The `chat_completion` task type only supports streaming and only through
@@ -1729,6 +2127,8 @@ class InferenceClient(NamespacedClient):
1729
2127
  :param chunking_settings: The chunking configuration object.
1730
2128
  :param task_settings: Settings to configure the inference task. These settings
1731
2129
  are specific to the task type you specified.
2130
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
2131
+ to be created.
1732
2132
  """
1733
2133
  if task_type in SKIP_IN_PATH:
1734
2134
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -1753,6 +2153,8 @@ class InferenceClient(NamespacedClient):
1753
2153
  __query["human"] = human
1754
2154
  if pretty is not None:
1755
2155
  __query["pretty"] = pretty
2156
+ if timeout is not None:
2157
+ __query["timeout"] = timeout
1756
2158
  if not __body:
1757
2159
  if service is not None:
1758
2160
  __body["service"] = service
@@ -1798,6 +2200,7 @@ class InferenceClient(NamespacedClient):
1798
2200
  human: t.Optional[bool] = None,
1799
2201
  pretty: t.Optional[bool] = None,
1800
2202
  task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
2203
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
1801
2204
  body: t.Optional[t.Dict[str, t.Any]] = None,
1802
2205
  ) -> ObjectApiResponse[t.Any]:
1803
2206
  """
@@ -1808,7 +2211,7 @@ class InferenceClient(NamespacedClient):
1808
2211
  <p>Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
1809
2212
 
1810
2213
 
1811
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-voyageai.html>`_
2214
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-voyageai.html>`_
1812
2215
 
1813
2216
  :param task_type: The type of the inference task that the model will perform.
1814
2217
  :param voyageai_inference_id: The unique identifier of the inference endpoint.
@@ -1819,6 +2222,8 @@ class InferenceClient(NamespacedClient):
1819
2222
  :param chunking_settings: The chunking configuration object.
1820
2223
  :param task_settings: Settings to configure the inference task. These settings
1821
2224
  are specific to the task type you specified.
2225
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
2226
+ to be created.
1822
2227
  """
1823
2228
  if task_type in SKIP_IN_PATH:
1824
2229
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -1843,6 +2248,8 @@ class InferenceClient(NamespacedClient):
1843
2248
  __query["human"] = human
1844
2249
  if pretty is not None:
1845
2250
  __query["pretty"] = pretty
2251
+ if timeout is not None:
2252
+ __query["timeout"] = timeout
1846
2253
  if not __body:
1847
2254
  if service is not None:
1848
2255
  __body["service"] = service
@@ -1881,6 +2288,7 @@ class InferenceClient(NamespacedClient):
1881
2288
  filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
1882
2289
  human: t.Optional[bool] = None,
1883
2290
  pretty: t.Optional[bool] = None,
2291
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
1884
2292
  body: t.Optional[t.Dict[str, t.Any]] = None,
1885
2293
  ) -> ObjectApiResponse[t.Any]:
1886
2294
  """
@@ -1890,14 +2298,9 @@ class InferenceClient(NamespacedClient):
1890
2298
  <p>Create an inference endpoint to perform an inference task with the <code>watsonxai</code> service.
1891
2299
  You need an IBM Cloud Databases for Elasticsearch deployment to use the <code>watsonxai</code> inference service.
1892
2300
  You can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform.</p>
1893
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
1894
- After creating the endpoint, wait for the model deployment to complete before using it.
1895
- To verify the deployment status, use the get trained model statistics API.
1896
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
1897
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
1898
2301
 
1899
2302
 
1900
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-watsonx-ai.html>`_
2303
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-watsonx-ai.html>`_
1901
2304
 
1902
2305
  :param task_type: The task type. The only valid task type for the model to perform
1903
2306
  is `text_embedding`.
@@ -1906,6 +2309,8 @@ class InferenceClient(NamespacedClient):
1906
2309
  this case, `watsonxai`.
1907
2310
  :param service_settings: Settings used to install the inference model. These
1908
2311
  settings are specific to the `watsonxai` service.
2312
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
2313
+ to be created.
1909
2314
  """
1910
2315
  if task_type in SKIP_IN_PATH:
1911
2316
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -1930,6 +2335,8 @@ class InferenceClient(NamespacedClient):
1930
2335
  __query["human"] = human
1931
2336
  if pretty is not None:
1932
2337
  __query["pretty"] = pretty
2338
+ if timeout is not None:
2339
+ __query["timeout"] = timeout
1933
2340
  if not __body:
1934
2341
  if service is not None:
1935
2342
  __body["service"] = service
@@ -1970,10 +2377,10 @@ class InferenceClient(NamespacedClient):
1970
2377
  """
1971
2378
  .. raw:: html
1972
2379
 
1973
- <p>Perform rereanking inference on the service</p>
2380
+ <p>Perform reranking inference on the service</p>
1974
2381
 
1975
2382
 
1976
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/post-inference-api.html>`_
2383
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/post-inference-api.html>`_
1977
2384
 
1978
2385
  :param inference_id: The unique identifier for the inference endpoint.
1979
2386
  :param input: The text on which you want to perform the inference task. It can
@@ -2049,7 +2456,7 @@ class InferenceClient(NamespacedClient):
2049
2456
  <p>Perform sparse embedding inference on the service</p>
2050
2457
 
2051
2458
 
2052
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/post-inference-api.html>`_
2459
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/post-inference-api.html>`_
2053
2460
 
2054
2461
  :param inference_id: The inference Id
2055
2462
  :param input: Inference input. Either a string or an array of strings.
@@ -2117,7 +2524,7 @@ class InferenceClient(NamespacedClient):
2117
2524
  <p>Perform text embedding inference on the service</p>
2118
2525
 
2119
2526
 
2120
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/post-inference-api.html>`_
2527
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/post-inference-api.html>`_
2121
2528
 
2122
2529
  :param inference_id: The inference Id
2123
2530
  :param input: Inference input. Either a string or an array of strings.
@@ -2199,7 +2606,7 @@ class InferenceClient(NamespacedClient):
2199
2606
  However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.</p>
2200
2607
 
2201
2608
 
2202
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/update-inference-api.html>`_
2609
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/update-inference-api.html>`_
2203
2610
 
2204
2611
  :param inference_id: The unique identifier of the inference endpoint.
2205
2612
  :param inference_config: